def test_sine_coulomb_matrix(self): # flat scm = SineCoulombMatrix(flatten=True) df = pd.DataFrame({"s": [self.sc, self.ni3al]}) with self.assertRaises(NotFittedError): df = scm.featurize_dataframe(df, "s") df = scm.fit_featurize_dataframe(df, "s") labels = scm.feature_labels() self.assertEqual(labels[0], "sine coulomb matrix eig 0") self.assertArrayAlmostEqual(df[labels].iloc[0], [235.740418, 0.0, 0.0, 0.0], decimal=5) self.assertArrayAlmostEqual( df[labels].iloc[1], [232.578562, 1656.288171, 1403.106576, 1403.106576], decimal=5) # matrix scm = SineCoulombMatrix(flatten=False) sin_mat = scm.featurize(self.diamond) mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]] self.assertAlmostEqual(np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4) scm = SineCoulombMatrix(diag_elems=False, flatten=False) sin_mat = scm.featurize(self.diamond)[0] self.assertEqual(sin_mat[0][0], 0) self.assertEqual(sin_mat[1][1], 0)
def test_sine_coulomb_matrix(self): # flat scm = SineCoulombMatrix(flatten=True) df = pd.DataFrame({"s": [self.sc, self.ni3al]}) with self.assertRaises(NotFittedError): df = scm.featurize_dataframe(df, "s") df = scm.fit_featurize_dataframe(df, "s") labels = scm.feature_labels() self.assertEqual(labels[0], "sine coulomb matrix eig 0") self.assertArrayAlmostEqual( df[labels].iloc[0], [235.740418, 0.0, 0.0, 0.0], decimal=5) self.assertArrayAlmostEqual( df[labels].iloc[1], [232.578562, 1656.288171, 1403.106576, 1403.106576], decimal=5) # matrix scm = SineCoulombMatrix(flatten=False) sin_mat = scm.featurize(self.diamond) mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]] self.assertAlmostEqual( np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4) scm = SineCoulombMatrix(diag_elems=False, flatten=False) sin_mat = scm.featurize(self.diamond)[0] self.assertEqual(sin_mat[0][0], 0) self.assertEqual(sin_mat[1][1], 0)
# df.index) finish = time.monotonic() print("TIME TO FEATURIZE SCM %f SECONDS" % (finish - start)) print() # Set up KRR model krr = KrrScm() print(krr.get_params().keys()) # Initialize hyperparameter grid search hpsel = GridSearchCV(krr, params['sine coulomb matrix'], cv=inner_cv, refit=True) # X = df['sine coulomb matrix'].to_numpy() X = df[scm.feature_labels()].to_numpy() # Append each vector descriptor with zeroes to make them all the same size. XLIST = [] for i in range(len(X)): XLIST.append(np.append(X[i], np.zeros(nt - X[i].shape[0]))) X = np.array(XLIST) print(X.shape) Y = df['formation_energy'].to_numpy() N = df['nsites'].to_numpy() mae, rmse, r2 = 0, 0, 0 # Evaluate SCM and time it start = time.monotonic() for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index]
# Take the eigenvalues of the SCMs to form vector descriptors # df['sine coulomb matrix'] = pd.Series([np.sort(np.linalg.eigvals(s))[::-1] # for s in df['sine coulomb matrix']], # df.index) finish = time.monotonic() print("TIME TO FEATURIZE SCM %f SECONDS" % (finish-start)) print() # Set up KRR model krr = KrrScm() print(krr.get_params().keys()) # Initialize hyperparameter grid search hpsel = GridSearchCV(krr, params['sine coulomb matrix'], cv=inner_cv, refit=True) # X = df['sine coulomb matrix'].as_matrix() X = df[scm.feature_labels()].as_matrix() # Append each vector descriptor with zeroes to make them all the same size. XLIST = [] for i in range(len(X)): XLIST.append(np.append(X[i], np.zeros(nt - X[i].shape[0]))) X = np.array(XLIST) print(X.shape) Y = df['formation_energy'].as_matrix() N = df['nsites'].as_matrix() mae, rmse, r2 = 0, 0, 0 # Evaluate SCM and time it start = time.monotonic() for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index]
data = {'structures': centrosymmetric_structures, 'ids' : task_ids} df = pd.DataFrame(data) #soap representation #from matminer.featurizers.structure import SOAP #soap = SOAP(periodic=True) #soap=soap.fit(data['structures']) #labels=soap.feature_labels() #df = soap.featurize_dataframe(df,'structures') from matminer.featurizers.structure import SineCoulombMatrix sine_coulomb = SineCoulombMatrix() sine_coulomb.set_n_jobs(28) sine_coulomb.fit(centrosymmetric_structures)#data['structures']) labels=sine_coulomb.feature_labels() df = sine_coulomb.featurize_dataframe(df, 'structures')#,ignore_errors=True) #agni #from matminer.featurizers.site import AGNIFingerprints #agni=AGNIFingerprints(directions=['x','y','z']) #agni.set_n_jobs(28) #labels=agni.feature_labels() #df = agni.featurize(df['structures'],0) #df = agni.featurize_dataframe(df, ['structures', 'site'])#,ignore_errors=True) #get s_vs_ep ec_list=[] for item in centro_elastic_compliance: obj=dict(item) if obj['elasticity.compliance_tensor'] != None: