Ejemplo n.º 1
0
    def test_sine_coulomb_matrix(self):
        # flat
        scm = SineCoulombMatrix(flatten=True)
        df = pd.DataFrame({"s": [self.sc, self.ni3al]})
        with self.assertRaises(NotFittedError):
            df = scm.featurize_dataframe(df, "s")
        df = scm.fit_featurize_dataframe(df, "s")
        labels = scm.feature_labels()
        self.assertEqual(labels[0], "sine coulomb matrix eig 0")
        self.assertArrayAlmostEqual(df[labels].iloc[0],
                                    [235.740418, 0.0, 0.0, 0.0],
                                    decimal=5)
        self.assertArrayAlmostEqual(
            df[labels].iloc[1],
            [232.578562, 1656.288171, 1403.106576, 1403.106576],
            decimal=5)

        # matrix
        scm = SineCoulombMatrix(flatten=False)
        sin_mat = scm.featurize(self.diamond)
        mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]]
        self.assertAlmostEqual(np.linalg.norm(sin_mat - np.array(mtarget)),
                               0.0,
                               places=4)
        scm = SineCoulombMatrix(diag_elems=False, flatten=False)
        sin_mat = scm.featurize(self.diamond)[0]
        self.assertEqual(sin_mat[0][0], 0)
        self.assertEqual(sin_mat[1][1], 0)
Ejemplo n.º 2
0
    def test_sine_coulomb_matrix(self):
        # flat
        scm = SineCoulombMatrix(flatten=True)
        df = pd.DataFrame({"s": [self.sc, self.ni3al]})
        with self.assertRaises(NotFittedError):
            df = scm.featurize_dataframe(df, "s")
        df = scm.fit_featurize_dataframe(df, "s")
        labels = scm.feature_labels()
        self.assertEqual(labels[0], "sine coulomb matrix eig 0")
        self.assertArrayAlmostEqual(
            df[labels].iloc[0],
            [235.740418, 0.0, 0.0, 0.0],
            decimal=5)
        self.assertArrayAlmostEqual(
            df[labels].iloc[1],
            [232.578562, 1656.288171, 1403.106576, 1403.106576],
            decimal=5)

        # matrix
        scm = SineCoulombMatrix(flatten=False)
        sin_mat = scm.featurize(self.diamond)
        mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]]
        self.assertAlmostEqual(
            np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4)
        scm = SineCoulombMatrix(diag_elems=False, flatten=False)
        sin_mat = scm.featurize(self.diamond)[0]
        self.assertEqual(sin_mat[0][0], 0)
        self.assertEqual(sin_mat[1][1], 0)
#                                       df.index)
finish = time.monotonic()
print("TIME TO FEATURIZE SCM %f SECONDS" % (finish - start))
print()

# Set up KRR model
krr = KrrScm()
print(krr.get_params().keys())

# Initialize hyperparameter grid search
hpsel = GridSearchCV(krr,
                     params['sine coulomb matrix'],
                     cv=inner_cv,
                     refit=True)
# X = df['sine coulomb matrix'].to_numpy()
X = df[scm.feature_labels()].to_numpy()

# Append each vector descriptor with zeroes to make them all the same size.
XLIST = []
for i in range(len(X)):
    XLIST.append(np.append(X[i], np.zeros(nt - X[i].shape[0])))
X = np.array(XLIST)
print(X.shape)
Y = df['formation_energy'].to_numpy()
N = df['nsites'].to_numpy()
mae, rmse, r2 = 0, 0, 0

# Evaluate SCM and time it
start = time.monotonic()
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
# Take the eigenvalues of the SCMs to form vector descriptors
# df['sine coulomb matrix'] = pd.Series([np.sort(np.linalg.eigvals(s))[::-1]
#                                        for s in df['sine coulomb matrix']],
#                                       df.index)
finish = time.monotonic()
print("TIME TO FEATURIZE SCM %f SECONDS" % (finish-start))
print()

# Set up KRR model
krr = KrrScm()
print(krr.get_params().keys())

# Initialize hyperparameter grid search
hpsel = GridSearchCV(krr, params['sine coulomb matrix'], cv=inner_cv, refit=True)
# X = df['sine coulomb matrix'].as_matrix()
X = df[scm.feature_labels()].as_matrix()

# Append each vector descriptor with zeroes to make them all the same size.
XLIST = []
for i in range(len(X)):
    XLIST.append(np.append(X[i], np.zeros(nt - X[i].shape[0])))
X = np.array(XLIST)
print(X.shape)
Y = df['formation_energy'].as_matrix()
N = df['nsites'].as_matrix()
mae, rmse, r2 = 0, 0, 0

# Evaluate SCM and time it
start = time.monotonic()
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
Ejemplo n.º 5
0
data = {'structures': centrosymmetric_structures, 'ids' : task_ids}
df = pd.DataFrame(data)


#soap representation
#from matminer.featurizers.structure import SOAP 
#soap = SOAP(periodic=True)
#soap=soap.fit(data['structures'])
#labels=soap.feature_labels()
#df = soap.featurize_dataframe(df,'structures')

from matminer.featurizers.structure import SineCoulombMatrix
sine_coulomb = SineCoulombMatrix()
sine_coulomb.set_n_jobs(28)
sine_coulomb.fit(centrosymmetric_structures)#data['structures'])
labels=sine_coulomb.feature_labels()
df  = sine_coulomb.featurize_dataframe(df, 'structures')#,ignore_errors=True)

#agni
#from matminer.featurizers.site import AGNIFingerprints 
#agni=AGNIFingerprints(directions=['x','y','z']) 
#agni.set_n_jobs(28)
#labels=agni.feature_labels()
#df = agni.featurize(df['structures'],0)
#df  = agni.featurize_dataframe(df, ['structures', 'site'])#,ignore_errors=True)

#get s_vs_ep
ec_list=[]
for item in centro_elastic_compliance:
	obj=dict(item)
	if obj['elasticity.compliance_tensor'] != None: