def test_sine_coulomb_matrix(self): # flat scm = SineCoulombMatrix(flatten=True) df = pd.DataFrame({"s": [self.sc, self.ni3al]}) with self.assertRaises(NotFittedError): df = scm.featurize_dataframe(df, "s") df = scm.fit_featurize_dataframe(df, "s") labels = scm.feature_labels() self.assertEqual(labels[0], "sine coulomb matrix eig 0") self.assertArrayAlmostEqual(df[labels].iloc[0], [235.740418, 0.0, 0.0, 0.0], decimal=5) self.assertArrayAlmostEqual( df[labels].iloc[1], [232.578562, 1656.288171, 1403.106576, 1403.106576], decimal=5) # matrix scm = SineCoulombMatrix(flatten=False) sin_mat = scm.featurize(self.diamond) mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]] self.assertAlmostEqual(np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4) scm = SineCoulombMatrix(diag_elems=False, flatten=False) sin_mat = scm.featurize(self.diamond)[0] self.assertEqual(sin_mat[0][0], 0) self.assertEqual(sin_mat[1][1], 0)
def test_sine_coulomb_matrix(self): # flat scm = SineCoulombMatrix(flatten=True) df = pd.DataFrame({"s": [self.sc, self.ni3al]}) with self.assertRaises(NotFittedError): df = scm.featurize_dataframe(df, "s") df = scm.fit_featurize_dataframe(df, "s") labels = scm.feature_labels() self.assertEqual(labels[0], "sine coulomb matrix eig 0") self.assertArrayAlmostEqual( df[labels].iloc[0], [235.740418, 0.0, 0.0, 0.0], decimal=5) self.assertArrayAlmostEqual( df[labels].iloc[1], [232.578562, 1656.288171, 1403.106576, 1403.106576], decimal=5) # matrix scm = SineCoulombMatrix(flatten=False) sin_mat = scm.featurize(self.diamond) mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]] self.assertAlmostEqual( np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4) scm = SineCoulombMatrix(diag_elems=False, flatten=False) sin_mat = scm.featurize(self.diamond)[0] self.assertEqual(sin_mat[0][0], 0) self.assertEqual(sin_mat[1][1], 0)
return sklearn.metrics.r2_score(y_true, y_pred) def length(self, vec): return vec[vec != 0].shape[0] # SCM evaluation DIAG = True print("DIAG ELEMS", DIAG) # Featurize dataframe with sine coulomb matrix and time it start = time.monotonic() scm = SineCoulombMatrix(diag_elems=DIAG, flatten=True) # Set the number of jobs for parallelization scm.set_n_jobs(NJOBS) df = scm.fit_featurize_dataframe(df, 'structure') # Take the eigenvalues of the SCMs to form vector descriptors # df['sine coulomb matrix'] = pd.Series([np.sort(np.linalg.eigvals(s))[::-1] # for s in df['sine coulomb matrix']], # df.index) finish = time.monotonic() print("TIME TO FEATURIZE SCM %f SECONDS" % (finish - start)) print() # Set up KRR model krr = KrrScm() print(krr.get_params().keys()) # Initialize hyperparameter grid search hpsel = GridSearchCV(krr, params['sine coulomb matrix'],