예제 #1
0
    def test_sine_coulomb_matrix(self):
        # flat
        scm = SineCoulombMatrix(flatten=True)
        df = pd.DataFrame({"s": [self.sc, self.ni3al]})
        with self.assertRaises(NotFittedError):
            df = scm.featurize_dataframe(df, "s")
        df = scm.fit_featurize_dataframe(df, "s")
        labels = scm.feature_labels()
        self.assertEqual(labels[0], "sine coulomb matrix eig 0")
        self.assertArrayAlmostEqual(df[labels].iloc[0],
                                    [235.740418, 0.0, 0.0, 0.0],
                                    decimal=5)
        self.assertArrayAlmostEqual(
            df[labels].iloc[1],
            [232.578562, 1656.288171, 1403.106576, 1403.106576],
            decimal=5)

        # matrix
        scm = SineCoulombMatrix(flatten=False)
        sin_mat = scm.featurize(self.diamond)
        mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]]
        self.assertAlmostEqual(np.linalg.norm(sin_mat - np.array(mtarget)),
                               0.0,
                               places=4)
        scm = SineCoulombMatrix(diag_elems=False, flatten=False)
        sin_mat = scm.featurize(self.diamond)[0]
        self.assertEqual(sin_mat[0][0], 0)
        self.assertEqual(sin_mat[1][1], 0)
예제 #2
0
    def test_sine_coulomb_matrix(self):
        # flat
        scm = SineCoulombMatrix(flatten=True)
        df = pd.DataFrame({"s": [self.sc, self.ni3al]})
        with self.assertRaises(NotFittedError):
            df = scm.featurize_dataframe(df, "s")
        df = scm.fit_featurize_dataframe(df, "s")
        labels = scm.feature_labels()
        self.assertEqual(labels[0], "sine coulomb matrix eig 0")
        self.assertArrayAlmostEqual(
            df[labels].iloc[0],
            [235.740418, 0.0, 0.0, 0.0],
            decimal=5)
        self.assertArrayAlmostEqual(
            df[labels].iloc[1],
            [232.578562, 1656.288171, 1403.106576, 1403.106576],
            decimal=5)

        # matrix
        scm = SineCoulombMatrix(flatten=False)
        sin_mat = scm.featurize(self.diamond)
        mtarget = [[36.8581, 6.147068], [6.147068, 36.8581]]
        self.assertAlmostEqual(
            np.linalg.norm(sin_mat - np.array(mtarget)), 0.0, places=4)
        scm = SineCoulombMatrix(diag_elems=False, flatten=False)
        sin_mat = scm.featurize(self.diamond)[0]
        self.assertEqual(sin_mat[0][0], 0)
        self.assertEqual(sin_mat[1][1], 0)
        return sklearn.metrics.r2_score(y_true, y_pred)

    def length(self, vec):
        return vec[vec != 0].shape[0]


# SCM evaluation
DIAG = True
print("DIAG ELEMS", DIAG)

# Featurize dataframe with sine coulomb matrix and time it
start = time.monotonic()
scm = SineCoulombMatrix(diag_elems=DIAG, flatten=True)
# Set the number of jobs for parallelization
scm.set_n_jobs(NJOBS)
df = scm.fit_featurize_dataframe(df, 'structure')
# Take the eigenvalues of the SCMs to form vector descriptors
# df['sine coulomb matrix'] = pd.Series([np.sort(np.linalg.eigvals(s))[::-1]
#                                        for s in df['sine coulomb matrix']],
#                                       df.index)
finish = time.monotonic()
print("TIME TO FEATURIZE SCM %f SECONDS" % (finish - start))
print()

# Set up KRR model
krr = KrrScm()
print(krr.get_params().keys())

# Initialize hyperparameter grid search
hpsel = GridSearchCV(krr,
                     params['sine coulomb matrix'],