Esempio n. 1
0
 def test_ValueError_full(self):
     """Checks that the matrix cannot be normalized
     if there is a zero variation matrix."""
     X = np.array([2, 2, 2]).reshape(-1, 1)
     model = StandardFlexibleScaler(column_wise=False)
     with self.assertRaises(ValueError):
         model.fit(X)
Esempio n. 2
0
 def test_ValueError_column_wise(self):
     """Checks that the matrix cannot be normalized
     across columns if there is a zero variation column."""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     X[0][0] = X[1][0] = X[2][0] = 2
     model = StandardFlexibleScaler(column_wise=True)
     with self.assertRaises(ValueError):
         model.fit(X)
Esempio n. 3
0
 def test_NotFittedError_inverse(self):
     """Checks that an error is returned when
     trying to use the inverse transform function
     before the fit function"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler()
     with self.assertRaises(sklearn.exceptions.NotFittedError):
         model.inverse_transform(X)
Esempio n. 4
0
 def test_fit_transform_pf(self):
     """Checks that in the case of normalization by columns,
     the result is the same as in the case of using the package from sklearn
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     transformed_skcosmo = model.fit_transform(X)
     transformed_sklearn = StandardScaler().fit_transform(X)
     self.assertTrue(
         (np.isclose(transformed_sklearn, transformed_skcosmo, atol=1e-12)).all()
     )
Esempio n. 5
0
 def test_fit_transform_npf(self):
     """Checks that the entire matrix is correctly normalized
     (not column-wise). Compare with the value calculated
     directly from the equation.
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=False)
     X_tr = model.fit_transform(X)
     mean = X.mean(axis=0)
     var = ((X - mean) ** 2).mean(axis=0)
     scale = np.sqrt(var.sum())
     X_ex = (X - mean) / scale
     self.assertTrue((np.isclose(X_ex, X_tr, atol=1e-12)).all())
Esempio n. 6
0
 def test_transform(self):
     """Checks the transformation relative
     to the reference matrix.
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     mean = X.mean(axis=0)
     var = ((X - mean) ** 2).mean(axis=0)
     scale = np.sqrt(var)
     Y_ex = (Y - mean) / scale
     self.assertTrue((np.isclose(Y_tr, Y_ex, atol=1e-12)).all())
Esempio n. 7
0
 def test_atol(self):
     """Checks that we can define absolute tolerance and it control the
     minimal variance of columns ot the whole matrix"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     atol = ((X[:, 0] - X[:, 0].mean(axis=0)) ** 2).mean(axis=0) + 1e-8
     model = StandardFlexibleScaler(column_wise=True, atol=atol, rtol=0)
     with self.assertRaises(ValueError):
         model.fit(X)
     atol = (X - X.mean(axis=0) ** 2).mean(axis=0) + 1e-8
     model = StandardFlexibleScaler(column_wise=False, atol=atol, rtol=0)
     with self.assertRaises(ValueError):
         model.fit(X)
Esempio n. 8
0
 def test_rtol(self):
     """Checks that we can define relative tolerance and it control the
     minimal variance of columns or the whole matrix"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     mean = X[:, 0].mean(axis=0)
     rtol = ((X[:, 0] - mean) ** 2).mean(axis=0) / mean + 1e-8
     model = StandardFlexibleScaler(column_wise=True, atol=0, rtol=rtol)
     with self.assertRaises(ValueError):
         model.fit(X)
     mean = X.mean(axis=0)
     rtol = ((X - mean) ** 2).mean(axis=0) / mean + 1e-8
     model = StandardFlexibleScaler(column_wise=False, atol=0, rtol=rtol)
     with self.assertRaises(ValueError):
         model.fit(X)
Esempio n. 9
0
 def test_shape_inconsistent_inverse(self):
     """Checks that an error is returned when attempting
     to use the inverse transform function with mismatched matrix sizes."""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     X_test = self.random_state.uniform(0, 100, size=(4, 4))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     with self.assertRaises(ValueError):
         model.inverse_transform(X_test)
Esempio n. 10
0
 def test_invalid_sample_weights(self):
     """Checks that weights must be 1D array with the same length as the number of samples"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     wts_len = np.ones(len(X) + 1)
     wts_dim = np.ones((len(X), 2))
     model = StandardFlexibleScaler()
     with self.assertRaises(ValueError):
         model.fit_transform(X, sample_weight=wts_len)
     with self.assertRaises(ValueError):
         model.fit_transform(X, sample_weight=wts_dim)
Esempio n. 11
0
 def test_sample_weights(self):
     """Checks that sample weights of one are equal to the unweighted case and that the nonuniform weights are different from the unweighted case"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     equal_wts = np.ones(len(X))
     nonequal_wts = self.random_state.uniform(0, 100, size=(len(X),))
     model = StandardFlexibleScaler()
     weighted_model = StandardFlexibleScaler()
     X_unweighted = model.fit_transform(X)
     X_equal_weighted = weighted_model.fit_transform(X, sample_weight=equal_wts)
     self.assertTrue((np.isclose(X_unweighted, X_equal_weighted, atol=1e-12)).all())
     X_nonequal_weighted = weighted_model.fit_transform(
         X, sample_weight=nonequal_wts
     )
     self.assertFalse(
         (np.isclose(X_unweighted, X_nonequal_weighted, atol=1e-12)).all()
     )
Esempio n. 12
0
def calculate_variables(
    X,
    Y,
    indices,
    n_atoms=None,
    N=10,
    n_FPS=200,
    kernel_func=gaussian_kernel,
    i_train=None,
    i_test=None,
    n_train=None,
    K_train=None,
    K_test=None,
):
    """Loads necessary data for the tutorials"""

    print("Shape of Input Data is ", X.shape, ".")

    if n_FPS is not None and n_FPS < X.shape[1]:
        fps_idxs = FPS(n_to_select=n_FPS).fit(X).selected_idx_
        print("Taking a subsampling of ", n_FPS, "features")
        X = X[:, fps_idxs]

    if i_train is not None:
        print("Shape of testing data is: ", i_train.shape, ".")
    else:
        print("Splitting Data Set")
        if n_train is None:
            n_train = int(len(Y) / 2)

        i_test, i_train = train_test_split(np.arange(len(Y)), train_size=n_train)

    n_train = len(i_train)
    n_test = len(i_test)

    Y_train = Y[i_train]
    Y_test = Y[i_test]

    y_scaler = StandardFlexibleScaler(column_wise=True).fit(Y_train)

    # Center total dataset
    Y = y_scaler.transform(Y)

    # Center training data
    Y_train = y_scaler.transform(Y_train)

    # Center training data
    Y_test = y_scaler.transform(Y_test)

    if len(Y) == len(indices) and n_atoms is not None:
        print(
            "Computing training/testing sets from summed environment-centered soap vectors."
        )
        frame_starts = [sum(n_atoms[:i]) for i in range(len(n_atoms) + 1)]
        X_split = [
            X[frame_starts[i] : frame_starts[i + 1]] for i in range(len(indices))
        ]

        X = np.array([np.mean(xs, axis=0) for xs in X_split])
        X_train = X[i_train]
        X_test = X[i_test]

    else:
        X_split = X.copy()

        X_train = X[i_train]
        X_test = X[i_test]

    x_scaler = StandardFlexibleScaler(column_wise=False).fit(X_train)

    # Center total dataset
    X = x_scaler.transform(X)

    # Center training data
    X_train = x_scaler.transform(X_train)

    # Center training data
    X_test = x_scaler.transform(X_test)

    if K_train is not None and K_test is not None:
        print("Shape of kernel is: ", K_train.shape, ".")
    else:
        if len(Y) == len(indices):
            print(
                "Computing kernels from summing kernels of environment-centered soap vectors."
            )

            K_train = kernel_func(
                [X_split[i] for i in i_train], [X_split[i] for i in i_train]
            )
            K_test = kernel_func(
                [X_split[i] for i in i_test], [X_split[i] for i in i_train]
            )

        else:

            K_train = kernel_func(X_split[i_train], X_split[i_train])
            K_test = kernel_func(X_split[i_test], X_split[i_train])

    k_scaler = KernelNormalizer().fit(K_train)

    K_train = k_scaler.transform(K_train)
    K_test = k_scaler.transform(K_test)

    n_train = len(X_train)
    n_test = len(X_test)
    n_PC = 2

    return dict(
        X=X,
        Y=Y,
        X_split=X_split,
        X_center=x_scaler.mean_,
        Y_center=y_scaler.mean_,
        X_scale=x_scaler.scale_,
        Y_scale=y_scaler.scale_,
        X_train=X_train,
        Y_train=Y_train,
        X_test=X_test,
        Y_test=Y_test,
        K_train=K_train,
        K_test=K_test,
        i_train=i_train,
        i_test=i_test,
        n_PC=n_PC,
        n_train=n_train,
        n_test=n_test,
    )
Esempio n. 13
0
 def test_inverse_transform(self):
     """Checks the inverse transformation with
     respect to the reference matrix.
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     Y = np.around(Y, decimals=4)
     Y_inv = np.around((model.inverse_transform(Y_tr)), decimals=4)
     self.assertTrue((np.isclose(Y, Y_inv, atol=1e-12)).all())
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=False)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     Y = np.around(Y, decimals=4)
     Y_inv = np.around((model.inverse_transform(Y_tr)), decimals=4)
     self.assertTrue((np.isclose(Y, Y_inv, atol=1e-12)).all())