Esempio n. 1
0
 def test_NotFittedError_transform(self):
     """Checks that an error is returned when
     trying to use the transform function
     before the fit function"""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     with self.assertRaises(sklearn.exceptions.NotFittedError):
         model.transform(X)
Esempio n. 2
0
 def test_shape_inconsistent_transform(self):
     """Checks that an error is returned when attempting
     to use the transform function with mismatched matrix sizes."""
     X = self.random_state.uniform(0, 100, size=(3, 3))
     X_test = self.random_state.uniform(0, 100, size=(4, 4))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     with self.assertRaises(ValueError):
         model.transform(X_test)
Esempio n. 3
0
 def test_inverse_transform(self):
     """Checks the inverse transformation with
     respect to the reference matrix.
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     Y = np.around(Y, decimals=4)
     Y_inv = np.around((model.inverse_transform(Y_tr)), decimals=4)
     self.assertTrue((np.isclose(Y, Y_inv, atol=1e-12)).all())
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=False)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     Y = np.around(Y, decimals=4)
     Y_inv = np.around((model.inverse_transform(Y_tr)), decimals=4)
     self.assertTrue((np.isclose(Y, Y_inv, atol=1e-12)).all())
Esempio n. 4
0
 def test_transform(self):
     """Checks the transformation relative
     to the reference matrix.
     """
     X = self.random_state.uniform(0, 100, size=(3, 3))
     model = StandardFlexibleScaler(column_wise=True)
     model.fit(X)
     Y = self.random_state.uniform(0, 100, size=(3, 3))
     Y_tr = model.transform(Y)
     mean = X.mean(axis=0)
     var = ((X - mean) ** 2).mean(axis=0)
     scale = np.sqrt(var)
     Y_ex = (Y - mean) / scale
     self.assertTrue((np.isclose(Y_tr, Y_ex, atol=1e-12)).all())
Esempio n. 5
0
def calculate_variables(
    X,
    Y,
    indices,
    n_atoms=None,
    N=10,
    n_FPS=200,
    kernel_func=gaussian_kernel,
    i_train=None,
    i_test=None,
    n_train=None,
    K_train=None,
    K_test=None,
):
    """Loads necessary data for the tutorials"""

    print("Shape of Input Data is ", X.shape, ".")

    if n_FPS is not None and n_FPS < X.shape[1]:
        fps_idxs = FPS(n_to_select=n_FPS).fit(X).selected_idx_
        print("Taking a subsampling of ", n_FPS, "features")
        X = X[:, fps_idxs]

    if i_train is not None:
        print("Shape of testing data is: ", i_train.shape, ".")
    else:
        print("Splitting Data Set")
        if n_train is None:
            n_train = int(len(Y) / 2)

        i_test, i_train = train_test_split(np.arange(len(Y)), train_size=n_train)

    n_train = len(i_train)
    n_test = len(i_test)

    Y_train = Y[i_train]
    Y_test = Y[i_test]

    y_scaler = StandardFlexibleScaler(column_wise=True).fit(Y_train)

    # Center total dataset
    Y = y_scaler.transform(Y)

    # Center training data
    Y_train = y_scaler.transform(Y_train)

    # Center training data
    Y_test = y_scaler.transform(Y_test)

    if len(Y) == len(indices) and n_atoms is not None:
        print(
            "Computing training/testing sets from summed environment-centered soap vectors."
        )
        frame_starts = [sum(n_atoms[:i]) for i in range(len(n_atoms) + 1)]
        X_split = [
            X[frame_starts[i] : frame_starts[i + 1]] for i in range(len(indices))
        ]

        X = np.array([np.mean(xs, axis=0) for xs in X_split])
        X_train = X[i_train]
        X_test = X[i_test]

    else:
        X_split = X.copy()

        X_train = X[i_train]
        X_test = X[i_test]

    x_scaler = StandardFlexibleScaler(column_wise=False).fit(X_train)

    # Center total dataset
    X = x_scaler.transform(X)

    # Center training data
    X_train = x_scaler.transform(X_train)

    # Center training data
    X_test = x_scaler.transform(X_test)

    if K_train is not None and K_test is not None:
        print("Shape of kernel is: ", K_train.shape, ".")
    else:
        if len(Y) == len(indices):
            print(
                "Computing kernels from summing kernels of environment-centered soap vectors."
            )

            K_train = kernel_func(
                [X_split[i] for i in i_train], [X_split[i] for i in i_train]
            )
            K_test = kernel_func(
                [X_split[i] for i in i_test], [X_split[i] for i in i_train]
            )

        else:

            K_train = kernel_func(X_split[i_train], X_split[i_train])
            K_test = kernel_func(X_split[i_test], X_split[i_train])

    k_scaler = KernelNormalizer().fit(K_train)

    K_train = k_scaler.transform(K_train)
    K_test = k_scaler.transform(K_test)

    n_train = len(X_train)
    n_test = len(X_test)
    n_PC = 2

    return dict(
        X=X,
        Y=Y,
        X_split=X_split,
        X_center=x_scaler.mean_,
        Y_center=y_scaler.mean_,
        X_scale=x_scaler.scale_,
        Y_scale=y_scaler.scale_,
        X_train=X_train,
        Y_train=Y_train,
        X_test=X_test,
        Y_test=Y_test,
        K_train=K_train,
        K_test=K_test,
        i_train=i_train,
        i_test=i_test,
        n_PC=n_PC,
        n_train=n_train,
        n_test=n_test,
    )