Example #1
0
    def test_different_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        mean = np.mean(X_train, axis=0)
        X_train -= mean
        X_test -= mean

        clf = PCA(data_norm=12, centered=True)
        clf.fit(X_train, y_train)

        transform1 = clf.transform(X_test)

        clf = PCA(data_norm=12, centered=True)
        clf.fit(X_train, y_train)

        transform2 = clf.transform(X_test)

        clf = sk_pca.PCA(svd_solver='full')
        clf.fit(X_train, y_train)

        transform3 = clf.transform(X_test)

        self.assertFalse(np.all(transform1 == transform2))
        self.assertFalse(np.all(transform3 == transform1) and np.all(transform3 == transform2))
Example #2
0
    def test_big_epsilon(self):
        X = np.random.randn(25000, 21)
        X -= np.mean(X, axis=0)
        X /= np.linalg.norm(X, axis=1).max()

        for components in range(1, 10):
            clf = PCA(components, epsilon=5, centered=True, data_norm=1)
            clf.fit(X)

            sk_clf = sk_pca.PCA(components, svd_solver='full')
            sk_clf.fit(X)

            self.assertAlmostEqual(clf.score(X), sk_clf.score(X), places=4)
Example #3
0
    def test_inf_epsilon(self):
        X = np.random.randn(250, 21)
        X -= np.mean(X, axis=0)
        X /= np.linalg.norm(X, axis=1).max()

        for components in range(1, 10):
            clf = PCA(components, epsilon=float("inf"), centered=True, data_norm=1)
            clf.fit(X)

            sk_clf = sk_pca.PCA(components, svd_solver='full')
            sk_clf.fit(X)

            self.assertAlmostEqual(clf.score(X), sk_clf.score(X))
            self.assertTrue(np.allclose(clf.get_precision(), sk_clf.get_precision()))
            self.assertTrue(np.allclose(clf.get_covariance(), sk_clf.get_covariance()))
            self.assertTrue(np.allclose(np.abs((clf.components_ / sk_clf.components_).sum(axis=1)), clf.n_features_))