def test_make_union(self):
     svd = SparkTruncatedSVD()
     mock = TransfT()
     fu = make_sparkunion(svd, mock)
     names, transformers = list(zip(*fu.transformer_list))
     assert_equal(names, ("sparktruncatedsvd", "transft"))
     assert_equal(transformers, (svd, mock))
    def test_same_fit_transforms(self):
        X, X_rdd = self.make_dense_rdd((1e3, 12))

        n_components = 4
        random_state = 42
        tol = 1e-7
        local = TruncatedSVD(n_components, n_iter=5, tol=tol,
                             random_state=random_state)
        dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol,
                                 random_state=random_state)

        Z_local = local.fit_transform(X)
        Z_dist = dist.fit_transform(X_rdd).toarray()

        tol = 1e-1
        assert_array_equal(Z_local.shape, Z_dist.shape)
        assert(np.allclose(+Z_dist[:, 0], Z_local[:, 0], atol=tol) |
               np.allclose(-Z_dist[:, 0], Z_local[:, 0], atol=tol))
Example #3
0
    def test_same_fit_transforms(self):
        X, X_rdd = self.make_dense_rdd((1e3, 12))

        n_components = 4
        random_state = 42
        tol = 1e-7
        local = TruncatedSVD(n_components, n_iter=5, tol=tol,
                             random_state=random_state)
        dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol,
                                 random_state=random_state)

        Z_local = local.fit_transform(X)
        Z_dist = dist.fit_transform(X_rdd)
        Z_collected = Z_dist.toarray()
        assert_true(check_rdd_dtype(Z_dist, (np.ndarray,)))

        tol = 1e-1
        assert_array_equal(Z_local.shape, Z_collected.shape)
        assert(np.allclose(+Z_collected[:, 0], Z_local[:, 0], atol=tol) |
               np.allclose(-Z_collected[:, 0], Z_local[:, 0], atol=tol))
    def test_same_components(self):
        X, X_rdd = self.make_dense_rdd((1e3, 10))

        n_components = 2
        random_state = 42
        tol = 1e-7
        local = TruncatedSVD(n_components, n_iter=5, tol=tol,
                             random_state=random_state)
        dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol,
                                 random_state=random_state)

        local.fit(X)
        dist.fit(X_rdd)

        v_true = local.components_
        v = dist.components_

        tol = 1e-1
        assert(np.allclose(+v[0], v_true[0, :], atol=tol) |
               np.allclose(-v[0], v_true[0, :], atol=tol))
Example #5
0
    def test_same_components(self):
        X, X_rdd = self.make_dense_rdd((1e3, 10))

        n_components = 2
        random_state = 42
        tol = 1e-7
        local = TruncatedSVD(n_components, n_iter=5, tol=tol,
                             random_state=random_state)
        dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol,
                                 random_state=random_state)

        local.fit(X)
        dist.fit(X_rdd)

        v_true = local.components_
        v = dist.components_

        tol = 1e-1
        assert(np.allclose(+v[0], v_true[0, :], atol=tol) |
               np.allclose(-v[0], v_true[0, :], atol=tol))