def test_make_union(self): svd = SparkTruncatedSVD() mock = TransfT() fu = make_sparkunion(svd, mock) names, transformers = list(zip(*fu.transformer_list)) assert_equal(names, ("sparktruncatedsvd", "transft")) assert_equal(transformers, (svd, mock))
def test_same_fit_transforms(self): X, X_rdd = self.make_dense_rdd((1e3, 12)) n_components = 4 random_state = 42 tol = 1e-7 local = TruncatedSVD(n_components, n_iter=5, tol=tol, random_state=random_state) dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol, random_state=random_state) Z_local = local.fit_transform(X) Z_dist = dist.fit_transform(X_rdd).toarray() tol = 1e-1 assert_array_equal(Z_local.shape, Z_dist.shape) assert(np.allclose(+Z_dist[:, 0], Z_local[:, 0], atol=tol) | np.allclose(-Z_dist[:, 0], Z_local[:, 0], atol=tol))
def test_same_fit_transforms(self): X, X_rdd = self.make_dense_rdd((1e3, 12)) n_components = 4 random_state = 42 tol = 1e-7 local = TruncatedSVD(n_components, n_iter=5, tol=tol, random_state=random_state) dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol, random_state=random_state) Z_local = local.fit_transform(X) Z_dist = dist.fit_transform(X_rdd) Z_collected = Z_dist.toarray() assert_true(check_rdd_dtype(Z_dist, (np.ndarray,))) tol = 1e-1 assert_array_equal(Z_local.shape, Z_collected.shape) assert(np.allclose(+Z_collected[:, 0], Z_local[:, 0], atol=tol) | np.allclose(-Z_collected[:, 0], Z_local[:, 0], atol=tol))
def test_same_components(self): X, X_rdd = self.make_dense_rdd((1e3, 10)) n_components = 2 random_state = 42 tol = 1e-7 local = TruncatedSVD(n_components, n_iter=5, tol=tol, random_state=random_state) dist = SparkTruncatedSVD(n_components, n_iter=50, tol=tol, random_state=random_state) local.fit(X) dist.fit(X_rdd) v_true = local.components_ v = dist.components_ tol = 1e-1 assert(np.allclose(+v[0], v_true[0, :], atol=tol) | np.allclose(-v[0], v_true[0, :], atol=tol))