def test_kmeans_k2(self): """ Test k=2 also with more points""" data, centerstrue = DataSets.make(self.sc, "kmeans", k=2, nrecords=50, npartitions=5, seed=42, returnparams=True) centerstrue = KMeansModel(centerstrue) model = KMeans(k=2, maxiter=20, tol=0.001, init="sample").train(data) labels = array(model.predict(data).values().collect()) labelstrue = array(centerstrue.predict(data).values().collect()) print(labels) print(labelstrue) assert(array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue))
def test_kmeans_k2(self): """ Test k=2 also with more points""" data, centerstrue = DataSets.make( self.sc, "kmeans", k=2, nrecords=50, npartitions=5, seed=42, returnparams=True ) centerstrue = KMeansModel(centerstrue) model = KMeans(k=2, maxiter=20, tol=0.001, init="sample").train(data) labels = array(model.predict(data).values().collect()) labelstrue = array(centerstrue.predict(data).values().collect()) print(labels) print(labelstrue) assert array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue)
def makeExample(self, dataset, **opts): """ Make an example data set for testing analyses see DataSets Parameters ---------- dataset : str Which dataset to generate Returns ------- data : RDD of (tuple, array) pairs Generated dataset """ return DataSets.make(self._sc, dataset, **opts)
def test_ica(self): random.seed(42) data, s, a = DataSets.make(self.sc, "ica", nrows=100, params=True) ica = ICA(c=2, svdmethod="direct", seed=1) ica.fit(data) s_ = array(ica.sigs.values().collect()) # test accurate recovery of original signals tol = 0.01 assert(allclose(abs(corrcoef(s[:, 0], s_[:, 0])[0, 1]), 1, atol=tol) or allclose(abs(corrcoef(s[:, 0], s_[:, 1])[0, 1]), 1, atol=tol)) assert(allclose(abs(corrcoef(s[:, 1], s_[:, 0])[0, 1]), 1, atol=tol) or allclose(abs(corrcoef(s[:, 1], s_[:, 1])[0, 1]), 1, atol=tol)) # test accurate reconstruction from sources assert(allclose(array(data.values().collect()), dot(s_, ica.a.T)))