コード例 #1
0
ファイル: test_clustering.py プロジェクト: vpomponiu/thunder
    def test_kmeans_k2(self):
        """ Test k=2 also with more points"""

        data, centerstrue = DataSets.make(self.sc, "kmeans",
                                          k=2, nrecords=50, npartitions=5, seed=42, returnparams=True)
        centerstrue = KMeansModel(centerstrue)

        model = KMeans(k=2, maxiter=20, tol=0.001, init="sample").train(data)

        labels = array(model.predict(data).values().collect())
        labelstrue = array(centerstrue.predict(data).values().collect())
        print(labels)
        print(labelstrue)

        assert(array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue))
コード例 #2
0
ファイル: test_clustering.py プロジェクト: rheiland/thunder
    def test_kmeans_k2(self):
        """ Test k=2 also with more points"""

        data, centerstrue = DataSets.make(
            self.sc, "kmeans", k=2, nrecords=50, npartitions=5, seed=42, returnparams=True
        )
        centerstrue = KMeansModel(centerstrue)

        model = KMeans(k=2, maxiter=20, tol=0.001, init="sample").train(data)

        labels = array(model.predict(data).values().collect())
        labelstrue = array(centerstrue.predict(data).values().collect())
        print(labels)
        print(labelstrue)

        assert array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue)
コード例 #3
0
ファイル: context.py プロジェクト: anujsrc/thunder
    def makeExample(self, dataset, **opts):
        """
        Make an example data set for testing analyses
        see DataSets

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset
        """

        return DataSets.make(self._sc, dataset, **opts)
コード例 #4
0
ファイル: context.py プロジェクト: gidonro/thunder
    def makeExample(self, dataset, **opts):
        """
        Make an example data set for testing analyses
        see DataSets

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset
        """

        return DataSets.make(self._sc, dataset, **opts)
コード例 #5
0
    def test_ica(self):

        random.seed(42)
        data, s, a = DataSets.make(self.sc, "ica", nrows=100, params=True)

        ica = ICA(c=2, svdmethod="direct", seed=1)
        ica.fit(data)

        s_ = array(ica.sigs.values().collect())

        # test accurate recovery of original signals
        tol = 0.01
        assert(allclose(abs(corrcoef(s[:, 0], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 0], s_[:, 1])[0, 1]), 1, atol=tol))
        assert(allclose(abs(corrcoef(s[:, 1], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 1], s_[:, 1])[0, 1]), 1, atol=tol))

        # test accurate reconstruction from sources
        assert(allclose(array(data.values().collect()), dot(s_, ica.a.T)))