Ejemplo n.º 1
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i + 1]) for i in range(4)]
        data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0],
                            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, data_local))
        mat = RowMatrix(data)
        h0 = array([[0.09082617, 0.85490047, 0.57234593],
                    [0.82766740, 0.21301186, 0.90913979]])

        # if the rows of h are not normalized on each iteration:
        h_true = array([[0., 0.6010, 0.9163], [0.8970, 0.1556, 0.7423]])
        w_true = array([[4.5885, 1.5348], [1.3651, 0.2184], [5.9349, 1.0030],
                        [0., 5.5147]])

        # if the columns of h are normalized (as in the current implementation):
        scale_mat = diag(norm(h_true, axis=1))
        h_true = dot(LinAlg.inv(scale_mat), h_true)
        w_true = dot(w_true, scale_mat)

        # calculate NMF using the Thunder implementation
        # (maxiter=9 corresponds with Matlab algorithm)
        nmf_thunder = NMF(k=2, method="als", h0=h0, maxiter=9)
        nmf_thunder.fit(mat)
        h_thunder = nmf_thunder.h
        w_thunder = array(nmf_thunder.w.values().collect())

        tol = 1e-03  # allow small error
        assert (allclose(w_thunder, w_true, atol=tol))
        assert (allclose(h_thunder, h_true, atol=tol))
Ejemplo n.º 2
0
    def test_init(self):
        """
        test performance of whole function, including random initialization
        """
        data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0],
                            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(
            zip([array([i]) for i in range(data_local.shape[0])], data_local))
        mat = RowMatrix(data)

        nmf_thunder = NMF(k=2, recon_hist='final')
        nmf_thunder.fit(mat)

        # check to see if Thunder's solution achieves close-to-optimal reconstruction error
        # scikit-learn's solution achieves 2.993952
        # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges)
        assert (nmf_thunder.recon_err < 2.9950)
Ejemplo n.º 3
0
    def test_init(self):
        """
        test performance of whole function, including random initialization
        """
        dataLocal = array([
            [1.0, 2.0, 6.0],
            [1.0, 3.0, 0.0],
            [1.0, 4.0, 6.0],
            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip([array([i]) for i in range(dataLocal.shape[0])], dataLocal))
        mat = RowMatrix(data)

        nmfThunder = NMF(k=2, reconHist='final')
        nmfThunder.fit(mat)

        # check to see if Thunder's solution achieves close-to-optimal reconstruction error
        # scikit-learn's solution achieves 2.993952
        # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges)
        assert(nmfThunder.reconErr < 2.9950)
Ejemplo n.º 4
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i+1]) for i in range(4)]
        dataLocal = array([
            [1.0, 2.0, 6.0],
            [1.0, 3.0, 0.0],
            [1.0, 4.0, 6.0],
            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, dataLocal))
        mat = RowMatrix(data)
        h0 = array(
            [[0.09082617,  0.85490047,  0.57234593],
             [0.82766740,  0.21301186,  0.90913979]])

        # if the rows of h are not normalized on each iteration:
        hTrue = array(
            [[0.    ,    0.6010,    0.9163],
             [0.8970,    0.1556,    0.7423]])
        wTrue = array(
            [[4.5885,    1.5348],
             [1.3651,    0.2184],
             [5.9349,    1.0030],
             [0.    ,    5.5147]])

        # if the columns of h are normalized (as in the current implementation):
        scaleMat = diag(norm(hTrue, axis=1))
        hTrue = dot(LinAlg.inv(scaleMat), hTrue)
        wTrue = dot(wTrue, scaleMat)

        # calculate NMF using the Thunder implementation
        # (maxiter=9 corresponds with Matlab algorithm)
        nmfThunder = NMF(k=2, method="als", h0=h0, maxIter=9)
        nmfThunder.fit(mat)
        hThunder = nmfThunder.h
        wThunder = array(nmfThunder.w.values().collect())

        tol = 1e-03  # allow small error
        assert(allclose(wThunder, wTrue, atol=tol))
        assert(allclose(hThunder, hTrue, atol=tol))