def test_als(self): """ Test accuracy of alternating least-squares NMF algorithm against the MATLAB-computed version """ # set data and initializing constants keys = [array([i + 1]) for i in range(4)] data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0], [5.0, 1.0, 4.0]]) data = self.sc.parallelize(zip(keys, data_local)) mat = RowMatrix(data) h0 = array([[0.09082617, 0.85490047, 0.57234593], [0.82766740, 0.21301186, 0.90913979]]) # if the rows of h are not normalized on each iteration: h_true = array([[0., 0.6010, 0.9163], [0.8970, 0.1556, 0.7423]]) w_true = array([[4.5885, 1.5348], [1.3651, 0.2184], [5.9349, 1.0030], [0., 5.5147]]) # if the columns of h are normalized (as in the current implementation): scale_mat = diag(norm(h_true, axis=1)) h_true = dot(LinAlg.inv(scale_mat), h_true) w_true = dot(w_true, scale_mat) # calculate NMF using the Thunder implementation # (maxiter=9 corresponds with Matlab algorithm) nmf_thunder = NMF(k=2, method="als", h0=h0, maxiter=9) nmf_thunder.fit(mat) h_thunder = nmf_thunder.h w_thunder = array(nmf_thunder.w.values().collect()) tol = 1e-03 # allow small error assert (allclose(w_thunder, w_true, atol=tol)) assert (allclose(h_thunder, h_true, atol=tol))
def test_init(self): """ test performance of whole function, including random initialization """ data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0], [5.0, 1.0, 4.0]]) data = self.sc.parallelize( zip([array([i]) for i in range(data_local.shape[0])], data_local)) mat = RowMatrix(data) nmf_thunder = NMF(k=2, recon_hist='final') nmf_thunder.fit(mat) # check to see if Thunder's solution achieves close-to-optimal reconstruction error # scikit-learn's solution achieves 2.993952 # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges) assert (nmf_thunder.recon_err < 2.9950)
def test_init(self): """ test performance of whole function, including random initialization """ dataLocal = array([ [1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0], [5.0, 1.0, 4.0]]) data = self.sc.parallelize(zip([array([i]) for i in range(dataLocal.shape[0])], dataLocal)) mat = RowMatrix(data) nmfThunder = NMF(k=2, reconHist='final') nmfThunder.fit(mat) # check to see if Thunder's solution achieves close-to-optimal reconstruction error # scikit-learn's solution achieves 2.993952 # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges) assert(nmfThunder.reconErr < 2.9950)
def test_als(self): """ Test accuracy of alternating least-squares NMF algorithm against the MATLAB-computed version """ # set data and initializing constants keys = [array([i+1]) for i in range(4)] dataLocal = array([ [1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0], [5.0, 1.0, 4.0]]) data = self.sc.parallelize(zip(keys, dataLocal)) mat = RowMatrix(data) h0 = array( [[0.09082617, 0.85490047, 0.57234593], [0.82766740, 0.21301186, 0.90913979]]) # if the rows of h are not normalized on each iteration: hTrue = array( [[0. , 0.6010, 0.9163], [0.8970, 0.1556, 0.7423]]) wTrue = array( [[4.5885, 1.5348], [1.3651, 0.2184], [5.9349, 1.0030], [0. , 5.5147]]) # if the columns of h are normalized (as in the current implementation): scaleMat = diag(norm(hTrue, axis=1)) hTrue = dot(LinAlg.inv(scaleMat), hTrue) wTrue = dot(wTrue, scaleMat) # calculate NMF using the Thunder implementation # (maxiter=9 corresponds with Matlab algorithm) nmfThunder = NMF(k=2, method="als", h0=h0, maxIter=9) nmfThunder.fit(mat) hThunder = nmfThunder.h wThunder = array(nmfThunder.w.values().collect()) tol = 1e-03 # allow small error assert(allclose(wThunder, wTrue, atol=tol)) assert(allclose(hThunder, hTrue, atol=tol))