def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic # 'nndsvd' only where the basic version has zeros. data = np.abs(random_state.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, init="nndsvd") Wa, Ha = nmf._initialize_nmf(data, 10, init="nndsvda") War, Har = nmf._initialize_nmf(data, 10, init="nndsvdar", random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_almost_equal(evl[ref != 0], ref[ref != 0])
def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'a' and 'ar' differ from basic NNDSVD only where # the basic version has zeros. data = np.abs(random_state.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, variant=None) Wa, Ha = nmf._initialize_nmf(data, 10, variant='a') War, Har = nmf._initialize_nmf(data, 10, variant='ar', random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_true(np.allclose(evl[ref != 0], ref[ref != 0]))
def test_initialize_nn_output(): # Test that initialization does not return negative values rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) for init in ('random', 'nndsvd', 'nndsvda', 'nndsvdar'): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert not ((W < 0).any() or (H < 0).any())
def test_nmf_decreasing(): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): for solver in ('cd', 'mu'): if solver != 'mu' and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init='custom', n_components=n_components, max_iter=1, alpha=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, regularization='both', random_state=0, update_H=True) loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: assert_greater(previous_loss, loss) previous_loss = loss
def test_initialize_close(): # Test NNDSVD error # Test that _initialize_nmf error is less than the standard deviation of # the entries in the matrix. A = np.abs(random_state.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10, init='nndsvd') error = linalg.norm(np.dot(W, H) - A) sdev = linalg.norm(A - A.mean()) assert_true(error <= sdev)
def test_safe_compute_error(): A = np.abs(random_state.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) W, H = nmf._initialize_nmf(A, 5, init='random', random_state=0) error = nmf._safe_compute_error(A, W, H) error_sparse = nmf._safe_compute_error(A_sparse, W, H) assert_almost_equal(error, error_sparse)
def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None): ''' A, S = nnmf(X, r, tol=1e-3, R=None) Implement Lee & Seung's algorithm Parameters ---------- V : 2-ndarray, [n_samples, n_features] input matrix r : integer number of latent features max_iter : integer, optional maximum number of iterations (default: 10000) tol : double tolerance threshold for early exit (when the update factor is within tol of 1., the function exits) R : integer, optional random seed Returns ------- A : 2-ndarray, [n_samples, r] Component part of the factorization S : 2-ndarray, [r, n_features] Data part of the factorization Reference --------- "Algorithms for Non-negative Matrix Factorization" by Daniel D Lee, Sebastian H Seung (available at http://citeseer.ist.psu.edu/lee01algorithms.html) ''' # Nomenclature in the function follows Lee & Seung eps = 1e-5 n, m = V.shape if R == "svd": W, H = _initialize_nmf(V, r) elif R is None: R = np.random.mtrand._rand W = np.abs(R.standard_normal((n, r))) H = np.abs(R.standard_normal((r, m))) for i in xrange(max_iter): updateH = np.dot(W.T, V) / (np.dot(np.dot(W.T, W), H) + eps) H *= updateH updateW = np.dot(V, H.T) / (np.dot(W, np.dot(H, H.T)) + eps) W *= updateW if True or (i % 10) == 0: max_update = max(updateW.max(), updateH.max()) if abs(1. - max_update) < tol: break return W, H
def initialize_factor_matrices(S, Y, W, init, dtype, logger, config): """ This function initializes factor matrices based on the choice of initialization method either 'random' or 'nndsvd', random seed can be set based on user input. """ if config.FIXED_SEED == 'Y': np.random.seed(int(config.SEED_VALUE)) logger.debug('Initializing factor matrices') if init == 'random': U = np.array(np.random.rand(int(config.N), int(config.L_COMPONENTS)), dtype=dtype) M = np.array(np.random.rand(int(config.L_COMPONENTS), int(config.N)), dtype=dtype) Q = np.array(np.random.rand(int(config.Q), int(config.L_COMPONENTS)), dtype=dtype) elif init == 'nndsvd': U, M = _initialize_nmf(S, int(config.L_COMPONENTS), 'nndsvd') Q, _ = _initialize_nmf(W*Y, int(config.L_COMPONENTS), 'nndsvd') else: raise('Unknown init option ("%s")' % init) U = sparse_to_matrix(U) M = sparse_to_matrix(M) Q = sparse_to_matrix(Q) H = np.array(np.random.rand(int(config.K), int(config.N)), dtype=dtype) C = np.array(np.random.rand(int(config.K), int(config.L_COMPONENTS)), dtype=dtype) logger.debug('Initialization completed') return M, U, C, H, Q
def test_loss_decreasing(): # test that the objective function for at least one of the matrices is decreasing n_components = 10 alpha = 0.1 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(20, 15)) Y = np.abs(rng.randn(15, 10)) U0, V0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) V0_, Z0 = nmf._initialize_nmf(Y, n_components, init='random', random_state=42) V0 = (V0.T + V0_) / 2 U, V, Z = U0.copy(), V0.copy(), Z0.copy() # since Hessian is being perturbed, might not have to work for newton-raphson solver for solver in ['mu']: previous_x_loss = nmf._beta_divergence(X, U, V.T, 2) previous_y_loss = nmf._beta_divergence(Y, V, Z.T, 2) for _ in range(30): # one more iteration starting from the previous results U, V, Z, _ = collective_matrix_factorization( X, Y, U, V, Z, x_init='custom', y_init='custom', n_components=n_components, max_iter=1, solver=solver, tol=tol, verbose=0, random_state=0) x_loss = nmf._beta_divergence(X, U, V.T, 2) y_loss = nmf._beta_divergence(Y, V, Z.T, 2) max_loss_decrease = max(previous_x_loss - x_loss, previous_y_loss - y_loss) assert_greater(max_loss_decrease, 0) previous_x_loss = x_loss previous_y_loss = y_loss
def alt_nnmf(V, r, max_iter=1000, tol=1e-3, init='random'): """ A, S = nnmf(X, r, tol=1e-3, R=None) Implement Lee & Seung's algorithm Parameters ---------- V : 2-ndarray, [n_samples, n_features] input matrix r : integer number of latent features max_iter : integer, optional maximum number of iterations (default: 1000) tol : double tolerance threshold for early exit (when the update factor is within tol of 1., the function exits) init : string Method used to initialize the procedure. Returns ------- A : 2-ndarray, [n_samples, r] Component part of the factorization S : 2-ndarray, [r, n_features] Data part of the factorization Reference --------- "Algorithms for Non-negative Matrix Factorization" by Daniel D Lee, Sebastian H Seung (available at http://citeseer.ist.psu.edu/lee01algorithms.html) """ # Nomenclature in the function follows Lee & Seung eps = 1e-5 n, m = V.shape W, H = _initialize_nmf(V, r, init, random_state=0) for i in xrange(max_iter): updateH = np.dot(W.T, V) / (np.dot(np.dot(W.T, W), H) + eps) H *= updateH updateW = np.dot(V, H.T) / (np.dot(W, np.dot(H, H.T)) + eps) W *= updateW if i % 10 == 0: max_update = max(updateW.max(), updateH.max()) if abs(1. - max_update) < tol: break return W, H
def em(X, k, init, LF, prior, maxiter=100, verbose=True, eval_every=10, return_loss=False, tol=1e-3, random_state=123): e = sys.float_info.min ## init if init is None: L, F = LF else: L, Ft = _initialize_nmf(X, k, init=init, eps=1e-6, random_state=random_state) F = Ft.T L = np.clip(L, a_min=e, a_max=None) F = np.clip(F, a_min=e, a_max=None) mycosts = [] for iter in range(maxiter): ## assess convergence if iter % eval_every == 0: mycost = cost(X, L.dot(F.T), e=0) mycosts.append(mycost) rel_cost = (mycosts[-2] - mycost) / abs(mycosts[-2]) if iter > 0 else 10000 if verbose: print("iter {:4d}\t{:.3f}\t{:.6f} ".format( iter, mycost, rel_cost)) if rel_cost < tol: print( "rel_cost {} meet tolerance {} after {} iteration".format( rel_cost, tol, iter)) break ## update L, F L, F = _update_em(X, L, F, prior) if return_loss: return L, F, mycosts return L, F
def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): X = check_array(X, accept_sparse=('csr', 'csc')) check_non_negative(X, "NMF (input X)") n_samples, n_features = X.shape n_components = self.n_components if n_components is None: n_components = n_features if (not isinstance(n_components, INTEGER_TYPES) or n_components <= 0): raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) if not isinstance(self.max_iter, INTEGER_TYPES) or self.max_iter < 0: raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % self.max_iter) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) # check W and H, or initialize them if self.init == 'custom' and update_H: _check_init(H, (n_components, n_features), "NMF (input H)") _check_init(W, (n_samples, n_components), "NMF (input W)") elif not update_H: _check_init(H, (n_components, n_features), "NMF (input H)") W = np.zeros((n_samples, n_components)) else: W, H = _initialize_nmf(X, n_components, init=self.init, random_state=self.random_state) if update_H: # fit_transform W, H, n_iter = _fit_projected_gradient( X, W, H, self.tol, self.max_iter, self.nls_max_iter, self.alpha, self.l1_ratio) else: # transform Wt, _, n_iter = _nls_subproblem(X.T, H.T, W.T, self.tol, self.nls_max_iter, alpha=self.alpha, l1_ratio=self.l1_ratio) W = Wt.T if n_iter == self.max_iter and self.tol > 0: warnings.warn("Maximum number of iteration %d reached. Increase it" " to improve convergence." % self.max_iter, ConvergenceWarning) return W, H, n_iter
def alt_nnmf(V, r, max_iter=1000, tol=1e-3, init='random'): ''' A, S = nnmf(X, r, tol=1e-3, R=None) Implement Lee & Seung's algorithm Parameters ---------- V : 2-ndarray, [n_samples, n_features] input matrix r : integer number of latent features max_iter : integer, optional maximum number of iterations (default: 1000) tol : double tolerance threshold for early exit (when the update factor is within tol of 1., the function exits) init : string Method used to initialize the procedure. Returns ------- A : 2-ndarray, [n_samples, r] Component part of the factorization S : 2-ndarray, [r, n_features] Data part of the factorization Reference --------- "Algorithms for Non-negative Matrix Factorization" by Daniel D Lee, Sebastian H Seung (available at http://citeseer.ist.psu.edu/lee01algorithms.html) ''' # Nomenclature in the function follows Lee & Seung eps = 1e-5 n, m = V.shape W, H = _initialize_nmf(V, r, init, random_state=0) for i in xrange(max_iter): updateH = np.dot(W.T, V) / (np.dot(np.dot(W.T, W), H) + eps) H *= updateH updateW = np.dot(V, H.T) / (np.dot(W, np.dot(H, H.T)) + eps) W *= updateW if i % 10 == 0: max_update = max(updateW.max(), updateH.max()) if abs(1. - max_update) < tol: break return W, H
def test_nmf_decreasing(): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): for solver in ('cd', 'mu'): if solver != 'mu' and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization(X, W, H, beta_loss=beta_loss, init='custom', n_components=n_components, max_iter=1, alpha=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, regularization='both', random_state=0, update_H=True) loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: assert_greater(previous_loss, loss) previous_loss = loss
def test_nmf_multiplicative_update_sparse(): # Compare sparse and dense input in multiplicative update NMF # Also test continuity of the results with respect to beta_loss parameter n_samples = 20 n_features = 10 n_components = 5 alpha = 0.1 l1_ratio = 0.5 n_iter = 20 # initialization rng = np.random.mtrand.RandomState(1337) X = rng.randn(n_samples, n_features) X = np.abs(X) X_csr = sp.csr_matrix(X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): # Reference with dense array X W, H = W0.copy(), H0.copy() W1, H1, _ = non_negative_factorization( X, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) # Compare with sparse X W, H = W0.copy(), H0.copy() W2, H2, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W2, decimal=7) assert_array_almost_equal(H1, H2, decimal=7) # Compare with almost same beta_loss, since some values have a specific # behavior, but the results should be continuous w.r.t beta_loss beta_loss -= 1.e-5 W, H = W0.copy(), H0.copy() W3, H3, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W3, decimal=4) assert_array_almost_equal(H1, H3, decimal=4)
def ANLS(V, max_iter=30, sub_iter=30, sub_sub_iter=30, rank=40, callback=None, seed='nndsvd'): """ Alternating Non-Negative Leasts Squares algorithm. At each iteration fixed W or H. If W is fixed we solve ||A-W.dot(H)|| problem, if H is fixed we solve ||A.T-H.T.dot(W.T)||. Args: A: target matrix; max_iter: number of iterations; sub_iter, sub_sub_iter: parameters for subproblem() function; rank: rank of factorization; callback: function executing at each iteration; seed: method of choosing starting point. Returns: W: basis matrix; H: coefficients matrix. """ m = V.shape[0] n = V.shape[1] if seed=='nndsvd': W, H = _initialize_nmf(V, rank) elif seed=='random': W = np.random.randn(m, rank) H = np.random.randn(rank, n) for iter_num in range(1, 1 + max_iter): if iter_num % 2 == 1: H = subproblem(V, W, H, sub_iter=sub_iter, sub_sub_iter=sub_sub_iter) else: W = subproblem(V.T, H.T, W.T, sub_iter=sub_iter, sub_sub_iter=sub_sub_iter) W = W.T if callback: callback(V, W, H) return W, H
def compute_nmf_kl(A, rank, init='nndsvda', eps=sys.float_info.min, shuffle=False, l2_reg_H=0.0, l2_reg_W=0.0, l1_reg_H=0.0, l1_reg_W=0.0, tol=1e-5, maxiter=200, random_state=None): random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) m, n = A.shape if np.any(A < 0): raise ValueError("Input matrix with nonnegative elements is required.") # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialization methods for factor matrices W and H # 'normal': nonnegative standard normal random init # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ W, H = _initialize_nmf(A, rank, init=init, eps=1e-6, random_state=random_state) costs = [] E = np.ones(A.shape) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the mu algorithm until maxiter is reached # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for niter in range(maxiter): W, H = update_kl(A, W, H, E, eps=eps) # Return factor matrices return W, H
def test_beta_divergence(): # Compare _beta_divergence with the reference _beta_divergence_dense n_samples = 20 n_features = 10 n_components = 5 beta_losses = [0., 0.5, 1., 1.5, 2.] # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = sp.csr_matrix(X) W, H = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta in beta_losses: ref = _beta_divergence_dense(X, W, H, beta) loss = nmf._beta_divergence(X, W, H, beta) loss_csr = nmf._beta_divergence(X_csr, W, H, beta) assert_almost_equal(ref, loss, decimal=7) assert_almost_equal(ref, loss_csr, decimal=7)
def initialize_factor_matrices(S, Y, n, rank, k, init, dtype, logger): np.random.seed(0) logger.debug('Initializing U') if init == 'random': U = np.array(np.random.rand(rank, n), dtype=dtype) M = np.array(np.random.rand(n, rank), dtype=dtype) elif init == 'nndsvd': M, U = _initialize_nmf(S, rank, 'nndsvd') if issparse(U) and issparse(M): U = U.toarray() M = M.toarray() else: U = np.array(U) M = np.array(M) else: raise 'Unknown init option ("%s")' % init q = np.shape(Y)[0] Q = np.array(np.random.rand(q, rank), dtype=dtype) H = np.array(np.random.rand(n, k), dtype=dtype) C = np.array(np.random.rand(k, rank), dtype=dtype) logger.debug('Initialization completed') return M, U.T, C, H, Q
def run_bench(X, clfs, plot_name, n_components, tol, alpha, l1_ratio): start = time() results = [] for name, clf_type, iter_range, clf_params in clfs: print("Training %s:" % name) for rs, init in enumerate(('nndsvd', 'nndsvdar', 'random')): print(" %s %s: " % (init, " " * (8 - len(init))), end="") W, H = _initialize_nmf(X, n_components, init, 1e-6, rs) for max_iter in iter_range: clf_params['alpha'] = alpha clf_params['l1_ratio'] = l1_ratio clf_params['max_iter'] = max_iter clf_params['tol'] = tol clf_params['random_state'] = rs clf_params['init'] = 'custom' clf_params['n_components'] = n_components this_loss, duration = bench_one(name, X, W, H, X.shape, clf_type, clf_params, init, n_components, rs) init_name = "init='%s'" % init results.append((name, this_loss, duration, init_name)) # print("loss: %.6f, time: %.3f sec" % (this_loss, duration)) print(".", end="") sys.stdout.flush() print(" ") # Use a panda dataframe to organize the results results_df = pandas.DataFrame(results, columns="method loss time init".split()) print("Total time = %0.3f sec\n" % (time() - start)) # plot the results plot_results(results_df, plot_name) return results_df
def test_initialize_nn_output(): # Test that initialization does not return negative values data = np.abs(random_state.randn(10, 10)) for init in ('random', 'nndsvd', 'nndsvda', 'nndsvdar'): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert_false((W < 0).any() or (H < 0).any())
def cvx_optimizer(A, max_iter=10, rank=10, callback=None, seed='nndsvd', norm_ord='fro', regularization=False, solver_eps=1e3): """ Alternating minimization using SCS solver. Args: A: target matrix; max_iter: maximal number of iterations; rank: rank of factorization; seed: method of choosing starting point; norm_ord: order of norm ||A-W.dot(H)||, may be 'fro'(other matrix norm are not recommended); solver_eps: eps for cvx optimizer; regularization(boolean): L1 regularization of H, to make it more sparse. Returns: W: basis matrix; H: coefficients matrix; status: status returned by optimizer. """ m = A.shape[0] n = A.shape[1] status = 0 if seed=='nndsvd': W, H = _initialize_nmf(A, rank) elif seed=='random': W = np.random.randn(m, rank) for iter_num in range(1, 1 + max_iter): if iter_num % 2 == 1: H = Variable(rank, n) constraints = [H >= 0] else: W = Variable(m, rank) constraints = [W >= 0] objective = norm(A - W*H, norm_ord) if regularization: objective += cp.sum_entries(H) objective = Minimize(objective) prob = Problem(objective, constraints) prob.solve(solver=SCS, eps = solver_eps) if prob.status != OPTIMAL: status = 1 break if iter_num % 2 == 1: H = H.value else: W = W.value if callback: callback(A, W, H) return W, H, prob.status
def compute_rnmf(A, rank, oversample=20, n_subspace=2, init='nndsvd', shuffle=False, l2_reg_H=0.0, l2_reg_W=0.0, l1_reg_H=0.0, l1_reg_W=0.0, tol=1e-5, maxiter=200, random_state=None): """ Randomized Nonnegative Matrix Factorization. Randomized hierarchical alternating least squares algorithm for computing the approximate low-rank nonnegative matrix factorization of a rectangular `(m, n)` matrix `A`. Given the target rank `rank << min{m,n}`, the input matrix `A` is factored as `A = W H`. The nonnegative factor matrices `W` and `H` are of dimension `(m, rank)` and `(rank, n)`, respectively. The quality of the approximation can be controlled via the oversampling parameter `oversample` and the parameter `n_subspace` which specifies the number of subspace iterations. Parameters ---------- A : array_like, shape `(m, n)`. Real nonnegative input matrix. rank : integer, `rank << min{m,n}`. Target rank, i.e., number of components to extract from the data oversample : integer, optional (default: 10) Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. n_subspace : integer, default: 2. Parameter to control number of subspace iterations. Increasing this parameter may improve numerical accuracy. init : 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' Method used to initialize the procedure. Default: 'nndsvd'. Valid options: - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) shuffle : boolean, default: False If true, randomly shuffle the update order of the variables. l2_reg_H : float, (default ``l2_reg_H = 0.1``). Amount of ridge shrinkage to apply to `H` to improve conditioning. l2_reg_W : float, (default ``l2_reg_W = 0.1``). Amount of ridge shrinkage to apply to `W` to improve conditioning. l1_reg_H : float, (default ``l1_reg_H = 0.0``). Sparsity controlling parameter on `H`. Higher values lead to sparser components. l1_reg_W : float, (default ``l1_reg_W = 0.0``). Sparsity controlling parameter on `W`. Higher values lead to sparser components. tol : float, default: `tol=1e-5`. Tolerance of the stopping condition. maxiter : integer, default: `maxiter=200`. Number of iterations. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. verbose : boolean, default: `verbose=False`. The verbosity level. Returns ------- W: array_like, `(m, rank)`. Solution to the non-negative least squares problem. H : array_like, `(rank, n)`. Solution to the non-negative least squares problem. Notes ----- This HALS update algorithm written in cython is adapted from the scikit-learn implementation for the deterministic NMF. We also have adapted the initilization scheme. See: https://github.com/scikit-learn/scikit-learn References ---------- [1] Erichson, N. Benjamin, Ariana Mendible, Sophie Wihlborn, and J. Nathan Kutz. "Randomized Nonnegative Matrix Factorization." Pattern Recognition Letters (2018). [2] Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. [3] C. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for nonnegative matrix factorization - Pattern Recognition, 2008 http://tinyurl.com/nndsvd Examples -------- >>> import numpy as np >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]]) >>> import ristretto as ro >>> W, H = ro.rnmf(X, rank=2, oversample=0) """ random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) m, n = A.shape flipped = False if n > m: A = A.T m, n = A.shape flipped = True if A.dtype not in _VALID_DTYPES: raise ValueError('A.dtype must be one of %s, not %s' % (' '.join(_VALID_DTYPES), A.dtype)) if np.any(A < 0): raise ValueError("Input matrix with nonnegative elements is required.") Q, B = compute_rqb(A, rank, oversample=oversample, n_subspace=n_subspace, random_state=random_state) # Initialization methods for factor matrices W and H W, H = _initialize_nmf(A, rank, init=init, eps=1e-6, random_state=random_state) Ht = np.array(H.T, order='C') W_tilde = Q.T.dot(W) del A # Iterate the HALS algorithm until convergence or maxiter is reached violation = 0.0 for niter in range(maxiter): # Update factor matrix H WtW = W.T.dot(W) WtW.flat[::rank + 1] += l2_reg_H # adds l2_reg only on the diagonal BtW = B.T.dot(W_tilde) - l1_reg_H # compute violation update permutation = random_state.permutation(rank) if shuffle else np.arange(rank) violation = _update_cdnmf_fast(Ht, WtW, BtW, permutation) # Update factor matrix W HHt = Ht.T.dot(Ht) HHt.flat[::rank + 1] += l2_reg_W # adds l2_reg only on the diagonal # Rotate AHt back to high-dimensional space BHt = Q.dot(B.dot(Ht)) - l1_reg_W # compute violation update permutation = random_state.permutation(rank) if shuffle else np.arange(rank) violation += _update_cdnmf_fast(W, HHt, BHt, permutation) # Project W to low-dimensional space W_tilde = Q.T.dot(W) # Compute stopping condition. if niter == 0: if violation == 0: break violation_init = violation if violation / violation_init <= tol: break # Return factor matrices if flipped: return(Ht, W.T) return(W, Ht.T)
def proximal_training(C, WA, WB, rank, Obs=None, theta_tv_a=100, theta_tv_b=0.01, max_outer_iter=7, max_inner_iter=800, A=None, B=None, data_path=None, load_from_disk=False, validation_func=None, random_init=False, verbose=0, method=0): start = time.time() GA = utils.convert_adjacency_matrix(WA) GB = utils.convert_adjacency_matrix(WB) if load_from_disk and data_path is not None: data = np.load(data_path + '.npz') A = data['A'] B = data['B'] else: if random_init: A, B = init_factor_matrices(C.shape[0], C.shape[1], rank) else: if A is None or B is None: A, B = nmf._initialize_nmf(C, rank, None) KA = graph_gradient_operator(GA) KB = graph_gradient_operator(GB) # For sparse matrix _, normKA, _ = sp.sparse.linalg.svds(KA, 1) _, normKB, _ = sp.sparse.linalg.svds(KB, 1) normKA = normKA[0] normKB = normKB[0] if Obs is None: # no observation mask Obs = 0.1 * np.ones(C.shape) mask = C > 0 if isinstance(C, sp.sparse.base.spmatrix): mask = mask.toarray() Obs[mask] = 1.0 Obs = np.array(Obs) # Mask over rating matrix, computed once OC = C.toarray() stop = False nb_iter = 0 error = False while not stop and nb_iter < max_outer_iter: tick = time.time() A, B = update_step(theta_tv_a, theta_tv_b, A, B, KA, normKA, KB, normKB, Obs, OC, max_inner_iter, method) nb_iter += 1 if data_path is not None: np.savez(data_path, A=A, B=B, theta_tv_a=theta_tv_a, theta_tv_b=theta_tv_b) if verbose > 0: if validation_func is not None: t = validation_func(np.array(A), np.array(B)) print t print t.mean() sys.stdout.flush() # utils.plot_factor_mat(A, 'A step' + str(nb_iter)) print('Step:{} done in {} seconds\n'.format( nb_iter, time.time() - tick)) if not error: print 'Max iterations reached', nb_iter, 'steps,', \ 'reconstruction error:', sp.linalg.norm(C - A.dot(B)) else: print 'Error: try to increase min_iter_inner, the number of iteration for the inner loop' print 'Total elapsed time:', time.time() - start, 'seconds' return np.array(A), np.array(B)
def initialize_rnmf(data, rank, alg, beta=2, sum_to_one=0, user_prov=None): ''' This function retrieves factor matrices to initialize rNMF. It can do this via the following algorithms: 1. 'random': draw uniform random values. 2. 'NMF': initialize with 200 iterations of regular NMF. 3. 'bNMF': initialize with 200 iterations of beta NMF. 4. 'nndsvdar': initialize with Boutsidis' modified algorithm. (classic nndsvd will cause issues with division by zero) 5. 'user': provide own initializations. Must be passed in 'user_prov' as a dictionary with the format: user_prov['basis'], user_prov['coeff'], user_prov['outlier'] Input: 1. data: data to be factorized. 2. rank: rank of the factorization/number of components. 3. alg: Algorithm to initialize factorization. Either 'random', 'NMF', or 'bNMF'. 'bNMF' is the slowest option. 4. beta: parameter for beta-NMF. Ignored if not provided. 5. sum_to_one: binary flag indicating whether a simplex constraint will be later applied on the coefficient matrix. 6. user_prov: if alg == 'user', then this is the dictionary containing the user provided initial values to use. Mandatory keys: 'basis', 'coeff', and 'outlier'. Output: 1. basis: initial basis matrix. 2. coeff: initial coefficient matrix. 3. outlier: initial outlier matrix. This can use a small run of regular/beta NMF to initialize rNMF via 'alg'. If a longer run is desired, or other parameters of sklearn's NMF are desired, modify the code below in the else block. NMF itself is very initialization sensitive. Here, we use Boutsidis, et al.'s NNDSVD algorithm to initialize it. Empirically, random initializations work well for rNMF. This initializes the outlier matrix as uniform random values. ''' # Utilities: # Defining epsilon to protect against division by zero: eps = 2.3e-16 # Slightly higher than actual epsilon in fp64 # Initialize outliers with uniform random values: outlier = np.random.rand(data.shape[0], data.shape[1]) # Initialize basis and coefficients: if alg == 'random': print('Initializing rNMF uniformly at random.') basis = np.random.rand(data.shape[0], rank) coeff = np.random.rand(rank, data.shape[1]) # Rescale coefficients if they will have a simplex constraint later: if sum_to_one == 1: coeff = normalize(coeff, norm='l1', axis=0) return basis + eps, coeff + eps, outlier + eps elif alg == 'bNMF': # NNDSVDar used to initialize beta-NMF as multiplicative algorithms do # not like zero values and regular NNDSVD causes sparsity. print('Initializing rNMF with beta-NMF.') model = NMF(n_components=rank, init='nndsvdar', beta_loss=beta, solver='mu', verbose=True) basis = model.fit_transform(data) coeff = model.components_ # Rescale coefficients if they will have a simplex constraint later: if sum_to_one == 1: coeff = normalize(coeff, norm='l1', axis=0) return basis + eps, coeff + eps, outlier + eps elif alg == 'NMF': print('Initializing rNMF with NMF.') model = NMF(n_components=rank, init='nndsvdar', verbose=True) basis = model.fit_transform(data) coeff = model.components_ # Rescale coefficients if they will have a simplex constraint later: if sum_to_one == 1: coeff = normalize(coeff, norm='l1', axis=0) return basis + eps, coeff + eps, outlier + eps elif alg == 'nndsvdar': print('Initializing rNMF with nndsvdar.') basis, coeff = _initialize_nmf(data, n_components=rank, init='nndsvdar') # Rescale coefficients if they will have a simplex constraint later: if sum_to_one == 1: coeff = normalize(coeff, norm='l1', axis=0) return basis + eps, coeff + eps, outlier + eps elif alg == 'user': print('Initializing rNMF with user provided values.') # Make sure that the initialization provided is in the correct format: if user_prov is None: raise ValueError('You forgot the dictionary with the data') elif type(user_prov) is not dict: raise ValueError('Initializations must be in a dictionary') elif ('basis' not in user_prov or 'coeff' not in user_prov or 'outlier' not in user_prov): raise ValueError('Wrong format for initialization dictionary') return user_prov['basis'], user_prov['coeff'], user_prov['outlier'] else: # Making sure the user doesn't do something unexpected: # Inspired by how sklearn deals with this: raise ValueError( 'Invalid algorithm (typo?): got %r instead of one of %r' % (alg, ('random', 'NMF', 'bNMF', 'nndsvdar', 'user')))
def compute_rnmf_kl(A, rank, oversample=100, init='nndsvda', eps=sys.float_info.min, tol=1e-5, maxiter=200, random_state=None, approx='nndsvd'): random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) m, n = A.shape flipped = False if n > m: A = A.T m, n = A.shape flipped = True # if A.dtype not in _VALID_DTYPES: # raise ValueError('A.dtype must be one of %s, not %s' # % (' '.join(_VALID_DTYPES), A.dtype)) if np.any(A < 0): raise ValueError("Input matrix with nonnegative elements is required.") # compute low rank "projection" # I hope to get A \approx Q' * B, where # Q (p,d) is orthonormal, nonnegative # B (d,n) is nonnegative ## one way: just use nndsvd if approx == 'nndsvd': start = time.time() Q, B = _initialize_nmf(A, rank + oversample, init="nndsvd", eps=1e-6, random_state=random_state) print("approximation takes {}".format(time.time() - start)) ## the other way: use rnmf if approx == 'rnmf': start = time.time() Q, B = compute_rnmf(A, rank + oversample, init="nndsvd") print("approximation takes {}".format(time.time() - start)) # Initialization methods for factor matrices W and H W, H = _initialize_nmf(A, rank, init=init, eps=1e-6, random_state=random_state) Ht = np.array(H.T, order='C') W_tilde = Q.T.dot(W) del A costs = [] E = np.ones(B.shape) # Iterate the mu algorithm until maxiter is reached for niter in range(maxiter): W_tilde, H = update_kl(B, W_tilde, H, E, eps=eps) W = Q.dot(W_tilde) W = W.clip(min=eps) W_tilde = Q.T.dot(W) # Return factor matrices if flipped: return (Ht, W.T) return W, H
def nmf_solve(X, n_clusters, gamma=0.5): """ Balanced K-means based on exclusive lasso regulator using NMF """ random_state = None W, H = _initialize_nmf(X, n_components=n_clusters, init='random', random_state=random_state) Ht = check_array(H.T, order='C') X = check_array(X, accept_sparse='csr') # L1 and L2 regularization l1_H, l2_H, l1_W, l2_W = 0, 0, 0, 0 update_H = True shuffle = False verbose = True tol = 1e-4 max_iter = 200 #============================================================================== # if regularization in ('both', 'components'): # alpha = float(alpha) # l1_H = l1_ratio * alpha # l2_H = (1. - l1_ratio) * alpha # if regularization in ('both', 'transformation'): # alpha = float(alpha) # l1_W = l1_ratio * alpha # l2_W = (1. - l1_ratio) * alpha #============================================================================== rng = check_random_state(random_state) for n_iter in range(max_iter): violation = 0. # Update W violation += _update_coordinate_descent(X, W, Ht, l1_W, l2_W, shuffle, rng) # Update H if update_H: violation += _update_coordinate_descent(X.T, Ht, W, l1_H, l2_H, shuffle, rng) if n_iter == 0: violation_init = violation if violation_init == 0: break if verbose: print("violation:", violation / violation_init) if violation / violation_init <= tol: if verbose: print("Converged at iteration", n_iter + 1) break return W, Ht.T, n_iter
def test_initialize_nn_input(): """Test NNDSVD behaviour on negative input""" nmf._initialize_nmf(-np.ones((2, 2)), 2)
def test_initialize_nn_output(): # Test that initialization does not return negative values data = np.abs(random_state.randn(10, 10)) for init in ("random", "nndsvd", "nndsvda", "nndsvdar"): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert_false((W < 0).any() or (H < 0).any())
def non_negative_factorization(X, W=None, H=None, n_components=None, init='random', update_H=True, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, alpha=0., l1_ratio=0., regularization=None, random_state=None, verbose=0, shuffle=False, distribution = 'gaussian', N=None, D=None): r"""Compute Non-negative Matrix Factorization (NMF) Find two non-negative matrices (W, H) whose product approximates the non- negative matrix X. This factorization can be used for example for dimensionality reduction, source separation or topic extraction. The objective function is:: 0.5 * ||X - WH||_Fro^2 + alpha * l1_ratio * ||vec(W)||_1 + alpha * l1_ratio * ||vec(H)||_1 + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2 + 0.5 * alpha * (1 - l1_ratio) * ||H||_Fro^2 Where:: ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm) ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm) For multiplicative-update ('mu') solver, the Frobenius norm (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss, by changing the beta_loss parameter. The objective function is minimized with an alternating minimization of W and H. If H is given and update_H=False, it solves for W only. Parameters ---------- X : array-like, shape (n_samples, n_features) Constant matrix. W : array-like, shape (n_samples, n_components) If init='custom', it is used as initial guess for the solution. H : array-like, shape (n_components, n_features) If init='custom', it is used as initial guess for the solution. If update_H=False, it is used as a constant, to solve for W only. n_components : integer Number of components, if n_components is not set all features are kept. init : None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' | 'custom' Method used to initialize the procedure. Default: 'random'. Valid options: - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) - 'custom': use custom matrices W and H update_H : boolean, default: True Set to True, both W and H will be estimated from initial guesses. Set to False, only W will be estimated. solver : 'cd' | 'mu' Numerical solver to use: 'cd' is a Coordinate Descent solver that uses Fast Hierarchical Alternating Least Squares (Fast HALS). 'mu' is a Multiplicative Update solver. .. versionadded:: 0.17 Coordinate Descent solver. .. versionadded:: 0.19 Multiplicative Update solver. beta_loss : float or string, default 'frobenius' String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}. Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input matrix X cannot contain zeros. Used only in 'mu' solver. .. versionadded:: 0.19 tol : float, default: 1e-4 Tolerance of the stopping condition. max_iter : integer, default: 200 Maximum number of iterations before timing out. alpha : double, default: 0. Constant that multiplies the regularization terms. l1_ratio : double, default: 0. The regularization mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm). For l1_ratio = 1 it is an elementwise L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. regularization : 'both' | 'components' | 'transformation' | None Select whether the regularization affects the components (H), the transformation (W), both or none of them. random_state : int, RandomState instance or None, optional, default: None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. verbose : integer, default: 0 The verbosity level. shuffle : boolean, default: False If true, randomize the order of coordinates in the CD solver. Returns ------- W : array-like, shape (n_samples, n_components) Solution to the non-negative least squares problem. H : array-like, shape (n_components, n_features) Solution to the non-negative least squares problem. n_iter : int Actual number of iterations. Examples -------- >>> import numpy as np >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]]) >>> from sklearn.decomposition import non_negative_factorization >>> W, H, n_iter = non_negative_factorization(X, n_components=2, ... init='random', random_state=0) References ---------- Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix factorization with the beta-divergence. Neural Computation, 23(9). """ #print('My Non negative Factorization') X = check_array(X, accept_sparse=('csr', 'csc'), dtype=float) check_non_negative(X, "NMF (input X)") beta_loss = _check_string_param(solver, regularization, beta_loss, init) if safe_min(X) == 0 and beta_loss <= 0: raise ValueError("When beta_loss <= 0 and X contains zeros, " "the solver may diverge. Please add small values to " "X, or use a positive beta_loss.") n_samples, n_features = X.shape if n_components is None: n_components = n_features if not isinstance(n_components, INTEGER_TYPES) or n_components <= 0: raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) if not isinstance(max_iter, INTEGER_TYPES) or max_iter < 0: raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % max_iter) if not isinstance(tol, numbers.Number) or tol < 0: raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % tol) # check W and H, or initialize them if init == 'custom' and update_H: _check_init(H, (n_components, n_features), "NMF (input H)") _check_init(W, (n_samples, n_components), "NMF (input W)") elif not update_H: _check_init(H, (n_components, n_features), "NMF (input H)") # 'mu' solver should not be initialized by zeros if solver == 'mu': avg = np.sqrt(X.mean() / n_components) W = np.full((n_samples, n_components), avg) else: W = np.zeros((n_samples, n_components)) else: W, H = _initialize_nmf(X, n_components, init=init, random_state=random_state) l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = _compute_regularization( alpha, l1_ratio, regularization) W, H, n_iter = update(X, W, H, max_iter, distribution, N , D) if n_iter == max_iter and tol > 0: warnings.warn("Maximum number of iteration %d reached. Increase it to" " improve convergence." % max_iter, ConvergenceWarning) return W, H, n_iter
def klquasinewton(V, max_iter = 10, rank = 40): """ Quasinewton method for minimising KL-divergence. The method is based on the article http://link.springer.com/chapter/10.1007/11785231_91. Args: V: target matrix max_iter: maximum number of iterations rank: rank of factorization Returns: W: basis matrix H: coefficients matrix kl_div: list of KL divergences for each iteration """ e = 0.000001 M = V.shape[0] K = V.shape[1] R = rank np.random.seed(1) #initialization W, H = _initialize_nmf(V, rank) for i in range(W.shape[0]): for j in range(W.shape[1]): if W[i,j] <= 0: W[i,j] = e for i in range(H.shape[0]): for j in range(H.shape[1]): if H[i,j] <= 0: H[i,j] = e kl_div = [] for it in range(max_iter): res = np.identity(R * K) * e #hessian hess = klhessh(V,W,H).todense() inv_hess = scipy.linalg.inv(hess + res) #gradient gr = np.reshape(klgradh(V,W,H),(1, K * R), order='F')[0] #multiply inverse hessian by gradient diff = inv_hess.dot(gr) #get new matrix H H_new = H - np.reshape(diff,(R,K),order='F') #replace nonpositive elements with epsilon for i in range(H_new.shape[0]): for j in range(H_new.shape[1]): if H_new[i,j] <= 0: H_new[i,j] = e H = H_new.copy() res2 = np.identity(R * M) * e #hessian hess2 = klhessw(V,W,H).todense() inv_hess2 = scipy.linalg.inv(hess2 + res2) #gradient gr2 = np.reshape(klgradw(V,W,H),(1, M * R))[0] #multiply inverse hessian by gradient diff2 = inv_hess2.dot(gr2) #get new matrix W W_new = W - np.reshape(diff2,(M,R)) #replace nonpositive elements with epsilon for i in range(W_new.shape[0]): for j in range(W_new.shape[1]): if W_new[i,j] <= 0: W_new[i,j] = e W = W_new.copy() kl_div.append(kldiv(V,W,H)) return W,H,kl_div
def test_initialize_nn_output(): """Test that NNDSVD does not return negative values""" data = np.abs(random_state.randn(10, 10)) for var in (None, 'a', 'ar'): W, H = nmf._initialize_nmf(data, 10) assert_false((W < 0).any() or (H < 0).any())
def nonneg_rescal(X, rank, **kwargs): """Non-Negative RESCAL Factors a _sparse_ three-way tensor X such that each frontal slice X_k = A * R_k * A.T. The frontal slices of a tensor are _sparse_ N x N matrices that correspond to the adjecency matrices of the relational graph for a particular relation. For a full description of the algorithm see: [1] Denis Krompass, Maximilian Nickel, Xueyan Jiang, Volker Tresp, "Non-Negative Tensor Factorization with RESCAL", ECML/PKDD 2013, Prague, Czech Republic Parameters ---------- X : list of :class:`scipy.sparse.csr_matrix` List of frontal slices X_k of the tensor X. The shape of each X_k is n x n. rank : int Rank of the factorization. lambda_A : float, optional Regularization parameter for factor matrix A. Defaults to 0. lambda_R : float, optional Regularization parameter for core tensor R. Defaults to 0. lambda_V : float, optional Regularization parameter for the V_l factor matrices of the attributes. Defaults to 0. attr : list of :class:`scipy.sparse.csr_matrix`, optional List of sparse n x v_l attribute matrices. 'v_l' may be different for each set of attributes. Defaults to None. init : string, optional Initialization method of the factor matrices. 'nndsvd' initializes A based on the NNDSVD algorithm. 'random' initializes the factor matrices randomly. Defaults to 'nndsvd' maxIter : int, optional Maximium number of iterations of the ALS algorithm. Defaults to 500. conv : float, optional Stop when residual of factorization is less than conv. Defaults to 1e-5. normalize : boolean, optional Keep A normalized during the fit, a L1 regularization penalty is employed. *Not implemented for multinomial bases cost function* Defautls to `False`- costF : string, optional Specify the cost function for the fitting: 'LS' for least squares 'KL' for generalized Kullback-Leibler Divergence 'MUL' for multinomial based Kullback-Leibler Divergence Defaults to 'LS'. verbose : bool, optional Show more detailed messages that show the progress of the learning. Defaults or `False`. Returns ------- A : ndarray array of shape ('N', 'rank') corresponding to the factor matrix A R : list list of 'M' arrays of shape ('rank', 'rank') corresponding to the factor matrices R_k f : float function value of the factorization iter : int number of iterations until convergence exectimes : ndarray execution times to compute the updates in each iteration """ # ------------ init options ---------------------------------------------- ainit = kwargs.pop('init', __DEF_INIT) maxIter = kwargs.pop('maxIter', __DEF_MAXITER) conv = kwargs.pop('conv', None) lmbdaA = kwargs.pop('lambda_A', __DEF_LMBDA) lmbdaR = kwargs.pop('lambda_R', __DEF_LMBDA) lmbdaV = kwargs.pop('lambda_V', __DEF_LMBDA) D = kwargs.pop('attr', __DEF_ATTR) dtype = kwargs.pop('dtype', np.float32) normalize = kwargs.pop('normalize', False) verbose = kwargs.pop('verbose', False) costF = kwargs.pop('costF', 'LS') if costF == 'LS': if conv is None: conv = __DEF_CONV_LS elif costF == 'KL': if conv is None: conv = __DEF_CONV_KL else: if conv is None: conv = __DEF_CONV_MUL if verbose: logging.basicConfig(level=logging.DEBUG) # ------------- check input ----------------------------------------------- if not len(kwargs) == 0: raise ValueError('Unknown keywords (%s)' % (kwargs.keys())) for i in range(len(X)): if not issparse(X[i]): raise ValueError('X[%d] is not a sparse matrix' % i) sz = X[0].shape n = sz[0] k = len(X) _log.debug('[Config] rank: %d | maxIter: %d | conv: %7.1e |' ' lmbda: %7.1e' % (rank, maxIter, conv, lmbdaA)) _log.debug('[Config] dtype: %s / %s' % (dtype, X[0].dtype)) # ------- convert X to CSR ------------------------------------------------ for i in range(k): X[i] = X[i].tocsr() X[i].sort_indices() # ---------- initialize A, R and V----------------------------------------- _log.debug('Initializing A') R = [] V = [] if ainit == 'random': A = array(rand(n, rank), dtype=dtype) for i in range(k): R.append(array(rand(rank, rank), dtype=dtype)) for i in range(len(D)): V.append(array(rand(rank, D[i].shape[1]), dtype=dtype)) elif ainit == 'nndsvd': S = csr_matrix((n, n), dtype=dtype) for i in range(k): S = S + X[i] S = S + X[i].T A, W = _initialize_nmf(S, rank, 'nndsvd') if issparse(A): A = A.toarray() W = W.toarray() else: A = np.array(A) W = np.array(W) Z = np.dot(A.T, W.T) for i in range(k): R.append(Z) for i in range(len(D)): if rank > D[i].shape[1]: V.append(array(rand(rank, D[i].shape[1]), dtype=dtype)) else: _, P = _initialize_nmf(D[i], rank, 'nndsvd') if issparse(P): P = P.toarray() else: P = np.array(P) V.append(P) else: raise 'Unknown init option ("%s")' % ainit # ------ compute factorization ------------------------------------------- fit = fitchange = fitold = f = 0 exectimes = [] if normalize: A = __normalize(A) for iter in range(maxIter): tic = time.time() fitold = fit if costF == 'LS': if normalize: R = __LS_updateR_L1(X, A, R, lmbdaR) A = __LS_updateA_normalized(X, A, R, D, V, lmbdaA) V = __LS_updateV_L1(D, A, V, lmbdaV) else: R = __LS_updateR_L2(X, A, R, lmbdaR) A = __LS_updateA(X, A, R, D, V, lmbdaA) V = __LS_updateV_L2(D, A, V, lmbdaV) # compute fit value fit = __LS_compute_fit(X, A, R) elif costF == 'KL': if normalize: R = __KL_updateR(X, A, R, lmbdaR) A = __KL_updateA_normalized(X, A, R, D, V, lmbdaA) V = __KL_updateV(D, A, V, lmbdaV) else: R = __KL_updateR(X, A, R, lmbdaR) A = __KL_updateA(X, A, R, D, V, lmbdaA) V = __KL_updateV(D, A, V, lmbdaV) # compute fit value fit = __KL_compute_fit(X, A, R) elif costF == 'MUL': if normalize: raise NotImplementedError('Normalize option not implemented ' 'for multinomial cost function!') else: R = __MUL_updateR(X, A, R, lmbdaR) A = __MUL_updateA(X, A, R, D, V, lmbdaA) V = __MUL_updateV(D, A, V, lmbdaV) fit = __MUL_compute_fit(X, A, R) fitchange = abs(fitold - fit) toc = time.time() exectimes.append(toc - tic) _log.debug('[%3d] fit: %0.5f | delta: %7.1e | secs: %.5f' % (iter, fit, fitchange, exectimes[-1])) if iter > 0 and fitchange < conv: break return A, R, f, iter + 1, array(exectimes)
def compute_nmf(A, rank, init='nndsvd', shuffle=False, l2_reg_H=0.0, l2_reg_W=0.0, l1_reg_H=0.0, l1_reg_W=0.0, tol=1e-5, maxiter=200, random_state=None): """Nonnegative Matrix Factorization. Hierarchical alternating least squares algorithm for computing the approximate low-rank nonnegative matrix factorization of a rectangular `(m, n)` matrix `A`. Given the target rank `rank << min{m,n}`, the input matrix `A` is factored as `A = W H`. The nonnegative factor matrices `W` and `H` are of dimension `(m, rank)` and `(rank, n)`, respectively. Parameters ---------- A : array_like, shape `(m, n)`. Real nonnegative input matrix. rank : integer, `rank << min{m,n}`. Target rank. init : 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' Method used to initialize the procedure. Default: 'nndsvd'. Valid options: - 'random': non-negative random matrices, scaled with: sqrt(X.mean() / n_components) - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD) initialization (better for sparseness) - 'nndsvda': NNDSVD with zeros filled with the average of X (better when sparsity is not desired) - 'nndsvdar': NNDSVD with zeros filled with small random values (generally faster, less accurate alternative to NNDSVDa for when sparsity is not desired) shuffle : boolean, default: False If true, randomly shuffle the update order of the variables. l2_reg_H : float, (default ``l2_reg_H = 0.1``). Amount of ridge shrinkage to apply to `H` to improve conditionin. l2_reg_W : float, (default ``l2_reg_W = 0.1``). Amount of ridge shrinkage to apply to `W` to improve conditionin. l1_reg_H : float, (default ``l1_reg_H = 0.0``). Sparsity controlling parameter on `H`. Higher values lead to sparser components. l1_reg_W : float, (default ``l1_reg_W = 0.0``). Sparsity controlling parameter on `W`. Higher values lead to sparser components. tol : float, default: `tol=1e-4`. Tolerance of the stopping condition. maxiter : integer, default: `maxiter=100`. Number of iterations. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. verbose : boolean, default: `verbose=False`. The verbosity level. Returns ------- W: array_like, `(m, rank)`. Solution to the non-negative least squares problem. H : array_like, `(rank, n)`. Solution to the non-negative least squares problem. Notes ----- This HALS update algorithm written in cython is adapted from the scikit-learn implementation for the deterministic NMF. We also have adapted the initilization scheme. See: https://github.com/scikit-learn/scikit-learn References ---------- [1] Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. [2] C. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for nonnegative matrix factorization - Pattern Recognition, 2008 http://tinyurl.com/nndsvd Examples -------- >>> import numpy as np >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]]) >>> import ristretto as ro >>> W, H = ro.nmf(X, rank=2) """ random_state = check_random_state(random_state) # converts A to array, raise ValueError if A has inf or nan A = np.asarray_chkfinite(A) m, n = A.shape if np.any(A < 0): raise ValueError("Input matrix with nonnegative elements is required.") # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialization methods for factor matrices W and H # 'normal': nonnegative standard normal random init # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ W, H = _initialize_nmf(A, rank, init=init, eps=1e-6, random_state=random_state) Ht = np.array(H.T, order='C') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the HALS algorithm until convergence or maxiter is reached # i) Update factor matrix H and normalize columns # ii) Update low-dimensional factor matrix W # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ violation = 0.0 for niter in range(maxiter): # Update factor matrix H with regularization WtW = W.T.dot(W) WtW.flat[::rank + 1] += l2_reg_H # adds l2_reg only on the diagonal AtW = A.T.dot(W) - l1_reg_H # compute violation update permutation = random_state.permutation(rank) if shuffle else np.arange(rank) violation = _update_cdnmf_fast(Ht, WtW, AtW, permutation) # Update factor matrix W with regularization HHt = Ht.T.dot(Ht) HHt.flat[::rank + 1] += l2_reg_W # adds l2_reg only on the diagonal AHt = A.dot(Ht) - l1_reg_W # compute violation update permutation = random_state.permutation(rank) if shuffle else np.arange(rank) violation += _update_cdnmf_fast(W, HHt, AHt, permutation) # Compute stopping condition. if niter == 0: if violation == 0: break violation_init = violation if violation / violation_init <= tol: break # Return factor matrices return W, Ht.T
def fit(self, X, shuffle=False, max_iter=200, tol=1e-4, verbose=False, W=None, H=None): # Fit X = W*H, implementing coordinate descent as in scikit-learn implementation n_samples, n_features = X.shape if self.n_components is None: self.n_components = min(n_samples, n_features) # Initialize using sklearn method Wtmp, Ht = _initialize_nmf(X, self.n_components) if H is None: H = (Ht.T).copy(order='C') if W is None: W = Wtmp # Determine whether or not to initialize matrices randomly # avg = np.sqrt(X.mean() / self.n_components) # if H is None: # H = avg * np.random.randn(n_features, self.n_components) # np.abs(H, H) # else: # H = np.copy(H) # if W is None: # W = avg * np.random.randn(n_samples, self.n_components) # np.abs(W, W) # else: # W = np.copy(W) l1_H, l2_H, l1_W, l2_W = 0, 0, 0, 0 if self.sparsity in ('both', 'components'): l1_H = self.sparsity_penalty if self.sparsity in ('both', 'transformation'): l1_W = self.sparsity_penalty if self.regularization in ('both', 'components'): l2_H = self.regularization_penalty if self.regularization in ('both', 'transformation'): l2_W = self.regularization_penalty for i in range(max_iter): violation = 0. # Update W violation += self.nmf_iteration_update(X, W, H, l1_W, l2_W, shuffle) # objective_new = np.sum((X - np.dot(W,Ht.T))**2) + l1_H*np.sum(np.sum(np.abs(Ht),axis=1)**2) + l1_W*np.sum(np.sum(np.abs(W),axis=1)**2) + l2_H*np.sum(Ht**2) + l2_W*np.sum(W**2) # if objective_new > objective: # print "warning: objective value increased" # objective = objective_new # Update H violation += self.nmf_iteration_update(X.T, H, W, l1_H, l2_H, shuffle) if i == 0: violation_init = violation if violation_init == 0: break if verbose: print("violation:", violation / violation_init) if violation / violation_init <= tol: print("Converged at iteration", i + 1) break self.components = H return W