def _cross_val(param_dict): # Display output display.my_display('Optimizing parameter set: {} \n'.format(param_dict['param_id']), True, param_dict['str_path']) # Derive params from dict n_train_win, n_train_conn = param_dict['train_cfg_matr'].shape n_test_win, n_test_conn = param_dict['test_cfg_matr'].shape # Run NMF on training set fac_subnet_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_train_conn)) fac_coef_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_train_win)) train_fac_subnet, train_fac_coef, train_err = nmf.snmf_bcd(param_dict['train_cfg_matr'], alpha=param_dict['alpha'], beta=param_dict['beta'], fac_subnet_init=fac_subnet_init, fac_coef_init=fac_coef_init, max_iter=25, verbose=False) # Solve single least squares estimate for coefficients on held-out fold fac_coef_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_test_win)) _, test_fac_coef, _ = nmf.snmf_bcd(param_dict['test_cfg_matr'], alpha=param_dict['alpha'], beta=param_dict['beta'], fac_subnet_init=train_fac_subnet, fac_coef_init=fac_coef_init, max_iter=1, verbose=False) # Compute error norm_test_cfg_matr = matrix_utils.norm_fro(param_dict['test_cfg_matr'].T) err = matrix_utils.norm_fro_err(param_dict['test_cfg_matr'].T, train_fac_subnet.T, test_fac_coef.T, norm_test_cfg_matr) / norm_test_cfg_matr return {'param_id': param_dict['param_id'], 'error': err}
def snmf_bcd(cfg_matr, alpha, beta, fac_subnet_init, fac_coef_init, max_iter, verbose=True): """ Compute Sparse-NMF based on Kim and Park (2011). By default, enforces a sparse penalty on the coefficients matrix H and regularizes the sub-network, basis matrix W. A -> cfg_matr.T W -> fac_subnet.T H -> fac_coefs Parameters ---------- cfg_matr: numpy.ndarray The network configuration matrix shape: [n_win x n_conn] alpha: float Regularization parameter on W beta: float Sparsity parameter on H fac_subnet_init: numpy.ndarray Initial sub-network basis matrix shape: [n_fac x n_conn] fac_coef_init: numpy.ndarray Initial coefficients matrix shape: [n_fac x n_win] max_iter: int Maximum number of optimization iterations to perform Typically around 100 verbose: bool Print progress information to the screen Returns ------- fac_subnet_init: numpy.ndarray Final sub-network basis matrix shape: [n_fac x n_conn] fac_coef_init: numpy.ndarray Final coefficients matrix shape: [n_fac x n_win] rel_err: numpy.ndarray Froebenius norm of the error matrix over iterations shape: [n_iter,] """ # Standard param checks errors.check_type(cfg_matr, np.ndarray) errors.check_type(alpha, float) errors.check_type(beta, float) errors.check_type(fac_subnet_init, np.ndarray) errors.check_type(fac_coef_init, np.ndarray) errors.check_type(max_iter, int) errors.check_type(verbose, bool) # Check input dimensions if not len(cfg_matr.shape) == 2: raise ValueError('%r does not have two dimensions' % cfg_matr) n_win = cfg_matr.shape[0] n_conn = cfg_matr.shape[1] if not len(fac_subnet_init.shape) == 2: raise ValueError('%r does not have two dimensions' % fac_subnet_init) n_fac = fac_subnet_init.shape[0] if not fac_subnet_init.shape[1] == n_conn: raise ValueError('%r should have same number of connections as %r' % (fac_subnet_init, cfg_matr)) if not len(fac_coef_init.shape) == 2: raise ValueError('%r does not have two dimensions' % fac_coef_init) if not fac_coef_init.shape[0] == n_fac: raise ValueError('%r should specify same number of factors as %r' % (fac_coef_init, fac_subnet_init)) if not fac_coef_init.shape[1] == n_win: raise ValueError('%r should have same number of windows as %r' % (fac_coef_init, cfg_matr)) # Initialize matrices # A - [n_conn x n_win] # W - [n_conn x n_fac] # H - [n_win x n_fac] A = cfg_matr.T.copy() W = fac_subnet_init.T.copy() H = fac_coef_init.T.copy() # Regularization matrix # alpha_matr - [n_fac x n_fac] alpha_matr = np.sqrt(alpha) * np.eye(n_fac) alpha_zeros_matr = np.zeros((n_conn, n_fac)) # Sparsity matrix # beta_matr - [1 x n_fac] beta_matr = np.sqrt(beta) * np.ones((1, n_fac)) beta_zeros_matr = np.zeros((1, n_win)) # Capture error minimization rel_error = np.zeros(max_iter) norm_A = matr_util.norm_fro(A) my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose) t_iter_start = time.time() for ii in xrange(max_iter): # Use the Block-Pivot Solver # First solve for H W_beta = np.vstack((W, beta_matr)) A_beta = np.vstack((A, beta_zeros_matr)) Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T) H = Sol.T # Now, solve for W H_alpha = np.vstack((H, alpha_matr)) A_alpha = np.hstack((A, alpha_zeros_matr)) Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T) W = Sol.T t_iter_elapsed = time.time() - t_iter_start err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A rel_error[ii] = err str_header = 'Running -- ' str_iter = 'Iteration %4d' % (ii + 1) str_err = 'Relative Error: %0.5f' % err str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed my_display( '{} {} | {} | {} \r'.format(str_header, str_iter, str_err, str_elapsed), verbose) my_display('\nDone.\n', verbose) W, H, weights = matr_util.normalize_column_pair(W, H) return W.T, H.T, rel_error
def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0): """ Run a NMF algorithm Parameters ---------- A : numpy.array or scipy.sparse matrix, shape (m,n) k : int - target lower rank Optional Parameters ------------------- init : (W_init, H_init) where W_init is numpy.array of shape (m,k) and H_init is numpy.array of shape (n,k). If provided, these values are used as initial values for NMF iterations. max_iter : int - maximum number of iterations. If not provided, default maximum for each algorithm is used. max_time : int - maximum amount of time in seconds. If not provided, default maximum for each algorithm is used. verbose : int - 0 (default) - No debugging information is collected, but input and output information is printed on screen. -1 - No debugging information is collected, and nothing is printed on screen. 1 (debugging/experimental purpose) - History of computation is returned. See 'rec' variable. 2 (debugging/experimental purpose) - History of computation is additionally printed on screen. Returns ------- (W, H, rec) W : Obtained factor matrix, shape (m,k) H : Obtained coefficient matrix, shape (n,k) rec : dict - (debugging/experimental purpose) Auxiliary information about the execution """ info = {'k': k, 'alg': str(self.__class__), 'A_dim_1': A.shape[0], 'A_dim_2': A.shape[1], 'A_type': str(A.__class__), 'max_iter': max_iter if max_iter is not None else self.default_max_iter, 'verbose': verbose, 'max_time': max_time if max_time is not None else self.default_max_time} if init != None: W = init[0].copy() H = init[1].copy() info['init'] = 'user_provided' else: W = random.rand(A.shape[0], k) H = random.rand(A.shape[1], k) info['init'] = 'uniform_random' if verbose >= 0: print '[NMF] Running: ' print json.dumps(info, indent=4, sort_keys=True) norm_A = mu.norm_fro(A) total_time = 0 if verbose >= 1: his = {'iter': [], 'elapsed': [], 'rel_error': []} start = time.time() # algorithm-specific initilization (W, H) = self.initializer(W, H) for i in range(1, info['max_iter'] + 1): start_iter = time.time() # algorithm-specific iteration solver (W, H) = self.iter_solver(A, W, H, k, i) elapsed = time.time() - start_iter if verbose >= 1: rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A his['iter'].append(i) his['elapsed'].append(elapsed) his['rel_error'].append(rel_error) if verbose >= 2: print 'iter:' + str(i) + ', elapsed:' + str(elapsed) + ', rel_error:' + str(rel_error) total_time += elapsed if total_time > info['max_time']: break W, H, weights = mu.normalize_column_pair(W, H) final = {} final['norm_A'] = norm_A final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A final['iterations'] = i final['elapsed'] = time.time() - start rec = {'info': info, 'final': final} if verbose >= 1: rec['his'] = his if verbose >= 0: print '[NMF] Completed: ' print json.dumps(final, indent=4, sort_keys=True) return (W, H, rec)
def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0): """ Run a NMF algorithm Parameters ---------- A : numpy.array or scipy.sparse matrix, shape (m,n) k : int - target lower rank Optional Parameters ------------------- init : (W_init, H_init) where W_init is numpy.array of shape (m,k) and H_init is numpy.array of shape (n,k). If provided, these values are used as initial values for NMF iterations. max_iter : int - maximum number of iterations. If not provided, default maximum for each algorithm is used. max_time : int - maximum amount of time in seconds. If not provided, default maximum for each algorithm is used. verbose : int - 0 (default) - No debugging information is collected, but input and output information is printed on screen. -1 - No debugging information is collected, and nothing is printed on screen. 1 (debugging/experimental purpose) - History of computation is returned. See 'rec' variable. 2 (debugging/experimental purpose) - History of computation is additionally printed on screen. Returns ------- (W, H, rec) W : Obtained factor matrix, shape (m,k) H : Obtained coefficient matrix, shape (n,k) rec : dict - (debugging/experimental purpose) Auxiliary information about the execution """ info = { 'k': k, 'alg': str(self.__class__), 'A_dim_1': A.shape[0], 'A_dim_2': A.shape[1], 'A_type': str(A.__class__), 'max_iter': max_iter if max_iter is not None else self.default_max_iter, 'verbose': verbose, 'max_time': max_time if max_time is not None else self.default_max_time } if init != None: W = init[0].copy() H = init[1].copy() info['init'] = 'user_provided' else: W = random.rand(A.shape[0], k) H = random.rand(A.shape[1], k) info['init'] = 'uniform_random' if verbose >= 0: print '[NMF] Running: ' print json.dumps(info, indent=4, sort_keys=True) norm_A = mu.norm_fro(A) total_time = 0 if verbose >= 1: his = {'iter': [], 'elapsed': [], 'rel_error': []} start = time.time() # algorithm-specific initilization (W, H) = self.initializer(W, H) for i in range(1, info['max_iter'] + 1): start_iter = time.time() # algorithm-specific iteration solver (W, H) = self.iter_solver(A, W, H, k, i) elapsed = time.time() - start_iter if verbose >= 1: rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A his['iter'].append(i) his['elapsed'].append(elapsed) his['rel_error'].append(rel_error) if verbose >= 2: print 'iter:' + str(i) + ', elapsed:' + str( elapsed) + ', rel_error:' + str(rel_error) total_time += elapsed if total_time > info['max_time']: break W, H, weights = mu.normalize_column_pair(W, H) final = {} final['norm_A'] = norm_A final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A final['iterations'] = i final['elapsed'] = time.time() - start rec = {'info': info, 'final': final} if verbose >= 1: rec['his'] = his if verbose >= 0: print '[NMF] Completed: ' print json.dumps(final, indent=4, sort_keys=True) return (W, H, rec)
def run_xval_paramset(cfg_matr, param_dict): """ Run NMF cross-validation using a single parameter dictionary generated from gen_random_sampling_paramset Parameters ---------- cfg_matr: numpy.ndarray, shape:(n_win, n_conn) The network configuration matrix param_dict: dict with keys: {param_id, alpha, beta, rank, fold_id, train_ix, test_ix, str_path} Single entry of list of dicts returned by gen_random_sampling_paramset Return ------ qmeas_dict: dict with keys: {param_id, err, pct_sparse_subnet, pct_sparse_coef} Quality measures associated with param_dict """ # Check input dimensions of cfg_matr if not len(cfg_matr.shape) == 2: raise ValueError('%r does not have two dimensions' % cfg_matr) n_win = cfg_matr.shape[0] n_conn = cfg_matr.shape[1] # Derive params from dict train_cfg_matr = cfg_matr[param_dict['train_ix'], :] test_cfg_matr = cfg_matr[param_dict['test_ix'], :] n_train_win, n_train_conn = train_cfg_matr.shape n_test_win, n_test_conn = test_cfg_matr.shape # Display output display.my_display( 'Optimizing parameter set: {} \n'.format(param_dict['param_id']), True, param_dict['str_path']) # Run NMF on training set fac_subnet_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_train_conn)) fac_coef_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_train_win)) train_fac_subnet, train_fac_coef, train_err = nmf.snmf_bcd( train_cfg_matr, alpha=param_dict['alpha'], beta=param_dict['beta'], fac_subnet_init=fac_subnet_init, fac_coef_init=fac_coef_init, max_iter=25, verbose=False) # Solve single least squares estimate for coefficients on held-out fold fac_coef_init = np.random.uniform(low=0.0, high=1.0, size=(param_dict['rank'], n_test_win)) _, test_fac_coef, _ = nmf.snmf_bcd(test_cfg_matr, alpha=param_dict['alpha'], beta=param_dict['beta'], fac_subnet_init=train_fac_subnet, fac_coef_init=fac_coef_init, max_iter=1, verbose=False) # Compute error norm_test_cfg_matr = matrix_utils.norm_fro(test_cfg_matr.T) err = matrix_utils.norm_fro_err(test_cfg_matr.T, train_fac_subnet.T, test_fac_coef.T, norm_test_cfg_matr) / norm_test_cfg_matr # Compute sparsity of the subgraphs and coefficients pct_sparse_subnet = (train_fac_subnet == 0).mean(axis=1).mean() pct_sparse_coef = (test_fac_coef == 0).mean(axis=1).mean() qmeas_dict = { 'param_id': param_dict['param_id'], 'error': err, 'pct_sparse_subgraph': pct_sparse_subnet, 'pct_sparse_coef': pct_sparse_coef } return qmeas_dict
def snmf_bcd(cfg_matr, alpha, beta, fac_subnet_init, fac_coef_init, max_iter, verbose=True): """ Compute Sparse-NMF based on Kim and Park (2011). By default, enforces a sparse penalty on the coefficients matrix H and regularizes the sub-network, basis matrix W. A -> cfg_matr.T W -> fac_subnet.T H -> fac_coefs Parameters ---------- cfg_matr: numpy.ndarray The network configuration matrix shape: [n_win x n_conn] alpha: float Regularization parameter on W beta: float Sparsity parameter on H fac_subnet_init: numpy.ndarray Initial sub-network basis matrix shape: [n_fac x n_conn] fac_coef_init: numpy.ndarray Initial coefficients matrix shape: [n_fac x n_win] max_iter: int Maximum number of optimization iterations to perform Typically around 100 verbose: bool Print progress information to the screen Returns ------- fac_subnet_init: numpy.ndarray Final sub-network basis matrix shape: [n_fac x n_conn] fac_coef_init: numpy.ndarray Final coefficients matrix shape: [n_fac x n_win] rel_err: numpy.ndarray Froebenius norm of the error matrix over iterations shape: [n_iter,] """ # Standard param checks errors.check_type(cfg_matr, np.ndarray) errors.check_type(alpha, float) errors.check_type(beta, float) errors.check_type(fac_subnet_init, np.ndarray) errors.check_type(fac_coef_init, np.ndarray) errors.check_type(max_iter, int) errors.check_type(verbose, bool) # Check input dimensions if not len(cfg_matr.shape) == 2: raise ValueError('%r does not have two dimensions' % cfg_matr) n_win = cfg_matr.shape[0] n_conn = cfg_matr.shape[1] if not len(fac_subnet_init.shape) == 2: raise ValueError('%r does not have two dimensions' % fac_subnet_init) n_fac = fac_subnet_init.shape[0] if not fac_subnet_init.shape[1] == n_conn: raise ValueError('%r should have same number of connections as %r' % (fac_subnet_init, cfg_matr)) if not len(fac_coef_init.shape) == 2: raise ValueError('%r does not have two dimensions' % fac_coef_init) if not fac_coef_init.shape[0] == n_fac: raise ValueError('%r should specify same number of factors as %r' % (fac_coef_init, fac_subnet_init)) if not fac_coef_init.shape[1] == n_win: raise ValueError('%r should have same number of windows as %r' % (fac_coef_init, cfg_matr)) # Initialize matrices # A - [n_conn x n_win] # W - [n_conn x n_fac] # H - [n_win x n_fac] A = cfg_matr.T.copy() W = fac_subnet_init.T.copy() H = fac_coef_init.T.copy() # Regularization matrix # alpha_matr - [n_fac x n_fac] alpha_matr = np.sqrt(alpha) * np.eye(n_fac) alpha_zeros_matr = np.zeros((n_conn, n_fac)) # Sparsity matrix # beta_matr - [1 x n_fac] beta_matr = np.sqrt(beta) * np.ones((1, n_fac)) beta_zeros_matr = np.zeros((1, n_win)) # Capture error minimization rel_error = np.zeros(max_iter) norm_A = matr_util.norm_fro(A) my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose) t_iter_start = time.time() for ii in xrange(max_iter): # Use the Block-Pivot Solver # First solve for H W_beta = np.vstack((W, beta_matr)) A_beta = np.vstack((A, beta_zeros_matr)) Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T) H = Sol.T # Now, solve for W H_alpha = np.vstack((H, alpha_matr)) A_alpha = np.hstack((A, alpha_zeros_matr)) Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T) W = Sol.T t_iter_elapsed = time.time() - t_iter_start err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A rel_error[ii] = err str_header = 'Running -- ' str_iter = 'Iteration %4d' % (ii+1) str_err = 'Relative Error: %0.5f' % err str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed my_display('{} {} | {} | {} \r'.format(str_header, str_iter, str_err, str_elapsed), verbose) my_display('\nDone.\n', verbose) W, H, weights = matr_util.normalize_column_pair(W, H) return W.T, H.T, rel_error