Exemple #1
0
def _cross_val(param_dict):
    # Display output
    display.my_display('Optimizing parameter set: {} \n'.format(param_dict['param_id']), True, param_dict['str_path'])

    # Derive params from dict
    n_train_win, n_train_conn = param_dict['train_cfg_matr'].shape
    n_test_win, n_test_conn = param_dict['test_cfg_matr'].shape

    # Run NMF on training set
    fac_subnet_init = np.random.uniform(low=0.0, high=1.0,
                                        size=(param_dict['rank'], n_train_conn))
    fac_coef_init = np.random.uniform(low=0.0, high=1.0,
                                      size=(param_dict['rank'], n_train_win))

    train_fac_subnet, train_fac_coef, train_err = nmf.snmf_bcd(param_dict['train_cfg_matr'],
                                                               alpha=param_dict['alpha'],
                                                               beta=param_dict['beta'],
                                                               fac_subnet_init=fac_subnet_init,
                                                               fac_coef_init=fac_coef_init,
                                                               max_iter=25, verbose=False)

    # Solve single least squares estimate for coefficients on held-out fold
    fac_coef_init = np.random.uniform(low=0.0, high=1.0,
                                      size=(param_dict['rank'], n_test_win))

    _, test_fac_coef, _ = nmf.snmf_bcd(param_dict['test_cfg_matr'],
                                       alpha=param_dict['alpha'],
                                       beta=param_dict['beta'],
                                       fac_subnet_init=train_fac_subnet,
                                       fac_coef_init=fac_coef_init,
                                       max_iter=1, verbose=False)

    # Compute error
    norm_test_cfg_matr = matrix_utils.norm_fro(param_dict['test_cfg_matr'].T)
    err = matrix_utils.norm_fro_err(param_dict['test_cfg_matr'].T,
                                    train_fac_subnet.T,
                                    test_fac_coef.T,
                                    norm_test_cfg_matr) / norm_test_cfg_matr

    return {'param_id': param_dict['param_id'],
            'error': err}
Exemple #2
0
def snmf_bcd(cfg_matr,
             alpha,
             beta,
             fac_subnet_init,
             fac_coef_init,
             max_iter,
             verbose=True):
    """
    Compute Sparse-NMF based on Kim and Park (2011).
    By default, enforces a sparse penalty on the coefficients matrix H
    and regularizes the sub-network, basis matrix W.

    A -> cfg_matr.T
    W -> fac_subnet.T
    H -> fac_coefs

    Parameters
    ----------
        cfg_matr: numpy.ndarray
            The network configuration matrix
            shape: [n_win x n_conn]

        alpha: float
            Regularization parameter on W

        beta: float
            Sparsity parameter on H

        fac_subnet_init: numpy.ndarray
            Initial sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Initial coefficients matrix
            shape: [n_fac x n_win]

        max_iter: int
            Maximum number of optimization iterations to perform
            Typically around 100

        verbose: bool
            Print progress information to the screen

    Returns
    -------
        fac_subnet_init: numpy.ndarray
            Final sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Final coefficients matrix
            shape: [n_fac x n_win]

        rel_err: numpy.ndarray
            Froebenius norm of the error matrix over iterations
            shape: [n_iter,]
    """

    # Standard param checks
    errors.check_type(cfg_matr, np.ndarray)
    errors.check_type(alpha, float)
    errors.check_type(beta, float)
    errors.check_type(fac_subnet_init, np.ndarray)
    errors.check_type(fac_coef_init, np.ndarray)
    errors.check_type(max_iter, int)
    errors.check_type(verbose, bool)

    # Check input dimensions
    if not len(cfg_matr.shape) == 2:
        raise ValueError('%r does not have two dimensions' % cfg_matr)
    n_win = cfg_matr.shape[0]
    n_conn = cfg_matr.shape[1]

    if not len(fac_subnet_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_subnet_init)
    n_fac = fac_subnet_init.shape[0]
    if not fac_subnet_init.shape[1] == n_conn:
        raise ValueError('%r should have same number of connections as %r' %
                         (fac_subnet_init, cfg_matr))

    if not len(fac_coef_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_coef_init)
    if not fac_coef_init.shape[0] == n_fac:
        raise ValueError('%r should specify same number of factors as %r' %
                         (fac_coef_init, fac_subnet_init))
    if not fac_coef_init.shape[1] == n_win:
        raise ValueError('%r should have same number of windows as %r' %
                         (fac_coef_init, cfg_matr))

    # Initialize matrices
    # A - [n_conn x n_win]
    # W - [n_conn x n_fac]
    # H - [n_win x n_fac]
    A = cfg_matr.T.copy()
    W = fac_subnet_init.T.copy()
    H = fac_coef_init.T.copy()

    # Regularization matrix
    # alpha_matr - [n_fac x n_fac]
    alpha_matr = np.sqrt(alpha) * np.eye(n_fac)
    alpha_zeros_matr = np.zeros((n_conn, n_fac))

    # Sparsity matrix
    # beta_matr - [1 x n_fac]
    beta_matr = np.sqrt(beta) * np.ones((1, n_fac))
    beta_zeros_matr = np.zeros((1, n_win))

    # Capture error minimization
    rel_error = np.zeros(max_iter)
    norm_A = matr_util.norm_fro(A)

    my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose)
    t_iter_start = time.time()
    for ii in xrange(max_iter):
        # Use the Block-Pivot Solver
        # First solve for H
        W_beta = np.vstack((W, beta_matr))
        A_beta = np.vstack((A, beta_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T)
        H = Sol.T

        # Now, solve for W
        H_alpha = np.vstack((H, alpha_matr))
        A_alpha = np.hstack((A, alpha_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T)
        W = Sol.T

        t_iter_elapsed = time.time() - t_iter_start
        err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A
        rel_error[ii] = err

        str_header = 'Running -- '
        str_iter = 'Iteration %4d' % (ii + 1)
        str_err = 'Relative Error: %0.5f' % err
        str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed
        my_display(
            '{} {} | {} | {} \r'.format(str_header, str_iter, str_err,
                                        str_elapsed), verbose)
    my_display('\nDone.\n', verbose)

    W, H, weights = matr_util.normalize_column_pair(W, H)

    return W.T, H.T, rel_error
Exemple #3
0
    def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0):
        """ Run a NMF algorithm

        Parameters
        ----------
        A : numpy.array or scipy.sparse matrix, shape (m,n)
        k : int - target lower rank

        Optional Parameters
        -------------------
        init : (W_init, H_init) where
                    W_init is numpy.array of shape (m,k) and
                    H_init is numpy.array of shape (n,k).
                    If provided, these values are used as initial values for NMF iterations.
        max_iter : int - maximum number of iterations.
                    If not provided, default maximum for each algorithm is used.
        max_time : int - maximum amount of time in seconds.
                    If not provided, default maximum for each algorithm is used.
        verbose : int - 0 (default) - No debugging information is collected, but
                                    input and output information is printed on screen.
                        -1 - No debugging information is collected, and
                                    nothing is printed on screen.
                        1 (debugging/experimental purpose) - History of computation is
                                        returned. See 'rec' variable.
                        2 (debugging/experimental purpose) - History of computation is
                                        additionally printed on screen.
        Returns
        -------
        (W, H, rec)
        W : Obtained factor matrix, shape (m,k)
        H : Obtained coefficient matrix, shape (n,k)
        rec : dict - (debugging/experimental purpose) Auxiliary information about the execution
        """
        info = {'k': k,
                'alg': str(self.__class__),
                'A_dim_1': A.shape[0],
                'A_dim_2': A.shape[1],
                'A_type': str(A.__class__),
                'max_iter': max_iter if max_iter is not None else self.default_max_iter,
                'verbose': verbose,
                'max_time': max_time if max_time is not None else self.default_max_time}
        if init != None:
            W = init[0].copy()
            H = init[1].copy()
            info['init'] = 'user_provided'
        else:
            W = random.rand(A.shape[0], k)
            H = random.rand(A.shape[1], k)
            info['init'] = 'uniform_random'

        if verbose >= 0:
            print '[NMF] Running: '
            print json.dumps(info, indent=4, sort_keys=True)

        norm_A = mu.norm_fro(A)
        total_time = 0

        if verbose >= 1:
            his = {'iter': [], 'elapsed': [], 'rel_error': []}

        start = time.time()
        # algorithm-specific initilization
        (W, H) = self.initializer(W, H)

        for i in range(1, info['max_iter'] + 1):
            start_iter = time.time()
            # algorithm-specific iteration solver
            (W, H) = self.iter_solver(A, W, H, k, i)
            elapsed = time.time() - start_iter

            if verbose >= 1:
                rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A
                his['iter'].append(i)
                his['elapsed'].append(elapsed)
                his['rel_error'].append(rel_error)
                if verbose >= 2:
                    print 'iter:' + str(i) + ', elapsed:' + str(elapsed) + ', rel_error:' + str(rel_error)

            total_time += elapsed
            if total_time > info['max_time']:
                break

        W, H, weights = mu.normalize_column_pair(W, H)

        final = {}
        final['norm_A'] = norm_A
        final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A
        final['iterations'] = i
        final['elapsed'] = time.time() - start

        rec = {'info': info, 'final': final}
        if verbose >= 1:
            rec['his'] = his

        if verbose >= 0:
            print '[NMF] Completed: '
            print json.dumps(final, indent=4, sort_keys=True)
        return (W, H, rec)
Exemple #4
0
    def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0):
        """ Run a NMF algorithm

        Parameters
        ----------
        A : numpy.array or scipy.sparse matrix, shape (m,n)
        k : int - target lower rank

        Optional Parameters
        -------------------
        init : (W_init, H_init) where
                    W_init is numpy.array of shape (m,k) and
                    H_init is numpy.array of shape (n,k).
                    If provided, these values are used as initial values for NMF iterations.
        max_iter : int - maximum number of iterations.
                    If not provided, default maximum for each algorithm is used.
        max_time : int - maximum amount of time in seconds.
                    If not provided, default maximum for each algorithm is used.
        verbose : int - 0 (default) - No debugging information is collected, but
                                    input and output information is printed on screen.
                        -1 - No debugging information is collected, and
                                    nothing is printed on screen.
                        1 (debugging/experimental purpose) - History of computation is
                                        returned. See 'rec' variable.
                        2 (debugging/experimental purpose) - History of computation is
                                        additionally printed on screen.
        Returns
        -------
        (W, H, rec)
        W : Obtained factor matrix, shape (m,k)
        H : Obtained coefficient matrix, shape (n,k)
        rec : dict - (debugging/experimental purpose) Auxiliary information about the execution
        """
        info = {
            'k': k,
            'alg': str(self.__class__),
            'A_dim_1': A.shape[0],
            'A_dim_2': A.shape[1],
            'A_type': str(A.__class__),
            'max_iter':
            max_iter if max_iter is not None else self.default_max_iter,
            'verbose': verbose,
            'max_time':
            max_time if max_time is not None else self.default_max_time
        }
        if init != None:
            W = init[0].copy()
            H = init[1].copy()
            info['init'] = 'user_provided'
        else:
            W = random.rand(A.shape[0], k)
            H = random.rand(A.shape[1], k)
            info['init'] = 'uniform_random'

        if verbose >= 0:
            print '[NMF] Running: '
            print json.dumps(info, indent=4, sort_keys=True)

        norm_A = mu.norm_fro(A)
        total_time = 0

        if verbose >= 1:
            his = {'iter': [], 'elapsed': [], 'rel_error': []}

        start = time.time()
        # algorithm-specific initilization
        (W, H) = self.initializer(W, H)

        for i in range(1, info['max_iter'] + 1):
            start_iter = time.time()
            # algorithm-specific iteration solver
            (W, H) = self.iter_solver(A, W, H, k, i)
            elapsed = time.time() - start_iter

            if verbose >= 1:
                rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A
                his['iter'].append(i)
                his['elapsed'].append(elapsed)
                his['rel_error'].append(rel_error)
                if verbose >= 2:
                    print 'iter:' + str(i) + ', elapsed:' + str(
                        elapsed) + ', rel_error:' + str(rel_error)

            total_time += elapsed
            if total_time > info['max_time']:
                break

        W, H, weights = mu.normalize_column_pair(W, H)

        final = {}
        final['norm_A'] = norm_A
        final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A
        final['iterations'] = i
        final['elapsed'] = time.time() - start

        rec = {'info': info, 'final': final}
        if verbose >= 1:
            rec['his'] = his

        if verbose >= 0:
            print '[NMF] Completed: '
            print json.dumps(final, indent=4, sort_keys=True)
        return (W, H, rec)
Exemple #5
0
def run_xval_paramset(cfg_matr, param_dict):
    """
    Run NMF cross-validation using a single parameter dictionary generated from
    gen_random_sampling_paramset

    Parameters
    ----------
        cfg_matr: numpy.ndarray, shape:(n_win, n_conn)
            The network configuration matrix

        param_dict: dict with keys: {param_id, alpha, beta, rank, fold_id,
                                     train_ix, test_ix, str_path}
            Single entry of list of dicts returned by gen_random_sampling_paramset


    Return
    ------
        qmeas_dict: dict with keys: {param_id, err,
                                     pct_sparse_subnet, pct_sparse_coef}
            Quality measures associated with param_dict
    """

    # Check input dimensions of cfg_matr
    if not len(cfg_matr.shape) == 2:
        raise ValueError('%r does not have two dimensions' % cfg_matr)
    n_win = cfg_matr.shape[0]
    n_conn = cfg_matr.shape[1]

    # Derive params from dict
    train_cfg_matr = cfg_matr[param_dict['train_ix'], :]
    test_cfg_matr = cfg_matr[param_dict['test_ix'], :]
    n_train_win, n_train_conn = train_cfg_matr.shape
    n_test_win, n_test_conn = test_cfg_matr.shape

    # Display output
    display.my_display(
        'Optimizing parameter set: {} \n'.format(param_dict['param_id']), True,
        param_dict['str_path'])

    # Run NMF on training set
    fac_subnet_init = np.random.uniform(low=0.0,
                                        high=1.0,
                                        size=(param_dict['rank'],
                                              n_train_conn))
    fac_coef_init = np.random.uniform(low=0.0,
                                      high=1.0,
                                      size=(param_dict['rank'], n_train_win))

    train_fac_subnet, train_fac_coef, train_err = nmf.snmf_bcd(
        train_cfg_matr,
        alpha=param_dict['alpha'],
        beta=param_dict['beta'],
        fac_subnet_init=fac_subnet_init,
        fac_coef_init=fac_coef_init,
        max_iter=25,
        verbose=False)

    # Solve single least squares estimate for coefficients on held-out fold
    fac_coef_init = np.random.uniform(low=0.0,
                                      high=1.0,
                                      size=(param_dict['rank'], n_test_win))

    _, test_fac_coef, _ = nmf.snmf_bcd(test_cfg_matr,
                                       alpha=param_dict['alpha'],
                                       beta=param_dict['beta'],
                                       fac_subnet_init=train_fac_subnet,
                                       fac_coef_init=fac_coef_init,
                                       max_iter=1,
                                       verbose=False)

    # Compute error
    norm_test_cfg_matr = matrix_utils.norm_fro(test_cfg_matr.T)
    err = matrix_utils.norm_fro_err(test_cfg_matr.T, train_fac_subnet.T,
                                    test_fac_coef.T,
                                    norm_test_cfg_matr) / norm_test_cfg_matr

    # Compute sparsity of the subgraphs and coefficients
    pct_sparse_subnet = (train_fac_subnet == 0).mean(axis=1).mean()
    pct_sparse_coef = (test_fac_coef == 0).mean(axis=1).mean()

    qmeas_dict = {
        'param_id': param_dict['param_id'],
        'error': err,
        'pct_sparse_subgraph': pct_sparse_subnet,
        'pct_sparse_coef': pct_sparse_coef
    }

    return qmeas_dict
Exemple #6
0
def snmf_bcd(cfg_matr, alpha, beta,
             fac_subnet_init, fac_coef_init,
             max_iter, verbose=True):
    """
    Compute Sparse-NMF based on Kim and Park (2011).
    By default, enforces a sparse penalty on the coefficients matrix H
    and regularizes the sub-network, basis matrix W.

    A -> cfg_matr.T
    W -> fac_subnet.T
    H -> fac_coefs

    Parameters
    ----------
        cfg_matr: numpy.ndarray
            The network configuration matrix
            shape: [n_win x n_conn]

        alpha: float
            Regularization parameter on W

        beta: float
            Sparsity parameter on H

        fac_subnet_init: numpy.ndarray
            Initial sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Initial coefficients matrix
            shape: [n_fac x n_win]

        max_iter: int
            Maximum number of optimization iterations to perform
            Typically around 100

        verbose: bool
            Print progress information to the screen

    Returns
    -------
        fac_subnet_init: numpy.ndarray
            Final sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Final coefficients matrix
            shape: [n_fac x n_win]

        rel_err: numpy.ndarray
            Froebenius norm of the error matrix over iterations
            shape: [n_iter,]
    """

    # Standard param checks
    errors.check_type(cfg_matr, np.ndarray)
    errors.check_type(alpha, float)
    errors.check_type(beta, float)
    errors.check_type(fac_subnet_init, np.ndarray)
    errors.check_type(fac_coef_init, np.ndarray)
    errors.check_type(max_iter, int)
    errors.check_type(verbose, bool)

    # Check input dimensions
    if not len(cfg_matr.shape) == 2:
        raise ValueError('%r does not have two dimensions' % cfg_matr)
    n_win = cfg_matr.shape[0]
    n_conn = cfg_matr.shape[1]

    if not len(fac_subnet_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_subnet_init)
    n_fac = fac_subnet_init.shape[0]
    if not fac_subnet_init.shape[1] == n_conn:
        raise ValueError('%r should have same number of connections as %r' %
                         (fac_subnet_init, cfg_matr))

    if not len(fac_coef_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_coef_init)
    if not fac_coef_init.shape[0] == n_fac:
        raise ValueError('%r should specify same number of factors as %r' %
                         (fac_coef_init, fac_subnet_init))
    if not fac_coef_init.shape[1] == n_win:
        raise ValueError('%r should have same number of windows as %r' %
                         (fac_coef_init, cfg_matr))

    # Initialize matrices
    # A - [n_conn x n_win]
    # W - [n_conn x n_fac]
    # H - [n_win x n_fac]
    A = cfg_matr.T.copy()
    W = fac_subnet_init.T.copy()
    H = fac_coef_init.T.copy()

    # Regularization matrix
    # alpha_matr - [n_fac x n_fac]
    alpha_matr = np.sqrt(alpha) * np.eye(n_fac)
    alpha_zeros_matr = np.zeros((n_conn, n_fac))

    # Sparsity matrix
    # beta_matr - [1 x n_fac]
    beta_matr = np.sqrt(beta) * np.ones((1, n_fac))
    beta_zeros_matr = np.zeros((1, n_win))

    # Capture error minimization
    rel_error = np.zeros(max_iter)
    norm_A = matr_util.norm_fro(A)

    my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose)
    t_iter_start = time.time()
    for ii in xrange(max_iter):
        # Use the Block-Pivot Solver
        # First solve for H
        W_beta = np.vstack((W, beta_matr))
        A_beta = np.vstack((A, beta_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T)
        H = Sol.T

        # Now, solve for W
        H_alpha = np.vstack((H, alpha_matr))
        A_alpha = np.hstack((A, alpha_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T)
        W = Sol.T

        t_iter_elapsed = time.time() - t_iter_start
        err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A
        rel_error[ii] = err

        str_header = 'Running -- '
        str_iter = 'Iteration %4d' % (ii+1)
        str_err = 'Relative Error: %0.5f' % err
        str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed
        my_display('{} {} | {} | {} \r'.format(str_header, str_iter,
                                               str_err, str_elapsed),
                   verbose)
    my_display('\nDone.\n', verbose)

    W, H, weights = matr_util.normalize_column_pair(W, H)

    return W.T, H.T, rel_error