Example #1
0
def snmf_bcd(cfg_matr,
             alpha,
             beta,
             fac_subnet_init,
             fac_coef_init,
             max_iter,
             verbose=True):
    """
    Compute Sparse-NMF based on Kim and Park (2011).
    By default, enforces a sparse penalty on the coefficients matrix H
    and regularizes the sub-network, basis matrix W.

    A -> cfg_matr.T
    W -> fac_subnet.T
    H -> fac_coefs

    Parameters
    ----------
        cfg_matr: numpy.ndarray
            The network configuration matrix
            shape: [n_win x n_conn]

        alpha: float
            Regularization parameter on W

        beta: float
            Sparsity parameter on H

        fac_subnet_init: numpy.ndarray
            Initial sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Initial coefficients matrix
            shape: [n_fac x n_win]

        max_iter: int
            Maximum number of optimization iterations to perform
            Typically around 100

        verbose: bool
            Print progress information to the screen

    Returns
    -------
        fac_subnet_init: numpy.ndarray
            Final sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Final coefficients matrix
            shape: [n_fac x n_win]

        rel_err: numpy.ndarray
            Froebenius norm of the error matrix over iterations
            shape: [n_iter,]
    """

    # Standard param checks
    errors.check_type(cfg_matr, np.ndarray)
    errors.check_type(alpha, float)
    errors.check_type(beta, float)
    errors.check_type(fac_subnet_init, np.ndarray)
    errors.check_type(fac_coef_init, np.ndarray)
    errors.check_type(max_iter, int)
    errors.check_type(verbose, bool)

    # Check input dimensions
    if not len(cfg_matr.shape) == 2:
        raise ValueError('%r does not have two dimensions' % cfg_matr)
    n_win = cfg_matr.shape[0]
    n_conn = cfg_matr.shape[1]

    if not len(fac_subnet_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_subnet_init)
    n_fac = fac_subnet_init.shape[0]
    if not fac_subnet_init.shape[1] == n_conn:
        raise ValueError('%r should have same number of connections as %r' %
                         (fac_subnet_init, cfg_matr))

    if not len(fac_coef_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_coef_init)
    if not fac_coef_init.shape[0] == n_fac:
        raise ValueError('%r should specify same number of factors as %r' %
                         (fac_coef_init, fac_subnet_init))
    if not fac_coef_init.shape[1] == n_win:
        raise ValueError('%r should have same number of windows as %r' %
                         (fac_coef_init, cfg_matr))

    # Initialize matrices
    # A - [n_conn x n_win]
    # W - [n_conn x n_fac]
    # H - [n_win x n_fac]
    A = cfg_matr.T.copy()
    W = fac_subnet_init.T.copy()
    H = fac_coef_init.T.copy()

    # Regularization matrix
    # alpha_matr - [n_fac x n_fac]
    alpha_matr = np.sqrt(alpha) * np.eye(n_fac)
    alpha_zeros_matr = np.zeros((n_conn, n_fac))

    # Sparsity matrix
    # beta_matr - [1 x n_fac]
    beta_matr = np.sqrt(beta) * np.ones((1, n_fac))
    beta_zeros_matr = np.zeros((1, n_win))

    # Capture error minimization
    rel_error = np.zeros(max_iter)
    norm_A = matr_util.norm_fro(A)

    my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose)
    t_iter_start = time.time()
    for ii in xrange(max_iter):
        # Use the Block-Pivot Solver
        # First solve for H
        W_beta = np.vstack((W, beta_matr))
        A_beta = np.vstack((A, beta_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T)
        H = Sol.T

        # Now, solve for W
        H_alpha = np.vstack((H, alpha_matr))
        A_alpha = np.hstack((A, alpha_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T)
        W = Sol.T

        t_iter_elapsed = time.time() - t_iter_start
        err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A
        rel_error[ii] = err

        str_header = 'Running -- '
        str_iter = 'Iteration %4d' % (ii + 1)
        str_err = 'Relative Error: %0.5f' % err
        str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed
        my_display(
            '{} {} | {} | {} \r'.format(str_header, str_iter, str_err,
                                        str_elapsed), verbose)
    my_display('\nDone.\n', verbose)

    W, H, weights = matr_util.normalize_column_pair(W, H)

    return W.T, H.T, rel_error
Example #2
0
    def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0):
        """ Run a NMF algorithm

        Parameters
        ----------
        A : numpy.array or scipy.sparse matrix, shape (m,n)
        k : int - target lower rank

        Optional Parameters
        -------------------
        init : (W_init, H_init) where
                    W_init is numpy.array of shape (m,k) and
                    H_init is numpy.array of shape (n,k).
                    If provided, these values are used as initial values for NMF iterations.
        max_iter : int - maximum number of iterations.
                    If not provided, default maximum for each algorithm is used.
        max_time : int - maximum amount of time in seconds.
                    If not provided, default maximum for each algorithm is used.
        verbose : int - 0 (default) - No debugging information is collected, but
                                    input and output information is printed on screen.
                        -1 - No debugging information is collected, and
                                    nothing is printed on screen.
                        1 (debugging/experimental purpose) - History of computation is
                                        returned. See 'rec' variable.
                        2 (debugging/experimental purpose) - History of computation is
                                        additionally printed on screen.
        Returns
        -------
        (W, H, rec)
        W : Obtained factor matrix, shape (m,k)
        H : Obtained coefficient matrix, shape (n,k)
        rec : dict - (debugging/experimental purpose) Auxiliary information about the execution
        """
        info = {'k': k,
                'alg': str(self.__class__),
                'A_dim_1': A.shape[0],
                'A_dim_2': A.shape[1],
                'A_type': str(A.__class__),
                'max_iter': max_iter if max_iter is not None else self.default_max_iter,
                'verbose': verbose,
                'max_time': max_time if max_time is not None else self.default_max_time}
        if init != None:
            W = init[0].copy()
            H = init[1].copy()
            info['init'] = 'user_provided'
        else:
            W = random.rand(A.shape[0], k)
            H = random.rand(A.shape[1], k)
            info['init'] = 'uniform_random'

        if verbose >= 0:
            print '[NMF] Running: '
            print json.dumps(info, indent=4, sort_keys=True)

        norm_A = mu.norm_fro(A)
        total_time = 0

        if verbose >= 1:
            his = {'iter': [], 'elapsed': [], 'rel_error': []}

        start = time.time()
        # algorithm-specific initilization
        (W, H) = self.initializer(W, H)

        for i in range(1, info['max_iter'] + 1):
            start_iter = time.time()
            # algorithm-specific iteration solver
            (W, H) = self.iter_solver(A, W, H, k, i)
            elapsed = time.time() - start_iter

            if verbose >= 1:
                rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A
                his['iter'].append(i)
                his['elapsed'].append(elapsed)
                his['rel_error'].append(rel_error)
                if verbose >= 2:
                    print 'iter:' + str(i) + ', elapsed:' + str(elapsed) + ', rel_error:' + str(rel_error)

            total_time += elapsed
            if total_time > info['max_time']:
                break

        W, H, weights = mu.normalize_column_pair(W, H)

        final = {}
        final['norm_A'] = norm_A
        final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A
        final['iterations'] = i
        final['elapsed'] = time.time() - start

        rec = {'info': info, 'final': final}
        if verbose >= 1:
            rec['his'] = his

        if verbose >= 0:
            print '[NMF] Completed: '
            print json.dumps(final, indent=4, sort_keys=True)
        return (W, H, rec)
Example #3
0
 def initializer(self, W, H):
     W, H, weights = mu.normalize_column_pair(W, H)
     return W, H
Example #4
0
    def run(self, A, k, init=None, max_iter=None, max_time=None, verbose=0):
        """ Run a NMF algorithm

        Parameters
        ----------
        A : numpy.array or scipy.sparse matrix, shape (m,n)
        k : int - target lower rank

        Optional Parameters
        -------------------
        init : (W_init, H_init) where
                    W_init is numpy.array of shape (m,k) and
                    H_init is numpy.array of shape (n,k).
                    If provided, these values are used as initial values for NMF iterations.
        max_iter : int - maximum number of iterations.
                    If not provided, default maximum for each algorithm is used.
        max_time : int - maximum amount of time in seconds.
                    If not provided, default maximum for each algorithm is used.
        verbose : int - 0 (default) - No debugging information is collected, but
                                    input and output information is printed on screen.
                        -1 - No debugging information is collected, and
                                    nothing is printed on screen.
                        1 (debugging/experimental purpose) - History of computation is
                                        returned. See 'rec' variable.
                        2 (debugging/experimental purpose) - History of computation is
                                        additionally printed on screen.
        Returns
        -------
        (W, H, rec)
        W : Obtained factor matrix, shape (m,k)
        H : Obtained coefficient matrix, shape (n,k)
        rec : dict - (debugging/experimental purpose) Auxiliary information about the execution
        """
        info = {
            'k': k,
            'alg': str(self.__class__),
            'A_dim_1': A.shape[0],
            'A_dim_2': A.shape[1],
            'A_type': str(A.__class__),
            'max_iter':
            max_iter if max_iter is not None else self.default_max_iter,
            'verbose': verbose,
            'max_time':
            max_time if max_time is not None else self.default_max_time
        }
        if init != None:
            W = init[0].copy()
            H = init[1].copy()
            info['init'] = 'user_provided'
        else:
            W = random.rand(A.shape[0], k)
            H = random.rand(A.shape[1], k)
            info['init'] = 'uniform_random'

        if verbose >= 0:
            print '[NMF] Running: '
            print json.dumps(info, indent=4, sort_keys=True)

        norm_A = mu.norm_fro(A)
        total_time = 0

        if verbose >= 1:
            his = {'iter': [], 'elapsed': [], 'rel_error': []}

        start = time.time()
        # algorithm-specific initilization
        (W, H) = self.initializer(W, H)

        for i in range(1, info['max_iter'] + 1):
            start_iter = time.time()
            # algorithm-specific iteration solver
            (W, H) = self.iter_solver(A, W, H, k, i)
            elapsed = time.time() - start_iter

            if verbose >= 1:
                rel_error = mu.norm_fro_err(A, W, H, norm_A) / norm_A
                his['iter'].append(i)
                his['elapsed'].append(elapsed)
                his['rel_error'].append(rel_error)
                if verbose >= 2:
                    print 'iter:' + str(i) + ', elapsed:' + str(
                        elapsed) + ', rel_error:' + str(rel_error)

            total_time += elapsed
            if total_time > info['max_time']:
                break

        W, H, weights = mu.normalize_column_pair(W, H)

        final = {}
        final['norm_A'] = norm_A
        final['rel_error'] = mu.norm_fro_err(A, W, H, norm_A) / norm_A
        final['iterations'] = i
        final['elapsed'] = time.time() - start

        rec = {'info': info, 'final': final}
        if verbose >= 1:
            rec['his'] = his

        if verbose >= 0:
            print '[NMF] Completed: '
            print json.dumps(final, indent=4, sort_keys=True)
        return (W, H, rec)
Example #5
0
 def initializer(self, W, H):
     W, H, weights = mu.normalize_column_pair(W, H)
     return W, H
Example #6
0
def snmf_bcd(cfg_matr, alpha, beta,
             fac_subnet_init, fac_coef_init,
             max_iter, verbose=True):
    """
    Compute Sparse-NMF based on Kim and Park (2011).
    By default, enforces a sparse penalty on the coefficients matrix H
    and regularizes the sub-network, basis matrix W.

    A -> cfg_matr.T
    W -> fac_subnet.T
    H -> fac_coefs

    Parameters
    ----------
        cfg_matr: numpy.ndarray
            The network configuration matrix
            shape: [n_win x n_conn]

        alpha: float
            Regularization parameter on W

        beta: float
            Sparsity parameter on H

        fac_subnet_init: numpy.ndarray
            Initial sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Initial coefficients matrix
            shape: [n_fac x n_win]

        max_iter: int
            Maximum number of optimization iterations to perform
            Typically around 100

        verbose: bool
            Print progress information to the screen

    Returns
    -------
        fac_subnet_init: numpy.ndarray
            Final sub-network basis matrix
            shape: [n_fac x n_conn]

        fac_coef_init: numpy.ndarray
            Final coefficients matrix
            shape: [n_fac x n_win]

        rel_err: numpy.ndarray
            Froebenius norm of the error matrix over iterations
            shape: [n_iter,]
    """

    # Standard param checks
    errors.check_type(cfg_matr, np.ndarray)
    errors.check_type(alpha, float)
    errors.check_type(beta, float)
    errors.check_type(fac_subnet_init, np.ndarray)
    errors.check_type(fac_coef_init, np.ndarray)
    errors.check_type(max_iter, int)
    errors.check_type(verbose, bool)

    # Check input dimensions
    if not len(cfg_matr.shape) == 2:
        raise ValueError('%r does not have two dimensions' % cfg_matr)
    n_win = cfg_matr.shape[0]
    n_conn = cfg_matr.shape[1]

    if not len(fac_subnet_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_subnet_init)
    n_fac = fac_subnet_init.shape[0]
    if not fac_subnet_init.shape[1] == n_conn:
        raise ValueError('%r should have same number of connections as %r' %
                         (fac_subnet_init, cfg_matr))

    if not len(fac_coef_init.shape) == 2:
        raise ValueError('%r does not have two dimensions' % fac_coef_init)
    if not fac_coef_init.shape[0] == n_fac:
        raise ValueError('%r should specify same number of factors as %r' %
                         (fac_coef_init, fac_subnet_init))
    if not fac_coef_init.shape[1] == n_win:
        raise ValueError('%r should have same number of windows as %r' %
                         (fac_coef_init, cfg_matr))

    # Initialize matrices
    # A - [n_conn x n_win]
    # W - [n_conn x n_fac]
    # H - [n_win x n_fac]
    A = cfg_matr.T.copy()
    W = fac_subnet_init.T.copy()
    H = fac_coef_init.T.copy()

    # Regularization matrix
    # alpha_matr - [n_fac x n_fac]
    alpha_matr = np.sqrt(alpha) * np.eye(n_fac)
    alpha_zeros_matr = np.zeros((n_conn, n_fac))

    # Sparsity matrix
    # beta_matr - [1 x n_fac]
    beta_matr = np.sqrt(beta) * np.ones((1, n_fac))
    beta_zeros_matr = np.zeros((1, n_win))

    # Capture error minimization
    rel_error = np.zeros(max_iter)
    norm_A = matr_util.norm_fro(A)

    my_display('\nBeginning Non-Negative Matrix Factorization\n', verbose)
    t_iter_start = time.time()
    for ii in xrange(max_iter):
        # Use the Block-Pivot Solver
        # First solve for H
        W_beta = np.vstack((W, beta_matr))
        A_beta = np.vstack((A, beta_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(W_beta, A_beta, init=H.T)
        H = Sol.T

        # Now, solve for W
        H_alpha = np.vstack((H, alpha_matr))
        A_alpha = np.hstack((A, alpha_zeros_matr))
        Sol, info = nnls.nnlsm_blockpivot(H_alpha, A_alpha.T, init=W.T)
        W = Sol.T

        t_iter_elapsed = time.time() - t_iter_start
        err = matr_util.norm_fro_err(A, W, H, norm_A) / norm_A
        rel_error[ii] = err

        str_header = 'Running -- '
        str_iter = 'Iteration %4d' % (ii+1)
        str_err = 'Relative Error: %0.5f' % err
        str_elapsed = 'Elapsed Time: %0.3f sec' % t_iter_elapsed
        my_display('{} {} | {} | {} \r'.format(str_header, str_iter,
                                               str_err, str_elapsed),
                   verbose)
    my_display('\nDone.\n', verbose)

    W, H, weights = matr_util.normalize_column_pair(W, H)

    return W.T, H.T, rel_error