예제 #1
0
def test_geigh_Lsym_bp():

    Xs = [
        np.random.uniform(size=(4, 5)),
        np.random.uniform(size=(5, 5)),
        np.random.uniform(size=(5, 1))
    ]

    for X in Xs:

        Lsym = get_sym_laplacian_bp(X)
        true_evals, true_evecs = eigh_wrapper(Lsym, rank=None)

        for rank in range(1, sum(X.shape) + 1):

            gevals, gevecs = geigh_Lsym_bp(X,
                                           rank=rank,
                                           zero_tol=1e-10,
                                           end='largest')
            check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank)
            assert np.allclose(gevals[:rank], true_evals[:rank])

            gevals, gevecs = geigh_Lsym_bp(X,
                                           rank=rank,
                                           zero_tol=1e-10,
                                           end='smallest')
            check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank)
            assert np.allclose(gevals[:rank], true_evals[-rank:])

        rank = None
        gevals, gevecs = geigh_Lsym_bp(X,
                                       rank=rank,
                                       zero_tol=1e-10,
                                       end='largest')
        check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank)

        gevals, gevecs = geigh_Lsym_bp(X,
                                       rank=rank,
                                       zero_tol=1e-10,
                                       end='smallest')
        check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank)

    # test with zero rows/cols
    X = deepcopy(Xs)[0]
    X[0, :] = 0
    X[:, 0] = 0

    true_gevals, true_zero_mask = true_gevals_Lsym(X)
    for rank in range(1, 7 + 1):

        # make sure gen evals are correct
        gevals, gevecs = geigh_Lsym_bp(X,
                                       rank=rank,
                                       zero_tol=1e-10,
                                       end='largest')
        # check_geigh_Lsym_internal(X, gevals, gevecs, rank=rank)
        assert np.allclose(gevals, true_gevals[:rank])

        # check gen evecs have correct zero rows
        assert np.allclose(abs(gevecs[true_zero_mask]).sum(), 0)
예제 #2
0
파일: linalg.py 프로젝트: idc9/mvmm
def eigh_Lsym_bp(X, rank=None):
    """
    Computes the largest eigenvectors of Lsym(A_bp(X)) directly using scipy.linalg.eigh.

    Paramters
    ---------
    X: array-like, (n_rows, n_cols)
        The data matrix.

    rank: None, int
        The rank to compute.

    Output
    ------
    evals, evecs

    evals: array-like, (rank, )
        The largest evals Lsym(A_bp(X)).

    evecs: array-like, (n_row + n_cols, rank)
        The corresponding eigenvectors.

    """
    Lsym = get_sym_laplacian_bp(X)
    return eigh_wrapper(Lsym, rank=rank)
예제 #3
0
파일: opt_viz.py 프로젝트: idc9/mvmm_sim
def summarize_bd(D, n_blocks, zero_thresh=None, lap='sym'):

    assert lap in ['sym', 'un']
    comm_summary, Pi_comm = community_summary(D, zero_thresh=zero_thresh)

    print(comm_summary)

    plt.figure(figsize=(8, 4))
    if lap == 'sym':
        evals = eigh_Lsym_bp(D)[0]
    else:
        Lun = get_unnorm_laplacian_bp(D)
        evals = eigh_wrapper(Lun)[0]

    plt.subplot(1, 2, 1)
    plt.plot(evals, marker='.')
    plt.title('all evals of L_{}'.format(lap))
    plt.subplot(1, 2, 2)
    plt.plot(evals[-n_blocks:], marker='.')
    plt.title('smallest {} evals'.format(n_blocks))
    print('evals', evals)

    # print('found {} communities of sizes {}'.format(summary['n_communities'], summary['comm_shapes']))

    plt.figure()
    sns.heatmap(Pi_comm, cmap='Blues', square=True, cbar=False, vmin=0)
    plt.xlabel('View 1 clusters')
    plt.ylabel('View 2 clusters')
예제 #4
0
파일: BlockDiagMVMM.py 프로젝트: idc9/mvmm
    def _e_step(self, X):
        """
        Parameters
        ----------
        X:
            The observed data.


        Output
        ------
        E_out: dict
            E_out['log_resp']: array-like

            E_out['obs_nll']: float

            E_out['evals']: array-like, (n_blocks, )

            E_out['eig_var']: array-like

        """

        # standard E-step
        log_prob = self.log_probs(X)
        log_resp = self.log_resps(log_prob)

        obs_nll = - logsumexp(log_prob, axis=1).mean()

        if self.n_blocks is not None:
            B = self.n_blocks
        else:
            B = len(self.eval_weights)

        assert self.__mode in ['lap_pen', 'fine_tune_bd']
        if self.__mode == 'lap_pen' and self.n_blocks != 1:

            if self.lap == 'sym':

                evals, eig_var = geigh_Lsym_bp_smallest(X=self.bd_weights_,
                                                        rank=B,
                                                        zero_tol=1e-10,
                                                        method='tsym')

            elif self.lap == 'un':
                Lun = get_unnorm_laplacian_bp(self.bd_weights_)
                all_evals, all_evecs = eigh_wrapper(Lun)
                eig_var = all_evecs[:, -B:]
                evals = all_evals[-B:]

        else:  # if self.__mode == 'fine_tune_bd':
            evals = None
            eig_var = None

        return {'log_resp': log_resp,
                'obs_nll': obs_nll,
                'evals': evals,
                'eig_var': eig_var}
예제 #5
0
파일: test_linalg.py 프로젝트: idc9/mvmm
def check_geigh_Lsym_bp_from_Tsym(X, rank=None, method='direct'):
    """
    Checks the output of geigh_Lsym_bp_from_Tsym
    """
    Lun = get_unnorm_laplacian_bp(X)
    degs = get_deg_bp(X)
    true_gevals, true_gevecs = eigh_wrapper(A=Lun, B=np.diag(degs))

    if rank is None:
        _rank = min(X.shape)
    else:
        _rank = rank

    # check largest eigenvectors
    gevals, gevecs = geigh_sym_laplacian_bp(X=X, rank=rank, method=method,
                                            end='largest')
    for k in range(len(gevals)):

        # check the gevals are correct
        assert np.allclose(gevals[k], true_gevals[k])

        if not np.allclose(gevals[k], 1):  # non-unique subspace for 1 evals

            # check the gen evecs span the correct subspaces
            a = angle(gevecs[:, k], true_gevecs[:, k], subspace=True)
            assert a < 1e-4

        # check proper normalization
        assert np.allclose(gevecs.T @ np.diag(degs) @ gevecs,
                           np.eye(gevecs.shape[1]))

    # check smallest eigenvectors
    gevals, gevecs = geigh_sym_laplacian_bp(X=X, rank=rank, method=method,
                                            end='smallest')
    base_idx = sum(X.shape) - min(X.shape) + (min(X.shape) - _rank)
    for k in range(len(gevals)):
        # print(gevals[k], true_gevals[base_idx + k])

        # check the gevals are correct
        assert np.allclose(gevals[k], true_gevals[base_idx + k])

        if not np.allclose(gevals[k], 1):  # non-unique subspace for 1 evals
            # check the gen evecs span the correct subspaces
            a = angle(gevecs[:, k], true_gevecs[:, base_idx + k],
                      subspace=True)
            assert a < 1e-4

        # check proper normalization
        assert np.allclose(gevecs.T @ np.diag(degs) @ gevecs,
                           np.eye(gevecs.shape[1]))
예제 #6
0
def true_gevals_Lsym(X, zero_tol=1e-10):
    Lsym = get_sym_laplacian_bp(X)
    true_evals, true_evecs = eigh_wrapper(Lsym, rank=None)

    zero_row_mask = np.linalg.norm(X, axis=1) < zero_tol
    zero_col_mask = np.linalg.norm(X, axis=0) < zero_tol
    n_iso_verts = sum(zero_row_mask) + sum(zero_col_mask)
    meow = max(X.shape) - n_iso_verts
    true_gevals = np.concatenate([
        true_evals[0:meow], [1] * (max(X.shape) - min(X.shape)),
        true_evals[-meow:]
    ])
    true_gevals = np.sort(true_gevals)[::-1]

    true_zero_mask = np.concatenate([zero_row_mask, zero_col_mask])
    return true_gevals, true_zero_mask
예제 #7
0
파일: test_linalg.py 프로젝트: idc9/mvmm
def check_eigh_Lsym_bp_from_Tsym(X, rank=None):
    """
    Checks the output of get_sym_laplacian_bp
    """

    Lsym = get_sym_laplacian_bp(X)
    true_evals, true_evecs = eigh_wrapper(Lsym)

    if rank is None:
        _rank = min(X.shape)
    else:
        _rank = rank

    # check largest eigenvectors
    evals, evecs = eigh_Lsym_bp_from_Tsym(X, end='largest', rank=rank)
    for k in range(len(evals)):

        # check the evals are correct
        assert np.allclose(evals[k], true_evals[k])

        if not np.allclose(evals[k], 1):  # non-unique subspace for 1 evals
            # check eigenvectors point in the same direction
            a = angle(true_evecs[:, k], evecs[:, k], subspace=True)
            assert a < 1e-4

        # check normalization
        assert np.allclose(evecs.T @ evecs, np.eye(evecs.shape[1]))

    # check smallest eigenvectors
    evals, evecs = eigh_Lsym_bp_from_Tsym(X, end='smallest', rank=rank)
    base_idx = sum(X.shape) - min(X.shape) + (min(X.shape) - _rank)
    for k in range(len(evals)):

        # check the evals are correct
        assert np.allclose(evals[k], true_evals[base_idx + k])

        if not np.allclose(evals[k], 1):  # non-unique subspace for 1 evals
            # check eigenvectors point in the same direction
            a = angle(true_evecs[:, base_idx + k], evecs[:, k], subspace=True)
            assert a < 1e-4

        # check normalization
        assert np.allclose(evecs.T @ evecs, np.eye(evecs.shape[1]))
예제 #8
0
def check_vs_truth_smallest_eigh_Lsym_bp_from_Tsym_no_zeros(X, rank):
    """
    Check against ground truth
    """
    evals, evecs = smallest_eigh_Lsym_bp_from_Tsym_no_zeros(X, rank=rank)

    if rank is None:
        rank = min(X.shape)

    Lsym = get_sym_laplacian_bp(X)

    evals_true, evecs_true = eigh_wrapper(A=Lsym)
    evals_true = evals_true[-rank:]
    evecs_true = evecs_true[:, -rank:]

    # check gevals match true gecals
    assert np.allclose(evals, evals_true)

    # check evecs span the correct space
    for k in range(rank):
        # ignore 1 evals since the evecs are non-unique
        if not np.allclose(evals[k], 1):
            assert angle(evecs[:, k], evecs_true[:, k], subspace=True) < 1e-4
예제 #9
0
파일: linalg.py 프로젝트: idc9/mvmm
def geigh_Lsym_bp(X, rank=None, zero_tol=1e-10, end='smallest'):
    """
    Computes the largest or smallest generalized eigenvectors of
    [Lun(A_bp(X)), deg(A_bp(X))] directly using scipy.linalg.eigh.

    Paramters
    ---------
    X: array-like, (n_rows, n_cols)
        The data matrix.

    rank: None, int
        The rank to compute. If None, will compute as many gevals as possible. This will depend on the number of zero rows/columns X.

    zero_tol: float
        Tolerance to identify zero rows/columns by their norm.

    end: str
        Must be one of ['smallest', 'largest'].
        Compute the smallest or largest generalized eigenvectors.

    Output
    ------
    gevals, gevecs

    gevals: array-like, (rank, )
        The smallest or largest generalized eigenvalues.

    gevecs: array-like, (n_rows + n_cols, rank)
        The corresponding generalized eigenvectors.
        Normalized such that gevecs.T @ deg(A_bp(X)) gevecs = I

    """

    assert end in ['smallest', 'largest']

    # get X without its zero rows/columns
    zero_row_mask = np.linalg.norm(X, axis=1) < zero_tol
    zero_col_mask = np.linalg.norm(X, axis=0) < zero_tol
    X_woz = X[~zero_row_mask, :][:, ~zero_col_mask]

    if rank is None:
        rank = min(X_woz.shape)
    assert 1 <= rank and rank <= sum(X.shape)

    if rank > sum(X_woz.shape):
        raise ValueError("X has too many zero rows/columns.")

    # compute generalized eigenvectors/values for X without its
    # zero rows and columns
    Lun = get_unnorm_laplacian_bp(X_woz)
    degs = get_deg_bp(X_woz)

    if end == 'largest':
        gevals, gevecs_woz = eigh_wrapper(A=Lun, B=np.diag(degs),
                                          rank=rank)

    elif end == 'smallest':
        gevals, gevecs_woz = eigh_wrapper(A=-Lun, B=np.diag(degs),
                                          rank=rank)
        gevals = - gevals
        gevals = gevals[::-1]
        gevecs_woz = gevecs_woz[:, ::-1]

    # get gen eval/vecs for X by putting zeros back into gen evectors
    gevecs = np.zeros((sum(X.shape), rank))
    non_zero_mask = ~ np.concatenate([zero_row_mask, zero_col_mask])
    gevecs[non_zero_mask, :] = gevecs_woz

    return gevals, gevecs
예제 #10
0
파일: BlockDiagMVMM.py 프로젝트: idc9/mvmm
    def compute_tracking_data(self, X, E_out=None):
        """
        Optimization history to keep track of.
        """

        out = {}

        if E_out is None:
            E_out = self._e_step(X)

        # maybe track model history
        if self.history_tracking >= 2:
            out['model'] = deepcopy(self._get_parameters())

        if 'obs_nll' in E_out.keys():
            out['obs_nll'] = E_out['obs_nll']
        else:
            out['obs_nll'] = - self.score(X)

        # if we are fine tuning with a fixed zero mask the loss function
        # is just the observed negative log likelihood
        if self.__mode == 'fine_tune_bd' or self.n_blocks == 1:
            out['loss_val'] = out['obs_nll']
            return out

        if self.n_blocks is not None:
            B = self.n_blocks
        else:
            B = len(self.eval_weights)

        # evals of current step
        if 'evals' in E_out.keys():
            evals = E_out['evals']
        else:

            if self.lap == 'sym':

                evals, _ = geigh_Lsym_bp_smallest(X=self.bd_weights_,
                                                  rank=B,
                                                  zero_tol=1e-10,
                                                  method='tsym')

            elif self.lap == 'un':
                Lun = get_unnorm_laplacian_bp(self.bd_weights_)
                all_evals, all_evecs = eigh_wrapper(Lun)
                evals = all_evals[-B:]

        out['raw_eval_sum'] = sum(evals)

        if self.eval_weights is not None:

            eval_sum = evals.T @ asc_sort(self.eval_weights)

        else:
            # vanilla sum
            assert len(evals) == B
            eval_sum = sum(evals)

        out['eval_sum'] = eval_sum
        out['eval_loss'] = self.eval_pen_ * eval_sum
        out['evan_pen'] = deepcopy(self.eval_pen_)

        # overall loss
        out['loss_val'] = out['obs_nll'] + out['eval_loss']

        return out