Example #1
0
def significance(
    TTM: sp.csc_matrix,
    metric: Union[Callable, KeynessMetric],
    normalize: bool = False,
    n_contexts=None,
    n_words=None,
) -> sp.csc_matrix:
    """Computes statistical significance tf co-occurrences using `metric`.

    Args:
        TTM (sp.csc_matrix): [description]
        normalize (bool, optional): [description]. Defaults to False.

    Returns:
        sp.csc_matrix: [description]
    """
    metric = metric if callable(metric) else METRIC_FUNCTION.get(
        metric, _undefined)

    K: float = n_contexts
    N: float = n_words
    """Total number of observations (counts)"""
    Z: float = float(TTM.sum())
    """Number of observations per context (document, row sum)"""
    Zr = np.array(TTM.sum(axis=1), dtype=np.float64).flatten()
    """Row and column indices of non-zero elements."""
    ii, jj = TTM.nonzero()

    Cij: np.ndarray = np.array(TTM[ii, jj], dtype=np.float64).flatten()
    """Compute weights (with optional normalize)."""
    weights: np.ndarray = metric(Cij=Cij,
                                 Z=Z,
                                 Zr=Zr,
                                 ii=ii,
                                 jj=jj,
                                 K=K,
                                 N=N,
                                 normalize=normalize)

    np.nan_to_num(
        weights,
        copy=False,
        posinf=0.0,
        neginf=0.0,
        nan=0.0,
    )

    nz_indices: np.ndarray = weights.nonzero()

    return (weights[nz_indices], (ii[nz_indices], jj[nz_indices]))
def PopularItems(A: sp.csc_matrix, limit=50):
    """
    Returns the most popular items.
    :param A: user-item matrix
    :param limit: how many popular items should be returned. The other entries will be filled with 0s.
    """

    n = A.shape[0]
    # used for indexing
    dummy_column = np.arange(n).reshape(n, 1)

    # Counting the number of interactions
    item_count = np.asarray(A.sum(axis=0)).reshape(-1)

    # Partially sorted indexes
    part_sort_indexes = bn.argpartition(-item_count, kth=limit)
    # Focusing on the tops
    unsorted_idx_tops = part_sort_indexes[:limit]
    unsorted_tops = item_count[unsorted_idx_tops]
    sorted_idx_tops_part = np.argsort(unsorted_tops)
    # Extracting the indexes of the tops respect of the original array
    sorted_idx_tops = part_sort_indexes[sorted_idx_tops_part]

    recommend = sp.lil_matrix(A.shape)
    # We assign real values between 0.5 and 1 to the tops so we can employ ranking metrics.
    recommend[dummy_column, sorted_idx_tops] = np.linspace(start=0.5,
                                                           stop=1.0,
                                                           num=limit)

    return recommend
Example #3
0
    def __adjustTransitionMatrix(self, M: sparse.csc_matrix) \
            -> sparse.csc_matrix:
        """Function to compute the adjusted Markov transition matrix, given the
        unadjusted matrix. This method enforces column stochastic behavior.
        
        Returns:
            sparse.csc_matrix -- Adjusted Markov transition matrix.
        """

        logging.info('Building adjusted transition matrix')

        # counter
        last_check = 0

        logging.info('Computing sum of columns of M')
        magnitues = M.sum(axis=0)

        logging.info('Iterating through each column, rebalancing')

        # Iterate through each column
        for i in range(self.N):
            # Isolating magnitude
            magnitude = magnitues[0, i]

            # If criteria are satisfied, redistribute probabilities
            if (magnitude < 1.0) and (magnitude != 0):
                count = M[:, i].nnz

                # Isolate nonzero indezes
                nonzero_idx = M[:, i].nonzero()[0]

                # Update indexes with balanced probabilities
                for idx in nonzero_idx:
                    M[idx, i] = 1 / count

            # Log progress
            last_check = logLoopProgress(i, last_check, self.N,
                                         'Stable transition matrix')

        logging.info('Built adjusted Markov transition matrix with {0} \
            elements'.format(M.nnz))

        return M
Example #4
0
def adjacency2degree(adj: csc_matrix) -> csc_matrix:
    """ Compute the degree matrix for a give adjacency matrix A"""
    return diags(np.asarray(adj.sum(1)).reshape(-1), format='csc')