def significance( TTM: sp.csc_matrix, metric: Union[Callable, KeynessMetric], normalize: bool = False, n_contexts=None, n_words=None, ) -> sp.csc_matrix: """Computes statistical significance tf co-occurrences using `metric`. Args: TTM (sp.csc_matrix): [description] normalize (bool, optional): [description]. Defaults to False. Returns: sp.csc_matrix: [description] """ metric = metric if callable(metric) else METRIC_FUNCTION.get( metric, _undefined) K: float = n_contexts N: float = n_words """Total number of observations (counts)""" Z: float = float(TTM.sum()) """Number of observations per context (document, row sum)""" Zr = np.array(TTM.sum(axis=1), dtype=np.float64).flatten() """Row and column indices of non-zero elements.""" ii, jj = TTM.nonzero() Cij: np.ndarray = np.array(TTM[ii, jj], dtype=np.float64).flatten() """Compute weights (with optional normalize).""" weights: np.ndarray = metric(Cij=Cij, Z=Z, Zr=Zr, ii=ii, jj=jj, K=K, N=N, normalize=normalize) np.nan_to_num( weights, copy=False, posinf=0.0, neginf=0.0, nan=0.0, ) nz_indices: np.ndarray = weights.nonzero() return (weights[nz_indices], (ii[nz_indices], jj[nz_indices]))
def PopularItems(A: sp.csc_matrix, limit=50): """ Returns the most popular items. :param A: user-item matrix :param limit: how many popular items should be returned. The other entries will be filled with 0s. """ n = A.shape[0] # used for indexing dummy_column = np.arange(n).reshape(n, 1) # Counting the number of interactions item_count = np.asarray(A.sum(axis=0)).reshape(-1) # Partially sorted indexes part_sort_indexes = bn.argpartition(-item_count, kth=limit) # Focusing on the tops unsorted_idx_tops = part_sort_indexes[:limit] unsorted_tops = item_count[unsorted_idx_tops] sorted_idx_tops_part = np.argsort(unsorted_tops) # Extracting the indexes of the tops respect of the original array sorted_idx_tops = part_sort_indexes[sorted_idx_tops_part] recommend = sp.lil_matrix(A.shape) # We assign real values between 0.5 and 1 to the tops so we can employ ranking metrics. recommend[dummy_column, sorted_idx_tops] = np.linspace(start=0.5, stop=1.0, num=limit) return recommend
def __adjustTransitionMatrix(self, M: sparse.csc_matrix) \ -> sparse.csc_matrix: """Function to compute the adjusted Markov transition matrix, given the unadjusted matrix. This method enforces column stochastic behavior. Returns: sparse.csc_matrix -- Adjusted Markov transition matrix. """ logging.info('Building adjusted transition matrix') # counter last_check = 0 logging.info('Computing sum of columns of M') magnitues = M.sum(axis=0) logging.info('Iterating through each column, rebalancing') # Iterate through each column for i in range(self.N): # Isolating magnitude magnitude = magnitues[0, i] # If criteria are satisfied, redistribute probabilities if (magnitude < 1.0) and (magnitude != 0): count = M[:, i].nnz # Isolate nonzero indezes nonzero_idx = M[:, i].nonzero()[0] # Update indexes with balanced probabilities for idx in nonzero_idx: M[idx, i] = 1 / count # Log progress last_check = logLoopProgress(i, last_check, self.N, 'Stable transition matrix') logging.info('Built adjusted Markov transition matrix with {0} \ elements'.format(M.nnz)) return M
def adjacency2degree(adj: csc_matrix) -> csc_matrix: """ Compute the degree matrix for a give adjacency matrix A""" return diags(np.asarray(adj.sum(1)).reshape(-1), format='csc')