Esempio n. 1
0
 def eval(self, predicted_labels, K=5):
     """
         Args:
             predicted_labels: csr_matrix: predicted labels with shape (num_instances, num_labels)
             K: int: compute values from 1-5
     """
     if self.valid_idx is not None:
         predicted_labels = predicted_labels[self.valid_idx]
     assert predicted_labels.shape == self.true_labels.shape
     predicted_labels = topk(predicted_labels, K, self.num_labels, -100)
     prec = self.precision(predicted_labels, K)
     ndcg = self.nDCG(predicted_labels, K)
     if self.inv_propensity_scores is not None:
         wt_true_mat = self._rank_sparse(
             self.true_labels.dot(
                 sp.spdiags(self.inv_propensity_scores[:-1],
                            diags=0,
                            m=self.num_labels,
                            n=self.num_labels)), K)
         PSprecision = self.PSprecision(
             predicted_labels, K) / self.PSprecision(wt_true_mat, K)
         PSnDCG = self.PSnDCG(predicted_labels, K) / self.PSnDCG(
             wt_true_mat, K)
         return [prec, ndcg, PSprecision, PSnDCG]
     else:
         return [prec, ndcg]
Esempio n. 2
0
def _get_topk(X, pad_indx=0, k=5):
    """
    Get top-k indices (row-wise); Support for
    * csr_matirx
    * 2 np.ndarray with indices and values
    * np.ndarray with indices or values

    Arguments:
    ---------
    X: csr_matrix, np.ndarray or dict
        csr_matrix: csr_matrix with nnz at relevant places
        np.ndarray: array with indices (dtype=int) or values (dtype=float)
        dict: 'indices' -> np.ndarray of indices and
              'scores' -> np.ndarray of scores
    pad_indx: int, optional (default=0)
        padding index (useful when values are <k)
    k: int, optional (default=5)
        fetch top-k indices

    Returns:
    -------
    np.ndarray: top-k indices for each row
    """
    if sp.issparse(X):
        X = X.tocsr()
        X.sort_indices()
        pad_indx = X.shape[1]
        indices = topk(X, k, pad_indx, 0, return_values=False)
    elif type(X) == np.ndarray:
        if np.issubdtype(X.dtype, np.integer):
            warnings.warn("Assuming indices are sorted in desc order.")
            indices = X[:, :k]
        elif np.issubdtype(X.dtype, np.float):
            _indices = np.argpartition(X, -k)[:, -k:]
            _scores = np.take_along_axis(
                X, _indices, axis=-1
            )
            indices = np.argsort(-_scores, axis=-1)
            indices = np.take_along_axis(_indices, indices, axis=1)
    elif type(X) == dict:
        indices = X['indices']
        scores = X['scores']
        assert compatible_shapes(indices == scores), \
            "Dimension mis-match: expected array of shape {} found {}".format(
                indices.shape, scores.shape)
        assert scores.shape[1] < k, "Number of elements in X is < {}".format(
            k)
        if scores.shape[1] >= k:
            _indices = np.argpartition(_scores, -k)[:, -k:]
            scores = np.take_along_axis(
                X, _indices, axis=-1
            )
            __indices = np.argsort(-scores, axis=-1)
            _indices = np.take_along_axis(_indices, __indices, axis=-1)
            indices = np.take_along_axis(indices, _indices, axis=-1)
    else:
        raise NotImplementedError(
            "Unknown type; please pass csr_matrix, np.ndarray or dict.")
    return indices
 def merge(self, indices_kcentroid, indices_knn, sim_kcentroid, sim_knn):
     _shape = (len(indices_kcentroid), self.num_labels+1)
     short_knn = csr_from_arrays(
         indices_knn, sim_knn, _shape)
     short_kcentroid = csr_from_arrays(
         indices_kcentroid, sim_kcentroid, _shape)
     indices, sim = topk(
         (self.gamma*short_knn + (1-self.gamma)*short_kcentroid),
         k=self.num_neighbours, pad_ind=self.pad_ind,
         pad_val=self.pad_val, return_values=True)
     return indices, sim
Esempio n. 4
0
 def from_pretrained(self, fname):
     """
         Load label shortlist and similarity for each instance
     """
     shortlist = load_npz(fname)
     _ind, _sim = sp.topk(shortlist,
                          self.size_shortlist,
                          self.num_labels,
                          -1000,
                          return_values=True)
     self.update_shortlist(_ind, _sim)