コード例 #1
0
ファイル: rf_linear.py プロジェクト: jicksonp/Transformer-XMC
    def predict(
        self,
        X,
        csr_codes=None,
        only_topk=None,
        cond_prob=True,
        normalize=False,
        **arg_kw,
    ):
        assert csr_codes is not None, "csr_codes must be provided for CountModel.prdict)"
        assert csr_codes.shape[0] == X.shape[0]
        assert csr_codes.shape[1] == self.nr_codes
        if cond_prob:
            pred_csr = csr_codes.dot(self.code_to_label).tocsr()
        else:
            tmp = csr_codes.data
            tmp2 = sp.ones_like(tmp)
            csr_codes.data = tmp2
            pred_csr = csr_codes.dot(self.code_to_label).tocsr()
            csr_codes.data = tmp

        pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)
        if normalize:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
コード例 #2
0
 def rank_average(*args):
     CsrEnsembler.check_validlity(*args)
     mm = max((x.indptr[1:] - x.indptr[:-1]).max() for x in args)
     ret = sum(smat_util.get_relevance_csr(csr, mm) for csr in args)
     ret = smat_util.sorted_csr(ret)
     ret.data /= len(args)
     return ret
コード例 #3
0
ファイル: rf_linear.py プロジェクト: jicksonp/Transformer-XMC
    def predict_new(
        self,
        X,
        only_topk=None,
        csr_codes=None,
        cond_prob=None,
        normalized=False,
        threads=-1,
    ):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if cond_prob:
                dense = cond_prob.transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            if not csr_codes.has_sorted_indices:
                csr_codes = csr_codes.sorted_indices()
            if (csr_codes.data == 0).sum() != 0:
                # this is a trick to avoid zero entries explicit removal from the smat_dot_smat
                offset = sp.absolute(csr_codes.data).max() + 1
                csr_codes = smat.csr_matrix(
                    (csr_codes.data + offset, csr_codes.indices,
                     csr_codes.indptr),
                    shape=csr_codes.shape,
                )
                pZ = PyMatrix.init_from(csr_codes, self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
                csr_labels.data -= offset
            else:
                pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if cond_prob:
                val = cond_prob.transform(val, inplace=True)
                val = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
コード例 #4
0
 def round_robin(*args):
     CsrEnsembler.check_validlity(*args)
     base = 1.0 / (len(args) + 1.0)
     mm = max((x.indptr[1:] - x.indptr[:-1]).max() for x in args)
     ret = smat_util.get_relevance_csr(args[0], mm)
     ret.data[:] += len(args) * base
     for i, x in enumerate(args[1:], 1):
         tmp = smat_util.get_relevance_csr(x, mm)
         tmp.data[:] += (len(args) - i) * base
         ret = ret.maximum(tmp)
     ret = smat_util.sorted_csr(ret)
     ret.data /= len(args)
     return ret
コード例 #5
0
 def load_prediction(path_to_file, only_topk=None):
     with open(path_to_file, 'r') as fin:
         nr_insts, nr_labels = [
             int(x) for x in fin.readline().strip().split()
         ]
         coo = smat_util.coo_appender((nr_insts, nr_labels))
         for i in range(nr_insts):
             for iv in fin.readline().strip().split():
                 iv = iv.split(':')
                 j = int(iv[0])
                 v = float(iv[1])
                 coo.append(i, j, v)
     return smat_util.sorted_csr(coo.tocsr(), only_topk=only_topk)
コード例 #6
0
    def predict(self,
                X,
                only_topk=None,
                transform=None,
                csr_codes=None,
                cond_prob=None,
                normalized=False,
                threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            csr_labels = (csr_codes.dot(self.C.T)).tocsr()
            nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1]
            inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32),
                                 nnz_of_insts)
            label_idx = csr_labels.indices.astype(sp.uint32)
            val = self.predict_values(X, inst_idx, label_idx, threads=threads)
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat.csr_matrix((val, label_idx, csr_labels.indptr),
                                       shape=csr_labels.shape)
            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)
            #pred_csr = self.predict_with_coo_labels(X, coo_labels.row, coo_labels.cols, only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
コード例 #7
0
 def generate(cls, tY, pY, topk=10):
     assert isinstance(tY, smat.csr_matrix), type(tY)
     assert isinstance(pY, smat.csr_matrix), type(pY)
     assert tY.shape == pY.shape, "tY.shape = {}, pY.shape = {}".format(
         tY.shape, pY.shape)
     pY = smat_util.sorted_csr(pY)
     total_matched = sp.zeros(topk, dtype=sp.uint64)
     recall = sp.zeros(topk, dtype=sp.float64)
     for i in range(tY.shape[0]):
         truth = tY.indices[tY.indptr[i]:tY.indptr[i + 1]]
         matched = sp.isin(pY.indices[pY.indptr[i]:pY.indptr[i + 1]][:topk],
                           truth)
         cum_matched = sp.cumsum(matched, dtype=sp.uint64)
         total_matched[:len(cum_matched)] += cum_matched
         recall[:len(cum_matched)] += cum_matched / len(truth)
         if len(cum_matched) != 0:
             total_matched[len(cum_matched):] += cum_matched[-1]
             recall[len(cum_matched):] += cum_matched[-1] / len(truth)
     prec = total_matched / tY.shape[0] / sp.arange(1, topk + 1)
     recall = recall / tY.shape[0]
     return cls(prec=prec, recall=recall)
コード例 #8
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    transform=None,
                    csr_codes=None,
                    cond_prob=None,
                    normalized=False,
                    threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            pZ = PyMatrix.init_from(csr_codes, self.dtype)
            csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
コード例 #9
0
 def average(*args):
     CsrEnsembler.check_validlity(*args)
     ret = sum(args)
     ret = smat_util.sorted_csr(ret)
     ret.data /= len(args)
     return ret
コード例 #10
0
def get_optimal_codes(Y, C, only_topk=None):
    csr_codes = smat_util.sorted_csr(Y.dot(C).tocsr(), only_topk=only_topk)
    csr_codes = sk_normalize(csr_codes, axis=1, copy=False, norm='l1')
    return csr_codes