Ejemplo n.º 1
0
    def predict(
        self,
        X,
        only_topk=None,
        csr_codes=None,
        cond_prob=None,
        normalized=False,
        threads=-1,
    ):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if cond_prob:
                dense = cond_prob.transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            if (csr_codes.data == 0).sum() != 0:
                # this is a trick to avoid zero entries explicit removal from the smat_dot_smat
                offset = sp.absolute(csr_codes.data).max() + 1
                csr_codes = smat.csr_matrix(
                    (csr_codes.data + offset, csr_codes.indices,
                     csr_codes.indptr),
                    shape=csr_codes.shape,
                )
                csr_labels = (csr_codes.dot(self.C.T)).tocsr()
                csr_labels.data -= offset
            else:
                csr_labels = (csr_codes.dot(self.C.T)).tocsr()
            nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1]
            inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32),
                                 nnz_of_insts)
            label_idx = csr_labels.indices.astype(sp.uint32)
            val = self.predict_values(X, inst_idx, label_idx, threads=threads)
            if cond_prob:
                val = cond_prob.transform(val, inplace=True)
                val = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr_from_coo(csr_labels.shape,
                                                     inst_idx,
                                                     label_idx,
                                                     val,
                                                     only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
Ejemplo n.º 2
0
 def predict_with_coo_labels(self, X, inst_idx, label_idx, only_topk=None):
     val = self.predict_values(X, inst_idx, label_idx)
     shape = (X.shape[0], self.nr_labels)
     pred_csr = smat_util.sorted_csr_from_coo(shape,
                                              inst_idx,
                                              label_idx,
                                              val,
                                              only_topk=only_topk)
     return pred_csr
Ejemplo n.º 3
0
    def predict_new(
        self,
        X,
        only_topk=None,
        csr_codes=None,
        cond_prob=None,
        normalized=False,
        threads=-1,
    ):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if cond_prob:
                dense = cond_prob.transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            if not csr_codes.has_sorted_indices:
                csr_codes = csr_codes.sorted_indices()
            if (csr_codes.data == 0).sum() != 0:
                # this is a trick to avoid zero entries explicit removal from the smat_dot_smat
                offset = sp.absolute(csr_codes.data).max() + 1
                csr_codes = smat.csr_matrix(
                    (csr_codes.data + offset, csr_codes.indices,
                     csr_codes.indptr),
                    shape=csr_codes.shape,
                )
                pZ = PyMatrix.init_from(csr_codes, self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
                csr_labels.data -= offset
            else:
                pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if cond_prob:
                val = cond_prob.transform(val, inplace=True)
                val = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
Ejemplo n.º 4
0
    def predict(self,
                X,
                only_topk=None,
                transform=None,
                csr_codes=None,
                cond_prob=None,
                normalized=False,
                threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            csr_labels = (csr_codes.dot(self.C.T)).tocsr()
            nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1]
            inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32),
                                 nnz_of_insts)
            label_idx = csr_labels.indices.astype(sp.uint32)
            val = self.predict_values(X, inst_idx, label_idx, threads=threads)
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat.csr_matrix((val, label_idx, csr_labels.indptr),
                                       shape=csr_labels.shape)
            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)
            #pred_csr = self.predict_with_coo_labels(X, coo_labels.row, coo_labels.cols, only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
Ejemplo n.º 5
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    transform=None,
                    csr_codes=None,
                    cond_prob=None,
                    normalized=False,
                    threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            pZ = PyMatrix.init_from(csr_codes, self.dtype)
            csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
Ejemplo n.º 6
0
def sorted_csr(csr, only_topk=None):
    assert isinstance(csr, smat.csr_matrix)
    row_idx = sp.repeat(sp.arange(csr.shape[0], dtype=sp.uint32),
                        csr.indptr[1:] - csr.indptr[:-1])
    return smat_util.sorted_csr_from_coo(csr.shape, row_idx, csr.indices,
                                         csr.data, only_topk)