def __init__(self, X, Y, C=None, dtype=None): if dtype is None: dtype = X.dtype self.pX = PyMatrix.init_from(X, dtype) self.pY = PyMatrix.init_from(Y, dtype) self.pC = PyMatrix.init_from(C, dtype) Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C)) self.pZ = PyMatrix.init_from(Z, dtype) # Z = Y * C self.dtype = dtype
def __init__(self, W, C=None, dtype=None): if C is not None: if isinstance(C, PyMatrix): assert C.buf.shape[0] == W.shape[1] else: assert C.shape[0] == W.shape[1], 'C:{} W:{}'.format( C.shape, W.shape) if dtype is None: dtype = W.dtype self.pC = PyMatrix.init_from(C, dtype) self.pW = PyMatrix.init_from(W, dtype)
def __init__(self, X, Y, C=None, bias=-1.0, dtype=None): if dtype is None: dtype = X.dtype self.bias = bias if self.bias > 0: X = smat_util.append_column(X, self.bias) self.pX = PyMatrix.init_from(X, dtype) self.pY = PyMatrix.init_from(Y, dtype) self.pC = PyMatrix.init_from(C, dtype) Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C)) self.pZ = PyMatrix.init_from(Z, dtype) # Z = Y * C self.dtype = dtype
def predict_new( self, X, only_topk=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1, ): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if cond_prob: dense = cond_prob.transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if not csr_codes.has_sorted_indices: csr_codes = csr_codes.sorted_indices() if (csr_codes.data == 0).sum() != 0: # this is a trick to avoid zero entries explicit removal from the smat_dot_smat offset = sp.absolute(csr_codes.data).max() + 1 csr_codes = smat.csr_matrix( (csr_codes.data + offset, csr_codes.indices, csr_codes.indptr), shape=csr_codes.shape, ) pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) csr_labels.data -= offset else: pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if cond_prob: val = cond_prob.transform(val, inplace=True) val = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def predict_new(self, X, only_topk=None, csr_codes=None, beam_size=2, max_depth=None, cond_prob=True, normalized=False, threads=-1): if max_depth is None: max_depth = self.depth if cond_prob is None or cond_prob == False: cond_prob = PostProcessor(Transform.identity, Combiner.noop) if cond_prob == True: cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul) assert isinstance(cond_prob, PostProcessor), tpye(cond_prob) assert X.shape[1] == self.nr_features if self.bias > 0: X = smat_util.append_column(X, self.bias) pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype) max_depth = min(self.depth, max_depth) pred_csr = csr_codes for d in range(max_depth): cur_model = self.model_chain[d] local_only_topk = only_topk if d == (max_depth - 1) else beam_size pred_csr = cur_model.predict_new(pX, only_topk=local_only_topk, csr_codes=pred_csr, cond_prob=cond_prob, threads=threads) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def __init__(self, X, Y, C=None, bias=-1.0, dtype=None, Z_pred=None): if dtype is None: dtype = X.dtype self.bias = bias if self.bias > 0: X = smat_util.append_column(X, self.bias) self.pX = PyMatrix.init_from(X, dtype) self.pY = PyMatrix.init_from(Y, dtype) self.pC = PyMatrix.init_from(C, dtype) Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C)) if Z_pred is not None and Z is not None: print("Z", Z.shape) print("Z_pred", Z_pred.shape) Z = Z + Z_pred Z = Z.tocsr() self.pZ = PyMatrix.init_from(Z, dtype) # Z = Y * C self.dtype = dtype
def predict_values(self, X, inst_idx, label_idx, out=None, threads=-1): assert X.shape[1] == self.nr_features if out is None: out = sp.zeros(inst_idx.shape, dtype=self.pW.dtype) pX = PyMatrix.init_from(X, dtype=self.pW.dtype) out = clib.sparse_inner_products(pX, self.pW, inst_idx.astype(sp.uint32), label_idx.astype(sp.uint32), out, threads=threads) return out
def __init__(self, X, Y, C=None, dtype=None, Z_pred=None, negative_sampling_scheme=None): if dtype is None: dtype = X.dtype self.pX = PyMatrix.init_from(X, dtype) self.pY = PyMatrix.init_from(Y, dtype) self.pC = PyMatrix.init_from(C, dtype) Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C)) if negative_sampling_scheme is None or negative_sampling_scheme == 1: Z = Z elif negative_sampling_scheme is not None: if negative_sampling_scheme == 0: Z = (Z + Z_pred).tocsr() elif negative_sampling_scheme == 1: Z = Z elif negative_sampling_scheme == 2 and Z_pred is not None: Z = Z_pred self.pZ = PyMatrix.init_from(Z, dtype) # Z = Y * C self.dtype = dtype
def predict_new(self, X, only_topk=None, csr_codes=None, beam_size=2, max_depth=None, cond_prob=True, normalized=False, threads=-1): if max_depth is None: max_depth = self.depth if cond_prob is None or cond_prob == False: cond_prob = PostProcessor(Transform.identity, Combiner.noop) if cond_prob == True: cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul) assert isinstance(cond_prob, PostProcessor), tpye(cond_prob) pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype) max_depth = min(self.depth, max_depth) transform = cond_prob.transform if cond_prob else Transform.identity pred_csr = csr_codes #timer = WallTimer() for d in range(max_depth): ''' print('predict at depth {}'.format(d)) sys.stdout.flush() timer.tic() ''' cur_model = self.model_chain[d] local_only_topk = only_topk if d == (max_depth - 1) else beam_size pred_csr = cur_model.predict_new(pX, only_topk=local_only_topk, csr_codes=pred_csr, transform=transform, cond_prob=cond_prob, threads=threads) ''' print('>>> {}ms'.format(timer.toc())) sys.stdout.flush() ''' #if cond_prob and normalized: # perform normalization to avoid numerical issue # pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') #print('d = {} codes:{} nnz:{}'.format(d, pred_csr.shape[1], pred_csr.nnz)) #pred_csr.data[:] = sp.exp(pred_csr.data[:]) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def predict_new(self, X, only_topk=None, transform=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if transform: dense = transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if transform: val = transform(val, inplace=True) if cond_prob: val[:] = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def __init__(self, feat_mat): self.py_feat_mat = PyMatrix.init_from(feat_mat)