def predict_for_user(self, user, items=None, ratings=None): if items is not None: items = np.array(items) else: items = self.items.values valid_mask = self.items.get_indexer(items) >= 0 min_threshold = self.min_threshold min_nn = self.min_nn max_nn = self.max_nn item_sim = self.item_sim_matrix result = dict() if np.sum(~valid_mask) > 0: self.log.debug("user %s: %s are not valid", user, items[~valid_mask]) for e in items[~valid_mask]: result[e] = np.nan items = items[valid_mask] upos = self.users.get_loc(user) item_bias = None if self.bias is not None: item_bias = self.bias.get_item_bias() assert self.rmat.getformat() == 'csr' item_scores = _get_xs(self.rmat, upos) # narrow down to items were rated valid_item_index = np.argwhere(item_scores != 0) for item in items: ipos = self.items.get_loc(item) clock = Timer() # idx with descending similarities with itself sorted_idx = np.argsort(item_sim[ipos, valid_item_index])[::-1] item_idx = valid_item_index[sorted_idx] e0 = clock.restart() # sim need to meet min_threshold if min_threshold is not None: item_idx = item_idx[item_sim[ipos, item_idx] > min_threshold] if len(item_idx) < min_nn: self.log.debug( "item %s does not have enough neighbors (%s < %s)", item, len(item_idx), min_nn) result[item] = np.nan continue item_idx = item_idx[:max_nn] e1 = clock.restart() score = _nn_score(item_scores, item_sim, ipos, item_idx, item_bias) e2 = clock.restart() # print(e0, e1, e2) result[item] = score df = pd.Series(result) return df
def predict_for_user(self, user, items=None, ratings=None): uidx = self.users.get_loc(user) scores = _get_xs(self.rmat, uidx) scores[scores == 0] = np.nan if items is not None: items = np.array(items) else: items = self.items.values df = scores_to_series(scores, self.items, items) return df
def select(self, user, candidates=None): """ :param user: user id :param candidates: a list or np.array of items :return: return items not reviewed by user id """ upos = self.users.get_loc(user) ratings = _get_xs(self.rmat, upos) idx = np.argwhere(ratings != 0).flatten() if candidates is not None: candidates = np.array(candidates) base = self.items.get_indexer(candidates) idx = np.intersect1d(idx, base) items = self.items[idx] items = np.array(items) return items
def predict_for_user(self, user, items=None, ratings=None): """ :param user: user id :param items: a list of item ids :param ratings: :return: """ min_threshold = self.min_threshold min_nn = self.min_nn max_nn = self.max_nn user_sim = self.user_sim_matrix result = dict() if items is not None: items = np.array(items) else: items = self.items.values valid_mask = self.items.get_indexer(items) >= 0 if np.sum(~valid_mask) > 0: # self.log.warning("%s are not valid" % items[~valid_mask]) for e in items[~valid_mask]: result[e] = np.nan items = items[valid_mask] upos = self.users.get_loc(user) # idx with decending similarities with itself full_user_idx = np.argsort(user_sim[upos, :])[::-1] if False: min_sim = user_sim[upos, full_user_idx].min() max_sim = user_sim[upos, full_user_idx].max() self.log.info("max similarity and min similarity are %.3f and %.3f", max_sim, min_sim) # sim need to meet min_threshold if min_threshold is not None: full_user_idx = full_user_idx[user_sim[upos, full_user_idx] > min_threshold] # convert rmat to array # clock = Timer() # rmat_array = self.rmat.toarray() # print("XXX", clock.restart()) u_bias = None if self.bias is not None: u_bias = self.bias.get_user_bias() for item in items: ipos = self.items.get_loc(item) assert self.rmat.getformat() == 'csc' user_scores = _get_xs(self.rmat, ipos) # narrow down to users who rated the item user_idx = full_user_idx[user_scores[full_user_idx] != 0] # user_idx = full_user_idx[rmat_array[full_user_idx, ipos] != 0] # user_scores = rmat_array[:, ipos] user_idx = user_idx[:max_nn] if len(user_idx) < min_nn: self.log.debug("user %s does not have enough neighbors (%s < %s)", user, len(user_idx), min_nn) result[item] = np.nan result[item] = _nn_score(user_scores, user_sim, upos, user_idx, u_bias) df = pd.Series(result) return df