def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') rating = self.predictedRatings[u, i] if rating < 0.001: raise PredictionImpossible('No valid prediction exists.') return rating
def estimate(self, user_index, item_index): if not self.trainset.knows_user(user_index) or not self.trainset.knows_item(item_index): raise PredictionImpossible("User and item are unknown") user_info = self.user_features[:, user_index] item_info = self.item_features[:, item_index] return user_info.T.dot(item_info)
def estimate(self, u, i): if self.trainset.knows_item(i): item_id = self.trainset.to_raw_iid(i) else: # Strip off "UNK__" prefix to obtain the raw iid item_id = i[5:] try: published_date = self.dates[item_id] except KeyError: raise PredictionImpossible('No publication date registered') # Is there an upper bound? We use this to avoid recommending # "impossible" articles if self.cut_after: if self.cut_after < published_date: return self.lower if self.threshold_date < published_date: return self.upper diff = published_date - self.oldest_date # rating should be in domain [0, 1] rating = diff / self.date_scale # We may not want to straight up exclude the oldest stuff weighted_rating = (1.0 - self.weight) + (rating * self.weight) # Convert to scale used by the rest of the algorithms return self.lower + (self.range * weighted_rating)
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') x, y = self.switch(u, i) neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]] k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1]) est = self.means[x] # compute weighted average sum_sim = sum_ratings = actual_k = 0 for (nb, sim, r) in k_neighbors: if sim > 0: sum_sim += sim sum_ratings += sim * (r - self.means[nb]) / self.sigmas[nb] actual_k += 1 if actual_k < self.min_k: sum_ratings = 0 try: est += sum_ratings / sum_sim * self.sigmas[x] except ZeroDivisionError: pass # return mean details = {'actual_k': actual_k} return est, details
def estimate(self, u ,i): #sum_means = self.trainset.global_mean #div = 1 #if self.trainset.knows_user(u): # sum_means += np.mean([r for (_,r) in self.trainset.ur[u]]) # div += 1 #if self.trainset.knows_item(i): # sum_means += np.mean([r for (_,r) in self.trainset.ir[u]]) # div += 1 #return sum_means / div #return self.the_mean div = 0 est = 0 if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown .') neighbors = [(v,self.sim[u,v],r) for (v,r) in self.trainset.ir[i]] neighbors = sorted(neighbors,key=lambda x: x[1], reverse=True) print('The 3 nearest neighbors of user', str(u), 'are:') for v, sim_uv,r in neighbors[:3]: print('user{0:} with sim {1:1.2f}'.format(v,sim_uv)) est += r div +=1 est = est/div if self.trainset.knows_user(u): est += self.bu[u] if self.trainset.knows_item(i): est += self.bi[i] return est
def estimate(self, u, i): # Should we cythonize this as well? known_user = self.trainset.knows_user(u) known_item = self.trainset.knows_item(i) if self.biased: est = self.trainset.global_mean if known_user: est += self.bu[u] if known_item: est += self.bi[i] if known_user and known_item: est += np.dot(self.qi[i], self.pu[u]) else: if known_user and known_item: est = np.dot(self.qi[i], self.pu[u]) else: raise PredictionImpossible('User and item are unkown.') return est
def estimate(self, u, i): known_user = self.trainset.knows_user(u) known_item = self.trainset.knows_item(i) if known_user and known_item: return self.predictions[u, i] else: raise PredictionImpossible('User and item are unkown.')
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unknown.') # Compute similarities between the test track and all of the tracks # the user has listened to neighbours = sorted([self.sim_matrix[track_id, i] for track_id, rating \ in self.trainset.ur[u]], reverse=True)[:self.k] total_sim = sum(neighbours) if total_sim == 0: raise PredictionImpossible( 'There are no neighbours for this track!') pred_rating = total_sim / len(neighbours) return pred_rating
def estimate(self, user, item): """Estima el rating que un usuario dará a un ítem""" known_user = self.trainset.knows_user(user) known_item = self.trainset.knows_item(item) est = self.trainset.global_mean if known_user and known_item: est = self.kernel_a + self.kernel_c * np.dot( self.qi[item], self.pu[user]) else: raise PredictionImpossible("User and item are unknown.") return est
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') x, y = self.switch(u, i) #self.yr = self.trainset.ir if ub else self.trainset.ur #neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]] k_neighbors = self.get_neighbors_flock(self.trainset.to_raw_uid(u), self.k) #print('USER: '******'item' + str(y)) #print(k_neighbors) #print('##') # compute weighted average sum_sim = sum_ratings = actual_k = 0 if k_neighbors: for (neighbor, sim) in k_neighbors.items(): #print(self.trainset.ur[self.trainset.to_inner_uid(neighbor)]) for (item, r) in self.trainset.ur[self.trainset.to_inner_uid(neighbor)]: #print(self.trainset.to_raw_iid(item)) #print(self.trainset.to_raw_iid(y)) #print('item' + str(item) + 'el_mio' + str(y)) if item == y: #print('entra') sum_ratings += r * sim actual_k += 1 #print(r) #print(sum_ratings, actual_k) if actual_k < self.min_k: raise PredictionImpossible('Not enough neighbors.') est = sum_ratings details = {'actual_k': actual_k} return est, details
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') # Build up similarity scores between this item and everything the user rated neighbors = [] try: cluster_item_i = self.oas[self.oas[:, 0] == i][0][-1] except: raise PredictionImpossible('No neighbors') for rating in self.trainset.ur[u]: #get "m" oas that belongs to cluster of oa "i" similar_oas = self.oas[self.oas[:, -1] == cluster_item_i] for similar_idoa in similar_oas: c = similar_idoa[0] try: similitud_oas = self.similarities[int(similar_idoa[0]), rating[0]] neighbors.append((similitud_oas, rating[1])) except: continue # Extract the top-K most-similar ratings k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0]) # Compute average sim score of K neighbors weighted by user ratings simTotal = weightedSum = 0 for (simScore, rating) in k_neighbors: if (simScore > 0): simTotal += simScore weightedSum += simScore * rating if (simTotal == 0): raise PredictionImpossible('No neighbors') predictedRating = weightedSum / simTotal return predictedRating
def estimate(self, u, i): """ Estimate a rating when given an user and an item. Parameters ---------- u: int User id i: int Item id Returns ------- rating: float Return a predicted rating for user, item pair. """ if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') # Build up similarity scores between this item and everything the user rated neighbors = [] for rating in self.trainset.ur[u]: genre_similarity = self.similarities[i, rating[0]] neighbors.append((genre_similarity, rating[1])) # Extract the top-K most-similar ratings k_neighbors = nlargest(self.k, neighbors, key=lambda t: t[0]) # Compute average sim score of K neighbors weighted by user ratings sim_total = weighted_sum = 0 for sim_score, rating in k_neighbors: if sim_score > 0: sim_total += sim_score weighted_sum += sim_score * rating if sim_total == 0: raise PredictionImpossible('No neighbors') return weighted_sum / sim_total
def estimate(self, u, i): known_user = self.trainset.knows_user(u) known_item = self.trainset.knows_item(i) if self.biased: est = self.trainset.global_mean if known_user: est += self.bu[u] if known_item: est += self.bi[i] else: raise PredictionImpossible('User and item are unkown.') return est
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unknown.') neighbours = [] for rating in self.trainset.ur[u]: genreSimilarity = self.similarities[i, rating[0]] neighbours.append((genreSimilarity, rating[1])) k_neighbours = heapq.nlargest(self.k, neighbours, key=lambda t: t[0]) simTotal = weightedSum = 0 for (simScore, rating) in k_neighbours: if (simScore > 0): simTotal += simScore weightedSum += simScore * rating if (simTotal == 0): raise PredictionImpossible('No neighbours') predictedRating = weightedSum / simTotal return predictedRating
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') # convertion to real ids # print("user: "******"item: " + str(self.trainset.to_raw_iid(i))) movieRawId = trainset.to_raw_iid(i) movie_vector = self.movies[movieRawId]["embedding"] result = 1 - spatial.distance.cosine(self.mean_user_vectors[u], movie_vector) result = 2.25 * result + 2.75 # from [-1, 1] to [0.5, 5] return result
def _estimate(self, trainset, uid, iid, top_k=10): #一般不使用 """ :param trainset: :param uid: 均使用的inner_id :param iid: 使用的内部的id :param top_k: :return: """ if not (trainset.knows_user(uid=uid) and trainset.knows_item(iid=iid)): raise PredictionImpossible('User and/or item is unkown.') neighbors = [(vid, self.sim[uid, vid]) for (vid, r) in trainset.ir[iid]] # 计算u和v之间的相似性,其中v描述了所有其他用户,他们也对项目I进行了评级。 neighbors = sorted(neighbors, key=lambda x: x[1], reverse=True) # 降序 # 相似度排序操作 for v, sim_uv in neighbors[:top_k]: print('user {0:} with sim {1:1.2f}'.format(v, sim_uv))
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') # Compute similarities between u and v, where v describes all other # users that have also rated item i. neighbors = [(v, self.sim[u, v]) for (v, r) in self.trainset.ir[i]] # Sort these neighbors by similarity neighbors = sorted(neighbors, key=lambda x: x[1], reverse=True) print('The 3 nearest neighbors of user', str(u), 'are:') for v, sim_uv in neighbors[:3]: print('user {0:} with sim {1:1.2f}'.format(v, sim_uv)) return sim_uv
def estimate(self, u, i): # details = {} # # 基于Bg的评分 # est = 10 * self.bg_modify_factor(u, i) # actual_k = 0 # if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): # # 如果训练集中没有该用户或商品, 就设置为平均分 # est += self.trainset.global_mean # details['was_impossible'] = True # details['reason'] = 'User and/or item is unkown' # else: # x, y = self.switch(u, i) # # neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]] # # # sort neighbors by similarity # neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True) # # # compute weighted average # sum_sim = sum_ratings = 0 # for (_, sim, r) in neighbors[:self.k]: # if sim > 0: # sum_sim += sim # sum_ratings += sim * r # actual_k += 1 # # if actual_k < self.min_k: # print('not enough neighbors') # est += self.trainset.global_mean # details['was_impossible'] = True # details['reason'] = 'Not enough neighbors' # # raise PredictionImpossible('Not enough neighbors.') # else: # est += sum_ratings / sum_sim # # details = {'actual_k': actual_k} # return est, details if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): # 如果训练集中没有该用户或商品, 就设置为平均分, 在父类中Catch该异常,设置平均分 raise PredictionImpossible('User and/or item is unknown') est1, details1 = self.compute_by_traditional_cf(i, u) est2, details2 = self.compute_by_professional(i, u) est = 0.5 * est1 + 0.5 * est2 return est, details1
def estimate(self, u, i): predicted = current = max = 0 if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unkown.') x, y = self.switch(u, i) #for all the users to have rated the given item, neighbors = [(self.similarity_matrix[x, x2], r) for (x2, r) in self.yr[y]] for i in range(self.trainset.rating_scale[0], self.trainset.rating_scale[1]): print(i) return 3
def compute_by_traditional_cf(self, i, u): print('compute_by_traditional_cf') # 得到所以评价过商品i的用户 neighbors = [(v, self.sim[u, v], r) for (v, r) in self.trainset.ir[i]] # sort neighbors by similarity neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True) # compute weighted average sum_sim = sum_ratings = actual_k = 0 for (_, sim, r) in neighbors[:self.k]: if sim > 0: sum_sim += sim sum_ratings += sim * r actual_k += 1 if actual_k < self.min_k: raise PredictionImpossible('Not enough neighbors.') est = sum_ratings / sum_sim details = {'actual_k': actual_k} return est, details
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unknown.') numerator = 0 # ratings of item i by all users item_rating_by_user = self.trainset.ir[i] # average rating by user u avg_rating_by_user_u = sum(y[1] for y in self.trainset.ur[u]) / len( self.trainset.ur[u]) # Compute similarities between u and v, where v describes all other # users that have also rated item i. neighbors = [(v, self.sim[u, v]) for (v, r) in item_rating_by_user] # getting top k similar users for a user u top_k = sorted(neighbors, key=lambda x: x[1], reverse=True)[:self.n] # calculating numerator part of the resnick prediction function for j, (v, _) in enumerate(top_k): avg_rating_by_user_v = sum( y[1] for y in self.trainset.ur[v]) / len(self.trainset.ur[v]) numerator += top_k[j][1] * ( list(filter(lambda x: v in x, item_rating_by_user))[0][1] - avg_rating_by_user_v) # calculating denominator part of the resnick prediction function denominator = sum(abs(y[1]) for y in top_k) # getting prediction from the resnick prediction function # adding a small value epsilon to denominator to avoid division by 0 prediction = avg_rating_by_user_u + (numerator / (denominator + self.epsilon)) return prediction
def run_child_algos_on_jobs(self, jobs): """ Collect each algorithm's prediction for each job. Args: jobs: List of JobRequest. These are the user/item pairs we want to collect predictions for. Returns: Dict where key is (inner user ID, inner item ID) and value is a dictionary consisting of results, total_weights and rejected_results, as expected by combine(). """ def create_empty_result_dict(): return { 'results': [], 'total_weights': self.sum_weights, 'rejected_results': [] } # TODO: Use a list instead with indices matching those of jobs, # since the same user ID and item ID pair may appear multiple times results = defaultdict(create_empty_result_dict) # Go though one algorithm at a time for algorithm, weight, _ in self.all_algorithms(): # Don't fetch the name for every job algorithm_name = self._get_algorithm_name(algorithm) # Iterate through the job requests, and make a prediction for each for job in jobs: u = job.iuid i = job.iiid key = (u, i) try: this_result = algorithm.estimate(u, i) # Did we get just prediction or prediction and extras? extras = None if isinstance(this_result, tuple): this_result, extras = this_result if this_result == self.trainset.global_mean: # Though the algorithm did not admit it, it failed to # produce a result different than the global mean (a # symptom that a prediction was impossible) raise PredictionImpossible( 'Algorithm prediction equals global mean' ) # If we are here, the algorithm managed to produce a result! results[key]['results'].append(AlgorithmResult( algorithm_name, weight, this_result, extras )) except PredictionImpossible as e: # The algorithm failed! Register it as such results[key]['rejected_results'].append(AlgorithmResult( algorithm_name, weight, None, e )) # Don't use this algorithm's weight when weighting if weight != float('inf'): results[key]['total_weights'] -= weight # Make it so results throws KeyError when non-existing key is accessed results.default_factory = None return results
def estimate(self, User, index): #Catch Impossible predictions if not (self.UserData.knows_user(User) and self.UserData.knows_item(i)): raise PredictionImpossible('User and or Item are unknown') elif User.currentIndex == a and not (index == b): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == b and not (index == c): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == c and not (index == d): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == d and not (index == g): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == e and not (index == h or index == i): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == f and not (index == j or index == k): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == g: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) elif User.currentIndex == h and not (index == n): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == i and not (index == n or index == l): raise PredictionImpossible( 'User current Index and Prediction invalid due to tree') elif User.currentIndex == j: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) elif User.currentIndex == k: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) elif User.currentIndex == n: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) elif User.currentIndex == m: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) elif User.currentIndex == l: raise PredictionImpossible( 'User current Index and Prediction invalid due to tree - End Index' ) neighbours = [(v, self.sim[User, v]) for (v, r) in self.UserData.ir[index]] neighbours = sorted(neighbours, key=lambda x: x[1], reverse=True) print('The 3 nearest neighbours of user', str(User.userID), 'are:') for v, sim_Userv in neighbours[:3]: print('user {0:} with sim {1:1.2f}'.format(v, sim_Userv)) prediction = mean(sim_Userv for (v, sim_Userv) in neighbours[:3]) return prediction