def count_items_in_common(self): """ Counts the number of items each user has in common with every other user and stores the results in a dictionary. :return: a dictionary with the count of the number of times users have a certain number of items in common. For example, the dictionary {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items in common with the rest of users, there are 10 users who have rated 1 item in common with the rest of users, 6 users who have rated 6 items in common with the rest of the users, and so on. """ common_item_counts = {} user_dictionary = extractor.initialize_users(self.reviews, False) for i in range(self.num_users): for j in range(i + 1, self.num_users): user1 = self.user_ids[i] user2 = self.user_ids[j] num_common_items = len( extractor.get_common_items(user_dictionary, user1, user2)) if num_common_items in common_item_counts: common_item_counts[num_common_items] += 1 else: common_item_counts[num_common_items] = 1 return common_item_counts
def count_items_in_common(self): """ Counts the number of items each user has in common with every other user and stores the results in a dictionary. :return: a dictionary with the count of the number of times users have a certain number of items in common. For example, the dictionary {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items in common with the rest of users, there are 10 users who have rated 1 item in common with the rest of users, 6 users who have rated 6 items in common with the rest of the users, and so on. """ common_item_counts = {} user_dictionary = extractor.initialize_users(self.reviews, False) for i in range(self.num_users): for j in range(i + 1, self.num_users): user1 = self.user_ids[i] user2 = self.user_ids[j] num_common_items = len(extractor.get_common_items(user_dictionary, user1, user2)) if num_common_items in common_item_counts: common_item_counts[num_common_items] += 1 else: common_item_counts[num_common_items] = 1 return common_item_counts
def calculate_users_similarity(self, user_dictionary, user1, user2): common_items = extractor.get_common_items(user_dictionary, user1, user2) if not common_items: return None if self._min_common_items is not None and len( common_items) < self._min_common_items: return None user1_overall_ratings = user_dictionary[user1].item_ratings user1_multi_ratings = user_dictionary[user1].item_multi_ratings user2_overall_ratings = user_dictionary[user1].item_ratings user2_multi_ratings = user_dictionary[user2].item_multi_ratings similarity_sum = 0. for item in common_items: user1_item_ratings = list(user1_multi_ratings[item]) user1_item_ratings.insert(0, user1_overall_ratings[item]) user2_item_ratings = list(user2_multi_ratings[item]) user2_item_ratings.insert(0, user2_overall_ratings[item]) similarity_sum += similarity_calculator.calculate_similarity( user1_item_ratings, user2_item_ratings, self._similarity_metric) similarity = similarity_sum / len(common_items) return similarity
def calculate_pearson_similarity(self, user1, user2): common_items =\ extractor.get_common_items(self.user_dictionary, user1, user2) if not common_items: return None numerator = 0 denominator1 = 0 denominator2 = 0 user1_average = self.user_dictionary[user1].average_overall_rating user2_average = self.user_dictionary[user2].average_overall_rating for item in common_items: user1_rating = self.user_dictionary[user1].item_ratings[item] user2_rating = self.user_dictionary[user2].item_ratings[item] # print('user average', user1_average) numerator +=\ (user1_rating - user1_average) * (user2_rating - user2_average) denominator1 += (user1_rating - user1_average) ** 2 denominator2 += (user2_rating - user2_average) ** 2 denominator = math.sqrt(denominator1 * denominator2) if denominator == 0: return 0 return numerator / denominator
def calculate_cosine_similarity(self, user1, user2): common_items =\ extractor.get_common_items(self.user_dictionary, user1, user2) if not common_items: return None numerator = 0 denominator1 = 0 denominator2 = 0 for item in common_items: user1_rating = self.user_dictionary[user1].item_ratings[item] user2_rating = self.user_dictionary[user2].item_ratings[item] numerator += user1_rating * user2_rating denominator1 += user1_rating ** 2 denominator2 += user2_rating ** 2 denominator = math.sqrt(denominator1) * math.sqrt(denominator2) # if denominator == 0: # pass return numerator / denominator
def calculate_users_similarity(self, user_dictionary, user1, user2): common_items = extractor.get_common_items(user_dictionary, user1, user2) if not common_items: return None if self._min_common_items is not None and len( common_items) < self._min_common_items: return None user1_overall_ratings = user_dictionary[user1].item_ratings user1_multi_ratings = user_dictionary[user1].item_multi_ratings user2_overall_ratings = user_dictionary[user1].item_ratings user2_multi_ratings = user_dictionary[user2].item_multi_ratings similarity_sum = 0. for item in common_items: user1_item_ratings = list(user1_multi_ratings[item]) user1_item_ratings.insert(0, user1_overall_ratings[item]) user2_item_ratings = list(user2_multi_ratings[item]) user2_item_ratings.insert(0, user2_overall_ratings[item]) similarity_sum += similarity_calculator.calculate_similarity( user1_item_ratings, user2_item_ratings, self._similarity_metric) similarity = similarity_sum / len(common_items) return similarity
def calculate_user_similarity(self, user1, user2, threshold): common_items = extractor.get_common_items( self.user_dictionary, user1, user2) if not common_items: return None filtered_items = {} for item in common_items: context1 = self.user_dictionary[user1].item_contexts[item] context2 = self.user_dictionary[user2].item_contexts[item] context_similarity = context_utils.get_context_similarity( context1, context2, self.context_rich_topics) if context_similarity > threshold: filtered_items[item] = context_similarity numerator = 0 denominator1 = 0 denominator2 = 0 denominator3 = 0 user1_average = self.user_dictionary[user1].average_overall_rating user2_average = self.user_dictionary[user2].average_overall_rating for item in filtered_items.keys(): context_similarity = filtered_items[item] user1_rating = self.user_dictionary[user1].item_ratings[item] user2_rating = self.user_dictionary[user2].item_ratings[item] numerator +=\ (user1_rating - user1_average) *\ (user2_rating - user2_average) *\ context_similarity denominator1 += (user1_rating - user1_average) ** 2 denominator2 += (user2_rating - user2_average) ** 2 denominator3 += context_similarity ** 2 denominator = math.sqrt(denominator1 * denominator2 * denominator3) if denominator == 0: return None return numerator / denominator
def calculate_user_similarity(self, user1, user2, threshold): common_items = extractor.get_common_items(self.user_dictionary, user1, user2) if not common_items: return None filtered_items = {} for item in common_items: context1 = self.user_dictionary[user1].item_contexts[item] context2 = self.user_dictionary[user2].item_contexts[item] context_similarity = context_utils.get_context_similarity( context1, context2, self.context_rich_topics) if context_similarity > threshold: filtered_items[item] = context_similarity numerator = 0 denominator1 = 0 denominator2 = 0 denominator3 = 0 user1_average = self.user_dictionary[user1].average_overall_rating user2_average = self.user_dictionary[user2].average_overall_rating for item in filtered_items.keys(): context_similarity = filtered_items[item] user1_rating = self.user_dictionary[user1].item_ratings[item] user2_rating = self.user_dictionary[user2].item_ratings[item] numerator +=\ (user1_rating - user1_average) *\ (user2_rating - user2_average) *\ context_similarity denominator1 += (user1_rating - user1_average)**2 denominator2 += (user2_rating - user2_average)**2 denominator3 += context_similarity**2 denominator = math.sqrt(denominator1 * denominator2 * denominator3) if denominator == 0: return None return numerator / denominator
def calculate_users_similarity(self, user_dictionary, user1, user2): common_items = extractor.get_common_items(user_dictionary, user1, user2) if not common_items: return None if self._min_common_items is not None and len( common_items) < self._min_common_items: return None user1_ratings =\ extractor.get_user_ratings(user_dictionary, user1, common_items) user2_ratings =\ extractor.get_user_ratings(user_dictionary, user2, common_items) similarity_value = similarity_calculator.calculate_similarity( user1_ratings, user2_ratings, self._similarity_metric) return similarity_value
def calculate_users_similarity(self, user_dictionary, user1, user2): common_items = extractor.get_common_items(user_dictionary, user1, user2) if not common_items: return None if self._min_common_items is not None and len( common_items) < self._min_common_items: return None user1_overall_ratings =\ extractor.get_user_ratings(user_dictionary, user1, common_items) user1_multi_ratings =\ extractor.get_user_multi_ratings(user_dictionary, user1, common_items) user2_overall_ratings =\ extractor.get_user_ratings(user_dictionary, user2, common_items) user2_multi_ratings =\ extractor.get_user_multi_ratings(user_dictionary, user2, common_items) num_criteria = len(user1_multi_ratings[0]) total_similarity = 0. for i in xrange(0, num_criteria): user1_criterion_item_ratings =\ extractor.get_matrix_column(user1_multi_ratings, i) user2_criterion_item_ratings =\ extractor.get_matrix_column(user2_multi_ratings, i) total_similarity += similarity_calculator.calculate_similarity( user1_criterion_item_ratings, user2_criterion_item_ratings, self._similarity_metric) # We also add the overall similarity total_similarity += similarity_calculator.calculate_similarity( user1_overall_ratings, user2_overall_ratings, self._similarity_metric) average_similarity = total_similarity / (num_criteria + 1) return average_similarity
def calculate_users_similarity(self, user_dictionary, user1, user2): common_items = extractor.get_common_items(user_dictionary, user1, user2) if not common_items: return None if self._min_common_items is not None and len( common_items) < self._min_common_items: return None user1_overall_ratings =\ extractor.get_user_ratings(user_dictionary, user1, common_items) user1_multi_ratings =\ extractor.get_user_multi_ratings(user_dictionary, user1, common_items) user2_overall_ratings =\ extractor.get_user_ratings(user_dictionary, user2, common_items) user2_multi_ratings =\ extractor.get_user_multi_ratings(user_dictionary, user2, common_items) num_criteria = len(user1_multi_ratings[0]) total_similarity = 0. for i in xrange(0, num_criteria): user1_criterion_item_ratings =\ extractor.get_matrix_column(user1_multi_ratings, i) user2_criterion_item_ratings =\ extractor.get_matrix_column(user2_multi_ratings, i) total_similarity += similarity_calculator.calculate_similarity( user1_criterion_item_ratings, user2_criterion_item_ratings, self._similarity_metric) # We also add the overall similarity total_similarity += similarity_calculator.calculate_similarity( user1_overall_ratings, user2_overall_ratings, self._similarity_metric) average_similarity = total_similarity / (num_criteria + 1) return average_similarity
def calculate_user_similarity(self, user_id1, user_id2, threshold): common_items =\ extractor.get_common_items(self.user_dictionary, user_id1, user_id2) if not common_items: return None numerator = 0 denominator1 = 0 denominator2 = 0 for item in common_items: user1_rating = self.user_dictionary[user_id1].item_ratings[item] user2_rating = self.user_dictionary[user_id2].item_ratings[item] numerator += user1_rating * user2_rating denominator1 += user1_rating**2 denominator2 += user2_rating**2 denominator = math.sqrt(denominator1) * math.sqrt(denominator2) return numerator / denominator
def calculate_user_similarity(self, user_id1, user_id2, threshold): common_items =\ extractor.get_common_items(self.user_dictionary, user_id1, user_id2) if not common_items: return None numerator = 0 denominator1 = 0 denominator2 = 0 for item in common_items: user1_rating = self.user_dictionary[user_id1].item_ratings[item] user2_rating = self.user_dictionary[user_id2].item_ratings[item] numerator += user1_rating * user2_rating denominator1 += user1_rating ** 2 denominator2 += user2_rating ** 2 denominator = math.sqrt(denominator1) * math.sqrt(denominator2) return numerator / denominator