def load(self, records): self.records = records self.ratings_matrix = basic_knn.create_ratings_matrix(records) self.reviews_matrix = create_reviews_matrix(records) self.user_dictionary = extractor.initialize_users(self.records, False) self.user_ids = extractor.get_groupby_list(self.records, 'user_id') lda_based_context = LdaBasedContext(self.records, self.reviews) # self.lda_model =\ # lda_context_utils.discover_topics(text_reviews, self.num_topics) # if self.reviews: # lda_based_context = LdaBasedContext() # lda_based_context.reviews = self.reviews # lda_based_context.init_reviews() # else: # text_reviews = [] # for record in self.records: # text_reviews.append(record['text']) # lda_based_context = LdaBasedContext(text_reviews) # lda_based_context.init_reviews() self.context_rich_topics = lda_based_context.get_context_rich_topics() self.lda_model = lda_based_context.topic_model print('building similarity matrix', time.strftime("%H:%M:%S")) self.context_matrix = self.create_context_matrix(records) self.similarity_matrix = self.create_similarity_matrix() print('finished building similarity matrix', time.strftime("%H:%M:%S"))
def count_items_in_common(self): """ Counts the number of items each user has in common with every other user and stores the results in a dictionary. :return: a dictionary with the count of the number of times users have a certain number of items in common. For example, the dictionary {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items in common with the rest of users, there are 10 users who have rated 1 item in common with the rest of users, 6 users who have rated 6 items in common with the rest of the users, and so on. """ common_item_counts = {} user_dictionary = extractor.initialize_users(self.reviews, False) for i in range(self.num_users): for j in range(i + 1, self.num_users): user1 = self.user_ids[i] user2 = self.user_ids[j] num_common_items = len(extractor.get_common_items(user_dictionary, user1, user2)) if num_common_items in common_item_counts: common_item_counts[num_common_items] += 1 else: common_item_counts[num_common_items] = 1 return common_item_counts
def count_items_in_common(self): """ Counts the number of items each user has in common with every other user and stores the results in a dictionary. :return: a dictionary with the count of the number of times users have a certain number of items in common. For example, the dictionary {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items in common with the rest of users, there are 10 users who have rated 1 item in common with the rest of users, 6 users who have rated 6 items in common with the rest of the users, and so on. """ common_item_counts = {} user_dictionary = extractor.initialize_users(self.reviews, False) for i in range(self.num_users): for j in range(i + 1, self.num_users): user1 = self.user_ids[i] user2 = self.user_ids[j] num_common_items = len( extractor.get_common_items(user_dictionary, user1, user2)) if num_common_items in common_item_counts: common_item_counts[num_common_items] += 1 else: common_item_counts[num_common_items] = 1 return common_item_counts
def load(self, records): self.records = records self.ratings_matrix = basic_knn.create_ratings_matrix(records) self.reviews_matrix = create_reviews_matrix(records) self.user_dictionary = extractor.initialize_users(self.records, False) self.user_ids = extractor.get_groupby_list(self.records, 'user_id') # self.lda_model =\ # lda_context_utils.discover_topics(text_reviews, self.num_topics) if self.reviews: lda_based_context = LdaBasedContext() lda_based_context.reviews = self.reviews lda_based_context.init_reviews() else: text_reviews = [] for record in self.records: text_reviews.append(record['text']) lda_based_context = LdaBasedContext(text_reviews) lda_based_context.init_reviews() self.context_rich_topics = lda_based_context.get_context_rich_topics() self.lda_model = lda_based_context.topic_model print('building similarity matrix', time.strftime("%H:%M:%S")) self.context_matrix = self.create_context_matrix(records) self.similarity_matrix = self.create_similarity_matrix() print('finished building similarity matrix', time.strftime("%H:%M:%S"))
def test_build_similarity_matrix_pearson(self): user_dictionary =\ extractor.initialize_users(reviews_matrix_5, False) user_ids = extractor.get_groupby_list(reviews_matrix_5, 'user_id') similarity_matrix_builder = SingleSimilarityMatrixBuilder('pearson') self.assertEqual( pearson_matrix, similarity_matrix_builder.build_similarity_matrix( user_dictionary, user_ids))
def load(self, reviews): self.reviews = reviews self.user_dictionary =\ extractor.initialize_users( self.reviews, self._similarity_matrix_builder._is_multi_criteria) self.user_ids = extractor.get_groupby_list(self.reviews, 'user_id') if self._similarity_matrix_builder._similarity_metric is not None: self.user_similarity_matrix =\ self._similarity_matrix_builder.build_similarity_matrix( self.user_dictionary, self.user_ids)
def load(self, records): # self.records = records self.user_dictionary = extractor.initialize_users(records, False) self.user_ids = extractor.get_groupby_list(records, 'user_id') if self.has_context: self.load_context(records) self.user_similarity_calculator.load( self.user_ids, self.user_dictionary, self.context_rich_topics) self.neighbourhood_calculator.load( self.user_ids, self.user_dictionary, self.context_rich_topics, self.num_neighbours) self.user_baseline_calculator.load( self.user_dictionary, self.context_rich_topics) self.neighbour_contribution_calculator.load( self.user_baseline_calculator)
def test_build_similarity_matrix_pearson(self): user_dictionary = extractor.initialize_users(reviews_matrix_5, False) user_ids = extractor.get_groupby_list(reviews_matrix_5, "user_id") similarity_matrix_builder = SingleSimilarityMatrixBuilder("pearson") self.assertEqual(pearson_matrix, similarity_matrix_builder.build_similarity_matrix(user_dictionary, user_ids))
def load(self, reviews): self.reviews = reviews self.user_dictionary = extractor.initialize_users(self.reviews, False) self.user_ids = extractor.get_groupby_list(self.reviews, "user_id")
def load(self, reviews): self.reviews = reviews self.ratings_matrix = create_ratings_matrix(reviews) self.user_dictionary = extractor.initialize_users(self.reviews, False) self.user_ids = extractor.get_groupby_list(self.reviews, 'user_id') self.similarity_matrix = self.create_similarity_matrix()
def load(self, reviews): self.reviews = reviews self.user_dictionary = extractor.initialize_users(self.reviews, False) self.user_ids = extractor.get_groupby_list(self.reviews, 'user_id')