def init_reviews(self): print('init_reviews', time.strftime("%H:%M:%S")) self.reviews = [] self.specific_reviews = [] self.generic_reviews = [] # for text_review in self.text_reviews: # self.reviews.append(Review(text_review)) my_file = '/Users/fpena/tmp/reviews_hotel.pkl' # my_file = '/Users/fpena/tmp/reviews_restaurant.pkl' # my_file = '/Users/fpena/tmp/sentences_hotel.pkl' # with open(my_file, 'wb') as write_file: # pickle.dump(self.reviews, write_file, pickle.HIGHEST_PROTOCOL) with open(my_file, 'rb') as read_file: self.reviews = pickle.load(read_file) # self.reviews = self.reviews # for review in self.reviews: # print(review) cluster_labels = reviews_clusterer.cluster_reviews(self.reviews) review_clusters =\ reviews_clusterer.split_list_by_labels(self.reviews, cluster_labels) # print(cluster_labels) self.specific_reviews = review_clusters[0] self.generic_reviews = review_clusters[1] self.all_nouns = context_utils.get_all_nouns(self.reviews) context_utils.generate_stats(self.specific_reviews, self.generic_reviews)
def test_get_all_nouns(self): reviews = [ Review(empty_paragraph), Review(paragraph1), Review(review_text1), Review(review_text2) ] actual_value = context_utils.get_all_nouns(reviews) expected_value = {'morning', 'Dr.', 'Adams', 'patient', 'room', 'number', 'dinner', 'night', 'food', 'restaurant', 'town', 'bar', 'music', 'beer'} self.assertEqual(actual_value, expected_value)
def test_get_all_nouns(self): reviews = [ Review(empty_paragraph), Review(paragraph1), Review(review_text1), Review(review_text2) ] actual_value = context_utils.get_all_nouns(reviews) expected_value = { 'morning', 'Dr.', 'Adams', 'patient', 'room', 'number', 'dinner', 'night', 'food', 'restaurant', 'town', 'bar', 'music', 'beer' } self.assertEqual(actual_value, expected_value)
def init_reviews(self): print('init_reviews', time.strftime("%H:%M:%S")) # self.reviews = reviews self.specific_reviews = [] self.generic_reviews = [] # for text_review in self.text_reviews: # self.reviews.append(Review(text_review)) # my_file = '/Users/fpena/UCC/Thesis/projects/yelp/source/python/topicmodeling/context/reviews_hotel.pkl' records_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_hotel_shuffled.json' reviews_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_hotel_shuffled.pkl' # my_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_restaurant_shuffled.pkl' # my_file = '/Users/fpena/tmp/reviews_restaurant.pkl' # my_file = '/Users/fpena/tmp/sentences_hotel.pkl' # with open(my_file, 'wb') as write_file: # pickle.dump(self.reviews, write_file, pickle.HIGHEST_PROTOCOL) # self.records = ETLUtils.load_json_file(records_file) # # with open(reviews_file, 'rb') as read_file: # self.reviews = pickle.load(read_file)[:100] # # print(self.records[50]['text']) # print(self.reviews[50].text) # self.reviews = self.reviews # for review in self.reviews: # print(review) cluster_labels = reviews_clusterer.cluster_reviews(self.reviews) review_clusters =\ reviews_clusterer.split_list_by_labels(self.reviews, cluster_labels) # print(cluster_labels) self.specific_reviews = review_clusters[0] self.generic_reviews = review_clusters[1] self.all_nouns = context_utils.get_all_nouns(self.reviews) context_utils.generate_stats(self.specific_reviews, self.generic_reviews)