def init_reviews(self):

        print('init_reviews', time.strftime("%H:%M:%S"))

        self.reviews = []
        self.specific_reviews = []
        self.generic_reviews = []

        # for text_review in self.text_reviews:
        #     self.reviews.append(Review(text_review))

        my_file = '/Users/fpena/tmp/reviews_hotel.pkl'
        # my_file = '/Users/fpena/tmp/reviews_restaurant.pkl'
        # my_file = '/Users/fpena/tmp/sentences_hotel.pkl'
        # with open(my_file, 'wb') as write_file:
        #     pickle.dump(self.reviews, write_file, pickle.HIGHEST_PROTOCOL)

        with open(my_file, 'rb') as read_file:
            self.reviews = pickle.load(read_file)

        # self.reviews = self.reviews
        # for review in self.reviews:
        #     print(review)

        cluster_labels = reviews_clusterer.cluster_reviews(self.reviews)
        review_clusters =\
            reviews_clusterer.split_list_by_labels(self.reviews, cluster_labels)
        # print(cluster_labels)

        self.specific_reviews = review_clusters[0]
        self.generic_reviews = review_clusters[1]

        self.all_nouns = context_utils.get_all_nouns(self.reviews)

        context_utils.generate_stats(self.specific_reviews, self.generic_reviews)
Example #2
0
 def test_get_all_nouns(self):
     reviews = [
         Review(empty_paragraph),
         Review(paragraph1),
         Review(review_text1),
         Review(review_text2)
     ]
     actual_value = context_utils.get_all_nouns(reviews)
     expected_value = {'morning', 'Dr.', 'Adams', 'patient', 'room',
                       'number', 'dinner', 'night', 'food', 'restaurant',
                       'town', 'bar', 'music', 'beer'}
     self.assertEqual(actual_value, expected_value)
Example #3
0
 def test_get_all_nouns(self):
     reviews = [
         Review(empty_paragraph),
         Review(paragraph1),
         Review(review_text1),
         Review(review_text2)
     ]
     actual_value = context_utils.get_all_nouns(reviews)
     expected_value = {
         'morning', 'Dr.', 'Adams', 'patient', 'room', 'number', 'dinner',
         'night', 'food', 'restaurant', 'town', 'bar', 'music', 'beer'
     }
     self.assertEqual(actual_value, expected_value)
Example #4
0
    def init_reviews(self):

        print('init_reviews', time.strftime("%H:%M:%S"))

        # self.reviews = reviews
        self.specific_reviews = []
        self.generic_reviews = []

        # for text_review in self.text_reviews:
        #     self.reviews.append(Review(text_review))

        # my_file = '/Users/fpena/UCC/Thesis/projects/yelp/source/python/topicmodeling/context/reviews_hotel.pkl'
        records_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_hotel_shuffled.json'
        reviews_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_hotel_shuffled.pkl'
        # my_file = '/Users/fpena/UCC/Thesis/datasets/context/stuff/reviews_restaurant_shuffled.pkl'
        # my_file = '/Users/fpena/tmp/reviews_restaurant.pkl'
        # my_file = '/Users/fpena/tmp/sentences_hotel.pkl'
        # with open(my_file, 'wb') as write_file:
        #     pickle.dump(self.reviews, write_file, pickle.HIGHEST_PROTOCOL)

        # self.records = ETLUtils.load_json_file(records_file)
        #
        # with open(reviews_file, 'rb') as read_file:
        #     self.reviews = pickle.load(read_file)[:100]
        #
        # print(self.records[50]['text'])
        # print(self.reviews[50].text)

        # self.reviews = self.reviews
        # for review in self.reviews:
        #     print(review)

        cluster_labels = reviews_clusterer.cluster_reviews(self.reviews)
        review_clusters =\
            reviews_clusterer.split_list_by_labels(self.reviews, cluster_labels)
        # print(cluster_labels)

        self.specific_reviews = review_clusters[0]
        self.generic_reviews = review_clusters[1]

        self.all_nouns = context_utils.get_all_nouns(self.reviews)

        context_utils.generate_stats(self.specific_reviews, self.generic_reviews)