コード例 #1
0
    def test_full_flow(self):
        # simply test execution flow w/o exceptions against provided data
        errors = list()
        words_to_reviews: Dict[str, List[Tuple[Review, int]]] = defaultdict(lambda: list())
        try:
            search_engine = ReviewSearchEngine()
            file_path = "../input_data/review_data.txt"
            with open(file_path) as input_file:
                lines = input_file.readlines()
                for line in lines:
                    review = Review(line)
                    search_engine.add(review)
                    words_count = defaultdict(int)
                    review_text = review.get_text()
                    review_text = review_text.strip().lower()
                    for word in review_text.split():
                        cleaned_word = WordCleaner.clean_word(word=word)
                        if not cleaned_word:
                            print(f'word before empty clean: {word}')
                            continue
                        words_count[cleaned_word] += 1
                    for word, count in words_count.items():
                        words_to_reviews[word].append((review, count))

            search_words = ["Great", "product", "love", "happy"]
            for word in search_words:
                try:
                    search_results = search_engine.search(word)
                    num_results = len(search_results)
                    results_check = words_to_reviews[word.strip().lower()]
                    results_check.sort(key=lambda res_tuple: -res_tuple[1])
                    reviews_check = [res_tuple[0].get_text() for res_tuple in results_check]
                    num_results_check = len(results_check)
                    self.assertEqual(num_results, num_results_check)
                    self.assertEqual(reviews_check, search_results)
                    print(f"{word} : {num_results} results.")
                except Exception as e:
                    errors.append(e)
            print("Most Common Words")
            most_common_list = search_engine.most_common(10)
            for word, num_results in most_common_list:
                print(f"{word} : {num_results} results")

            words_to_review_tuples = list(words_to_reviews.items())
            words_to_review_tuples.sort(key=lambda word_review_tuple: len(word_review_tuple[1]), reverse=True)
            most_10_tuples = words_to_review_tuples[:10]
            most_10_res = [(word_to_review_tuple[0], len(word_to_review_tuple[1]))
                           for word_to_review_tuple in most_10_tuples]
            print("Most Common Words check:")
            for word_to_reviews_amount in most_10_res:
                word = word_to_reviews_amount[0]
                num_results = word_to_reviews_amount[1]
                print(f"{word} : {num_results} results")
            self.assertEqual(most_10_res, most_common_list)
        except Exception as e:
            errors.append(e)
        self.assertEqual(errors, [])
コード例 #2
0
 def test_multiple_reviews_n_1(self):
     review_json_1 = '{"body": "great product"}'
     review_json_2 = '{"body": "excellent product product"}'
     review_json_3 = '{"body": "excellent product not like other product and extra product"}'
     review_1 = Review(json_string=review_json_1)
     review_2 = Review(json_string=review_json_2)
     review_3 = Review(json_string=review_json_3)
     reviews = [(review_1, 1), (review_2, 2), (review_3, 3)]
     word = 'product'
     self._test(word=word, reviews=reviews, expected_res=[review_3], n=1)
コード例 #3
0
 def test_one_review_n_big(self):
     review_json = '{"body": "great product"}'
     review = Review(json_string=review_json)
     word = 'great'
     self._test(word=word,
                reviews=[(review, 1)],
                expected_res=[review],
                n=1000)
コード例 #4
0
def main():
    search_engine = ReviewSearchEngine()

    file_path = "input_data/review_data.txt"
    with open(file_path) as input_file:
        lines = input_file.readlines()
        for line in lines:
            review = Review(line)
            search_engine.add(review)

    search_words = ["Great", "product", "love", "happy"]
    for word in search_words:
        num_results = len(search_engine.search(word))
        print(f"{word} : {num_results} results.")

    print("Most Common Words")
    for word, num_results in search_engine.most_common(10):
        print(f"{word} : {num_results} results")
コード例 #5
0
 def add(self, review: Review) -> None:
     """
     adds a review to search engine DB
     :param review: the review to add
     :return:
     """
     if review is None:
         return
     review_text = review.get_text()
     review_text = review_text.strip().lower()
     review_text_words = review_text.split()
     word_count: Dict[str, int] = defaultdict(int)
     for word in review_text_words:
         # word is already lower case
         word = WordCleaner.clean_word(word, to_lower=False)
         if not word:
             continue
         word_count[word] += 1
     for word, count in word_count.items():
         word_info = self.words_info[word]
         word_info.set_word(word=word)
         word_info.add_review(review=review, count=count)
コード例 #6
0
 def _test(self, review_json, expected_res):
     review = Review(json_string=review_json)
     review_txt = review.get_text()
     self.assertEqual(review_txt, expected_res, f'expected:{expected_res}, got:{review_txt}')