def test_full_flow(self): # simply test execution flow w/o exceptions against provided data errors = list() words_to_reviews: Dict[str, List[Tuple[Review, int]]] = defaultdict(lambda: list()) try: search_engine = ReviewSearchEngine() file_path = "../input_data/review_data.txt" with open(file_path) as input_file: lines = input_file.readlines() for line in lines: review = Review(line) search_engine.add(review) words_count = defaultdict(int) review_text = review.get_text() review_text = review_text.strip().lower() for word in review_text.split(): cleaned_word = WordCleaner.clean_word(word=word) if not cleaned_word: print(f'word before empty clean: {word}') continue words_count[cleaned_word] += 1 for word, count in words_count.items(): words_to_reviews[word].append((review, count)) search_words = ["Great", "product", "love", "happy"] for word in search_words: try: search_results = search_engine.search(word) num_results = len(search_results) results_check = words_to_reviews[word.strip().lower()] results_check.sort(key=lambda res_tuple: -res_tuple[1]) reviews_check = [res_tuple[0].get_text() for res_tuple in results_check] num_results_check = len(results_check) self.assertEqual(num_results, num_results_check) self.assertEqual(reviews_check, search_results) print(f"{word} : {num_results} results.") except Exception as e: errors.append(e) print("Most Common Words") most_common_list = search_engine.most_common(10) for word, num_results in most_common_list: print(f"{word} : {num_results} results") words_to_review_tuples = list(words_to_reviews.items()) words_to_review_tuples.sort(key=lambda word_review_tuple: len(word_review_tuple[1]), reverse=True) most_10_tuples = words_to_review_tuples[:10] most_10_res = [(word_to_review_tuple[0], len(word_to_review_tuple[1])) for word_to_review_tuple in most_10_tuples] print("Most Common Words check:") for word_to_reviews_amount in most_10_res: word = word_to_reviews_amount[0] num_results = word_to_reviews_amount[1] print(f"{word} : {num_results} results") self.assertEqual(most_10_res, most_common_list) except Exception as e: errors.append(e) self.assertEqual(errors, [])
def test_multiple_reviews_n_1(self): review_json_1 = '{"body": "great product"}' review_json_2 = '{"body": "excellent product product"}' review_json_3 = '{"body": "excellent product not like other product and extra product"}' review_1 = Review(json_string=review_json_1) review_2 = Review(json_string=review_json_2) review_3 = Review(json_string=review_json_3) reviews = [(review_1, 1), (review_2, 2), (review_3, 3)] word = 'product' self._test(word=word, reviews=reviews, expected_res=[review_3], n=1)
def test_one_review_n_big(self): review_json = '{"body": "great product"}' review = Review(json_string=review_json) word = 'great' self._test(word=word, reviews=[(review, 1)], expected_res=[review], n=1000)
def main(): search_engine = ReviewSearchEngine() file_path = "input_data/review_data.txt" with open(file_path) as input_file: lines = input_file.readlines() for line in lines: review = Review(line) search_engine.add(review) search_words = ["Great", "product", "love", "happy"] for word in search_words: num_results = len(search_engine.search(word)) print(f"{word} : {num_results} results.") print("Most Common Words") for word, num_results in search_engine.most_common(10): print(f"{word} : {num_results} results")
def add(self, review: Review) -> None: """ adds a review to search engine DB :param review: the review to add :return: """ if review is None: return review_text = review.get_text() review_text = review_text.strip().lower() review_text_words = review_text.split() word_count: Dict[str, int] = defaultdict(int) for word in review_text_words: # word is already lower case word = WordCleaner.clean_word(word, to_lower=False) if not word: continue word_count[word] += 1 for word, count in word_count.items(): word_info = self.words_info[word] word_info.set_word(word=word) word_info.add_review(review=review, count=count)
def _test(self, review_json, expected_res): review = Review(json_string=review_json) review_txt = review.get_text() self.assertEqual(review_txt, expected_res, f'expected:{expected_res}, got:{review_txt}')