def test_file_to_update_in_wrong_lang(self): rus_text_info = l.TextInfo(self.alph_rus, ENCODING, self.big_rus_file) rus_count_info = rus_text_info.find_info(100) l.write_json_in_file( OUTPUT_FILE, rus_count_info.make_count_dict(), ENCODING) en_text_info = l.TextInfo(self.alph_en, ENCODING, self.first_file) en_count_info = en_text_info.find_info(100) en_count_info.update_count_info( OUTPUT_FILE, en_text_info.alph, 100, ENCODING) self.assertDictEqual(self.count_first, en_count_info.make_count_dict())
def test__frequency_updated(self): text_info = l.TextInfo(self.alph_en, ENCODING, self.first_file) count_info = text_info.find_info(100) l.write_json_in_file( OUTPUT_FILE, count_info.make_count_dict(), ENCODING) new_text_info = l.TextInfo(self.alph_en, ENCODING, self.second_file) new_count_info = new_text_info.find_info(100) new_count_info.update_count_info( OUTPUT_FILE, new_text_info.alph, 100, ENCODING) frequency = new_count_info.make_frequency_dict() self.assertDictEqual(frequency, self.frequency_updated)
def test_change_top_words(self): text_info = l.TextInfo(self.alph_en, ENCODING, self.first_file) count_info = text_info.find_info(10) l.write_json_in_file( OUTPUT_FILE, count_info.make_count_dict(), ENCODING) new_text_info = l.TextInfo(self.alph_en, ENCODING, self.second_file) new_count_info = new_text_info.find_info(10) new_count_info.update_count_info( OUTPUT_FILE, new_text_info.alph, 3, ENCODING) self.assertEqual(3, len(new_count_info.words))
def test_count_updated(self): text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.first_file) count_info = text_info.find_info(100) learner.write_json_in_file(OUTPUT_FILE, count_info.make_count_dict(), ENCODING) new_text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.second_file) new_count_info = new_text_info.find_info(100) new_count_info.update_count_info(OUTPUT_FILE, new_text_info.alph, 100, ENCODING) updated_dict = new_count_info.make_count_dict() self.assertDictEqual(updated_dict, self.count_updated)
def test_count_frequency(self): text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.first_file) count_info = text_info.find_info(100) frequency = count_info.make_frequency_dict() self.assertDictEqual(frequency, self.frequency_first)
def test_count_first(self): text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.first_file) count_info = text_info.find_info(100) count = count_info.make_count_dict() self.assertDictEqual(count, self.count_first)
def test_top_words(self): text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.first_file) count_info = text_info.find_info(3) print(count_info.words) self.assertEqual(3, len(count_info.words))
def test_ngramms(self): text_info = l.TextInfo(self.alph_en, ENCODING, self.ngr_file) count_info = text_info.find_info(100) ngramms = count_info.make_ngramms_dict() must_be = {"2": {"aa": 2, "ab": 2, "ba": 1}, "3": {"aaa": 1, "aba": 1, "bab": 1}, "4": {"abab": 1}} self.assertDictEqual(must_be, ngramms)
def test_file_to_update_is_missing(self): text_info = l.TextInfo(self.alph_en, ENCODING, self.first_file) count_info = text_info.find_info(100) count_info.update_count_info( "this_file_does_not_exist", text_info.alph, 100, ENCODING) self.assertDictEqual(self.count_first, count_info.make_count_dict())
def test_default_top_words(self): text_info = learner.TextInfo(self.alph_en, ENCODING, input_text=self.big_en_file) count_info = text_info.find_info(None) self.assertEqual(100, len(count_info.words))
def test_top_words(self): text_info = l.TextInfo(self.alph_en, ENCODING, self.first_file) count_info = text_info.find_info(3) self.assertEqual(3, len(count_info.words))