def test_predict_next_word_returns_only_possible_answer_for_longer_corpus( self): trigram = Trigram('three whole words are not enough to properly test ' 'this method so how about fifteen') trigram.parse() next_word = trigram.predict_next_word(bigram='enough to') self.assertEqual('properly', next_word)
def main(args): logging.basicConfig(level=LOGGING_LEVEL, format="DEBUG: %(message)s") if len(args) < 3 or len(args) > 4: print 'usage: %s training-file dev-file [output-dir]' % args[0] print ' output-dir is optional, default is "%s"' % OUTPUT_DIR_DEFAULT sys.exit(1) training_filename = args[1] dev_filename = args[2] output_dir = args[3] if len(args) == 4 else OUTPUT_DIR_DEFAULT logging.debug('Training models...') # train all the models! unigram_model = Unigram(training_filename) logging.debug('Done training unigram model') bigram_model = Bigram(training_filename) logging.debug('Done training bigram model') trigram_model = Trigram(training_filename) logging.debug('Done training trigram model') dev_words = [line.strip() for line in open(dev_filename, 'r')] # write predictions out to disk unigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'unigram')) logging.debug('Wrote dev set predictions using unigram model') bigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'bigram')) logging.debug('Wrote dev set predictions using bigram model') trigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'trigram')) logging.debug('Wrote dev set predictions using trigram model')
def test_mapbox_load_from_file_populates_input_text(self, mock_os_path): mock_os_path.exists.return_value = True trigram = Trigram() with patch.object(builtins, 'open', mock_open(read_data='three whole words')): trigram.load_from_file(filename='filename.txt') self.assertEqual('three whole words', trigram.input_text)
class TestPickle(unittest.TestCase): def setUp(self): self.model = Trigram() self.model.train('./data/trainA.txt') #def test_pickle(self): # pkl = pickle.dumps(self.model, protocol=1) # pickled_model = pickle.loads(pkl) # word = 'CC' # history = ['NNP', 'RB', 'JJ'] # self.assertEquals(self.model.get_probability(word, history), # pickled_model.get_probability(word, history)) def test_model_pickle(self): models = [Unigram, Bigram, Trigram, Fourgram, Fivegram] for model in models: m = model() m.train('./data/trainA.txt') fname = '/tmp/test_%s.pkl' % (model) m.save(fname) loaded_m = model() loaded_m.load(fname) word = 'CC' history = ['NNP', 'RB', 'JJ'] self.assertEquals( m.get_probability(word, history), loaded_m.get_probability(word, history) )
def test_mapbox_load_from_file_checks_for_file_existance( self, mock_os_path): mock_os_path.exists.return_value = True trigram = Trigram() with patch.object(builtins, 'open', mock_open(read_data='three whole words')): trigram.load_from_file(filename='filename.txt') mock_os_path.exists.assert_called_once_with('filename.txt')
class Author(object): __name = "" __unigram = Unigram() __bigram = Bigram() __trigram = Trigram() # Constructor. def __init__(self, name): self.__name = name self.__unigram = Unigram() self.__bigram = Bigram() self.__trigram = Trigram() # Getters. def getUnigram(self): return self.__unigram def getBigram(self): return self.__bigram def getTrigram(self): return self.__trigram def getName(self): return self.__name # Caller method, it is used for counting frequency in the unigram, bigram and trigram. def counterCaller(self, separated_line): self.__unigram.counter(separated_line) self.__bigram.counter(separated_line) self.__trigram.counter(separated_line) # Caller method, it is used for generating new text with respect to unigram, bigram and trigram. def generatorCaller(self, uni_list, bi_list, tri_list): self.__unigram.generator(uni_list) self.__bigram.generator(bi_list) self.__trigram.generator(tri_list)
def test_parse_raises_error_if_text_has_less_than_three_words(self): trigram = Trigram('two words') self.assertRaises(ValueError, trigram.parse)
def test_parse_makes_trigram_map_a_dictionary(self): trigram = Trigram("the quick brown fox jumped over the fence") trigram.parse() self.assertIsInstance(trigram.map, dict)
def test_generate_text_limited_by_max_words_property(self): trigram = Trigram("sorry sorry sorry") trigram.parse() text = trigram.generate_text(start_text="sorry sorry", max_words=4) self.assertEqual("sorry sorry sorry sorry", text)
def test_trigram_constructor_instantiates_empty_trigram_map_dict(self): trigram = Trigram() self.assertEqual(trigram.map, {})
def test_generate_text_limited_by_max_words_property(self): trigram = Trigram('sorry sorry sorry') trigram.parse() text = trigram.generate_text(start_text='sorry sorry', max_words=4) self.assertEqual('sorry sorry sorry sorry', text)
def test_mapbox_load_from_file_errors_if_no_file(self, mock_os_path): mock_os_path.exists.return_value = False trigram = Trigram() self.assertRaises(IOError, trigram.load_from_file, 'filename.txt')
def test_predict_next_word_returns_string(self): trigram = Trigram("three whole words") trigram.parse() next_word = trigram.predict_next_word(bigram="three whole") self.assertIsInstance(next_word, str)
def test_generate_text_errors_if_start_text_is_less_than_two_words(self): trigram = Trigram("three whole words") trigram.parse() self.assertRaises(ValueError, trigram.generate_text, "three")
def test_parse_return_dict_value_counters_increment_to_2(self): trigram = Trigram("three whole words and three whole words") trigram.parse() self.assertEqual(2, trigram.map["three whole"]["words"])
def test_running_parse_twice_with_append_map_true_double_counts(self): trigram = Trigram("three whole words") trigram.parse() trigram.parse(append_map=True) self.assertEqual(2, trigram.map["three whole"]["words"])
def test_parse_return_dict_has_second_bigram_as_key(self): trigram = Trigram("four whole real words") trigram.parse() self.assertIsInstance(trigram.map["whole real"], Counter)
def test_parse_return_dict_values_are_counters(self): trigram = Trigram("three whole words") trigram.parse() self.assertIsInstance(trigram.map["three whole"], Counter)
def test_parse_adds_dictionary_entry_of_first_two_words(self): trigram = Trigram("three whole words") trigram.parse() self.assertEqual("three whole", trigram.map.keys()[0])
def test_predict_next_word_returns_third_word_for_trigram_input(self): trigram = Trigram('three whole words') trigram.parse() next_word = trigram.predict_next_word(bigram='three whole') self.assertEqual('words', next_word)
def test_predict_next_word_returns_third_word_for_trigram_input(self): trigram = Trigram("three whole words") trigram.parse() next_word = trigram.predict_next_word(bigram="three whole") self.assertEqual("words", next_word)
def test_generate_text_returns_only_start_text_if_no_match(self): trigram = Trigram("three whole words") trigram.parse() text = trigram.generate_text(start_text="what the") self.assertEqual("what the", text)
def test_predict_next_word_returns_only_possible_answer_for_longer_corpus(self): trigram = Trigram("three whole words are not enough to properly test " "this method so how about fifteen") trigram.parse() next_word = trigram.predict_next_word(bigram="enough to") self.assertEqual("properly", next_word)
def test_generate_text_returns_only_start_text_if_no_match(self): trigram = Trigram('three whole words') trigram.parse() text = trigram.generate_text(start_text='what the') self.assertEqual('what the', text)
def test_predict_next_word_returns_most_likely_word(self): trigram = Trigram("two words this " "two words that " "two words this") trigram.parse() next_word = trigram.predict_next_word(bigram="two words") self.assertEqual("this", next_word)
def test_mapbox_load_from_file_checks_for_file_existance(self, mock_os_path): mock_os_path.exists.return_value = True trigram = Trigram() with patch.object(builtins, "open", mock_open(read_data="three whole words")): trigram.load_from_file(filename="filename.txt") mock_os_path.exists.assert_called_once_with("filename.txt")
def test_predict_next_word_throws_key_error_if_map_missing_bigram(self): trigram = Trigram("three whole words") trigram.parse() self.assertRaises(KeyError, trigram.predict_next_word, "a word")
def test_trigram_constructor_saves_input_text(self): text = 'the quick brown fox jumped over the fence' trigram = Trigram('the quick brown fox jumped over the fence') self.assertEqual(trigram.input_text, text)
def test_parse_return_dict_values_are_counters(self): trigram = Trigram('three whole words') trigram.parse() self.assertIsInstance(trigram.map['three whole'], Counter)
def test_trigram_constructor_default_is_empty_string(self): trigram = Trigram() self.assertEqual(trigram.input_text, '')
def test_parse_return_dict_value_counters_increment_to_2(self): trigram = Trigram('three whole words and three whole words') trigram.parse() self.assertEqual(2, trigram.map['three whole']['words'])
def test_parse_makes_trigram_map_a_dictionary(self): trigram = Trigram('the quick brown fox jumped over the fence') trigram.parse() self.assertIsInstance(trigram.map, dict)
def test_predict_next_word_returns_string(self): trigram = Trigram('three whole words') trigram.parse() next_word = trigram.predict_next_word(bigram='three whole') self.assertIsInstance(next_word, str)
def test_parse_adds_dictionary_entry_of_first_two_words(self): trigram = Trigram('three whole words') trigram.parse() self.assertEqual('three whole', trigram.map.keys()[0])
def test_mapbox_load_from_file_populates_input_text(self, mock_os_path): mock_os_path.exists.return_value = True trigram = Trigram() with patch.object(builtins, "open", mock_open(read_data="three whole words")): trigram.load_from_file(filename="filename.txt") self.assertEqual("three whole words", trigram.input_text)
def test_parse_return_dict_has_second_bigram_as_key(self): trigram = Trigram('four whole real words') trigram.parse() self.assertIsInstance(trigram.map['whole real'], Counter)
def setUp(self): self.model = Trigram() self.model.train('./data/trainA.txt')
def test_running_parse_twice_with_append_map_true_double_counts(self): trigram = Trigram('three whole words') trigram.parse() trigram.parse(append_map=True) self.assertEqual(2, trigram.map['three whole']['words'])
def test_predict_next_word_throws_key_error_if_map_missing_bigram(self): trigram = Trigram('three whole words') trigram.parse() self.assertRaises(KeyError, trigram.predict_next_word, 'a word')
def test_predict_next_word_errors_if_no_map(self): trigram = Trigram() self.assertRaises(ValueError, trigram.predict_next_word, 'anything')
def test_predict_next_word_returns_most_likely_word(self): trigram = Trigram('two words this ' 'two words that ' 'two words this') trigram.parse() next_word = trigram.predict_next_word(bigram='two words') self.assertEqual('this', next_word)
def test_generate_text_returns_string(self): trigram = Trigram('three whole words') trigram.parse() text = trigram.generate_text(start_text='three whole') self.assertIsInstance(text, str)
def test_generate_text_returns_third_whole_trigram(self): trigram = Trigram("three whole words") trigram.parse() text = trigram.generate_text(start_text="three whole") self.assertEqual("three whole words", text)
def __init__(self, name): self.__name = name self.__unigram = Unigram() self.__bigram = Bigram() self.__trigram = Trigram()
def test_generate_text_errors_if_start_text_is_less_than_two_words(self): trigram = Trigram('three whole words') trigram.parse() self.assertRaises(ValueError, trigram.generate_text, 'three')
def test_generate_text_returns_third_whole_trigram(self): trigram = Trigram('three whole words') trigram.parse() text = trigram.generate_text(start_text='three whole') self.assertEqual('three whole words', text)
def test_generate_text_returns_string(self): trigram = Trigram("three whole words") trigram.parse() text = trigram.generate_text(start_text="three whole") self.assertIsInstance(text, str)