Example #1
0
 def test_predict_next_word_returns_only_possible_answer_for_longer_corpus(
         self):
     trigram = Trigram('three whole words are not enough to properly test '
                       'this method so how about fifteen')
     trigram.parse()
     next_word = trigram.predict_next_word(bigram='enough to')
     self.assertEqual('properly', next_word)
def main(args):
  logging.basicConfig(level=LOGGING_LEVEL, format="DEBUG: %(message)s")

  if len(args) < 3 or len(args) > 4:
    print 'usage: %s training-file dev-file [output-dir]' % args[0]
    print '       output-dir is optional, default is "%s"' % OUTPUT_DIR_DEFAULT
    sys.exit(1)

  training_filename = args[1]
  dev_filename = args[2]
  output_dir = args[3] if len(args) == 4 else OUTPUT_DIR_DEFAULT

  logging.debug('Training models...')

  # train all the models!
  unigram_model = Unigram(training_filename)
  logging.debug('Done training unigram model')
  bigram_model = Bigram(training_filename)
  logging.debug('Done training bigram model')
  trigram_model = Trigram(training_filename)
  logging.debug('Done training trigram model')

  dev_words = [line.strip() for line in open(dev_filename, 'r')]

  # write predictions out to disk
  unigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'unigram'))
  logging.debug('Wrote dev set predictions using unigram model')
  bigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'bigram'))
  logging.debug('Wrote dev set predictions using bigram model')
  trigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'trigram'))
  logging.debug('Wrote dev set predictions using trigram model')
Example #3
0
 def test_mapbox_load_from_file_populates_input_text(self, mock_os_path):
     mock_os_path.exists.return_value = True
     trigram = Trigram()
     with patch.object(builtins, 'open',
                       mock_open(read_data='three whole words')):
         trigram.load_from_file(filename='filename.txt')
     self.assertEqual('three whole words', trigram.input_text)
class TestPickle(unittest.TestCase):
  def setUp(self):
    self.model = Trigram()
    self.model.train('./data/trainA.txt')

  #def test_pickle(self):
  #  pkl = pickle.dumps(self.model, protocol=1)
  #  pickled_model = pickle.loads(pkl)

  #  word = 'CC'
  #  history = ['NNP', 'RB', 'JJ']

  #  self.assertEquals(self.model.get_probability(word, history),
  #                    pickled_model.get_probability(word, history))


  def test_model_pickle(self):
    models  = [Unigram, Bigram, Trigram, Fourgram, Fivegram]

    for model in models:
      m = model()
      m.train('./data/trainA.txt')

      fname = '/tmp/test_%s.pkl' % (model)
      m.save(fname)

      loaded_m = model()
      loaded_m.load(fname)

      word = 'CC'
      history = ['NNP', 'RB', 'JJ']

      self.assertEquals( m.get_probability(word, history),
          loaded_m.get_probability(word, history) )
Example #5
0
 def test_mapbox_load_from_file_checks_for_file_existance(
         self, mock_os_path):
     mock_os_path.exists.return_value = True
     trigram = Trigram()
     with patch.object(builtins, 'open',
                       mock_open(read_data='three whole words')):
         trigram.load_from_file(filename='filename.txt')
     mock_os_path.exists.assert_called_once_with('filename.txt')
Example #6
0
class Author(object):

    __name = ""
    __unigram = Unigram()
    __bigram = Bigram()
    __trigram = Trigram()

    # Constructor.
    def __init__(self, name):
        self.__name = name
        self.__unigram = Unigram()
        self.__bigram = Bigram()
        self.__trigram = Trigram()

    # Getters.
    def getUnigram(self):
        return self.__unigram

    def getBigram(self):
        return self.__bigram

    def getTrigram(self):
        return self.__trigram

    def getName(self):
        return self.__name

    # Caller method, it is used for counting frequency in the unigram, bigram and trigram.
    def counterCaller(self, separated_line):
        self.__unigram.counter(separated_line)
        self.__bigram.counter(separated_line)
        self.__trigram.counter(separated_line)

    # Caller method, it is used for generating new text with respect to unigram, bigram and trigram.
    def generatorCaller(self, uni_list, bi_list, tri_list):
        self.__unigram.generator(uni_list)
        self.__bigram.generator(bi_list)
        self.__trigram.generator(tri_list)
Example #7
0
 def test_parse_raises_error_if_text_has_less_than_three_words(self):
     trigram = Trigram('two words')
     self.assertRaises(ValueError, trigram.parse)
Example #8
0
 def test_parse_makes_trigram_map_a_dictionary(self):
     trigram = Trigram("the quick brown fox jumped over the fence")
     trigram.parse()
     self.assertIsInstance(trigram.map, dict)
Example #9
0
 def test_generate_text_limited_by_max_words_property(self):
     trigram = Trigram("sorry sorry sorry")
     trigram.parse()
     text = trigram.generate_text(start_text="sorry sorry", max_words=4)
     self.assertEqual("sorry sorry sorry sorry", text)
Example #10
0
 def test_trigram_constructor_instantiates_empty_trigram_map_dict(self):
     trigram = Trigram()
     self.assertEqual(trigram.map, {})
Example #11
0
 def test_generate_text_limited_by_max_words_property(self):
     trigram = Trigram('sorry sorry sorry')
     trigram.parse()
     text = trigram.generate_text(start_text='sorry sorry', max_words=4)
     self.assertEqual('sorry sorry sorry sorry', text)
Example #12
0
 def test_mapbox_load_from_file_errors_if_no_file(self, mock_os_path):
     mock_os_path.exists.return_value = False
     trigram = Trigram()
     self.assertRaises(IOError, trigram.load_from_file, 'filename.txt')
Example #13
0
 def test_predict_next_word_returns_string(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     next_word = trigram.predict_next_word(bigram="three whole")
     self.assertIsInstance(next_word, str)
Example #14
0
 def test_generate_text_errors_if_start_text_is_less_than_two_words(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     self.assertRaises(ValueError, trigram.generate_text, "three")
Example #15
0
 def test_parse_return_dict_value_counters_increment_to_2(self):
     trigram = Trigram("three whole words and three whole words")
     trigram.parse()
     self.assertEqual(2, trigram.map["three whole"]["words"])
Example #16
0
 def test_running_parse_twice_with_append_map_true_double_counts(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     trigram.parse(append_map=True)
     self.assertEqual(2, trigram.map["three whole"]["words"])
Example #17
0
 def test_parse_return_dict_has_second_bigram_as_key(self):
     trigram = Trigram("four whole real words")
     trigram.parse()
     self.assertIsInstance(trigram.map["whole real"], Counter)
Example #18
0
 def test_parse_return_dict_values_are_counters(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     self.assertIsInstance(trigram.map["three whole"], Counter)
Example #19
0
 def test_parse_adds_dictionary_entry_of_first_two_words(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     self.assertEqual("three whole", trigram.map.keys()[0])
Example #20
0
 def test_predict_next_word_returns_third_word_for_trigram_input(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     next_word = trigram.predict_next_word(bigram='three whole')
     self.assertEqual('words', next_word)
Example #21
0
 def test_predict_next_word_returns_third_word_for_trigram_input(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     next_word = trigram.predict_next_word(bigram="three whole")
     self.assertEqual("words", next_word)
Example #22
0
 def test_generate_text_returns_only_start_text_if_no_match(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     text = trigram.generate_text(start_text="what the")
     self.assertEqual("what the", text)
Example #23
0
 def test_predict_next_word_returns_only_possible_answer_for_longer_corpus(self):
     trigram = Trigram("three whole words are not enough to properly test " "this method so how about fifteen")
     trigram.parse()
     next_word = trigram.predict_next_word(bigram="enough to")
     self.assertEqual("properly", next_word)
Example #24
0
 def test_generate_text_returns_only_start_text_if_no_match(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     text = trigram.generate_text(start_text='what the')
     self.assertEqual('what the', text)
Example #25
0
 def test_predict_next_word_returns_most_likely_word(self):
     trigram = Trigram("two words this " "two words that " "two words this")
     trigram.parse()
     next_word = trigram.predict_next_word(bigram="two words")
     self.assertEqual("this", next_word)
Example #26
0
 def test_mapbox_load_from_file_checks_for_file_existance(self, mock_os_path):
     mock_os_path.exists.return_value = True
     trigram = Trigram()
     with patch.object(builtins, "open", mock_open(read_data="three whole words")):
         trigram.load_from_file(filename="filename.txt")
     mock_os_path.exists.assert_called_once_with("filename.txt")
Example #27
0
 def test_predict_next_word_throws_key_error_if_map_missing_bigram(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     self.assertRaises(KeyError, trigram.predict_next_word, "a word")
Example #28
0
 def test_trigram_constructor_saves_input_text(self):
     text = 'the quick brown fox jumped over the fence'
     trigram = Trigram('the quick brown fox jumped over the fence')
     self.assertEqual(trigram.input_text, text)
Example #29
0
 def test_parse_return_dict_values_are_counters(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     self.assertIsInstance(trigram.map['three whole'], Counter)
Example #30
0
 def test_trigram_constructor_default_is_empty_string(self):
     trigram = Trigram()
     self.assertEqual(trigram.input_text, '')
Example #31
0
 def test_parse_return_dict_value_counters_increment_to_2(self):
     trigram = Trigram('three whole words and three whole words')
     trigram.parse()
     self.assertEqual(2, trigram.map['three whole']['words'])
Example #32
0
 def test_parse_makes_trigram_map_a_dictionary(self):
     trigram = Trigram('the quick brown fox jumped over the fence')
     trigram.parse()
     self.assertIsInstance(trigram.map, dict)
Example #33
0
 def test_predict_next_word_returns_string(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     next_word = trigram.predict_next_word(bigram='three whole')
     self.assertIsInstance(next_word, str)
Example #34
0
 def test_parse_adds_dictionary_entry_of_first_two_words(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     self.assertEqual('three whole', trigram.map.keys()[0])
Example #35
0
 def test_mapbox_load_from_file_populates_input_text(self, mock_os_path):
     mock_os_path.exists.return_value = True
     trigram = Trigram()
     with patch.object(builtins, "open", mock_open(read_data="three whole words")):
         trigram.load_from_file(filename="filename.txt")
     self.assertEqual("three whole words", trigram.input_text)
Example #36
0
 def test_parse_return_dict_has_second_bigram_as_key(self):
     trigram = Trigram('four whole real words')
     trigram.parse()
     self.assertIsInstance(trigram.map['whole real'], Counter)
 def setUp(self):
   self.model = Trigram()
   self.model.train('./data/trainA.txt')
Example #38
0
 def test_running_parse_twice_with_append_map_true_double_counts(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     trigram.parse(append_map=True)
     self.assertEqual(2, trigram.map['three whole']['words'])
Example #39
0
 def test_predict_next_word_throws_key_error_if_map_missing_bigram(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     self.assertRaises(KeyError, trigram.predict_next_word, 'a word')
Example #40
0
 def test_predict_next_word_errors_if_no_map(self):
     trigram = Trigram()
     self.assertRaises(ValueError, trigram.predict_next_word, 'anything')
Example #41
0
 def test_predict_next_word_returns_most_likely_word(self):
     trigram = Trigram('two words this ' 'two words that ' 'two words this')
     trigram.parse()
     next_word = trigram.predict_next_word(bigram='two words')
     self.assertEqual('this', next_word)
Example #42
0
 def test_generate_text_returns_string(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     text = trigram.generate_text(start_text='three whole')
     self.assertIsInstance(text, str)
Example #43
0
 def test_generate_text_returns_third_whole_trigram(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     text = trigram.generate_text(start_text="three whole")
     self.assertEqual("three whole words", text)
Example #44
0
 def __init__(self, name):
     self.__name = name
     self.__unigram = Unigram()
     self.__bigram = Bigram()
     self.__trigram = Trigram()
Example #45
0
 def test_generate_text_errors_if_start_text_is_less_than_two_words(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     self.assertRaises(ValueError, trigram.generate_text, 'three')
Example #46
0
 def test_generate_text_returns_third_whole_trigram(self):
     trigram = Trigram('three whole words')
     trigram.parse()
     text = trigram.generate_text(start_text='three whole')
     self.assertEqual('three whole words', text)
Example #47
0
 def test_generate_text_returns_string(self):
     trigram = Trigram("three whole words")
     trigram.parse()
     text = trigram.generate_text(start_text="three whole")
     self.assertIsInstance(text, str)