def test_generate_sent_1gram(self): ngram = NGram(1, self.sents) generator = NGramGenerator(ngram) voc = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'} for i in range(100): sent = generator.generate_sent() self.assertTrue(set(sent).issubset(voc))
def test_generate_sent_2gram(self): ngram = NGram(2, self.sents) generator = NGramGenerator(ngram) # all the possible generated sentences for 2-grams: sents = [ 'el gato come pescado .', 'la gata come salmón .', 'el gato come salmón .', 'la gata come pescado .', ] for i in range(100): sent = generator.generate_sent() self.assertTrue(' '.join(sent) in sents, sent)
generate.py -h | --help Options: -i <file> Language model file. -n <n> Number of sentences to generate. -h --help Show this screen. """ from docopt import docopt import pickle from languagemodeling.ngram_generator import NGramGenerator if __name__ == '__main__': opts = docopt(__doc__) # load the model filename = opts['-i'] f = open(filename, 'rb') model = pickle.load(f) f.close() # build generator generator = NGramGenerator(model) # generate sentences n = int(opts['-n']) for i in range(n): sent = generator.generate_sent() print(' '.join(sent))