Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', type=argparse.FileType())
    parser.add_argument('output_file', type=argparse.FileType('wb'))
    parser.add_argument('vocabulary_size', type=int)
    parser.add_argument('context_size', type=int)
    parser.add_argument('feature_size', type=int)
    parser.add_argument('hidden_size', type=int)
    parser.add_argument('iterations', type=int)
    args = parser.parse_args()

    sentences = list(prepend_caret(lower(tokenize(args.input_file))))
    nplm = NPLM(args.vocabulary_size, args.feature_size, args.context_size, args.hidden_size)
    nplm.train(sentences, args.iterations)
    pickle.dump(nplm, args.output_file)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_file', type=argparse.FileType())
    parser.add_argument('output_file', type=argparse.FileType('wb'))
    parser.add_argument('vocabulary_size', type=int)
    parser.add_argument('context_size', type=int)
    parser.add_argument('feature_size', type=int)
    parser.add_argument('hidden_size', type=int)
    parser.add_argument('iterations', type=int)
    args = parser.parse_args()

    sentences = list(prepend_caret(lower(tokenize(args.input_file))))
    nplm = NPLM(args.vocabulary_size, args.feature_size, args.context_size,
                args.hidden_size)
    nplm.train(sentences, args.iterations)
    pickle.dump(nplm, args.output_file)
Esempio n. 3
0
 def test_prepend_caret(self):
     sentences = [['i', 'am', 'beautiful']]
     actual = list(prepend_caret(sentences))
     expected = [['^', 'i', 'am', 'beautiful']]
     self.assertSequenceEqual(expected, actual)