Esempio n. 1
0
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, held_out_set, test_set = get_datasets(english, spanish)
    translations = get_word_translations("100000_trans.txt")
    search = BeamSearch(training_set, held_out_set, translations)

    print search.translate(test_set[8])
Esempio n. 2
0
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, test_set, translated_set = get_datasets(english, spanish)
    translations = get_word_translations("3000_trans.txt")

    print "Original Sentence:", ' '.join(test_set[0])

    translator = DirectTrans(translations)
    print "Direct Translation:", ' '.join(translator.translate(test_set[0]))

    test_output = open('trans_beam.txt','w')
    true_output = open('trans_true.txt','w')

    search = BeamSearch(training_set, translations)
    print "Beam Translation:", ' '.join(search.translate(test_set[0]))
    print "True Translation:", ' '.join(translated_set[0])
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, test_set, translated_set = get_datasets(english, spanish)
    translations = get_word_translations("3000_trans.txt")
    search = BeamSearch(training_set, translations)

    test_output = open('trans_beam.txt','w')
    true_output = open('trans_true.txt','w')

    for i in range(len(test_set)):
        print "Translating sentence", i, "..."
        test_output.write(' '.join(search.translate(test_set[i])) + "\n")
        true_output.write(' '.join(translated_set[i]) + "\n")

    test_output.close()
    true_output.close()