Esempio n. 1
0
def main():
    lang_train_list = []
    if len(sys.argv) == 1:
        lang_train_list = ['swedish', 'danish', 'english']
    else:
        lang_train_list = sys.argv[1:]

    random.seed(1126)

    for lang in lang_train_list:
        whole_data = get_train_data_from_lang(lang)
        subdata = random.sample(whole_data, 200)
        tp = TransitionParser(Transition, FeatureExtractor)
        print '\n===== Start training {} data ====='.format(lang)
        tp.train(subdata)
        tp.save(lang + '.model')

    print '===== Sucessfully generating models ====='
Esempio n. 2
0
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == '__main__':
    # traindata = dataset.get_swedish_train_corpus().parsed_sents()
    traindata = dataset.get_english_train_corpus().parsed_sents()

    try:

        tp = TransitionParser(Transition, FeatureExtractor)
        tp.train(traindata)

        # tp.save('swedish.model')
        # labeleddata = dataset.get_swedish_dev_corpus().parsed_sents()
        # blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents()

        tp.save('english.model')
        labeleddata = dataset.get_english_dev_corpus().parsed_sents()
        blinddata = dataset.get_english_dev_blind_corpus().parsed_sents()

        #tp = TransitionParser.load('badfeatures.model')

        # parsed = tp.parse(labeleddata)
        parsed = tp.parse(blinddata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
        parsed = tp.parse(testdata)

        with open('test.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(testdata, parsed)
        print "Bad Features Results"
        print "UAS: {} \nLAS: {}".format(*ev.eval())
        t1 = time.time()
        print "Time: " + str(t1 - t0) + '\n'

        # SWEDISH FEATURE MODELS
        print 'Starting Swedish'
        tp_s = TransitionParser(Transition, FeatureExtractor)
        tp_s.train(subdata)
        tp_s.save('swedish.model')

        testdata = dataset.get_swedish_test_corpus().parsed_sents()
        tp_s = TransitionParser.load('swedish.model')

        parsed = tp_s.parse(testdata)

        with open('swedish.conll', 'w') as f:
            for p in parsed:
                f.write(p.to_conll(10).encode('utf-8'))
                f.write('\n')

        ev = DependencyEvaluator(testdata, parsed)
        print "Swedish Results"