if __name__ == '__main__': # traindata = dataset.get_swedish_train_corpus().parsed_sents() traindata = dataset.get_english_train_corpus().parsed_sents() try: tp = TransitionParser(Transition, FeatureExtractor) tp.train(traindata) # tp.save('swedish.model') # labeleddata = dataset.get_swedish_dev_corpus().parsed_sents() # blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents() tp.save('english.model') labeleddata = dataset.get_english_dev_corpus().parsed_sents() blinddata = dataset.get_english_dev_blind_corpus().parsed_sents() #tp = TransitionParser.load('badfeatures.model') # parsed = tp.parse(labeleddata) parsed = tp.parse(blinddata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(labeleddata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval())
if __name__ == '__main__': #traindata = dataset.get_swedish_train_corpus().parsed_sents() traindata = dataset.get_english_train_corpus().parsed_sents() #traindata = dataset.get_danish_train_corpus().parsed_sents() try: tp = TransitionParser(Transition, FeatureExtractor) tp.train(traindata) #tp.save('swedish.model') #tp.save('english.model') ### tp.save('danish.model') #labeleddata = dataset.get_swedish_dev_corpus().parsed_sents() labeleddata = dataset.get_english_dev_corpus().parsed_sents() #labeleddata = dataset.get_danish_dev_corpus().parsed_sents() #blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents() blinddata = dataset.get_english_dev_blind_corpus().parsed_sents() #blinddata = dataset.get_danish_dev_blind_corpus().parsed_sents() #tp = TransitionParser.load('badfeatures.model') parsed = tp.parse(blinddata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(labeleddata, parsed)
f.write('\n') ev = DependencyEvaluator(testdata, parsed) print 'Swedish results' print "UAS: {} \nLAS: {}".format(*ev.eval()) # english print '\n----------------------\n' print 'Training english' tpe = TransitionParser(Transition, FeatureExtractor) tpe.train(english_subdata) tpe.save('english.model') print 'testing english' testdataE = dataset.get_english_dev_corpus().parsed_sents() tpe = TransitionParser.load('english.model') parsede = tpe.parse(testdataE) eve = DependencyEvaluator(testdataE, parsede) print 'English results' print "UAS: {} \nLAS: {}".format(*eve.eval()) # danish print '\n----------------------\n' print 'Training Danish' tpD = TransitionParser(Transition, FeatureExtractor) tpD.train(danish_subdata) tpD.save('danish.model')