def get_data_from_lang(lang): if lang == 'swedish': return dataset.get_swedish_test_corpus().parsed_sents() elif lang == 'danish': return dataset.get_danish_test_corpus().parsed_sents() elif lang == 'english': return dataset.get_english_test_corpus().parsed_sents() else: raise ValueError("Please don't use {}, only use english, swedish or danish".format(lang))
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las swed_score = (min(las, 0.7) / 0.7)**2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las eng_score = (min(las, 0.7) / 0.7)**2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las dan_score = (min(las, 0.7) / 0.7)**2 return dan_score
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las swed_score = (min(las, 0.7) / 0.7) ** 2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las eng_score = (min(las, 0.7) / 0.7) ** 2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las dan_score = (min(las, 0.7) / 0.7) ** 2 return dan_score
if __name__ == '__main__': # data = dataset.get_swedish_train_corpus().parsed_sents() data = dataset.get_english_train_corpus().parsed_sents() # data = dataset.get_danish_train_corpus().parsed_sents() random.seed(1234) subdata = random.sample(data, 400) try: tp = TransitionParser(Transition, FeatureExtractor) tp.train(subdata) # tp.save('swedish.model') tp.save('english.model') # tp.save('danish.model') # testdata = dataset.get_swedish_test_corpus().parsed_sents() testdata = dataset.get_english_test_corpus().parsed_sents() # testdata = dataset.get_danish_test_corpus().parsed_sents() # tp = TransitionParser.load('swedish.model') tp = TransitionParser.load('english.model') # tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "LAS: {} \nUAS: {}".format(*ev.eval())
from providedcode.transitionparser import TransitionParser from providedcode.evaluate import DependencyEvaluator from featureextractor import FeatureExtractor from transition import Transition if __name__ == '__main__': data = dataset.get_english_train_corpus().parsed_sents() random.seed(1234) subdata = random.sample(data, 200) try: #tp = TransitionParser(Transition, FeatureExtractor) #tp.train(subdata) #tp.save('swedish.model') testdata = dataset.get_english_test_corpus().parsed_sents() tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval()) f.close() # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test')