def get_data_from_lang(lang): if lang == 'swedish': return dataset.get_swedish_test_corpus().parsed_sents() elif lang == 'danish': return dataset.get_danish_test_corpus().parsed_sents() elif lang == 'english': return dataset.get_english_test_corpus().parsed_sents() else: raise ValueError("Please don't use {}, only use english, swedish or danish".format(lang))
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las swed_score = (min(las, 0.7) / 0.7)**2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las eng_score = (min(las, 0.7) / 0.7)**2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las dan_score = (min(las, 0.7) / 0.7)**2 return dan_score
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las swed_score = (min(las, 0.7) / 0.7) ** 2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las eng_score = (min(las, 0.7) / 0.7) ** 2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las dan_score = (min(las, 0.7) / 0.7) ** 2 return dan_score
# # SE # tp = TransitionParser(Transition, FeatureExtractor) # tp.train(SE_subdata) # tp.save('swedish.model') # SE_testdata = dataset.get_swedish_test_corpus().parsed_sents() # SE_tp = TransitionParser.load('swedish.model') # SE_parsed = SE_tp.parse(SE_testdata) # # DK tp = TransitionParser(Transition, FeatureExtractor) print('Training...') tp.train(DK_subdata) print('Ok. Saving the model...') tp.save('danish.model') print('Ok. Parsing the test corpus...') DK_testdata = dataset.get_danish_test_corpus().parsed_sents() #DK_tp = TransitionParser.load('danish.model') DK_parsed = tp.parse(DK_testdata) print('Ok.') # with open('english.conll', 'w') as f: # for p in EN_parsed: # f.write(p.to_conll(10).encode('utf-8')) # f.write('\n') # # ev = DependencyEvaluator(EN_testdata, EN_parsed) # print('Evaluating EN model...') # print "LAS: {} \nUAS: {}".format(*ev.eval()) with open('danish.conll', 'w') as f:
#data = dataset.get_korean_train_corpus().parsed_sents() data = dataset.get_danish_train_corpus().parsed_sents() random.seed(1234) subdata = random.sample(data, 200) try: tp = TransitionParser(Transition, FeatureExtractor) tp.train(subdata) #tp.save('swedish.model') #tp.save('korean.model') tp.save('danish.model') #testdata = dataset.get_swedish_test_corpus().parsed_sents() #testdata = dataset.get_korean_test_corpus().parsed_sents() testdata = dataset.get_danish_test_corpus().parsed_sents() #tp = TransitionParser.load('swedish.model') #tp = TransitionParser.load('korean.model') tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval())