def verify_lang_data(model, conll_output): try: lang = extract_lang_from_model_name(model) testdata = get_data_from_lang(lang) tp = TransitionParser.load(model) parsed = tp.parse(testdata) with open(conll_output, 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print "\n=====Prediction of {}.model===== \nUAS: {} \nLAS: {}".format(lang, uas, las) return las pass except ValueError as e: print(e)
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las swed_score = (min(las, 0.7) / 0.7)**2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las eng_score = (min(las, 0.7) / 0.7)**2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:', uas print 'LAS:', las dan_score = (min(las, 0.7) / 0.7)**2 return dan_score
def train_model(lang,training_set='train'): # load and sample data data = get_data(lang,dataset=training_set).parsed_sents() if len(data) >200: random.seed(1234) subdata = random.sample(data, 200) else: subdata = data # train model and save tp = TransitionParser(Transition, FeatureExtractor) tp.train(subdata) tp.save('{0}.model'.format(lang)) # test performance on new data if lang != 'english': testdata = get_data(lang,dataset='test').parsed_sents() # english test data not available # so find a subset of training data # that is disjoint from data used for training else: not_in_training = [sent for sent in data if sent not in subdata] testdata = random.sample(not_in_training,200) parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) # store and print results with open('results.txt','a') as results_file: results_file.write('{0} model:\n'.format(lang)) results_file.write("UAS: {} \nLAS: {}\n".format(*ev.eval())) print '{0} model:\n'.format(lang) print "UAS: {} \nLAS: {}\n".format(*ev.eval()) return ev.eval()[1]
def evaluate_parse(partIdx): if partIdx == 3: print 'Evaluating your swedish model ... ' testdata = dataset.get_swedish_test_corpus().parsed_sents() if not os.path.exists('./swedish.model'): print 'No model. Please save your model as swedish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las swed_score = (min(las, 0.7) / 0.7) ** 2 return swed_score if partIdx == 1: print 'Evaluating your english model ... ' testdata = dataset.get_english_test_corpus().parsed_sents() if not os.path.exists('./english.model'): print 'No model. Please save your model as english.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('english.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las eng_score = (min(las, 0.7) / 0.7) ** 2 return eng_score if partIdx == 2: print 'Evaluating your danish model ... ' testdata = dataset.get_danish_test_corpus().parsed_sents() if not os.path.exists('./danish.model'): print 'No model. Please save your model danish.model at current directory before submission.' sys.exit(0) tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) uas, las = ev.eval() print 'UAS:',uas print 'LAS:',las dan_score = (min(las, 0.7) / 0.7) ** 2 return dan_score
labeleddata = dataset.get_english_dev_corpus().parsed_sents() #labeleddata = dataset.get_danish_dev_corpus().parsed_sents() #blinddata = dataset.get_swedish_dev_blind_corpus().parsed_sents() blinddata = dataset.get_english_dev_blind_corpus().parsed_sents() #blinddata = dataset.get_danish_dev_blind_corpus().parsed_sents() #tp = TransitionParser.load('badfeatures.model') parsed = tp.parse(blinddata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(labeleddata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval()) # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test') # tp = TransitionParser.load('english.model') # parsed = tp.parse([sentence]) # print parsed[0].to_conll(10).encode('utf-8') except NotImplementedError: print """ This file is currently broken! We removed the implementation of Transition (in transition.py), which tells the transitionparser how to go from one Configuration to another Configuration. This is an essential part of the arc-eager dependency parsing algorithm, so you should probably fix that :)
try: # tp = TransitionParser(Transition, FeatureExtractor) # tp.train(subdata) # tp.save('swedish.model') testdata = dataset.get_swedish_test_corpus().parsed_sents() tp = TransitionParser.load("badfeatures.model") parsed = tp.parse(testdata) with open("test.conll", "w") as f: for p in parsed: f.write(p.to_conll(10).encode("utf-8")) f.write("\n") ev = DependencyEvaluator(testdata, parsed) print "LAS: {} \nUAS: {}".format(*ev.eval()) # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test') # tp = TransitionParser.load('english.model') # parsed = tp.parse([sentence]) # print parsed[0].to_conll(10).encode('utf-8') except NotImplementedError: print """ This file is currently broken! We removed the implementation of Transition (in transition.py), which tells the transitionparser how to go from one Configuration to another Configuration. This is an essential part of the arc-eager dependency parsing algorithm, so you should probably fix that :)
tp.save('english.model') labeleddata = dataset.get_english_dev_corpus().parsed_sents() blinddata = dataset.get_english_dev_blind_corpus().parsed_sents() #tp = TransitionParser.load('badfeatures.model') # parsed = tp.parse(labeleddata) parsed = tp.parse(blinddata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(labeleddata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval()) # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test') # tp = TransitionParser.load('english.model') # parsed = tp.parse([sentence]) # print parsed[0].to_conll(10).encode('utf-8') except NotImplementedError: print """ This file is currently broken! We removed the implementation of Transition (in transition.py), which tells the transitionparser how to go from one Configuration to another Configuration. This is an essential part of the arc-eager dependency parsing algorithm, so you should probably fix that :)
# testdata = dataset.get_swedish_test_corpus().parsed_sents() testdata = dataset.get_english_test_corpus().parsed_sents() # testdata = dataset.get_danish_test_corpus().parsed_sents() # tp = TransitionParser.load('swedish.model') tp = TransitionParser.load('english.model') # tp = TransitionParser.load('danish.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "LAS: {} \nUAS: {}".format(*ev.eval()) # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test') # tp = TransitionParser.load('english.model') # parsed = tp.parse([sentence]) # print parsed[0].to_conll(10).encode('utf-8') except NotImplementedError: print """ This file is currently broken! We removed the implementation of Transition (in transition.py), which tells the transitionparser how to go from one Configuration to another Configuration. This is an essential part of the arc-eager dependency parsing algorithm, so you should probably fix that :)
subdata_d = random.sample(data_d, 200) try: # BAD FEATURES MODEL (SWEDISH DATA) print "Starting Bad Features" testdata = dataset.get_swedish_test_corpus().parsed_sents() tp = TransitionParser.load('badfeatures.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "Bad Features Results" print "UAS: {} \nLAS: {}".format(*ev.eval()) t1 = time.time() print "Time: " + str(t1 - t0) + '\n' # SWEDISH FEATURE MODELS print 'Starting Swedish' tp_s = TransitionParser(Transition, FeatureExtractor) tp_s.train(subdata) tp_s.save('swedish.model') testdata = dataset.get_swedish_test_corpus().parsed_sents() tp_s = TransitionParser.load('swedish.model') parsed = tp_s.parse(testdata)
#badfeatures.model...don't use for real testing #tp = TransitionParser.load('badfeatures.model') testdata = dataset.get_swedish_test_corpus().parsed_sents() parsed = tp.parse(testdata) #to write output...for badfeatures.model ''' with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') ''' ev = DependencyEvaluator(testdata, parsed) print "SWEDISH UAS: {} \nLAS: {}".format(*ev.eval()) #DANISH TESTING tp.train(danishsubdata) tp.save('danish.model') testdata = dataset.get_danish_test_corpus().parsed_sents() parsed = tp.parse(testdata) ev = DependencyEvaluator(testdata, parsed) print "DANISH UAS: {} \nLAS: {}".format(*ev.eval()) #KOREAN TESTING tp.train(koreansubdata)
try: print "Training {0} model...".format(testName) tp = TransitionParser(Transition, MyFeatureExtractor) tp.train(traindata) tp.save(testName + ".model") print "Testing {0} model...".format(testName) parsed = tp.parse(testdata) # with open('test.conll', 'w') as f: # for p in parsed: # f.write(p.to_conll(10).encode('utf-8')) # f.write('\n') ev = DependencyEvaluator(testdata, parsed) print "Test Results For: {0}".format(testName) (uas, las) = ev.eval() points = scoreWeight[testName] * (min(0.7, las)/0.7)**2 totalPoints += points print "UAS: {0} \nLAS: {1}".format(uas, las) print "Points Scored: {0}".format(points) # parsing arbitrary sentences (english): # sentence = DependencyGraph.from_sentence('Hi, this is a test') # tp = TransitionParser.load('english.model') # parsed = tp.parse([sentence]) # print parsed[0].to_conll(10).encode('utf-8') except NotImplementedError: print """