def run_experiment(treebank_name,outdir=None,dep_style="ud", pos_style='ud', metric='LAS'): #TODO: have options for what goes in table if not outdir: outdir= config.exp + treebank_name + "/" TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style, pos_style=pos_style) TM.transform_parse_detransform() #if you just want the eval you can just comment this out """FILES""" test_gold = TM.testfile parsed_baseline = outdir + 'dev_parsed_baseline.conll' parsed_ud = TM.parsed_ud #if you just eval comment the one up and uncomment the one down #parsed_ud = outdir + 'dev_parsed.ud.conll' """RESULTS""" buas, blas= malteval.accuracy(test_gold,parsed_baseline) uas, las = malteval.accuracy(test_gold,parsed_ud) output = "" if metric =="LAS": las = str(float(las)*100) blas = str(float(blas)*100) #significance of las sig = malteval.significance(test_gold, parsed_baseline, parsed_ud) las += sig output = "%s;%s;%s\n"%(treebank_name, blas, las) else: uas = str(float(uas)*100) buas = str(float(buas)*100) sig = malteval.significance_uas(test_gold, parsed_baseline, parsed_ud) uas += sig output = "%s;%s;%s\n"%(treebank_name, buas, uas) return output
def evaluate_back_transformation_accuracy(treebank_name,outdir=None): if not outdir: outdir= config.exp + treebank_name + "/" TM = TreebankTransformer(treebank_name=treebank_name) TM.transform_detransform_trainfile() train_gold = TM.trainfile train_backtransf = TM.back_transf accuracy_of_back_transf = malteval.accuracy(train_gold,train_backtransf)[0] accuracy_of_back_transf = str(float(accuracy_of_back_transf)*100) output = "%s;%s\n"%(treebank_name,accuracy_of_back_transf) return output
def run_stats_cop(treebank_name,outdir=None,trainfile=None, testfile=None,dep_style="ud"): if not outdir: outdir= config.exp + treebank_name TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style) #replace train and test files if they are given as arg if trainfile: TM.trainfile = trainfile if testfile: TM.testfile = testfile cop_train, tot_train= TM.count_cop(TM.trainfile) cop_test, tot_test= TM.count_cop(TM.testfile) tot_cop = cop_train + cop_test tot_s = tot_train + tot_test cop_freq = (tot_cop/float(tot_s))*100 output = "%s;%s;%s\n"%(treebank_name, tot_s, cop_freq) return output
def run_stats(treebank_name,outdir=None,trainfile=None, testfile=None,dep_style="ud"): if not outdir: outdir= config.exp + treebank_name TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style) #replace train and test files if they are given as arg if trainfile: TM.trainfile = trainfile if testfile: TM.testfile = testfile aux_train, tot_train, s_train = TM.count_aux(TM.trainfile) aux_test, tot_test, s_test = TM.count_aux(TM.testfile) tot_s = s_train + s_test tot_aux = aux_train + aux_test tot_tokens = tot_train + tot_test aux_freq = (tot_aux/float(tot_tokens))*100 output = "%s;%s;%s;%s\n"%(treebank_name, tot_s, tot_tokens, aux_freq) return output
def evaluate_on_transformed_gold(treebank_name,outdir=None): """Evaluate on the transformed representation as gold standard""" if not outdir: outdir= config.exp + treebank_name + "/" TM = TreebankTransformer(treebank_name=treebank_name) dev_gold_ms = "%sdev_gold.ms.conll"%outdir parsed_ms = "%sdev_parsed.ms.conll"%outdir parsed_baseline = outdir + 'dev_parsed_baseline.conll' baseline_ms = outdir + 'dev_parsed_baseline.ms.conll' TM.transform(TM.testfile, dev_gold_ms, "transform") TM.transform(parsed_baseline, baseline_ms, "transform") uas, las = malteval.accuracy(dev_gold_ms,parsed_ms) buas, blas = malteval.accuracy(dev_gold_ms,baseline_ms) las = str(float(las)*100) blas = str(float(blas)*100) output = "%s;%s;%s\n"%(treebank_name, las, blas) return output
def prepare_files(treebank_name, outdir=None, trainfile=None, testfile=None, ambig_type=None, dep_style='ud', pos_style='ud'): if not outdir: outdir= config.exp + treebank_name if not os.path.exists(outdir): os.mkdir(outdir) if not trainfile and not testfile : tb = UDtreebank(treebank_name) trainfile = tb.trainfile testfile = tb.devfile TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style, pos_style=pos_style) TM.transform(trainfile, TM.trainfile, 'to_conllx') TM.transform(testfile, TM.testfile, 'to_conllx') #experiments about ambiguity if ambig_type: TM.transform(TM.trainfile, TM.trainfile, ambig_type) TM.transform(TM.testfile, TM.testfile, ambig_type)
#!/usr/bin/env python #============================================================================== #author :Miryam de Lhoneux #email :[email protected] #date :2015/12/30 #version :1.0 #description :Perform a transformation on a file #usage :python transform_file.py infile outfile [transform|detransform|to_conllx] #Python version :2.7.6 #============================================================================== import sys from src.treebank_transformer import TreebankTransformer if __name__=="__main__": infile = sys.argv[1] out = sys.argv[2] change = sys.argv[3] #dep_style = 'pdt' dep_style = 'ud' TM = TreebankTransformer(dep_style=dep_style) TM.transform(infile,out,change)
from src.treebank_transformer import TreebankTransformer from matplotlib import pyplot as plt import sys def plot_f_dict(d, filename): plt.figure() plt.bar(range(len(d)), d.values(), align="center") plt.xticks(range(len(d)), list(d.keys()), rotation='vertical') plt.subplots_adjust(bottom=0.15) plt.savefig(filename) if __name__=="__main__": treebank_name = sys.argv[1] dep_style = "ud" if len(sys.argv) > 2: dep_style = sys.argv[2] TT = TreebankTransformer(treebank_name,dep_style=dep_style) main_verb_pos, aux_pos = TT.collect_vg_postags(TT.trainfile) plot_f_dict(main_verb_pos, "main_verb_%s.png"%treebank_name) plot_f_dict(aux_pos, "aux_pos_%s.png"%treebank_name)