def run_baseline(treebank_name, outdir=None, trainfile=None, testfile=None): if not outdir: outdir= config.exp + treebank_name malteval = Malteval() TM = TreebankTransformer(treebank_name=treebank_name) """Train and parse""" train_gold = TM.trainfile test_gold = TM.testfile TM._parser.train(train_gold) parsed_baseline = outdir + '/dev_parsed_baseline.conll' TM._parser.parse(test_gold, parsed_baseline) """RESULTS""" uas, las= malteval.accuracy(test_gold,parsed_baseline) output = "%s;%s;%s\n"%(treebank_name,las,uas) return output
def plot_dep_scores(indir, treebank_number, dep, fig=None, outfile="Figures/figure.png"): gold = indir + "test_gold.conll" baseline = indir + "dev_parsed_baseline.conll" transf = indir + "dev_parsed.ud.conll" malteval = Malteval() mb = malteval.deprel_matrix(gold, baseline) mt = malteval.deprel_matrix(gold, transf) """The matplotlib thing""" x = np.array([malt[6].strip("\n") for malt in mb]) p1 = np.array([float(malt[0]) if malt[0] is not "-" else 0.0 for malt in mb]) # sorry p2 = np.array([float(malt[0]) if malt[0] is not "-" else 0.0 for malt in mt]) # not sorry r1 = np.array([float(malt[1]) if malt[1] is not "-" else 0.0 for malt in mb]) r2 = np.array([float(malt[1]) if malt[1] is not "-" else 0.0 for malt in mt]) f1 = np.array([(2 * p * r / (p + r)) if 0.0 not in (p, r) else 0.0 for p, r in zip(p1, r1)]) f2 = np.array([(2 * p * r / (p + r)) if 0.0 not in (p, r) else 0.0 for p, r in zip(p2, r2)]) n1 = np.array([int(malt[2]) if malt[2] is not "-" else 0 for malt in mb]) n2 = np.array([int(malt[2]) if malt[2] is not "-" else 0 for malt in mt]) me1 = np.array([0.98 / sqrt(n) if n != 0 else 0.0 for n in n1]) me2 = np.array([0.98 / sqrt(n) if n != 0 else 0.0 for n in n2]) allinfo = [(xi, fi1, fi2, mei1, mei2) for (xi, fi1, fi2, mei1, mei2) in zip(x, f1, f2, me1, me2) if xi == dep] x = [tup[0] for tup in allinfo] f1 = [tup[1] for tup in allinfo] f2 = [tup[2] for tup in allinfo] me1 = [tup[3] for tup in allinfo] me2 = [tup[4] for tup in allinfo] if not fig: fig, ax = plt.subplots() else: ax = fig.add_subplot(1, 1, 1) if len(x) == 0: return fig, ax bar_width = 0.45 last_index = treebank_number * 3 * bar_width index = last_index + np.arange(len(x)) error_config = {"ecolor": "0.3"} ax.bar( index, f1, bar_width, color="darkblue", error_kw=error_config, yerr=me1, label="baseline" if treebank_number == 0 else "", ) ax.bar( index + bar_width, f2, bar_width, color="skyblue", error_kw=error_config, yerr=me2, label="transformed" if treebank_number == 0 else "", ) if treebank_number == 0: plt.xlabel("%s deprel" % dep) plt.ylabel("Attachment score") plt.legend(bbox_to_anchor=(0.95, 0.92), bbox_transform=plt.gcf().transFigure) return (fig, ax)