def main(): parser = argparse.ArgumentParser(description="""Convert conllu to conll format""") parser.add_argument('--infile', help="conllu file") parser.add_argument('--lang', help="") args = parser.parse_args() #try: header = ["proj_pred", "proj_gold", "leaf_viol_pred", "leaf_viol_gold", "posAcc", "UAS"] if True: vals = [] rdr = CoNLLReader() predicted_sentences = [] gold_sentences = [] if args.infile: gold_sentences = rdr.read_conll_u_8cols(args.infile) numwords = sum([len(s.nodes()[1:]) for s in predicted_sentences]) #print([int(s.is_fully_projective()) for s in predicted_sentences]) for idx,s in enumerate(gold_sentences): print(idx,s.is_fully_projective())
def main(): parser = argparse.ArgumentParser( description="""Convert conllu to conll format""") parser.add_argument('--predicted', help="conllu file") parser.add_argument('--gold', help="conllu file") parser.add_argument('--lang', help="") args = parser.parse_args() #try: header = [ "proj_pred", "proj_gold", "leaf_viol_pred", "leaf_viol_gold", "posAcc", "UAS" ] if True: vals = [] rdr = CoNLLReader() predicted_sentences = [] gold_sentences = [] if args.predicted: predicted_sentences = rdr.read_conll_u_8cols(args.predicted) if args.gold: gold_sentences = rdr.read_conll_u(args.gold) numwords = sum([len(s.nodes()[1:]) for s in predicted_sentences]) #print([int(s.is_fully_projective()) for s in predicted_sentences]) proj_pred = sum( [int(s.is_fully_projective()) for s in predicted_sentences]) proj_gold = sum([int(s.is_fully_projective()) for s in gold_sentences]) punct_non__proj_pred = sum( [int(s.punct_proj_violations()) for s in predicted_sentences]) punct_non__proj_gold = sum( [int(s.punct_proj_violations()) for s in gold_sentences]) leaf_violations_pred = sum( [s.leaf_violations()[0] for s in predicted_sentences]) leaf_violations_gold = sum( [s.leaf_violations()[0] for s in gold_sentences]) wrongPOSgoodHeadscore = wrongPOSgoodHead(predicted_sentences, gold_sentences) posAcc_accum = sum([ POSAcc(p, g) for p, g in zip(predicted_sentences, gold_sentences) ]) / numwords UAS_accum = sum( [UAS(p, g) for p, g in zip(predicted_sentences, gold_sentences)]) / numwords prelength = edgelengths(predicted_sentences) goldlength = edgelengths(gold_sentences) avgprelength = np.std(prelength) avggoldlength = np.std(goldlength) vals.append(wrongPOSgoodHeadscore) vals.append(avgprelength) vals.append(avggoldlength) vals.append(proj_pred / len(predicted_sentences)) vals.append(proj_pred / len(predicted_sentences)) vals.append(proj_gold / len(gold_sentences)) vals.append(punct_non__proj_pred / numwords) vals.append(punct_non__proj_gold / numwords) vals.append(leaf_violations_pred / numwords) vals.append(leaf_violations_gold / numwords) vals.append(KLdivFromMACRO_POS_from_Training(predicted_sentences)) vals.append(KLdivFromMACRO_POS_from_Training(gold_sentences)) vals.append(posAcc_accum) vals.append(UAS_accum) lineout = " ".join([args.lang] + ["{0:.2f}".format(x) for x in vals]) #except: # lineout = "_\t_" print(lineout)