def eval_all(self, outstream=sys.stdout): overall_dict = defaultdict(list) for method in self.methods: overall_dict[method] = AlignEval() for lang in self.by_lang_dict.keys(): for method in self.methods: test_snts = [] gold_snts = [] for snt_id in self.by_lang_dict[lang][method].keys(): test_snt = self.by_lang_dict[lang][method][snt_id] gold_snt = self.by_lang_dict[lang]['gold'][snt_id] test_snts.append(test_snt) gold_snts.append(gold_snt) try: ae = AlignEval(test_snts, gold_snts) except AssertionError as ae: sys.stderr.write("ERROR IN METHOD {}\n".format(method)) raise(ae) outstream.write(','.join([lang,method]+[str(i) for i in ae.all()])) outstream.write('\n') overall_dict[method] += ae for method in self.methods: outstream.write(','.join(['overall',method]+[str(i) for i in overall_dict[method].all()])) outstream.write('\n')
args = p.parse_args() last_n = None if args.gold_aln: gold_alns = load_aln(args.gold_aln) last_n = len(gold_alns) e_snts = load_snts(args.e, args.limit) f_snts = load_snts(args.f, args.limit) assert len(e_snts) == len(f_snts) gold_e_snts = load_snts(args.gold_e) gold_f_snts = load_snts(args.gold_f) ga = GizaAligner() test_alns = ga.temp_train(e_snts + gold_e_snts, f_snts + gold_f_snts) if args.gold_aln: assert len(e_snts + gold_e_snts) >= len(gold_alns) # ------------------------------------------- # We will assume that the test sentences are the last # in the file. # ------------------------------------------- alns_to_test = test_alns[-len(gold_alns) :] assert len(alns_to_test) == len(gold_alns) ae = AlignEval(alns_to_test, gold_alns) print(",".join([str(args.limit)] + ["{:.5f}".format(f) for f in ae.all()]))