Beispiel #1
0
    def eval_all(self, outstream=sys.stdout):
        overall_dict = defaultdict(list)

        for method in self.methods:
            overall_dict[method] = AlignEval()


        for lang in self.by_lang_dict.keys():

            for method in self.methods:
                test_snts = []
                gold_snts = []

                for snt_id in self.by_lang_dict[lang][method].keys():
                    test_snt = self.by_lang_dict[lang][method][snt_id]
                    gold_snt = self.by_lang_dict[lang]['gold'][snt_id]
                    test_snts.append(test_snt)
                    gold_snts.append(gold_snt)

                try:
                    ae = AlignEval(test_snts, gold_snts)
                except AssertionError as ae:
                    sys.stderr.write("ERROR IN METHOD {}\n".format(method))
                    raise(ae)
                outstream.write(','.join([lang,method]+[str(i) for i in ae.all()]))
                outstream.write('\n')

                overall_dict[method] += ae

        for method in self.methods:
            outstream.write(','.join(['overall',method]+[str(i) for i in overall_dict[method].all()]))
            outstream.write('\n')
Beispiel #2
0
    args = p.parse_args()

    last_n = None
    if args.gold_aln:
        gold_alns = load_aln(args.gold_aln)
        last_n = len(gold_alns)

    e_snts = load_snts(args.e, args.limit)
    f_snts = load_snts(args.f, args.limit)
    assert len(e_snts) == len(f_snts)

    gold_e_snts = load_snts(args.gold_e)
    gold_f_snts = load_snts(args.gold_f)

    ga = GizaAligner()
    test_alns = ga.temp_train(e_snts + gold_e_snts, f_snts + gold_f_snts)

    if args.gold_aln:
        assert len(e_snts + gold_e_snts) >= len(gold_alns)

        # -------------------------------------------
        # We will assume that the test sentences are the last
        # in the file.
        # -------------------------------------------

        alns_to_test = test_alns[-len(gold_alns) :]
        assert len(alns_to_test) == len(gold_alns)

        ae = AlignEval(alns_to_test, gold_alns)
        print(",".join([str(args.limit)] + ["{:.5f}".format(f) for f in ae.all()]))