Exemple #1
0
parser.add_argument('--case-insensitive', '-i', action='store_true')

if __name__ == '__main__':
    args = parser.parse_args()

    if not any([args.all, args.wer, args.ter, args.bleu, args.pyter]):
        args.all = True

    if args.all:
        args.wer = args.ter = args.bleu = True

    with open(args.source) as src_file, open(args.target) as trg_file:
        if args.case_insensitive:
            hypotheses = [line.strip().lower() for line in src_file]
            references = [line.strip().lower() for line in trg_file]
        else:
            hypotheses = [line.strip() for line in src_file]
            references = [line.strip() for line in trg_file]

        scores = OrderedDict()
        if args.bleu:
            scores['bleu'], _ = corpus_bleu(hypotheses, references)
        if args.wer:
            scores['wer'], _ = corpus_wer(hypotheses, references)
        if args.ter:
            scores['ter'], _ = corpus_tercom(hypotheses, references)
        if args.pyter:
            scores['pyter'], _ = corpus_ter(hypotheses, references)

        print(' '.join('{}={:.2f}'.format(k, v) for k, v in scores.items()))
            references = [remove_punk(line) for line in references]

        if args.max_size is not None:
            hypotheses = hypotheses[:args.max_size]
            references = references[:args.max_size]

        if len(hypotheses) != len(references):
            sys.stderr.write(
                'warning: source and target don\'t have the same length\n')
            size = min(len(hypotheses), len(references))
            hypotheses = hypotheses[:size]
            references = references[:size]

        scores = OrderedDict()
        if args.bleu:
            scores['bleu'], summary = corpus_bleu(hypotheses, references)
            try:
                scores['penalty'], scores['ratio'] = map(
                    float, re.findall('\w+=(\d+.\d+)', summary))
            except ValueError:
                pass
        if args.wer:
            scores['wer'], _ = corpus_wer(hypotheses, references)
        if args.ter:
            try:  # java missing
                scores['ter'], _ = corpus_ter(hypotheses,
                                              references,
                                              tercom_path=tercom_path)
            except:
                scores['ter'] = 0
        if args.cer:
        if len(hypotheses) != len(references):
            sys.stderr.write(
                'warning: source and target don\'t have the same length\n')
            size = min(len(hypotheses), len(references))
            hypotheses = hypotheses[:size]
            references = references[:size]

        indices = np.arange(len(hypotheses))
        if args.sample_size == 0:
            args.sample_size = len(hypotheses)

        bleu_scores = []
        hypotheses = np.array(hypotheses)
        references = np.array(references)

        for _ in range(args.draws):
            indices = np.random.randint(len(hypotheses), size=args.sample_size)
            hypotheses_ = hypotheses[indices]
            references_ = references[indices]

            bleu, _ = corpus_bleu(hypotheses_, references_)
            bleu_scores.append(bleu)

        bleu_scores = sorted(bleu_scores)
        k = int(len(bleu_scores) * args.p) // 2  # FIXME

        bleu_scores = bleu_scores[k:len(bleu_scores) - k]

        print('[{:.3f}, {:.3f}]'.format(bleu_scores[0], bleu_scores[-1]))
if args.src is None:
    args.src = args.ref

assert args.labels is None or len(args.labels) == len(args.mt)

for k, mt in enumerate(args.mt):
    with open(args.src) as src_file, open(mt) as mt_file, open(args.ref) as ref_file:
        lines = list(zip(src_file, mt_file, ref_file))

        bins = OrderedDict()

        for i in range(args.min, args.max, args.step):
            lines_ = [(mt.strip(), ref.strip()) for src, mt, ref in lines if i < len(src.split()) <= i + args.step]
            if len(lines_) > 0:
                score, summary = corpus_bleu(*zip(*lines_))
                bins[i + args.step] = score
                # print(i + args.step, '{:.1f}'.format(score), len(lines_), summary)

        values = np.array(list(bins.values()))
        keys = np.array(list(bins.keys()))

        label = args.labels[k] if args.labels else None

        if args.bar:
            width = 1 if len(args.mt) > 1 else args.step - 1
            keys += k
            plt.bar(keys + k, values, width=width, label=label)
        else:
            plt.plot(keys, values, label=label)