Beispiel #1
0
def sentence_bleu(
    hypothesis: str,
    references: List[str],
    smooth_method: str = "exp",
    smooth_value: float = None,
    lowercase: bool = False,
    tokenize=sacrebleu.DEFAULT_TOKENIZER,
    use_effective_order: bool = False,
) -> BLEUScore:
    """
    Substitute for the sacrebleu version of sentence_bleu, which uses settings that aren't consistent with
    the values we use for corpus_bleu, and isn't fully parameterized
    """
    args = argparse.Namespace(
        smooth_method=smooth_method,
        smooth_value=smooth_value,
        force=False,
        short=False,
        lc=lowercase,
        tokenize=tokenize,
    )

    metric = BLEU(args)
    return metric.sentence_score(hypothesis,
                                 references,
                                 use_effective_order=use_effective_order)
Beispiel #2
0
def sacrebleu_fn(preds: Sequence[str], targets: Sequence[Sequence[str]],
                 tokenize: str, lowercase: bool) -> Tensor:
    sacrebleu_fn = BLEU(tokenize=tokenize, lowercase=lowercase)
    # Sacrebleu expects different format of input
    targets = [[target[i] for target in targets]
               for i in range(len(targets[0]))]
    sacrebleu_score = sacrebleu_fn.corpus_score(preds, targets).score / 100
    return tensor(sacrebleu_score)
Beispiel #3
0
def _get_sent_bleu(
        hypothesis: List[str], references: List[List[str]],
        extra_args: Optional[Dict[str, str]] = None, score='score'
) -> List[float]:
    tokenizer = get_optional_dict(extra_args, 'tokenizer', 'none')
    data = [hypothesis] + references
    args = get_default_args(smooth_method='floor', tokenize=tokenizer,
                            num_refs=len(references))
    scorer = BLEU(args)
    scores = [
        scorer.corpus_score([h], [[rr] for rr in r], use_effective_order=True)
        for h, *r in zip(*data)
    ]
    proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score)
    return [proj(s) for s in scores]
def test_degenerate_statistics(statistics, offset, expected_score):
    score = BLEU.compute_bleu(statistics[0].common,
                              statistics[0].total,
                              statistics[1],
                              statistics[2],
                              smooth_method='floor',
                              smooth_value=offset).score / 100
    assert score == expected_score
Beispiel #5
0
def corpus_bleu(
    sys_sents: List[str],
    refs_sents: List[List[str]],
    smooth_method: str = "exp",
    smooth_value: float = None,
    force: bool = True,
    lowercase: bool = False,
    tokenizer: str = "13a",
    effective_order: bool = False,
):
    sys_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in sys_sents]
    refs_sents = [[utils_prep.normalize(sent, lowercase, tokenizer) for sent in ref_sents] for ref_sents in refs_sents]

    bleu_scorer = BLEU(lowercase=False, force=force, tokenize="none", smooth_method=smooth_method, smooth_value=smooth_value, effective_order=effective_order)

    return bleu_scorer.corpus_score(
        sys_sents,
        refs_sents,
    ).score
Beispiel #6
0
 def score_corpus_multiprocess(
         self, hypothesis: List[str], references: List[List[str]],
         score='score'
 ) -> float:
     tokenizer = get_optional_dict(self.extra_args, 'tokenizer', 'none')
     args = get_default_args(tokenize=tokenizer, num_refs=len(references))
     scorer = BLEU(args)
     if self.n_workers == 1:
         corpus_score = scorer.corpus_score(
             hypothesis, references, use_effective_order=False
         )
     else:
         batches = list(
             self._batch(hypothesis, references, n_batches=self.n_workers)
         )
         ref_len, sys_len = 0, 0
         correct = [0 for _ in range(BLEU.NGRAM_ORDER)]
         total = [0 for _ in range(BLEU.NGRAM_ORDER)]
         with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
             futures = [
                 executor.submit(
                     scorer.corpus_score, b[0], b[1],
                     use_effective_order=False
                 )
                 for b in batches
             ]
             progress = as_completed(futures)
             if self.verbose:
                 progress = tqdm(progress)
             for future in progress:
                 s = future.result()
                 ref_len += s.ref_len
                 sys_len += s.sys_len
                 for n in range(BLEU.NGRAM_ORDER):
                     correct[n] += s.counts[n]
                     total[n] += s.totals[n]
             corpus_score = scorer.compute_bleu(
                 correct, total, sys_len, ref_len, smooth_method='exp'
             )
     proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score)
     return proj(corpus_score)
                def compute_bleu(meters):
                    import inspect
                    #import sacrebleu
                    from sacrebleu.metrics import BLEU

                    fn_sig = inspect.getfullargspec(BLEU.compute_bleu)[0]
                    if "smooth_method" in fn_sig:
                        smooth = {"smooth_method": "exp"}
                    else:
                        smooth = {"smooth": "exp"}
                    bleu = BLEU.compute_bleu(
                        correct=meters["_bleu_counts"].sum,
                        total=meters["_bleu_totals"].sum,
                        sys_len=meters["_bleu_sys_len"].sum,
                        ref_len=meters["_bleu_ref_len"].sum,
                        **smooth)
                    return round(bleu.score, 2)
Beispiel #8
0
def metric_calculate_pipeline(file_path, submitUUID):
    #  connect in memory sqlite database or you can connect your own database
    load_dotenv()
    engine = create_engine(
        os.getenv('SQLALCHEMY_DATABASE_URI',
                  default="mysql+pymysql://root:[email protected]:3306/superb"))

    # create session and bind engine
    Session = sessionmaker(bind=engine)
    session = Session()

    file_model = session.query(FileModel).filter_by(
        submitUUID=submitUUID).first()
    score_model = file_model.scores[0]

    file_model.state = Status.COMPUTING
    session.commit()

    output_log = os.path.join(os.path.dirname(file_path), "metrics.log")
    with open(output_log, "w") as output_log_f:
        #state = os.system(f"timeout {configs['UNZIP_TIMEOUT']} unzip {file_path} -d {os.path.dirname(file_path)}")
        state = os.system(
            f"unzip -qq {file_path} -d {os.path.dirname(file_path)}")
        # timeout!
        # if (state != 0):
        #    print("Unzip timeout")
        #    print("Unzip timeout", file=output_log_f)
        #    set_error_msg(session, file_model, "Unzip timeout")
        #    return

        ground_truth_root = configs["GROUND_TRUTH_ROOT"]
        predict_root = os.path.join(os.path.dirname(file_path), "predict")

        #============================================#
        #                   PR                       #
        #============================================#
        # PR PUBLIC
        if os.path.isdir(os.path.join(predict_root, "pr_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "pr_public", "predict.ark")):
                if is_plaintext(
                        os.path.join(predict_root, "pr_public",
                                     "predict.ark")):
                    print("[PR PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "pr_public", "truth.ark")
                        predict_file = os.path.join(predict_root, "pr_public",
                                                    "predict.ark")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]

                        score = wer(predict_values, truth_values)
                        print(f"PR: per {score}", file=output_log_f)
                        score_model.PR_per_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   KS                       #
        #============================================#
        # KS PUBLIC
        if os.path.isdir(os.path.join(predict_root, "ks_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "ks_public", "predict.txt")):
                if is_plaintext(
                        os.path.join(predict_root, "ks_public",
                                     "predict.txt")):
                    print("[KS PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "ks_public", "truth.txt")
                        predict_file = os.path.join(predict_root, "ks_public",
                                                    "predict.txt")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]
                        match = [
                            1 if p == t else 0
                            for p, t in zip(predict_values, truth_values)
                        ]
                        score = np.array(match).mean()
                        print(f"KS: acc {score}", file=output_log_f)
                        score_model.KS_acc_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   IC                       #
        #============================================#
        # IC PUBLIC
        if os.path.isdir(os.path.join(predict_root, "ic_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "ic_public", "predict.csv")):
                if is_csv(
                        os.path.join(predict_root, "ic_public",
                                     "predict.csv")):
                    print("[IC PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "ic_public", "truth.csv")
                        predict_file = os.path.join(predict_root, "ic_public",
                                                    "predict.csv")

                        predict = read_file(predict_file,
                                            lambda x: x.split(","), ",")
                        truth = read_file(truth_file, lambda x: x.split(","),
                                          ",")

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]
                        match = [
                            1 if p == t else 0
                            for p, t in zip(predict_values, truth_values)
                        ]
                        score = np.array(match).mean()
                        print(f"IC: acc {score}", file=output_log_f)
                        score_model.IC_acc_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   SID                       #
        #============================================#
        # SID PUBLIC
        if os.path.isdir(os.path.join(predict_root, "sid_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "sid_public", "predict.txt")):
                if is_plaintext(
                        os.path.join(predict_root, "sid_public",
                                     "predict.txt")):
                    print("[SID PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "sid_public", "truth.txt")
                        predict_file = os.path.join(predict_root, "sid_public",
                                                    "predict.txt")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]
                        match = [
                            1 if p == t else 0
                            for p, t in zip(predict_values, truth_values)
                        ]
                        score = np.array(match).mean()
                        print(f"SID: acc {score}", file=output_log_f)
                        score_model.SID_acc_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   ER                       #
        #============================================#
        # ER PUBLIC
        er_scores = []
        for foldid in range(1, 6):
            if os.path.isdir(
                    os.path.join(predict_root, f"er_fold{foldid}_public")):
                if os.path.isfile(
                        os.path.join(predict_root, f"er_fold{foldid}_public",
                                     "predict.txt")):
                    if is_plaintext(
                            os.path.join(predict_root,
                                         f"er_fold{foldid}_public",
                                         "predict.txt")):
                        print(f"[ER FOLD{foldid} PUBLIC]", file=output_log_f)
                        try:
                            truth_file = os.path.join(
                                ground_truth_root, f"er_fold{foldid}_public",
                                "truth.txt")
                            predict_file = os.path.join(
                                predict_root, f"er_fold{foldid}_public",
                                "predict.txt")

                            predict = read_file(predict_file)
                            truth = read_file(truth_file)

                            filenames = sorted(predict.keys())
                            predict_values = [
                                predict[filename] for filename in filenames
                            ]
                            truth_values = [
                                truth[filename] for filename in filenames
                            ]
                            match = [
                                1 if p == t else 0
                                for p, t in zip(predict_values, truth_values)
                            ]
                            score = np.array(match).mean()
                            er_scores.append(score)
                            print(f"ER FOLD{foldid}: acc {score}",
                                  file=output_log_f)
                            setattr(score_model, f"ERfold{foldid}_acc_public",
                                    score)
                            session.commit()
                        except Exception as e:
                            print(e, file=output_log_f)
        if len(er_scores) == 5:
            try:
                score = np.array(er_scores).mean()
                print(f"ER: acc {score}", file=output_log_f)
                score_model.ER_acc_public = score
                session.commit()
            except Exception as e:
                print(e, file=output_log_f)

        #============================================#
        #                   ASR                      #
        #============================================#
        # ASR PUBLIC
        if os.path.isdir(os.path.join(predict_root, "asr_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "asr_public", "predict.ark")):
                if is_plaintext(
                        os.path.join(predict_root, "asr_public",
                                     "predict.ark")):
                    print("[ASR PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "asr_public", "truth.ark")
                        predict_file = os.path.join(predict_root, "asr_public",
                                                    "predict.ark")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]

                        score = wer(predict_values, truth_values)
                        print(f"ASR: wer {score}", file=output_log_f)
                        score_model.ASR_wer_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        # ASR_LM PUBLIC
        if os.path.isdir(os.path.join(predict_root, "asr_lm_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "asr_lm_public",
                                 "predict.ark")):
                if is_plaintext(
                        os.path.join(predict_root, "asr_lm_public",
                                     "predict.ark")):
                    print("[ASR LM PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "asr_public", "truth.ark")
                        predict_file = os.path.join(predict_root,
                                                    "asr_lm_public",
                                                    "predict.ark")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]

                        score = wer(predict_values, truth_values)
                        print(f"ASR LM: wer {score}", file=output_log_f)
                        score_model.ASR_LM_wer_public = score
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   QbE                      #
        #============================================#
        # QbE PUBLIC
        if os.path.isdir(os.path.join(predict_root, "qbe_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "qbe_public",
                                 "benchmark.stdlist.xml")):
                print("[QbE PUBLIC]", file=output_log_f)
                try:
                    scoring_dir = os.path.abspath(
                        os.path.join(ground_truth_root, "qbe_public",
                                     "scoring"))
                    predict_dir = os.path.abspath(
                        os.path.join(predict_root, "qbe_public"))
                    current_dir = os.getcwd()
                    os.chdir(scoring_dir)
                    os.system(
                        f"./score-TWV-Cnxe.sh {predict_dir} groundtruth_quesst14_eval -10"
                    )
                    os.chdir(current_dir)

                    with open(os.path.join(predict_dir, "score.out"),
                              "r") as log:
                        logging = log.read()
                        mtwv = float(
                            re.search("maxTWV: [+-]?([0-9]*[.])?[0-9]+",
                                      logging).group().split()[1])

                    print(f"QbE: mtwv {mtwv}", file=output_log_f)
                    score_model.QbE_mtwv_public = mtwv
                    session.commit()
                except Exception as e:
                    print(e, file=output_log_f)

        #============================================#
        #                   SF                       #
        #============================================#
        # SF PUBLIC
        if os.path.isdir(os.path.join(predict_root, "sf_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "sf_public", "predict.ark")):
                if is_plaintext(
                        os.path.join(predict_root, "sf_public",
                                     "predict.ark")):
                    print("[SF PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "sf_public", "truth.ark")
                        predict_file = os.path.join(predict_root, "sf_public",
                                                    "predict.ark")

                        predict = read_file(predict_file)
                        truth = read_file(truth_file)

                        filenames = sorted(predict.keys())
                        predict_values = [
                            predict[filename] for filename in filenames
                        ]
                        truth_values = [
                            truth[filename] for filename in filenames
                        ]

                        score = wer(predict_values, truth_values)
                        f1 = slot_type_f1(predict_values, truth_values)
                        cer = slot_value_cer(predict_values, truth_values)
                        print(f"SF: slot_type_f1 {f1}, slot_value_cer {cer}",
                              file=output_log_f)
                        score_model.SF_f1_public = f1
                        score_model.SF_cer_public = cer
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   SV                       #
        #============================================#
        # SV PUBLIC
        if os.path.isdir(os.path.join(predict_root, "sv_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "sv_public", "predict.txt")):
                if is_plaintext(
                        os.path.join(predict_root, "sv_public",
                                     "predict.txt")):
                    print("[SV PUBLIC]", file=output_log_f)
                    try:
                        truth_file = os.path.join(ground_truth_root,
                                                  "sv_public", "truth.txt")
                        predict_file = os.path.join(predict_root, "sv_public",
                                                    "predict.txt")

                        predict = read_file(predict_file, lambda x: float(x))
                        truth = read_file(truth_file, lambda x: float(x))

                        pairnames = sorted(predict.keys())
                        predict_scores = np.array(
                            [predict[name] for name in pairnames])
                        truth_scores = np.array(
                            [truth[name] for name in pairnames])

                        eer, *other = EER(truth_scores, predict_scores)
                        print(f"SV: eer {eer}", file=output_log_f)
                        score_model.SV_eer_public = eer
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   SD                       #
        #============================================#
        # SD PUBLIC
        sd_dir = os.path.join(predict_root, "sd_public")
        if os.path.isdir(sd_dir):
            if len(glob.glob(os.path.join(sd_dir, "*.h5"))) > 0:
                prediction_dir = os.path.join(sd_dir, "scoring", "predictions")
                os.makedirs(prediction_dir, exist_ok=True)
                os.system(f"mv {sd_dir}/*.h5 {prediction_dir}")

            if len(
                    glob.glob(
                        os.path.join(predict_root, "sd_public", "scoring",
                                     "predictions", "*.h5"))) > 0:
                print("[SD PUBLIC]", file=output_log_f)
                try:
                    with tempfile.TemporaryDirectory() as scoring_dir:
                        sd_predict_dir = os.path.join(predict_root,
                                                      "sd_public")
                        os.system(
                            f"./{os.path.join(ground_truth_root, 'sd_public', 'score.sh')} {sd_predict_dir} {os.path.join(ground_truth_root, 'sd_public', 'test')} | tail -n 1 | awk '{{print $4}}' > {scoring_dir}/result.log"
                        )
                        with open(f"{scoring_dir}/result.log", "r") as result:
                            der = result.readline().strip()
                    print(f"SD: der {der}", file=output_log_f)
                    score_model.SD_der_public = der
                    session.commit()
                except Exception as e:
                    print(e, file=output_log_f)

        #============================================#
        #                   SE                       #
        #============================================#
        # SE PUBLIC
        if os.path.isdir(os.path.join(predict_root, "se_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "se_public", "metrics.txt")):
                if is_plaintext(
                        os.path.join(predict_root, "se_public",
                                     "metrics.txt")):
                    print("[SE PUBLIC]", file=output_log_f)
                    try:
                        predict_file = os.path.join(predict_root, "se_public",
                                                    "metrics.txt")

                        with open(predict_file) as file:
                            for line in file.readlines():
                                metric, score = line.strip().split(maxsplit=1)
                                if metric == "pesq":
                                    pesq = score
                                    score_model.SE_pesq_public = float(pesq)
                                elif metric == "stoi":
                                    stoi = score
                                    score_model.SE_stoi_public = float(stoi)

                        print(f"SE: pesq {pesq}, stoi {stoi}",
                              file=output_log_f)
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   SS                       #
        #============================================#
        # SS PUBLIC
        if os.path.isdir(os.path.join(predict_root, "ss_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "ss_public", "metrics.txt")):
                if is_plaintext(
                        os.path.join(predict_root, "ss_public",
                                     "metrics.txt")):
                    print("[SS PUBLIC]", file=output_log_f)
                    try:
                        predict_file = os.path.join(predict_root, "ss_public",
                                                    "metrics.txt")

                        with open(predict_file) as file:
                            for line in file.readlines():
                                metric, score = line.strip().split(maxsplit=1)
                                if "si_sdr" in metric:
                                    si_sdri = score
                                    score_model.SS_sisdri_public = float(
                                        si_sdri)

                        print(f"SS: si_sdri {si_sdri}", file=output_log_f)
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        #============================================#
        #                   ST                       #
        #============================================#
        # ST PUBLIC
        if os.path.isdir(os.path.join(predict_root, "st_public")):
            if os.path.isfile(
                    os.path.join(predict_root, "st_public", "predict.tsv")):
                if is_plaintext(
                        os.path.join(predict_root, "st_public",
                                     "predict.tsv")):
                    print("[ST PUBLIC]", file=output_log_f)
                    try:
                        predict_file = os.path.join(predict_root, "st_public",
                                                    "predict.tsv")

                        hyps, refs = [], []

                        with open(predict_file, 'r') as f:
                            reader = csv.DictReader(
                                f,
                                delimiter='\t',
                                quotechar=None,
                                doublequote=False,
                                lineterminator='\n',
                                quoting=csv.QUOTE_NONE,
                            )
                            for line in reader:
                                hyps.append(line["hyp"])
                                refs.append(line["ref"])

                        bleu = BLEU()
                        score = bleu.corpus_score(hyps, [refs]).score
                        score_model.ST_bleu_public = float(score)

                        print(f"ST: bleu {score}", file=output_log_f)
                        session.commit()
                    except Exception as e:
                        print(e, file=output_log_f)

        file_model.state = Status.DONE
        session.commit()
def test_scoring(statistics, expected_score):
    score = BLEU.compute_bleu(statistics[0].common, statistics[0].total,
                              statistics[1], statistics[2]).score / 100
    assert abs(score - expected_score) < EPSILON
Beispiel #10
0
def calculate_bleu(output_lns, reference_lns):
    bleu = BLEU()
    return bleu.corpus_score(output_lns, (reference_lns, ))
Beispiel #11
0
    "newstest2017.online-G.0.en-de": (0.0001, 0.0001),
    "newstest2017.PROMT-Rule-based.4735.en-de": (0.0001, 0.0001),
    "newstest2017.RWTH-nmt-ensemble.4921.en-de": (0.0207, 0.07539),
    "newstest2017.SYSTRAN.4847.en-de": (0.59914, 0.0001),
    "newstest2017.TALP-UPC.4834.en-de": (0.0001, 0.0001),
    "newstest2017.uedin-nmt.4722.en-de": (0.0001, 0.0001),
    "newstest2017.xmu.4910.en-de": (0.71073, 0.0001),
}

SACREBLEU_BS_P_VALS = defaultdict(float)
SACREBLEU_AR_P_VALS = defaultdict(float)

# Load data from pickled file to not bother with WMT17 downloading
named_systems = _read_pickle_file()
_, refs = named_systems.pop()
metrics = {'BLEU': BLEU(references=refs, tokenize='none')}

#########
# BS test
#########
os.environ['SACREBLEU_SEED'] = str(12345)
bs_scores = PairedTest(named_systems,
                       metrics,
                       references=None,
                       test_type='bs',
                       n_samples=2000)()[1]

for name, result in zip(bs_scores['System'], bs_scores['BLEU']):
    if result.p_value is not None:
        SACREBLEU_BS_P_VALS[name] += result.p_value
Beispiel #12
0
from sacrebleu.metrics import BLEU

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--exp-dir', required=True)
    parser.add_argument('--tsv-file', default='output-st-test.tsv')
    parser.add_argument('--hyp-key', default='hyp')
    parser.add_argument('--ref-key', default='ref')
    args = parser.parse_args()

    args.exp_dir = Path(args.exp_dir)
    hyps, refs = [], []

    with open(args.exp_dir / args.tsv_file, 'r') as f:
        reader = csv.DictReader(
            f,
            delimiter='\t',
            quotechar=None,
            doublequote=False,
            lineterminator='\n',
            quoting=csv.QUOTE_NONE,
        )
        for line in reader:
            hyps.append(line[args.hyp_key])
            refs.append(line[args.ref_key])

    bleu = BLEU()
    score = bleu.corpus_score(hyps, [refs])
    print(score.score)