def sentence_bleu( hypothesis: str, references: List[str], smooth_method: str = "exp", smooth_value: float = None, lowercase: bool = False, tokenize=sacrebleu.DEFAULT_TOKENIZER, use_effective_order: bool = False, ) -> BLEUScore: """ Substitute for the sacrebleu version of sentence_bleu, which uses settings that aren't consistent with the values we use for corpus_bleu, and isn't fully parameterized """ args = argparse.Namespace( smooth_method=smooth_method, smooth_value=smooth_value, force=False, short=False, lc=lowercase, tokenize=tokenize, ) metric = BLEU(args) return metric.sentence_score(hypothesis, references, use_effective_order=use_effective_order)
def sacrebleu_fn(preds: Sequence[str], targets: Sequence[Sequence[str]], tokenize: str, lowercase: bool) -> Tensor: sacrebleu_fn = BLEU(tokenize=tokenize, lowercase=lowercase) # Sacrebleu expects different format of input targets = [[target[i] for target in targets] for i in range(len(targets[0]))] sacrebleu_score = sacrebleu_fn.corpus_score(preds, targets).score / 100 return tensor(sacrebleu_score)
def _get_sent_bleu( hypothesis: List[str], references: List[List[str]], extra_args: Optional[Dict[str, str]] = None, score='score' ) -> List[float]: tokenizer = get_optional_dict(extra_args, 'tokenizer', 'none') data = [hypothesis] + references args = get_default_args(smooth_method='floor', tokenize=tokenizer, num_refs=len(references)) scorer = BLEU(args) scores = [ scorer.corpus_score([h], [[rr] for rr in r], use_effective_order=True) for h, *r in zip(*data) ] proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score) return [proj(s) for s in scores]
def test_degenerate_statistics(statistics, offset, expected_score): score = BLEU.compute_bleu(statistics[0].common, statistics[0].total, statistics[1], statistics[2], smooth_method='floor', smooth_value=offset).score / 100 assert score == expected_score
def corpus_bleu( sys_sents: List[str], refs_sents: List[List[str]], smooth_method: str = "exp", smooth_value: float = None, force: bool = True, lowercase: bool = False, tokenizer: str = "13a", effective_order: bool = False, ): sys_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in sys_sents] refs_sents = [[utils_prep.normalize(sent, lowercase, tokenizer) for sent in ref_sents] for ref_sents in refs_sents] bleu_scorer = BLEU(lowercase=False, force=force, tokenize="none", smooth_method=smooth_method, smooth_value=smooth_value, effective_order=effective_order) return bleu_scorer.corpus_score( sys_sents, refs_sents, ).score
def score_corpus_multiprocess( self, hypothesis: List[str], references: List[List[str]], score='score' ) -> float: tokenizer = get_optional_dict(self.extra_args, 'tokenizer', 'none') args = get_default_args(tokenize=tokenizer, num_refs=len(references)) scorer = BLEU(args) if self.n_workers == 1: corpus_score = scorer.corpus_score( hypothesis, references, use_effective_order=False ) else: batches = list( self._batch(hypothesis, references, n_batches=self.n_workers) ) ref_len, sys_len = 0, 0 correct = [0 for _ in range(BLEU.NGRAM_ORDER)] total = [0 for _ in range(BLEU.NGRAM_ORDER)] with ProcessPoolExecutor(max_workers=self.n_workers) as executor: futures = [ executor.submit( scorer.corpus_score, b[0], b[1], use_effective_order=False ) for b in batches ] progress = as_completed(futures) if self.verbose: progress = tqdm(progress) for future in progress: s = future.result() ref_len += s.ref_len sys_len += s.sys_len for n in range(BLEU.NGRAM_ORDER): correct[n] += s.counts[n] total[n] += s.totals[n] corpus_score = scorer.compute_bleu( correct, total, sys_len, ref_len, smooth_method='exp' ) proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score) return proj(corpus_score)
def compute_bleu(meters): import inspect #import sacrebleu from sacrebleu.metrics import BLEU fn_sig = inspect.getfullargspec(BLEU.compute_bleu)[0] if "smooth_method" in fn_sig: smooth = {"smooth_method": "exp"} else: smooth = {"smooth": "exp"} bleu = BLEU.compute_bleu( correct=meters["_bleu_counts"].sum, total=meters["_bleu_totals"].sum, sys_len=meters["_bleu_sys_len"].sum, ref_len=meters["_bleu_ref_len"].sum, **smooth) return round(bleu.score, 2)
def metric_calculate_pipeline(file_path, submitUUID): # connect in memory sqlite database or you can connect your own database load_dotenv() engine = create_engine( os.getenv('SQLALCHEMY_DATABASE_URI', default="mysql+pymysql://root:[email protected]:3306/superb")) # create session and bind engine Session = sessionmaker(bind=engine) session = Session() file_model = session.query(FileModel).filter_by( submitUUID=submitUUID).first() score_model = file_model.scores[0] file_model.state = Status.COMPUTING session.commit() output_log = os.path.join(os.path.dirname(file_path), "metrics.log") with open(output_log, "w") as output_log_f: #state = os.system(f"timeout {configs['UNZIP_TIMEOUT']} unzip {file_path} -d {os.path.dirname(file_path)}") state = os.system( f"unzip -qq {file_path} -d {os.path.dirname(file_path)}") # timeout! # if (state != 0): # print("Unzip timeout") # print("Unzip timeout", file=output_log_f) # set_error_msg(session, file_model, "Unzip timeout") # return ground_truth_root = configs["GROUND_TRUTH_ROOT"] predict_root = os.path.join(os.path.dirname(file_path), "predict") #============================================# # PR # #============================================# # PR PUBLIC if os.path.isdir(os.path.join(predict_root, "pr_public")): if os.path.isfile( os.path.join(predict_root, "pr_public", "predict.ark")): if is_plaintext( os.path.join(predict_root, "pr_public", "predict.ark")): print("[PR PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "pr_public", "truth.ark") predict_file = os.path.join(predict_root, "pr_public", "predict.ark") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] score = wer(predict_values, truth_values) print(f"PR: per {score}", file=output_log_f) score_model.PR_per_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # KS # #============================================# # KS PUBLIC if os.path.isdir(os.path.join(predict_root, "ks_public")): if os.path.isfile( os.path.join(predict_root, "ks_public", "predict.txt")): if is_plaintext( os.path.join(predict_root, "ks_public", "predict.txt")): print("[KS PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "ks_public", "truth.txt") predict_file = os.path.join(predict_root, "ks_public", "predict.txt") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] match = [ 1 if p == t else 0 for p, t in zip(predict_values, truth_values) ] score = np.array(match).mean() print(f"KS: acc {score}", file=output_log_f) score_model.KS_acc_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # IC # #============================================# # IC PUBLIC if os.path.isdir(os.path.join(predict_root, "ic_public")): if os.path.isfile( os.path.join(predict_root, "ic_public", "predict.csv")): if is_csv( os.path.join(predict_root, "ic_public", "predict.csv")): print("[IC PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "ic_public", "truth.csv") predict_file = os.path.join(predict_root, "ic_public", "predict.csv") predict = read_file(predict_file, lambda x: x.split(","), ",") truth = read_file(truth_file, lambda x: x.split(","), ",") filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] match = [ 1 if p == t else 0 for p, t in zip(predict_values, truth_values) ] score = np.array(match).mean() print(f"IC: acc {score}", file=output_log_f) score_model.IC_acc_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SID # #============================================# # SID PUBLIC if os.path.isdir(os.path.join(predict_root, "sid_public")): if os.path.isfile( os.path.join(predict_root, "sid_public", "predict.txt")): if is_plaintext( os.path.join(predict_root, "sid_public", "predict.txt")): print("[SID PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "sid_public", "truth.txt") predict_file = os.path.join(predict_root, "sid_public", "predict.txt") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] match = [ 1 if p == t else 0 for p, t in zip(predict_values, truth_values) ] score = np.array(match).mean() print(f"SID: acc {score}", file=output_log_f) score_model.SID_acc_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # ER # #============================================# # ER PUBLIC er_scores = [] for foldid in range(1, 6): if os.path.isdir( os.path.join(predict_root, f"er_fold{foldid}_public")): if os.path.isfile( os.path.join(predict_root, f"er_fold{foldid}_public", "predict.txt")): if is_plaintext( os.path.join(predict_root, f"er_fold{foldid}_public", "predict.txt")): print(f"[ER FOLD{foldid} PUBLIC]", file=output_log_f) try: truth_file = os.path.join( ground_truth_root, f"er_fold{foldid}_public", "truth.txt") predict_file = os.path.join( predict_root, f"er_fold{foldid}_public", "predict.txt") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] match = [ 1 if p == t else 0 for p, t in zip(predict_values, truth_values) ] score = np.array(match).mean() er_scores.append(score) print(f"ER FOLD{foldid}: acc {score}", file=output_log_f) setattr(score_model, f"ERfold{foldid}_acc_public", score) session.commit() except Exception as e: print(e, file=output_log_f) if len(er_scores) == 5: try: score = np.array(er_scores).mean() print(f"ER: acc {score}", file=output_log_f) score_model.ER_acc_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # ASR # #============================================# # ASR PUBLIC if os.path.isdir(os.path.join(predict_root, "asr_public")): if os.path.isfile( os.path.join(predict_root, "asr_public", "predict.ark")): if is_plaintext( os.path.join(predict_root, "asr_public", "predict.ark")): print("[ASR PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "asr_public", "truth.ark") predict_file = os.path.join(predict_root, "asr_public", "predict.ark") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] score = wer(predict_values, truth_values) print(f"ASR: wer {score}", file=output_log_f) score_model.ASR_wer_public = score session.commit() except Exception as e: print(e, file=output_log_f) # ASR_LM PUBLIC if os.path.isdir(os.path.join(predict_root, "asr_lm_public")): if os.path.isfile( os.path.join(predict_root, "asr_lm_public", "predict.ark")): if is_plaintext( os.path.join(predict_root, "asr_lm_public", "predict.ark")): print("[ASR LM PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "asr_public", "truth.ark") predict_file = os.path.join(predict_root, "asr_lm_public", "predict.ark") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] score = wer(predict_values, truth_values) print(f"ASR LM: wer {score}", file=output_log_f) score_model.ASR_LM_wer_public = score session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # QbE # #============================================# # QbE PUBLIC if os.path.isdir(os.path.join(predict_root, "qbe_public")): if os.path.isfile( os.path.join(predict_root, "qbe_public", "benchmark.stdlist.xml")): print("[QbE PUBLIC]", file=output_log_f) try: scoring_dir = os.path.abspath( os.path.join(ground_truth_root, "qbe_public", "scoring")) predict_dir = os.path.abspath( os.path.join(predict_root, "qbe_public")) current_dir = os.getcwd() os.chdir(scoring_dir) os.system( f"./score-TWV-Cnxe.sh {predict_dir} groundtruth_quesst14_eval -10" ) os.chdir(current_dir) with open(os.path.join(predict_dir, "score.out"), "r") as log: logging = log.read() mtwv = float( re.search("maxTWV: [+-]?([0-9]*[.])?[0-9]+", logging).group().split()[1]) print(f"QbE: mtwv {mtwv}", file=output_log_f) score_model.QbE_mtwv_public = mtwv session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SF # #============================================# # SF PUBLIC if os.path.isdir(os.path.join(predict_root, "sf_public")): if os.path.isfile( os.path.join(predict_root, "sf_public", "predict.ark")): if is_plaintext( os.path.join(predict_root, "sf_public", "predict.ark")): print("[SF PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "sf_public", "truth.ark") predict_file = os.path.join(predict_root, "sf_public", "predict.ark") predict = read_file(predict_file) truth = read_file(truth_file) filenames = sorted(predict.keys()) predict_values = [ predict[filename] for filename in filenames ] truth_values = [ truth[filename] for filename in filenames ] score = wer(predict_values, truth_values) f1 = slot_type_f1(predict_values, truth_values) cer = slot_value_cer(predict_values, truth_values) print(f"SF: slot_type_f1 {f1}, slot_value_cer {cer}", file=output_log_f) score_model.SF_f1_public = f1 score_model.SF_cer_public = cer session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SV # #============================================# # SV PUBLIC if os.path.isdir(os.path.join(predict_root, "sv_public")): if os.path.isfile( os.path.join(predict_root, "sv_public", "predict.txt")): if is_plaintext( os.path.join(predict_root, "sv_public", "predict.txt")): print("[SV PUBLIC]", file=output_log_f) try: truth_file = os.path.join(ground_truth_root, "sv_public", "truth.txt") predict_file = os.path.join(predict_root, "sv_public", "predict.txt") predict = read_file(predict_file, lambda x: float(x)) truth = read_file(truth_file, lambda x: float(x)) pairnames = sorted(predict.keys()) predict_scores = np.array( [predict[name] for name in pairnames]) truth_scores = np.array( [truth[name] for name in pairnames]) eer, *other = EER(truth_scores, predict_scores) print(f"SV: eer {eer}", file=output_log_f) score_model.SV_eer_public = eer session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SD # #============================================# # SD PUBLIC sd_dir = os.path.join(predict_root, "sd_public") if os.path.isdir(sd_dir): if len(glob.glob(os.path.join(sd_dir, "*.h5"))) > 0: prediction_dir = os.path.join(sd_dir, "scoring", "predictions") os.makedirs(prediction_dir, exist_ok=True) os.system(f"mv {sd_dir}/*.h5 {prediction_dir}") if len( glob.glob( os.path.join(predict_root, "sd_public", "scoring", "predictions", "*.h5"))) > 0: print("[SD PUBLIC]", file=output_log_f) try: with tempfile.TemporaryDirectory() as scoring_dir: sd_predict_dir = os.path.join(predict_root, "sd_public") os.system( f"./{os.path.join(ground_truth_root, 'sd_public', 'score.sh')} {sd_predict_dir} {os.path.join(ground_truth_root, 'sd_public', 'test')} | tail -n 1 | awk '{{print $4}}' > {scoring_dir}/result.log" ) with open(f"{scoring_dir}/result.log", "r") as result: der = result.readline().strip() print(f"SD: der {der}", file=output_log_f) score_model.SD_der_public = der session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SE # #============================================# # SE PUBLIC if os.path.isdir(os.path.join(predict_root, "se_public")): if os.path.isfile( os.path.join(predict_root, "se_public", "metrics.txt")): if is_plaintext( os.path.join(predict_root, "se_public", "metrics.txt")): print("[SE PUBLIC]", file=output_log_f) try: predict_file = os.path.join(predict_root, "se_public", "metrics.txt") with open(predict_file) as file: for line in file.readlines(): metric, score = line.strip().split(maxsplit=1) if metric == "pesq": pesq = score score_model.SE_pesq_public = float(pesq) elif metric == "stoi": stoi = score score_model.SE_stoi_public = float(stoi) print(f"SE: pesq {pesq}, stoi {stoi}", file=output_log_f) session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # SS # #============================================# # SS PUBLIC if os.path.isdir(os.path.join(predict_root, "ss_public")): if os.path.isfile( os.path.join(predict_root, "ss_public", "metrics.txt")): if is_plaintext( os.path.join(predict_root, "ss_public", "metrics.txt")): print("[SS PUBLIC]", file=output_log_f) try: predict_file = os.path.join(predict_root, "ss_public", "metrics.txt") with open(predict_file) as file: for line in file.readlines(): metric, score = line.strip().split(maxsplit=1) if "si_sdr" in metric: si_sdri = score score_model.SS_sisdri_public = float( si_sdri) print(f"SS: si_sdri {si_sdri}", file=output_log_f) session.commit() except Exception as e: print(e, file=output_log_f) #============================================# # ST # #============================================# # ST PUBLIC if os.path.isdir(os.path.join(predict_root, "st_public")): if os.path.isfile( os.path.join(predict_root, "st_public", "predict.tsv")): if is_plaintext( os.path.join(predict_root, "st_public", "predict.tsv")): print("[ST PUBLIC]", file=output_log_f) try: predict_file = os.path.join(predict_root, "st_public", "predict.tsv") hyps, refs = [], [] with open(predict_file, 'r') as f: reader = csv.DictReader( f, delimiter='\t', quotechar=None, doublequote=False, lineterminator='\n', quoting=csv.QUOTE_NONE, ) for line in reader: hyps.append(line["hyp"]) refs.append(line["ref"]) bleu = BLEU() score = bleu.corpus_score(hyps, [refs]).score score_model.ST_bleu_public = float(score) print(f"ST: bleu {score}", file=output_log_f) session.commit() except Exception as e: print(e, file=output_log_f) file_model.state = Status.DONE session.commit()
def test_scoring(statistics, expected_score): score = BLEU.compute_bleu(statistics[0].common, statistics[0].total, statistics[1], statistics[2]).score / 100 assert abs(score - expected_score) < EPSILON
def calculate_bleu(output_lns, reference_lns): bleu = BLEU() return bleu.corpus_score(output_lns, (reference_lns, ))
"newstest2017.online-G.0.en-de": (0.0001, 0.0001), "newstest2017.PROMT-Rule-based.4735.en-de": (0.0001, 0.0001), "newstest2017.RWTH-nmt-ensemble.4921.en-de": (0.0207, 0.07539), "newstest2017.SYSTRAN.4847.en-de": (0.59914, 0.0001), "newstest2017.TALP-UPC.4834.en-de": (0.0001, 0.0001), "newstest2017.uedin-nmt.4722.en-de": (0.0001, 0.0001), "newstest2017.xmu.4910.en-de": (0.71073, 0.0001), } SACREBLEU_BS_P_VALS = defaultdict(float) SACREBLEU_AR_P_VALS = defaultdict(float) # Load data from pickled file to not bother with WMT17 downloading named_systems = _read_pickle_file() _, refs = named_systems.pop() metrics = {'BLEU': BLEU(references=refs, tokenize='none')} ######### # BS test ######### os.environ['SACREBLEU_SEED'] = str(12345) bs_scores = PairedTest(named_systems, metrics, references=None, test_type='bs', n_samples=2000)()[1] for name, result in zip(bs_scores['System'], bs_scores['BLEU']): if result.p_value is not None: SACREBLEU_BS_P_VALS[name] += result.p_value
from sacrebleu.metrics import BLEU if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--exp-dir', required=True) parser.add_argument('--tsv-file', default='output-st-test.tsv') parser.add_argument('--hyp-key', default='hyp') parser.add_argument('--ref-key', default='ref') args = parser.parse_args() args.exp_dir = Path(args.exp_dir) hyps, refs = [], [] with open(args.exp_dir / args.tsv_file, 'r') as f: reader = csv.DictReader( f, delimiter='\t', quotechar=None, doublequote=False, lineterminator='\n', quoting=csv.QUOTE_NONE, ) for line in reader: hyps.append(line[args.hyp_key]) refs.append(line[args.ref_key]) bleu = BLEU() score = bleu.corpus_score(hyps, [refs]) print(score.score)