Ejemplo n.º 1
0
 def moses_detokenize(self,
                      inp: Path,
                      out: Path,
                      col=0,
                      lang='en',
                      post_op=None):
     log.info(f"detok : {inp} --> {out}")
     tok_lines = IO.get_lines(inp, col=col, line_mapper=lambda x: x.split())
     with MosesDetokenizer(lang=lang) as detok:
         detok_lines = (detok(tok_line) for tok_line in tok_lines)
         if post_op:
             detok_lines = (post_op(line) for line in detok_lines)
         IO.write_lines(out, detok_lines)
Ejemplo n.º 2
0
 def evaluate_file(self,
                   detok_hyp: Path,
                   ref: Union[Path, List[str]],
                   lowercase=True) -> float:
     detok_lines = list(IO.get_lines(detok_hyp))
     # takes multiple refs, but here we have only one
     ref_liness = [IO.get_lines(ref) if isinstance(ref, Path) else ref]
     bleu: BLEUScore = corpus_bleu(sys_stream=detok_lines,
                                   ref_streams=ref_liness,
                                   lowercase=lowercase)
     # this should be part of new sacrebleu  release (i sent a PR ;)
     bleu_str = bleu.format()
     bleu_file = detok_hyp.with_name(detok_hyp.name +
                                     ('.lc' if lowercase else '.oc') +
                                     '.sacrebleu')
     log.info(f'BLEU {detok_hyp} : {bleu_str}')
     IO.write_lines(bleu_file, bleu_str)
     return bleu.score
Ejemplo n.º 3
0
 def evaluate_file(self,
                   detok_hyp: Path,
                   ref: Union[Path, List[str]],
                   lowercase=True) -> float:
     detok_lines = IO.get_lines(detok_hyp)
     # takes multiple refs, but here we have only one
     ref_liness = [IO.get_lines(ref) if isinstance(ref, Path) else ref]
     bleu: BLEU = corpus_bleu(sys_stream=detok_lines,
                              ref_streams=ref_liness,
                              lowercase=lowercase)
     # this should be part of new sacrebleu  release (i sent a PR ;)
     bleu_str = f'BLEU = {bleu.score:.2f} {"/".join(f"{p:.1f}" for p in bleu.precisions)}' \
         f' (BP = {bleu.bp:.3f} ratio = {(bleu.sys_len / bleu.ref_len):.3f}' \
         f' hyp_len = {bleu.sys_len:d} ref_len={bleu.ref_len:d})'
     bleu_file = detok_hyp.with_suffix(('.lc' if lowercase else '.oc') +
                                       '.sacrebleu')
     log.info(f'BLEU {detok_hyp} : {bleu_str}')
     IO.write_lines(bleu_file, bleu_str)
     return bleu.score
Ejemplo n.º 4
0
    def tune_decoder_params(self,
                            exp: Experiment,
                            tune_src: str,
                            tune_ref: str,
                            batch_size: int,
                            trials: int = 10,
                            lowercase=True,
                            beam_size=(1, 4, 8),
                            ensemble=(1, 5, 10),
                            lp_alpha=(0.0, 0.4, 0.6),
                            suggested: List[Tuple[int, int, float]] = None,
                            **fixed_args):
        _, _, _, tune_args = inspect.getargvalues(inspect.currentframe())
        tune_args.update(fixed_args)
        ex_args = ['exp', 'self', 'fixed_args', 'batch_size', 'max_len']
        if trials == 0:
            ex_args += ['beam_size', 'ensemble', 'lp_alpha']
        for x in ex_args:
            del tune_args[x]  # exclude some args

        _, step = exp.get_last_saved_model()
        tune_dir = exp.work_dir / f'tune_step{step}'
        log.info(f"Tune dir = {tune_dir}")
        tune_dir.mkdir(parents=True, exist_ok=True)
        tune_src, tune_ref = Path(tune_src), Path(tune_ref)
        assert tune_src.exists()
        assert tune_ref.exists()
        tune_src, tune_ref = list(IO.get_lines(tune_src)), list(
            IO.get_lines(tune_ref))
        assert len(tune_src) == len(tune_ref)

        tune_log = tune_dir / 'scores.json'  # resume the tuning
        memory: Dict[Tuple, float] = {}
        if tune_log.exists():
            data = json.load(tune_log.open())
            # JSON keys cant be tuples, so they were stringified
            memory = {eval(k): v for k, v in data.items()}

        beam_sizes, ensembles, lp_alphas = [], [], []
        if suggested:
            if isinstance(suggested[0], str):
                suggested = [eval(x) for x in suggested]
            suggested = [(x[0], x[1], round(x[2], 2)) for x in suggested]
            suggested_new = [x for x in suggested if x not in memory]
            beam_sizes += [x[0] for x in suggested_new]
            ensembles += [x[1] for x in suggested_new]
            lp_alphas += [x[2] for x in suggested_new]

        new_trials = trials - len(memory)
        if new_trials > 0:
            beam_sizes += [random.choice(beam_size) for _ in range(new_trials)]
            ensembles += [random.choice(ensemble) for _ in range(new_trials)]
            lp_alphas += [
                round(random.choice(lp_alpha), 2) for _ in range(new_trials)
            ]

        # ensembling is somewhat costlier, so try minimize the model ensembling, by grouping them together
        grouped_ens = defaultdict(list)
        for b, ens, l in zip(beam_sizes, ensembles, lp_alphas):
            grouped_ens[ens].append((b, l))
        try:
            for ens, args in grouped_ens.items():
                decoder = Decoder.new(exp, ensemble=ens)
                for b_s, lp_a in args:
                    eff_batch_size = batch_size // b_s  # effective batch size
                    name = f'tune_step{step}_beam{b_s}_ens{ens}_lp{lp_a:.2f}'
                    log.info(name)
                    out_file = tune_dir / f'{name}.out.tsv'
                    score = self.decode_eval_file(decoder,
                                                  tune_src,
                                                  out_file,
                                                  tune_ref,
                                                  batch_size=eff_batch_size,
                                                  beam_size=b_s,
                                                  lp_alpha=lp_a,
                                                  lowercase=lowercase,
                                                  **fixed_args)
                    memory[(b_s, ens, lp_a)] = score
            best_params = sorted(memory.items(),
                                 key=lambda x: x[1],
                                 reverse=True)[0][0]
            return dict(zip(['beam_size', 'ensemble', 'lp_alpha'],
                            best_params)), tune_args
        finally:
            # JSON keys cant be tuples, so we stringify them
            data = {str(k): v for k, v in memory.items()}
            IO.write_lines(tune_log, json.dumps(data))