def __init__(self, args): self.args = args assert ( args.l2r_model_path is not None ), "Rescoring needs --l2r-model-path which generated given hypotheses" self.l2r_model_scorer = SimpleModelScorer(args, args.l2r_model_path) self.forward_task = self.l2r_model_scorer.task self.r2l_model_scorer = None if args.r2l_model_path: self.r2l_model_scorer = R2LModelScorer(args, args.r2l_model_path) self.reverse_model_scorer = None if args.reverse_model_path: self.reverse_model_scorer = ReverseModelScorer( args, args.reverse_model_path, self.forward_task) self.lm_scorer = None if args.lm_model_path: self.lm_scorer = LMScorer(args, args.lm_model_path, self.forward_task) self.cloze_transformer_scorer = None if args.cloze_transformer_path: self.cloze_transformer_scorer = SimpleModelScorer( args, args.cloze_transformer_path)
def __init__(self, args, forward_task=None, models=None): """models = {'l2r_model': {'model': model, 'task': task}, ...}""" self.args = args if models is None: models = {} self.l2r_model_scorer = None if args.l2r_model_path or models.get("l2r_model", None): self.l2r_model_scorer = SimpleModelScorer( args, args.l2r_model_path, models.get("l2r_model", None), forward_task ) self.r2l_model_scorer = None if args.r2l_model_path or models.get("r2l_model", None): self.r2l_model_scorer = R2LModelScorer( args, args.r2l_model_path, models.get("r2l_model", None), forward_task ) self.reverse_model_scorer = None if args.reverse_model_path or models.get("reverse_model", None): self.reverse_model_scorer = ReverseModelScorer( args, args.reverse_model_path, models.get("reverse_model", None), forward_task, ) self.lm_scorer = None if args.lm_model_path or models.get("lm_model", None): self.lm_scorer = LMScorer( args, args.lm_model_path, models.get("lm_model", None), forward_task ) self.cloze_transformer_scorer = None if args.cloze_transformer_path or models.get("cloze_model", None): self.cloze_transformer_scorer = SimpleModelScorer( args, args.cloze_transformer_path, models.get("cloze_model", None), forward_task, )
class Rescorer: """Reranks n-best hypotheses based on extra models and parameters""" def __init__(self, args, forward_task=None, models=None): """models = {'l2r_model': {'model': model, 'task': task}, ...}""" self.args = args if models is None: models = {} self.l2r_model_scorer = None if args.l2r_model_path or models.get("l2r_model", None): self.l2r_model_scorer = SimpleModelScorer( args, args.l2r_model_path, models.get("l2r_model", None), forward_task ) self.r2l_model_scorer = None if args.r2l_model_path or models.get("r2l_model", None): self.r2l_model_scorer = R2LModelScorer( args, args.r2l_model_path, models.get("r2l_model", None), forward_task ) self.reverse_model_scorer = None if args.reverse_model_path or models.get("reverse_model", None): self.reverse_model_scorer = ReverseModelScorer( args, args.reverse_model_path, models.get("reverse_model", None), forward_task, ) self.lm_scorer = None if args.lm_model_path or models.get("lm_model", None): self.lm_scorer = LMScorer( args, args.lm_model_path, models.get("lm_model", None), forward_task ) self.cloze_transformer_scorer = None if args.cloze_transformer_path or models.get("cloze_model", None): self.cloze_transformer_scorer = SimpleModelScorer( args, args.cloze_transformer_path, models.get("cloze_model", None), forward_task, ) def score(self, src_tokens, hypos): """run models and compute scores based on p(y), p(x|y) etc.""" scores = torch.zeros((len(hypos), len(FeatureList)), dtype=torch.float) self.compute_l2r_model_scores(src_tokens, hypos, scores) self.compute_r2l_model_scores(src_tokens, hypos, scores) self.compute_reverse_model_scores(src_tokens, hypos, scores) self.compute_lm_scores(src_tokens, hypos, scores) self.compute_cloze_transformer_scores(src_tokens, hypos, scores) return scores def compute_l2r_model_scores(self, src_tokens, hypos, scores): if not self.l2r_model_scorer: return l2r_scores = self.l2r_model_scorer.score(src_tokens, hypos) scores[:, FeatureList.L2R_MODEL_SCORE.value] = l2r_scores[:] def compute_r2l_model_scores(self, src_tokens, hypos, scores): if not self.r2l_model_scorer: return r2l_scores = self.r2l_model_scorer.score(src_tokens, hypos) scores[:, FeatureList.R2L_MODEL_SCORE.value] = r2l_scores[:] def compute_reverse_model_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.reverse_model_scorer: return scores[ :, FeatureList.REVERSE_MODEL_SCORE.value ] = self.reverse_model_scorer.score(src_tokens, hypos) def compute_lm_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.lm_scorer: return lm_scores = self.lm_scorer.score(src_tokens, hypos) scores[:, FeatureList.LM_SCORE.value] = lm_scores[:] def compute_cloze_transformer_scores(self, src_tokens, hypos, scores): if not self.cloze_transformer_scorer: return cloze_scores = self.cloze_transformer_scorer.score(src_tokens, hypos) scores[:, FeatureList.CLOZE_SCORE.value] = cloze_scores[:]
class Rescorer: """Reranks n-best hypotheses based on extra models and parameters""" def __init__(self, args): self.args = args assert ( args.l2r_model_path is not None ), "Rescoring needs --l2r-model-path which generated given hypotheses" self.l2r_model_scorer = SimpleModelScorer(args, args.l2r_model_path) self.forward_task = self.l2r_model_scorer.task self.r2l_model_scorer = None if args.r2l_model_path: self.r2l_model_scorer = R2LModelScorer(args, args.r2l_model_path) self.reverse_model_scorer = None if args.reverse_model_path: self.reverse_model_scorer = ReverseModelScorer( args, args.reverse_model_path, self.forward_task) self.lm_scorer = None if args.lm_model_path: self.lm_scorer = LMScorer(args, args.lm_model_path, self.forward_task) self.cloze_transformer_scorer = None if args.cloze_transformer_path: self.cloze_transformer_scorer = SimpleModelScorer( args, args.cloze_transformer_path) def score(self, src_tokens, hypos): """run models and compute scores based on p(y), p(x|y) etc.""" scores = torch.zeros((len(hypos), len(FeatureList)), dtype=torch.float) self.compute_l2r_model_scores(src_tokens, hypos, scores) self.compute_r2l_model_scores(src_tokens, hypos, scores) self.compute_reverse_model_scores(src_tokens, hypos, scores) self.compute_lm_scores(src_tokens, hypos, scores) self.compute_cloze_transformer_scores(src_tokens, hypos, scores) return scores def compute_l2r_model_scores(self, src_tokens, hypos, scores): l2r_scores = self.l2r_model_scorer.score(src_tokens, hypos) scores[:, FeatureList.L2R_MODEL_SCORE.value] = l2r_scores[:] def compute_r2l_model_scores(self, src_tokens, hypos, scores): if not self.r2l_model_scorer: return r2l_scores = self.r2l_model_scorer.score(src_tokens, hypos) scores[:, FeatureList.R2L_MODEL_SCORE.value] = r2l_scores[:] def compute_reverse_model_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.reverse_model_scorer: return scores[:, FeatureList.REVERSE_MODEL_SCORE. value] = self.reverse_model_scorer.score(src_tokens, hypos) def compute_lm_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.lm_scorer: return lm_scores = self.lm_scorer.score(src_tokens, hypos) scores[:, FeatureList.LM_SCORE.value] = lm_scores[:] def compute_cloze_transformer_scores(self, src_tokens, hypos, scores): if not self.cloze_transformer_scorer: return cloze_scores = self.cloze_transformer_scorer.score(src_tokens, hypos) scores[:, FeatureList.CLOZE_SCORE.value] = cloze_scores[:]
class Rescorer: """Reranks n-best hypotheses based on extra models and parameters""" def __init__(self, args): self.args = args assert ( args.l2r_model_path is not None ), "Rescoring needs --l2r-model-path which generated given hypotheses" self.l2r_model_scorer = SimpleModelScorer(args, args.l2r_model_path) self.forward_task = self.l2r_model_scorer.task self.r2l_model_scorer = None if args.r2l_model_path: self.r2l_model_scorer = R2LModelScorer(args, args.r2l_model_path) self.reverse_model_scorer = None if args.reverse_model_path: self.reverse_model_scorer = ReverseModelScorer( args, args.reverse_model_path, self.forward_task ) self.lm_scorer = None if args.lm_model_path: self.lm_scorer = LMScorer(args, args.lm_model_path, self.forward_task) def combine_weighted_scores(self, scores, src_tokens, hypos): """combine scores from different models""" src_len = torch.tensor(len(src_tokens), dtype=torch.float) tgt_len = torch.tensor( [len(hypo["tokens"]) for hypo in hypos], dtype=torch.float ) scores[ :, FeatureList.L2R_MODEL_SCORE.value ] *= ( self.args.l2r_model_weight ) # L2R model score should be length normalized already scores[:, FeatureList.R2L_MODEL_SCORE.value] *= ( self.args.r2l_model_weight / tgt_len ) scores[:, FeatureList.REVERSE_MODEL_SCORE.value] *= ( self.args.reverse_model_weight / src_len ) scores[:, FeatureList.LM_SCORE.value] *= self.args.lm_model_weight / src_len return scores.sum(dim=1).max(0)[1] def score(self, src_tokens, hypos): """run models and compute scores based on p(y), p(x|y) etc.""" scores = torch.zeros((len(hypos), len(FeatureList)), dtype=torch.float) self.compute_l2r_model_scores(src_tokens, hypos, scores) self.compute_r2l_model_scores(src_tokens, hypos, scores) self.compute_reverse_model_scores(src_tokens, hypos, scores) self.compute_lm_scores(src_tokens, hypos, scores) max_score_index = self.combine_weighted_scores(scores, src_tokens, hypos) return hypos[max_score_index]["tokens"].int().cpu() def compute_l2r_model_scores(self, src_tokens, hypos, scores): for i, hypo in enumerate(hypos): scores[i, FeatureList.L2R_MODEL_SCORE.value] = hypo["score"] def compute_r2l_model_scores(self, src_tokens, hypos, scores): if not self.r2l_model_scorer: return r2l_scores = self.r2l_model_scorer.score(src_tokens, hypos) scores[:, FeatureList.R2L_MODEL_SCORE.value] = r2l_scores[:] def compute_reverse_model_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.reverse_model_scorer: return scores[ :, FeatureList.REVERSE_MODEL_SCORE.value ] = self.reverse_model_scorer.score(src_tokens, hypos) def compute_lm_scores(self, src_tokens, hypos, scores): """computes p(x|y) for each hypothesis. """ if not self.lm_scorer: return lm_scores = self.lm_scorer.score(src_tokens, hypos) scores[:, FeatureList.LM_SCORE.value] = lm_scores[:]