def build_generator( self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None, prefix_allowed_tokens_fn=None, ): """ Build a :class:`~fairseq.SequenceGenerator` instance for this task. Args: models (List[~fairseq.models.FairseqModel]): ensemble of models args (fairseq.dataclass.configs.GenerationConfig): configuration object (dataclass) for generation extra_gen_cls_kwargs (Dict[str, Any]): extra options to pass through to SequenceGenerator prefix_allowed_tokens_fn (Callable[[int, torch.Tensor], List[int]]): If provided, this function constrains the beam search to allowed tokens only at each step. The provided function should take 2 arguments: the batch ID (`batch_id: int`) and a unidimensional tensor of token ids (`inputs_ids: torch.Tensor`). It has to return a `List[int]` with the allowed tokens for the next generation step conditioned on the previously generated tokens (`inputs_ids`) and the batch ID (`batch_id`). This argument is useful for constrained generation conditioned on the prefix, as described in "Autoregressive Entity Retrieval" (https://arxiv.org/abs/2010.00904) and https://github.com/facebookresearch/GENRE. """ if getattr(args, "score_reference", False): from fairseq.sequence_scorer import SequenceScorer return SequenceScorer( self.target_dictionary, compute_alignment=getattr(args, "print_alignment", False), ) from fairseq.sequence_generator import ( SequenceGenerator, SequenceGeneratorWithAlignment, ) # Choose search strategy. Defaults to Beam Search. sampling = getattr(args, "sampling", False) sampling_topk = getattr(args, "sampling_topk", -1) sampling_topp = getattr(args, "sampling_topp", -1.0) diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) match_source_len = getattr(args, "match_source_len", False) diversity_rate = getattr(args, "diversity_rate", -1) constrained = getattr(args, "constraints", False) if prefix_allowed_tokens_fn is None: prefix_allowed_tokens_fn = getattr(args, "prefix_allowed_tokens_fn", None) if ( sum( int(cond) for cond in [ sampling, diverse_beam_groups > 0, match_source_len, diversity_rate > 0, ] ) > 1 ): raise ValueError("Provided Search parameters are mutually exclusive.") assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" if sampling: search_strategy = search.Sampling( self.target_dictionary, sampling_topk, sampling_topp ) elif diverse_beam_groups > 0: search_strategy = search.DiverseBeamSearch( self.target_dictionary, diverse_beam_groups, diverse_beam_strength ) elif match_source_len: # this is useful for tagging applications where the output # length should match the input length, so we hardcode the # length constraints for simplicity search_strategy = search.LengthConstrainedBeamSearch( self.target_dictionary, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) elif diversity_rate > -1: search_strategy = search.DiverseSiblingsSearch( self.target_dictionary, diversity_rate ) elif constrained: search_strategy = search.LexicallyConstrainedBeamSearch( self.target_dictionary, args.constraints ) elif prefix_allowed_tokens_fn: search_strategy = search.PrefixConstrainedBeamSearch( self.target_dictionary, prefix_allowed_tokens_fn ) else: search_strategy = search.BeamSearch(self.target_dictionary) extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} if seq_gen_cls is None: if getattr(args, "print_alignment", False): seq_gen_cls = SequenceGeneratorWithAlignment extra_gen_cls_kwargs["print_alignment"] = args.print_alignment else: seq_gen_cls = SequenceGenerator return seq_gen_cls( models, self.target_dictionary, beam_size=getattr(args, "beam", 5), max_len_a=getattr(args, "max_len_a", 0), max_len_b=getattr(args, "max_len_b", 200), min_len=getattr(args, "min_len", 1), normalize_scores=(not getattr(args, "unnormalized", False)), len_penalty=getattr(args, "lenpen", 1), unk_penalty=getattr(args, "unkpen", 0), temperature=getattr(args, "temperature", 1.0), match_source_len=getattr(args, "match_source_len", False), no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), search_strategy=search_strategy, **extra_gen_cls_kwargs, )
def test_export_sampling(self): low_sampling_topp = self.min_top1_prob / 2.0 search_strategy = search.Sampling(self.tgt_dict, sampling_topp=low_sampling_topp) torch.jit.script(search_strategy)
def build_generator(self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None): if getattr(args, "score_reference", False): from fairseq.sequence_scorer import SequenceScorer return SequenceScorer( self.target_dictionary, compute_alignment=getattr(args, "print_alignment", False), ) from fairseq.sequence_generator import ( SequenceGenerator, SequenceGeneratorWithAlignment, ) # Choose search strategy. Defaults to Beam Search. sampling = getattr(args, "sampling", False) sampling_topk = getattr(args, "sampling_topk", -1) sampling_topp = getattr(args, "sampling_topp", -1.0) diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) match_source_len = getattr(args, "match_source_len", False) diversity_rate = getattr(args, "diversity_rate", -1) if (sum( int(cond) for cond in [ sampling, diverse_beam_groups > 0, match_source_len, diversity_rate > 0, ]) > 1): raise ValueError( "Provided Search parameters are mutually exclusive.") assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" if sampling: search_strategy = search.Sampling(self.target_dictionary, sampling_topk, sampling_topp) elif diverse_beam_groups > 0: search_strategy = search.DiverseBeamSearch(self.target_dictionary, diverse_beam_groups, diverse_beam_strength) elif match_source_len: # this is useful for tagging applications where the output # length should match the input length, so we hardcode the # length constraints for simplicity search_strategy = search.LengthConstrainedBeamSearch( self.target_dictionary, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) elif diversity_rate > -1: search_strategy = search.DiverseSiblingsSearch( self.target_dictionary, diversity_rate) else: search_strategy = search.BeamSearch(self.target_dictionary) if seq_gen_cls is None: if getattr(args, "print_alignment", False): seq_gen_cls = SequenceGeneratorWithAlignment else: seq_gen_cls = SequenceGenerator extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} return seq_gen_cls( models, self.target_dictionary, beam_size=getattr(args, "beam", 5), max_len_a=getattr(args, "max_len_a", 0), max_len_b=getattr(args, "max_len_b", 200), min_len=getattr(args, "min_len", 1), normalize_scores=(not getattr(args, "unnormalized", False)), len_penalty=getattr(args, "lenpen", 1), unk_penalty=getattr(args, "unkpen", 0), temperature=getattr(args, "temperature", 1.0), match_source_len=getattr(args, "match_source_len", False), no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), search_strategy=search_strategy, **extra_gen_cls_kwargs, )
def __init__( self, tgt_dict, beam_size=1, max_len_a=0, max_len_b=200, min_len=1, stop_early=True, normalize_scores=True, len_penalty=1., unk_penalty=0., retain_dropout=False, sampling=False, sampling_topk=-1, temperature=1., diverse_beam_groups=-1, diverse_beam_strength=0.5, match_source_len=False, no_repeat_ngram_size=0, args=None, ): """Generates translations of a given source sentence. Args: tgt_dict (~fairseq.data.Dictionary): target dictionary beam_size (int, optional): beam width (default: 1) max_len_a/b (int, optional): generate sequences of maximum length ax + b, where x is the source length min_len (int, optional): the minimum length of the generated output (not including end-of-sentence) stop_early (bool, optional): stop generation immediately after we finalize beam_size hypotheses, even though longer hypotheses might have better normalized scores (default: True) normalize_scores (bool, optional): normalize scores by the length of the output (default: True) len_penalty (float, optional): length penalty, where <1.0 favors shorter, >1.0 favors longer sentences (default: 1.0) unk_penalty (float, optional): unknown word penalty, where <0 produces more unks, >0 produces fewer (default: 0.0) retain_dropout (bool, optional): use dropout when generating (default: False) sampling (bool, optional): sample outputs instead of beam search (default: False) sampling_topk (int, optional): only sample among the top-k choices at each step (default: -1) temperature (float, optional): temperature, where values >1.0 produce more uniform samples and values <1.0 produce sharper samples (default: 1.0) diverse_beam_groups/strength (float, optional): parameters for Diverse Beam Search sampling match_source_len (bool, optional): outputs should match the source length (default: False) """ self.pad = tgt_dict.pad() self.unk = tgt_dict.unk() self.eos = tgt_dict.eos() self.vocab_size = len(tgt_dict) self.beam_size = beam_size # the max beam size is the dictionary size - 1, since we never select pad self.beam_size = min(beam_size, self.vocab_size - 1) self.max_len_a = max_len_a self.max_len_b = max_len_b self.min_len = min_len self.stop_early = stop_early self.normalize_scores = normalize_scores self.len_penalty = len_penalty self.unk_penalty = unk_penalty self.retain_dropout = retain_dropout self.temperature = temperature self.match_source_len = match_source_len self.no_repeat_ngram_size = no_repeat_ngram_size self.bert_output_layer = args.bert_output_layer assert sampling_topk < 0 or sampling, '--sampling-topk requires --sampling' assert temperature > 0, '--temperature must be greater than 0' if sampling: self.search = search.Sampling(tgt_dict, sampling_topk) elif diverse_beam_groups > 0: self.search = search.DiverseBeamSearch(tgt_dict, diverse_beam_groups, diverse_beam_strength) elif match_source_len: self.search = search.LengthConstrainedBeamSearch( tgt_dict, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) else: self.search = search.BeamSearch(tgt_dict)
def build_generator(self, args): if getattr(args, 'score_reference', False): from fairseq.sequence_scorer import SequenceScorer return SequenceScorer( self.target_dictionary, compute_alignment=getattr(args, 'print_alignment', False), ) from fairseq.sequence_generator import SequenceGenerator, SequenceGeneratorWithAlignment # Choose search strategy. Defaults to Beam Search. sampling = getattr(args, 'sampling', False) sampling_topk = getattr(args, 'sampling_topk', -1) sampling_topp = getattr(args, 'sampling_topp', -1.0) diverse_beam_groups = getattr(args, 'diverse_beam_groups', -1) diverse_beam_strength = getattr(args, 'diverse_beam_strength', 0.5), match_source_len = getattr(args, 'match_source_len', False) diversity_rate = getattr(args, 'diversity_rate', -1) if ( sum( int(cond) for cond in [ sampling, diverse_beam_groups > 0, match_source_len, diversity_rate > 0, ] ) > 1 ): raise ValueError('Provided Search parameters are mutually exclusive.') assert sampling_topk < 0 or sampling, '--sampling-topk requires --sampling' assert sampling_topp < 0 or sampling, '--sampling-topp requires --sampling' if sampling: search_strategy = search.Sampling(self.target_dictionary, sampling_topk, sampling_topp) elif diverse_beam_groups > 0: search_strategy = search.DiverseBeamSearch( self.target_dictionary, diverse_beam_groups, diverse_beam_strength) elif match_source_len: # this is useful for tagging applications where the output # length should match the input length, so we hardcode the # length constraints for simplicity search_strategy = search.LengthConstrainedBeamSearch( self.target_dictionary, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) elif diversity_rate > -1: search_strategy = search.DiverseSiblingsSearch(self.target_dictionary, diversity_rate) else: search_strategy = search.BeamSearch(self.target_dictionary) if getattr(args, 'print_alignment', False): seq_gen_cls = SequenceGeneratorWithAlignment else: seq_gen_cls = SequenceGenerator return seq_gen_cls( self.target_dictionary, beam_size=getattr(args, 'beam', 5), max_len_a=getattr(args, 'max_len_a', 0), max_len_b=getattr(args, 'max_len_b', 200), min_len=getattr(args, 'min_len', 1), normalize_scores=(not getattr(args, 'unnormalized', False)), len_penalty=getattr(args, 'lenpen', 1), unk_penalty=getattr(args, 'unkpen', 0), temperature=getattr(args, 'temperature', 1.), match_source_len=getattr(args, 'match_source_len', False), no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0), search_strategy=search_strategy, )
def __init__( self, models, tgt_dict, beam_size=1, minlen=1, maxlen=None, stop_early=True, normalize_scores=True, len_penalty=0, unk_reward=0, lexicon_reward=0, retain_dropout=False, word_reward=0, model_weights=None, use_char_source=False, diverse_beam_groups=-1, diverse_beam_strength=0.5, diversity_sibling_gamma=0.0, sampling=False, sampling_topk=-1, temperature=1, ): """Generates translations of a given source sentence. Args: models: List of FairseqEncoderDecoderModel objects. Each one must implement reorder_encoder_output() method to replicate encoder outputs. min/maxlen: The length of the generated output will be bounded by minlen and maxlen (not including the end-of-sentence marker). stop_early: Stop generation immediately after we finalize beam_size hypotheses, even though longer hypotheses might have better normalized scores. normalize_scores: Normalize scores by the length of the output. word_reward: add this value to score each token except EOS (an alternative method to len_penalty for encouraging longer output) model_weights: None or list of Python floats of the same length as `models` with ensemble interpolation weights. use_char_source: if True, encoder inputs consist of (src_tokens, src_lengths, char_inds, word_lengths) diverse_beam_groups: number of groups for Diverse Beam Search (-1 by default is vanilla beam search) diverse_beam_strength: strength of diversity penalty for Diverse Beam Search. diversity_sibling_gamma: The diversity rate of sibling rank (-0.0 by default to disable sibling rank penalty) sampling (bool, optional): sample outputs instead of beam search (default: False) sampling_topk (int, optional): only sample among the top-k choices at each step (default: -1) temperature (float, optional): temperature, where values >1.0 produce more uniform samples and values <1.0 produce sharper samples (default: 1.0) """ self.models = models self.pad = tgt_dict.pad() self.unk = tgt_dict.unk() self.eos = tgt_dict.eos() self.vocab_size = len(tgt_dict) self.beam_size = beam_size self.minlen = minlen max_decoder_len = min(m.max_decoder_positions() for m in self.models) self.maxlen = (max_decoder_len if maxlen is None else min( maxlen, max_decoder_len)) self.stop_early = stop_early self.normalize_scores = normalize_scores self.len_penalty = len_penalty self.unk_reward = unk_reward self.lexicon_reward = lexicon_reward self.lexicon_indices = tgt_dict.lexicon_indices_list() self.retain_dropout = retain_dropout self.temperature = temperature self.word_reward = word_reward if model_weights is not None: assert len(models) == len(model_weights) self.model_weights = model_weights else: self.model_weights = [1.0 / len(models)] * len(models) self.use_char_source = use_char_source assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" assert temperature > 0, "--temperature must be greater than 0" if sampling: self.search = search.Sampling(tgt_dict, sampling_topk) elif diverse_beam_groups > 0: self.search = search.DiverseBeamSearch(tgt_dict, diverse_beam_groups, diverse_beam_strength) else: self.search = search.BeamSearch(tgt_dict) self.diversity_sibling_gamma = diversity_sibling_gamma
def __init__( self, args, tgt_dict, tokenizer, beam_size=1, max_len_a=0, max_len_b=200, min_len=1, normalize_scores=True, len_penalty=1., unk_penalty=0., retain_dropout=False, sampling=False, sampling_topk=-1, sampling_topp=-1.0, temperature=1., diverse_beam_groups=-1, diverse_beam_strength=0.5, match_source_len=False, no_repeat_ngram_size=0, ): """Generates translations of a given source sentence. Args: tgt_dict (~fairseq.data.Dictionary): target dictionary id2tok_fn (HuggingFace tokenizer.decode()): Used to transfer bpe indices to words beam_size (int, optional): beam width (default: 1) max_len_a/b (int, optional): generate sequences of maximum length ax + b, where x is the source length min_len (int, optional): the minimum length of the generated output (not including end-of-sentence) normalize_scores (bool, optional): normalize scores by the length of the output (default: True) len_penalty (float, optional): length penalty, where <1.0 favors shorter, >1.0 favors longer sentences (default: 1.0) unk_penalty (float, optional): unknown word penalty, where <0 produces more unks, >0 produces fewer (default: 0.0) retain_dropout (bool, optional): use dropout when generating (default: False) sampling (bool, optional): sample outputs instead of beam search (default: False) sampling_topk (int, optional): only sample among the top-k choices at each step (default: -1) sampling_topp (float, optional): only sample among the smallest set of words whose cumulative probability mass exceeds p at each step (default: -1.0) temperature (float, optional): temperature, where values >1.0 produce more uniform samples and values <1.0 produce sharper samples (default: 1.0) diverse_beam_groups/strength (float, optional): parameters for Diverse Beam Search sampling match_source_len (bool, optional): outputs should match the source length (default: False) """ self.args = args self.tgt_dict = tgt_dict self.tokenizer = tokenizer self.pad = tgt_dict.pad() self.unk = tgt_dict.unk() self.eos = tgt_dict.eos() self.bos = tgt_dict.bos() self.vocab_size = len(tgt_dict) self.beam_size = beam_size # the max beam size is the dictionary size - 1, since we never select pad self.beam_size = min(beam_size, self.vocab_size - 1) self.max_len_a = max_len_a self.max_len_b = max_len_b self.min_len = min_len self.normalize_scores = normalize_scores self.len_penalty = len_penalty self.unk_penalty = unk_penalty self.retain_dropout = retain_dropout self.temperature = temperature self.match_source_len = match_source_len self.no_repeat_ngram_size = no_repeat_ngram_size self.gpt2_max_length = 1024 assert sampling_topk < 0 or sampling, '--sampling-topk requires --sampling' assert sampling_topp < 0 or sampling, '--sampling-topp requires --sampling' assert temperature > 0, '--temperature must be greater than 0' if sampling: self.search = search.Sampling(tgt_dict, sampling_topk, sampling_topp) elif diverse_beam_groups > 0: self.search = search.DiverseBeamSearch(tgt_dict, diverse_beam_groups, diverse_beam_strength) elif match_source_len: self.search = search.LengthConstrainedBeamSearch( tgt_dict, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) else: self.search = search.BeamSearch(tgt_dict)
def build_generator(self, models, args): if getattr(args, "score_reference", False): args.score_reference = False logger.warning( "--score-reference is not applicable to speech recognition, ignoring it." ) from fairseq.sequence_generator import SequenceGenerator # Choose search strategy. Defaults to Beam Search. sampling = getattr(args, "sampling", False) sampling_topk = getattr(args, "sampling_topk", -1) sampling_topp = getattr(args, "sampling_topp", -1.0) diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) match_source_len = getattr(args, "match_source_len", False) diversity_rate = getattr(args, "diversity_rate", -1) if (sum( int(cond) for cond in [ sampling, diverse_beam_groups > 0, match_source_len, diversity_rate > 0, ]) > 1): raise ValueError( "Provided Search parameters are mutually exclusive.") assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" if sampling: search_strategy = search.Sampling(self.target_dictionary, sampling_topk, sampling_topp) elif diverse_beam_groups > 0: search_strategy = search.DiverseBeamSearch(self.target_dictionary, diverse_beam_groups, diverse_beam_strength) elif match_source_len: # this is useful for tagging applications where the output # length should match the input length, so we hardcode the # length constraints for simplicity search_strategy = search.LengthConstrainedBeamSearch( self.target_dictionary, min_len_a=1, min_len_b=0, max_len_a=1, max_len_b=0, ) elif diversity_rate > -1: search_strategy = search.DiverseSiblingsSearch( self.target_dictionary, diversity_rate) else: search_strategy = search.BeamSearch(self.target_dictionary) return SequenceGenerator( self.target_dictionary, beam_size=getattr(args, "beam", 5), max_len_a=getattr(args, "max_len_a", 0), max_len_b=getattr(args, "max_len_b", 200), min_len=getattr(args, "min_len", 1), normalize_scores=(not getattr(args, "unnormalized", False)), len_penalty=getattr(args, "lenpen", 1), unk_penalty=getattr(args, "unkpen", 0), temperature=getattr(args, "temperature", 1.), match_source_len=getattr(args, "match_source_len", False), no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), search_strategy=search_strategy, eos_factor=getattr(args, "eos_factor", None), )
def __init__( self, models, tgt_dict, beam_size=1, minlen=1, maxlen=None, stop_early=True, normalize_scores=True, len_penalty=0, unk_reward=0, lexicon_reward=0, retain_dropout=False, word_reward=0, model_weights=None, use_char_source=False, sampling=False, sampling_topk=-1, sampling_temperature=1, ): """Generates translations of a given source sentence. Args: models: List of FairseqModel objects. Each one must implement reorder_encoder_output() method to replicate encoder outputs. min/maxlen: The length of the generated output will be bounded by minlen and maxlen (not including the end-of-sentence marker). stop_early: Stop generation immediately after we finalize beam_size hypotheses, even though longer hypotheses might have better normalized scores. normalize_scores: Normalize scores by the length of the output. word_reward: add this value to score each token except EOS (an alternative method to len_penalty for encouraging longer output) model_weights: None or list of Python floats of the same length as `models` with ensemble interpolation weights. use_char_source: if True, encoder inputs consist of (src_tokens, src_lengths, char_inds, word_lengths) """ self.models = models self.pad = tgt_dict.pad() self.unk = tgt_dict.unk() self.eos = tgt_dict.eos() self.vocab_size = len(tgt_dict) self.beam_size = beam_size self.minlen = minlen max_decoder_len = min(m.max_decoder_positions() for m in self.models) self.maxlen = (max_decoder_len if maxlen is None else min( maxlen, max_decoder_len)) self.stop_early = stop_early self.normalize_scores = normalize_scores self.len_penalty = len_penalty self.unk_reward = unk_reward self.lexicon_reward = lexicon_reward self.lexicon_indices = tgt_dict.lexicon_indices_list() self.retain_dropout = retain_dropout self.word_reward = word_reward if model_weights is not None: assert len(models) == len(model_weights) self.model_weights = model_weights else: self.model_weights = [1.0 / len(models)] * len(models) self.use_char_source = use_char_source assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" if sampling: self.search = search.Sampling(tgt_dict, sampling_topk, sampling_temperature)