def get_parser_with_args(): parser = options.get_parser("Collect Top-K Probs", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--k-probs-to-collect", type=int, default=8, help="Number of probabilities to collect for each output step.", ) generation_group.add_argument( "--top-k-probs-binary-file", type=str, default="", help="File into which to save top-K probabilities for each token.", ) return parser
def main(): parser = argparse.ArgumentParser(description="PyTorch Translate - preprocessing") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_preprocessing_args(parser) args = parser.parse_args() pytorch_translate_options.validate_preprocessing_args(args) pytorch_translate_options.print_args(args) preprocess_corpora(args)
def get_parser_with_args(default_task="pytorch_translate"): parser = options.get_parser("Trainer", default_task=default_task) pytorch_translate_options.add_verbosity_args(parser, train=True) pytorch_translate_options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) # Adds args related to training (validation and stopping criterions). optimization_group = options.add_optimization_args(parser) pytorch_translate_options.expand_optimization_args(optimization_group) # Adds args related to checkpointing. checkpointing_group = options.add_checkpoint_args(parser) pytorch_translate_options.expand_checkpointing_args(checkpointing_group) # Add model related args options.add_model_args(parser) # Adds args for generating intermediate BLEU eval while training. generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group, train=True) # Adds args related to input data files (preprocessing, numberizing, and # binarizing text files; creating vocab files) pytorch_translate_options.add_preprocessing_args(parser) return parser
def get_parser_with_args(): parser = options.get_parser("Generation", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) # Adds args used by the standalone generate binary. generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--source-text-file", default="", nargs="+", metavar="FILE", help="Path to raw text file containing examples in source dialect. " "This overrides what would be loaded from the data dir. " "You can specify multiple source files (eg. for use in combination " "with --source-ensembling). By default this will only translate the " "first source file", ) generation_group.add_argument( "--target-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in target dialect. " "This overrides what would be loaded from the data dir.", ) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--translation-output-file", default="", type=str, metavar="FILE", help="Path to text file to store the output of the model. ", ) generation_group.add_argument( "--translation-probs-file", default="", type=str, metavar="FILE", help="Path to text file to store the probs of translation output. ", ) generation_group.add_argument( "--multiling-source-lang", action="append", metavar="SRC", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-encoder-lang from training." ), ) generation_group.add_argument( "--multiling-target-lang", action="append", metavar="TARGET", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-decoder-lang from training." ), ) generation_group.add_argument( "--source-ensembling", action="store_true", help="If this flag is present, the model will ensemble the predictions " "conditioned on multiple source sentences (one per source-text-file)", ) generation_group.add_argument( "--competing-completed-beam-search", action="store_true", help="If this flag is present, use the alternative beam search " "implementation in research/beam_search. This beam search keeps completed " "hypos in the beam and let them compete against hypo expansions in the " "next time step.", ) return parser
def get_parser_with_args(): parser = options.get_parser("Generation") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) # Adds args used by the standalone generate binary. generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--source-text-file", default="", nargs="+", metavar="FILE", help="Path to raw text file containing examples in source dialect. " "This overrides what would be loaded from the data dir. " "You can specify multiple source files (eg. for use in combination " "with --source-ensembling). By default this will only translate the " "first source file", ) generation_group.add_argument( "--target-text-file", default="", metavar="FILE", help="Path to raw text file containing examples in target dialect. " "This overrides what would be loaded from the data dir.", ) generation_group.add_argument( "--source-binary-file", default="", help="Path for the binary file containing source eval examples. " "(Overrides --source-text-file. Must be used in conjunction with " "--target-binary-file).", ) generation_group.add_argument( "--target-binary-file", default="", help="Path for the binary file containing target eval examples. " "(Overrides --target-text-file. Must be used in conjunction with " "--source-binary-file).", ) generation_group.add_argument( "--translation-output-file", default="", type=str, metavar="FILE", help="Path to text file to store the output of the model. ", ) generation_group.add_argument( "--translation-probs-file", default="", type=str, metavar="FILE", help="Path to text file to store the probs of translation output. ", ) generation_group.add_argument( "--multiling-source-lang-id", type=int, default=None, help=( "Must be set for decoding with multilingual models. Set to i if " "the source language is the i-th language in the training parameter " "--multiling-encoder-lang (0-indexed)" ), ) generation_group.add_argument( "--multiling-target-lang-id", type=int, default=None, help=( "Must be set for decoding with multilingual models. Set to i if " "the target language is the i-th language in the training parameter " "--multiling-decoder-lang (0-indexed)" ), ) generation_group.add_argument( "--source-ensembling", action="store_true", help="If this flag is present, the model will ensemble the predictions " "conditioned on multiple source sentences (one per source-text-file)", ) return parser
def get_parser_with_args(): parser = options.get_parser("Generation", default_task="pytorch_translate") pytorch_translate_options.add_verbosity_args(parser) pytorch_translate_options.add_dataset_args(parser, gen=True) generation_group = options.add_generation_args(parser) pytorch_translate_options.expand_generation_args(generation_group) generation_group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--char-source-vocab-file", default="", metavar="FILE", help=( "Same as --source-vocab-file except using characters. " "(For use with char_source models only.)" ), ) generation_group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) generation_group.add_argument( "--multiling-source-lang", action="append", metavar="SRC", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-encoder-lang from training." ), ) generation_group.add_argument( "--multiling-target-lang", action="append", metavar="TARGET", help=( "Must be set for decoding with multilingual models. " "Must match an entry from --multiling-decoder-lang from training." ), ) # Add args related to benchmarking. group = parser.add_argument_group("Benchmarking") group.add_argument( "--runs-per-length", default=10, type=int, help="Number of times to run generation on each length.", ) group.add_argument( "--examples-per-length", default=1, type=int, help="Sentences of each length to include in each eval (batched if >1).", ) return parser