def get_parser_with_args(): parser = options.get_parser("Generation") options.add_dataset_args(parser, gen=True) options.add_generation_args(parser) pytorch_translate_generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the Dictionary to use.", ) # Add args related to benchmarking. group = parser.add_argument_group("Benchmarking") group.add_argument( "--increment", default=5, type=int, help="Difference in lengths between synthesized sentences. " "Must be integer >=1.", ) group.add_argument( "--max-length", default=100, type=int, help="Maximum allowed length for synthesized sentences. " "Should be greater than --increment.", ) group.add_argument( "--samples-per-length", default=1, type=int, help="Number of sentences to be synthesized at each length. ", ) return parser
def get_parser_with_args(): parser = options.get_parser("Trainer") options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( "--log-verbose", action="store_true", help="Whether to output more verbose logs for debugging/profiling.", ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group("Optimization") group.add_argument( "--subepoch-validate-interval", default=0, type=int, metavar="N", help="Calculates loss over the validation set every N batch updates. " "Note that validation is done at the end of every epoch regardless. " "A value of <= 0 disables this.", ) group.add_argument( "--stop-time-hr", default=-1, type=int, metavar="N", help="Stops training after N hours have elapsed. " "A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-validate-loss", default=-1, type=int, metavar="N", help="Stops training after N validations have been run without " "achieving a better loss than before. Note that this is affected by " "--validation-interval in how frequently we run validation in the " "first place. A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-bleu-eval", default=-1, type=int, metavar="N", help="Stops training after N evals have been run without " "achieving a better BLEU score than before. Note that this is affected " "by --generate-bleu-eval-interval in how frequently we run BLEU eval " "in the first place. A value of < 0 disables this.", ) # Adds args related to input data files (preprocessing, numberizing, and # binarizing text files; creating vocab files) preprocess.add_args(parser) # Adds args related to checkpointing. group = parser.add_argument_group("Checkpointing") group.add_argument( "--no-end-of-epoch-checkpoints", action="store_true", help="Disables saving checkpoints at the end of the epoch. " "This differs from --no-save and --no-epoch-checkpoints in that it " "still allows for intra-epoch checkpoints if --save-interval is set.", ) group.add_argument( "--max-checkpoints-kept", default=-1, type=int, metavar="N", help="Keep at most the last N checkpoints file around. " "A value < -1 keeps all. " "When --generate-bleu-eval-avg-checkpoints is used and is > N, the " "number of checkpoints kept around is automatically adjusted " "to allow BLEU to work properly.", ) # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--generate-bleu-eval-per-epoch", action="store_true", help="Whether to generate BLEU score eval after each epoch.", ) group.add_argument( "--generate-bleu-eval-interval", default=0, type=int, metavar="N", help="Does BLEU eval every N batch updates. Note that " "--save-interval also affects this - we can only eval as " "frequently as a checkpoint is written. A value of <= 0 " "disables this.", ) group.add_argument( "--generate-bleu-eval-avg-checkpoints", default=1, type=int, metavar="N", help="Maximum number of last N checkpoints to average over when " "doing BLEU eval. Must be >= 1.", ) group.add_argument( "--continuous-averaging-after-epochs", type=int, default=-1, help=("Average parameter values after each step since previous " "checkpoint, beginning after the specified number of epochs. "), ) return parser
def get_parser_with_args(): parser = options.get_parser('Trainer') options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( '--log-verbose', action='store_true', help='Whether to output more verbose logs for debugging/profiling.', ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group('Optimization') group.add_argument( '--subepoch-validate-interval', default=0, type=int, metavar='N', help='Calculates loss over the validation set every N batch updates. ' 'Note that validation is done at the end of every epoch regardless. ' 'A value of <= 0 disables this.', ) group.add_argument( '--stop-time-hr', default=-1, type=int, metavar='N', help='Stops training after N hours have elapsed. ' 'A value of < 0 disables this.', ) group.add_argument( '--stop-no-best-validate-loss', default=-1, type=int, metavar='N', help='Stops training after N validations have been run without ' 'achieving a better loss than before. Note that this is affected by ' '--validation-interval in how frequently we run validation in the ' 'first place. A value of < 0 disables this.', ) group.add_argument( '--stop-no-best-bleu-eval', default=-1, type=int, metavar='N', help='Stops training after N evals have been run without ' 'achieving a better BLEU score than before. Note that this is affected ' 'by --generate-bleu-eval-interval in how frequently we run BLEU eval ' 'in the first place. A value of < 0 disables this.', ) # Args related to dataset. group = parser.add_argument_group('Dataset and data loading') group.add_argument( '--source-vocab-file', default='', metavar='FILE', help='Path to text file representing the fairseq Dictionary to use. ' 'If left empty, the dict is auto-generated from source training data.', ) group.add_argument( '--source-max-vocab-size', default=-1, type=int, metavar='N', help='If a new vocab file needs to be generated, restrict it to the ' 'top N most common words. If we re-use an existing vocab file, this ' 'flag will have no effect. A value of < 0 means no max size.', ) group.add_argument( '--target-vocab-file', default='', metavar='FILE', help='Path to text file representing the fairseq Dictionary to use. ' 'If left empty, the dict is auto-generated from target training data.', ) group.add_argument( '--target-max-vocab-size', default=-1, type=int, metavar='N', help='If a new vocab file needs to be generated, restrict it to the ' 'top N most common words. If we re-use an existing vocab file, this ' 'flag will have no effect. A value of < 0 means no max size.', ) group.add_argument( '--train-source-text-file', default='', metavar='FILE', help='Path to raw text file containing source training examples. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--train-target-text-file', default='', metavar='FILE', help='Path to raw text file containing target training examples. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--eval-source-text-file', default='', metavar='FILE', help='Path to raw text file containing source eval examples for ' 'calculating validation loss and BLEU eval scores. ' 'This overrides what would be loaded from the data dir.', ) group.add_argument( '--eval-target-text-file', default='', metavar='FILE', help='Path to raw text file containing target eval examples for ' 'calculating validation loss and BLEU eval scores. ' 'This overrides what would be loaded from the data dir.', ) # Adds args related to checkpointing. group = parser.add_argument_group('Checkpointing') group.add_argument( '--no-end-of-epoch-checkpoints', action='store_true', help='Disables saving checkpoints at the end of the epoch. ' 'This differs from --no-save and --no-epoch-checkpoints in that it ' 'still allows for intra-epoch checkpoints if --save-interval is set.') # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group('Generation') group.add_argument( '--generate-bleu-eval-per-epoch', action='store_true', help='Whether to generate BLEU score eval after each epoch.', ) group.add_argument( '--generate-bleu-eval-interval', default=0, type=int, metavar='N', help='Does BLEU eval every N batch updates. Note that ' '--save-interval also affects this - we can only eval as ' 'frequently as a checkpoint is written. A value of <= 0 ' 'disables this.', ) group.add_argument( '--generate-bleu-eval-avg-checkpoints', default=1, type=int, metavar='N', help='Maximum number of last N checkpoints to average over when ' 'doing BLEU eval. Must be >= 1.', ) group.add_argument( '--continuous-averaging-after-epochs', type=int, default=-1, help=('Average parameter values after each step since previous ' 'checkpoint, beginning after the specified number of epochs. '), ) return parser
def get_parser_with_args(): parser = options.get_parser("Trainer") options.add_dataset_args(parser, train=True, gen=True) options.add_distributed_training_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_model_args(parser) options.add_generation_args(parser) parser.add_argument( "--log-verbose", action="store_true", help="Whether to output more verbose logs for debugging/profiling.", ) # Adds args related to training (validation and stopping criterions). group = parser.add_argument_group("Optimization") group.add_argument( "--subepoch-validate-interval", default=0, type=int, metavar="N", help="Calculates loss over the validation set every N batch updates. " "Note that validation is done at the end of every epoch regardless. " "A value of <= 0 disables this.", ) group.add_argument( "--stop-time-hr", default=-1, type=int, metavar="N", help="Stops training after N hours have elapsed. " "A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-validate-loss", default=-1, type=int, metavar="N", help="Stops training after N validations have been run without " "achieving a better loss than before. Note that this is affected by " "--validation-interval in how frequently we run validation in the " "first place. A value of < 0 disables this.", ) group.add_argument( "--stop-no-best-bleu-eval", default=-1, type=int, metavar="N", help="Stops training after N evals have been run without " "achieving a better BLEU score than before. Note that this is affected " "by --generate-bleu-eval-interval in how frequently we run BLEU eval " "in the first place. A value of < 0 disables this.", ) # Args related to dataset. group = parser.add_argument_group("Dataset and data loading") group.add_argument( "--source-vocab-file", default="", metavar="FILE", help="Path to text file representing the fairseq Dictionary to use. " "If left empty, the dict is auto-generated from source training data.", ) group.add_argument( "--source-max-vocab-size", default=-1, type=int, metavar="N", help="If a new vocab file needs to be generated, restrict it to the " "top N most common words. If we re-use an existing vocab file, this " "flag will have no effect. A value of < 0 means no max size.", ) group.add_argument( "--target-vocab-file", default="", metavar="FILE", help="Path to text file representing the fairseq Dictionary to use. " "If left empty, the dict is auto-generated from target training data.", ) group.add_argument( "--target-max-vocab-size", default=-1, type=int, metavar="N", help="If a new vocab file needs to be generated, restrict it to the " "top N most common words. If we re-use an existing vocab file, this " "flag will have no effect. A value of < 0 means no max size.", ) group.add_argument( "--train-source-text-file", default="", metavar="FILE", help="Path to raw text file containing source training examples. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--train-target-text-file", default="", metavar="FILE", help="Path to raw text file containing target training examples. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--eval-source-text-file", default="", metavar="FILE", help="Path to raw text file containing source eval examples for " "calculating validation loss and BLEU eval scores. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--eval-target-text-file", default="", metavar="FILE", help="Path to raw text file containing target eval examples for " "calculating validation loss and BLEU eval scores. " "This overrides what would be loaded from the data dir.", ) group.add_argument( "--penalized-target-tokens-file", default="", metavar="FILE", help="Path to text file of tokens to receive a penalty in decoding." "If left empty, no penalty will be applied", ) # Adds args related to checkpointing. group = parser.add_argument_group("Checkpointing") group.add_argument( "--no-end-of-epoch-checkpoints", action="store_true", help="Disables saving checkpoints at the end of the epoch. " "This differs from --no-save and --no-epoch-checkpoints in that it " "still allows for intra-epoch checkpoints if --save-interval is set.", ) group.add_argument( "--max-checkpoints-kept", default=-1, type=int, metavar="N", help="Keep at most the last N checkpoints file around. " "A value < -1 keeps all. " "When --generate-bleu-eval-avg-checkpoints is used and is > N, the " "number of checkpoints kept around is automatically adjusted " "to allow BLEU to work properly.", ) # Adds args for generating intermediate BLEU eval while training. # generate.add_args() adds args used by both train.py and the standalone # generate binary, while the flags defined here are used only by train.py. generate.add_args(parser) group = parser.add_argument_group("Generation") group.add_argument( "--generate-bleu-eval-per-epoch", action="store_true", help="Whether to generate BLEU score eval after each epoch.", ) group.add_argument( "--generate-bleu-eval-interval", default=0, type=int, metavar="N", help="Does BLEU eval every N batch updates. Note that " "--save-interval also affects this - we can only eval as " "frequently as a checkpoint is written. A value of <= 0 " "disables this.", ) group.add_argument( "--generate-bleu-eval-avg-checkpoints", default=1, type=int, metavar="N", help="Maximum number of last N checkpoints to average over when " "doing BLEU eval. Must be >= 1.", ) group.add_argument( "--continuous-averaging-after-epochs", type=int, default=-1, help=("Average parameter values after each step since previous " "checkpoint, beginning after the specified number of epochs. "), ) return parser