def _add_dynamic_transform_opts(parser): """Options related to transforms. Options that specified in the definitions of each transform class at `onmt/transforms/*.py`. """ for name, transform_cls in AVAILABLE_TRANSFORMS.items(): transform_cls.add_options(parser)
def _add_dynamic_corpus_opts(parser, build_vocab_only=False): """Options related to training corpus, type: a list of dictionary.""" group = parser.add_argument_group('Data') group.add("-data", "--data", required=True, help="List of datasets and their specifications. " "See examples/*.yaml for further details.") group.add("-skip_empty_level", "--skip_empty_level", default="warning", choices=["silent", "warning", "error"], help="Security level when encounter empty examples." "silent: silently ignore/skip empty example;" "warning: warning when ignore/skip empty example;" "error: raise error & stop excution when encouter empty.)") group.add("-transforms", "--transforms", default=[], nargs="+", choices=AVAILABLE_TRANSFORMS.keys(), help="Default transform pipeline to apply to data. " "Can be specified in each corpus of data to override.") group.add("-save_data", "--save_data", required=build_vocab_only, help="Output base path for objects that will " "be saved (vocab, transforms, embeddings, ...).") group.add("-overwrite", "--overwrite", action="store_true", help="Overwrite existing objects if any.") group.add( '-n_sample', '--n_sample', type=int, default=(5000 if build_vocab_only else 0), help=("Build vocab using " if build_vocab_only else "Stop after save ") + "this number of transformed samples/corpus. Can be [-1, 0, N>0]. " "Set to -1 to go full corpus, 0 to skip.") if not build_vocab_only: group.add('-dump_fields', '--dump_fields', action='store_true', help="Dump fields `*.vocab.pt` to disk." " -save_data should be set as saving prefix.") group.add('-dump_transforms', '--dump_transforms', action='store_true', help="Dump transforms `*.transforms.pt` to disk." " -save_data should be set as saving prefix.")
def _validate_transforms_opts(cls, opt): """Check options used by transforms.""" for name, transform_cls in AVAILABLE_TRANSFORMS.items(): if name in opt._all_transform: transform_cls._validate_options(opt)
def translate_opts(parser, dynamic=False): """ Translation / inference options """ group = parser.add_argument_group('Model') group.add('--model', '-model', dest='models', metavar='MODEL', nargs='+', type=str, default=[], required=True, help="Path to model .pt file(s). " "Multiple models can be specified, " "for ensemble decoding.") group.add('--fp32', '-fp32', action='store_true', help="Force the model to be in FP32 " "because FP16 is very slow on GTX1080(ti).") group.add('--int8', '-int8', action='store_true', help="Enable dynamic 8-bit quantization (CPU only).") group.add('--avg_raw_probs', '-avg_raw_probs', action='store_true', help="If this is set, during ensembling scores from " "different models will be combined by averaging their " "raw probabilities and then taking the log. Otherwise, " "the log probabilities will be averaged directly. " "Necessary for models whose output layers can assign " "zero probability.") group = parser.add_argument_group('Data') group.add('--data_type', '-data_type', default="text", help="Type of the source input. Options: [text].") group.add('--src', '-src', required=True, help="Source sequence to decode (one line per " "sequence)") group.add( "-src_feats", "--src_feats", required=False, help="Source sequence features (dict format). " "Ex: {'feat_0': '../data.txt.feats0', 'feat_1': '../data.txt.feats1'}" ) # noqa: E501 group.add('--tgt', '-tgt', help='True target sequence (optional)') group.add('--tgt_prefix', '-tgt_prefix', action='store_true', help='Generate predictions using provided `-tgt` as prefix.') group.add('--shard_size', '-shard_size', type=int, default=10000, help="Divide src and tgt (if applicable) into " "smaller multiple src and tgt files, then " "build shards, each shard will have " "opt.shard_size samples except last shard. " "shard_size=0 means no segmentation " "shard_size>0 means segment dataset into multiple shards, " "each shard has shard_size samples") group.add('--output', '-output', default='pred.txt', help="Path to output the predictions (each line will " "be the decoded sequence") group.add('--report_align', '-report_align', action='store_true', help="Report alignment for each translation.") group.add('--report_time', '-report_time', action='store_true', help="Report some translation time metrics") # Adding options relate to decoding strategy _add_decoding_opts(parser) # Adding option for logging _add_logging_opts(parser, is_train=False) group = parser.add_argument_group('Efficiency') group.add('--batch_size', '-batch_size', type=int, default=30, help='Batch size') group.add('--batch_type', '-batch_type', default='sents', choices=["sents", "tokens"], help="Batch grouping for batch_size. Standard " "is sents. Tokens will do dynamic batching") group.add('--gpu', '-gpu', type=int, default=-1, help="Device to run on") if dynamic: group.add("-transforms", "--transforms", default=[], nargs="+", choices=AVAILABLE_TRANSFORMS.keys(), help="Default transform pipeline to apply to data.") # Adding options related to Transforms _add_dynamic_transform_opts(parser)