Example #1
0
def _add_dynamic_transform_opts(parser):
    """Options related to transforms.

    Options that specified in the definitions of each transform class
    at `onmt/transforms/*.py`.
    """
    for name, transform_cls in AVAILABLE_TRANSFORMS.items():
        transform_cls.add_options(parser)
Example #2
0
def _add_dynamic_corpus_opts(parser, build_vocab_only=False):
    """Options related to training corpus, type: a list of dictionary."""
    group = parser.add_argument_group('Data')
    group.add("-data",
              "--data",
              required=True,
              help="List of datasets and their specifications. "
              "See examples/*.yaml for further details.")
    group.add("-skip_empty_level",
              "--skip_empty_level",
              default="warning",
              choices=["silent", "warning", "error"],
              help="Security level when encounter empty examples."
              "silent: silently ignore/skip empty example;"
              "warning: warning when ignore/skip empty example;"
              "error: raise error & stop excution when encouter empty.)")
    group.add("-transforms",
              "--transforms",
              default=[],
              nargs="+",
              choices=AVAILABLE_TRANSFORMS.keys(),
              help="Default transform pipeline to apply to data. "
              "Can be specified in each corpus of data to override.")

    group.add("-save_data",
              "--save_data",
              required=build_vocab_only,
              help="Output base path for objects that will "
              "be saved (vocab, transforms, embeddings, ...).")
    group.add("-overwrite",
              "--overwrite",
              action="store_true",
              help="Overwrite existing objects if any.")
    group.add(
        '-n_sample',
        '--n_sample',
        type=int,
        default=(5000 if build_vocab_only else 0),
        help=("Build vocab using " if build_vocab_only else "Stop after save ")
        + "this number of transformed samples/corpus. Can be [-1, 0, N>0]. "
        "Set to -1 to go full corpus, 0 to skip.")

    if not build_vocab_only:
        group.add('-dump_fields',
                  '--dump_fields',
                  action='store_true',
                  help="Dump fields `*.vocab.pt` to disk."
                  " -save_data should be set as saving prefix.")
        group.add('-dump_transforms',
                  '--dump_transforms',
                  action='store_true',
                  help="Dump transforms `*.transforms.pt` to disk."
                  " -save_data should be set as saving prefix.")
Example #3
0
 def _validate_transforms_opts(cls, opt):
     """Check options used by transforms."""
     for name, transform_cls in AVAILABLE_TRANSFORMS.items():
         if name in opt._all_transform:
             transform_cls._validate_options(opt)
Example #4
0
def translate_opts(parser, dynamic=False):
    """ Translation / inference options """
    group = parser.add_argument_group('Model')
    group.add('--model',
              '-model',
              dest='models',
              metavar='MODEL',
              nargs='+',
              type=str,
              default=[],
              required=True,
              help="Path to model .pt file(s). "
              "Multiple models can be specified, "
              "for ensemble decoding.")
    group.add('--fp32',
              '-fp32',
              action='store_true',
              help="Force the model to be in FP32 "
              "because FP16 is very slow on GTX1080(ti).")
    group.add('--int8',
              '-int8',
              action='store_true',
              help="Enable dynamic 8-bit quantization (CPU only).")
    group.add('--avg_raw_probs',
              '-avg_raw_probs',
              action='store_true',
              help="If this is set, during ensembling scores from "
              "different models will be combined by averaging their "
              "raw probabilities and then taking the log. Otherwise, "
              "the log probabilities will be averaged directly. "
              "Necessary for models whose output layers can assign "
              "zero probability.")

    group = parser.add_argument_group('Data')
    group.add('--data_type',
              '-data_type',
              default="text",
              help="Type of the source input. Options: [text].")

    group.add('--src',
              '-src',
              required=True,
              help="Source sequence to decode (one line per "
              "sequence)")
    group.add(
        "-src_feats",
        "--src_feats",
        required=False,
        help="Source sequence features (dict format). "
        "Ex: {'feat_0': '../data.txt.feats0', 'feat_1': '../data.txt.feats1'}"
    )  # noqa: E501
    group.add('--tgt', '-tgt', help='True target sequence (optional)')
    group.add('--tgt_prefix',
              '-tgt_prefix',
              action='store_true',
              help='Generate predictions using provided `-tgt` as prefix.')
    group.add('--shard_size',
              '-shard_size',
              type=int,
              default=10000,
              help="Divide src and tgt (if applicable) into "
              "smaller multiple src and tgt files, then "
              "build shards, each shard will have "
              "opt.shard_size samples except last shard. "
              "shard_size=0 means no segmentation "
              "shard_size>0 means segment dataset into multiple shards, "
              "each shard has shard_size samples")
    group.add('--output',
              '-output',
              default='pred.txt',
              help="Path to output the predictions (each line will "
              "be the decoded sequence")
    group.add('--report_align',
              '-report_align',
              action='store_true',
              help="Report alignment for each translation.")
    group.add('--report_time',
              '-report_time',
              action='store_true',
              help="Report some translation time metrics")

    # Adding options relate to decoding strategy
    _add_decoding_opts(parser)

    # Adding option for logging
    _add_logging_opts(parser, is_train=False)

    group = parser.add_argument_group('Efficiency')
    group.add('--batch_size',
              '-batch_size',
              type=int,
              default=30,
              help='Batch size')
    group.add('--batch_type',
              '-batch_type',
              default='sents',
              choices=["sents", "tokens"],
              help="Batch grouping for batch_size. Standard "
              "is sents. Tokens will do dynamic batching")
    group.add('--gpu', '-gpu', type=int, default=-1, help="Device to run on")

    if dynamic:
        group.add("-transforms",
                  "--transforms",
                  default=[],
                  nargs="+",
                  choices=AVAILABLE_TRANSFORMS.keys(),
                  help="Default transform pipeline to apply to data.")

        # Adding options related to Transforms
        _add_dynamic_transform_opts(parser)