Esempio n. 1
0
def get_parser(desc, default_task="translation"):
    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument("--user-dir", default=None)
    usr_args, _ = usr_parser.parse_known_args()
    import_user_module(usr_args)

    parser = argparse.ArgumentParser(allow_abbrev=False)
    gen_parser_from_dataclass(parser, CommonParams())

    from fairseq.registry import REGISTRIES

    for registry_name, REGISTRY in REGISTRIES.items():
        parser.add_argument(
            "--" + registry_name.replace("_", "-"),
            default=REGISTRY["default"],
            choices=REGISTRY["registry"].keys(),
        )

    # Task definitions can be found under fairseq/tasks/
    from fairseq.tasks import TASK_REGISTRY

    parser.add_argument(
        "--task",
        metavar="TASK",
        default=default_task,
        choices=TASK_REGISTRY.keys(),
        help="task",
    )
    # fmt: on
    return parser
Esempio n. 2
0
def parse_args_and_arch(parser, input_args=None, parse_known=False):
    from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY

    # The parser doesn't know about model/criterion/optimizer-specific args, so
    # we parse twice. First we parse the model/criterion/optimizer, then we
    # parse a second time after adding the *-specific arguments.
    # If input_args is given, we will parse those args instead of sys.argv.
    args, _ = parser.parse_known_args(input_args)

    # Add model-specific args to parser.
    if hasattr(args, 'arch'):
        model_specific_group = parser.add_argument_group(
            'Model-specific configuration',
            # Only include attributes which are explicitly given as command-line
            # arguments or which have default values.
            argument_default=argparse.SUPPRESS,
        )
        ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group)

    # Add *-specific args to parser.
    from fairseq.registry import REGISTRIES
    for registry_name, REGISTRY in REGISTRIES.items():
        choice = getattr(args, registry_name, None)
        if choice is not None:
            cls = REGISTRY['registry'][choice]
            if hasattr(cls, 'add_args'):
                cls.add_args(parser)
    if hasattr(args, 'task'):
        from fairseq.tasks import TASK_REGISTRY
        TASK_REGISTRY[args.task].add_args(parser)
    if getattr(args, 'use_bmuf', False):
        # hack to support extra args for block distributed data parallelism
        from fairseq.optim.bmuf import FairseqBMUF
        FairseqBMUF.add_args(parser)

    # Parse a second time.
    if parse_known:
        args, extra = parser.parse_known_args(input_args)
    else:
        args = parser.parse_args(input_args)
        extra = None

    # Post-process args.
    if hasattr(args,
               'max_sentences_valid') and args.max_sentences_valid is None:
        args.max_sentences_valid = args.max_sentences
    if hasattr(args, 'max_tokens_valid') and args.max_tokens_valid is None:
        args.max_tokens_valid = args.max_tokens
    if getattr(args, 'memory_efficient_fp16', False):
        args.fp16 = True

    # Apply architecture configuration.
    if hasattr(args, 'arch'):
        ARCH_CONFIG_REGISTRY[args.arch](args)

    if parse_known:
        return args, extra
    else:
        return args
Esempio n. 3
0
def get_parser(desc, default_task="translation"):
    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument("--user-dir", default=None)
    usr_args, _ = usr_parser.parse_known_args()
    utils.import_user_module(usr_args)

    parser = configargparse.ArgumentParser(allow_abbrev=False)
    parser.add_argument("--configs", required=False, is_config_file=True)
    parser.add_argument(
        "--pdb", action="store_true", default=False, help="debug with pdb"
    )
    # fmt: off
    parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar')
    parser.add_argument('--log-interval', type=int, default=1000, metavar='N',
                        help='log progress every N batches (when progress bar is disabled)')
    parser.add_argument('--log-format', default=None, help='log format to use',
                        choices=['json', 'none', 'simple', 'tqdm'])
    parser.add_argument('--tensorboard-logdir', metavar='DIR', default='',
                        help='path to save logs for tensorboard, should match --logdir '
                             'of running tensorboard (default: no tensorboard logging)')
    parser.add_argument("--tbmf-wrapper", action="store_true",
                        help="[FB only] ")
    parser.add_argument('--seed', default=1, type=int, metavar='N',
                        help='pseudo random number generator seed')
    parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA')
    parser.add_argument('--fp16', action='store_true', help='use FP16')
    parser.add_argument('--memory-efficient-fp16', action='store_true',
                        help='use a memory-efficient version of FP16 training; implies --fp16')
    parser.add_argument('--fp16-init-scale', default=2 ** 7, type=int,
                        help='default FP16 loss scale')
    parser.add_argument('--fp16-scale-window', type=int,
                        help='number of updates before increasing loss scale')
    parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float,
                        help='pct of updates that can overflow before decreasing the loss scale')
    parser.add_argument('--min-loss-scale', default=1e-4, type=float, metavar='D',
                        help='minimum FP16 loss scale, after which training is stopped')
    parser.add_argument('--threshold-loss-scale', type=float,
                        help='threshold FP16 loss scale from below')
    parser.add_argument('--user-dir', default=None,
                        help='path to a python module containing custom extensions (tasks and/or architectures)')

    from fairseq.registry import REGISTRIES
    for registry_name, REGISTRY in REGISTRIES.items():
        parser.add_argument(
            '--' + registry_name.replace('_', '-'),
            default=REGISTRY['default'],
            choices=REGISTRY['registry'].keys(),
        )

    # Task definitions can be found under fairseq/tasks/
    from fairseq.tasks import TASK_REGISTRY
    parser.add_argument('--task', metavar='TASK', default=default_task,
                        choices=TASK_REGISTRY.keys(),
                        help='task')
    # fmt: on
    return parser
Esempio n. 4
0
def override_module_args(args: Namespace) -> Tuple[List[str], List[str]]:
    """use the field in args to overrides those in cfg"""
    overrides = []
    deletes = []

    for k in FairseqConfig.__dataclass_fields__.keys():
        overrides.extend(
            _override_attr(k, FairseqConfig.__dataclass_fields__[k].type, args)
        )

    if args is not None:
        if hasattr(args, "task"):
            from fairseq.tasks import TASK_DATACLASS_REGISTRY

            migrate_registry(
                "task", args.task, TASK_DATACLASS_REGISTRY, args, overrides, deletes
            )
        else:
            deletes.append("task")

        # these options will be set to "None" if they have not yet been migrated
        # so we can populate them with the entire flat args
        CORE_REGISTRIES = {"criterion", "optimizer", "lr_scheduler"}

        from fairseq.registry import REGISTRIES

        for k, v in REGISTRIES.items():
            if hasattr(args, k):
                migrate_registry(
                    k,
                    getattr(args, k),
                    v["dataclass_registry"],
                    args,
                    overrides,
                    deletes,
                    use_name_as_val=k not in CORE_REGISTRIES,
                )
            else:
                deletes.append(k)

        no_dc = True
        if hasattr(args, "arch"):
            from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_MODEL_NAME_REGISTRY

            if args.arch in ARCH_MODEL_REGISTRY:
                m_cls = ARCH_MODEL_REGISTRY[args.arch]
                dc = getattr(m_cls, "__dataclass", None)
                if dc is not None:
                    m_name = ARCH_MODEL_NAME_REGISTRY[args.arch]
                    overrides.append("model={}".format(m_name))
                    overrides.append("model._name={}".format(args.arch))
                    # override model params with those exist in args
                    overrides.extend(_override_attr("model", dc, args))
                    no_dc = False
        if no_dc:
            deletes.append("model")

    return overrides, deletes
Esempio n. 5
0
def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument("--hostname",
                        type=str,
                        default=DEFAULT_HOSTNAME,
                        help="server hostname")
    parser.add_argument("--port",
                        type=int,
                        default=DEFAULT_PORT,
                        help="server port number")
    parser.add_argument("--agent-type",
                        default="simul_trans_text",
                        help="Agent type")
    parser.add_argument("--scorer-type", default="text", help="Scorer type")
    parser.add_argument(
        "--start-idx",
        type=int,
        default=0,
        help="Start index of the sentence to evaluate",
    )
    parser.add_argument(
        "--end-idx",
        type=int,
        default=float("inf"),
        help="End index of the sentence to evaluate",
    )
    parser.add_argument("--scores",
                        action="store_true",
                        help="Request scores from server")
    parser.add_argument("--reset-server",
                        action="store_true",
                        help="Reset the server")
    parser.add_argument("--num-threads",
                        type=int,
                        default=10,
                        help="Number of threads used by agent")
    parser.add_argument("--local",
                        action="store_true",
                        default=False,
                        help="Local evaluation")

    args, _ = parser.parse_known_args()

    for registry_name, REGISTRY in REGISTRIES.items():
        choice = getattr(args, registry_name, None)
        if choice is not None:
            cls = REGISTRY["registry"][choice]
            if hasattr(cls, "add_args"):
                cls.add_args(parser)
    args = parser.parse_args()

    return args
Esempio n. 6
0
def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument('--hostname',
                        type=str,
                        default=DEFAULT_HOSTNAME,
                        help='server hostname')
    parser.add_argument('--port',
                        type=int,
                        default=DEFAULT_PORT,
                        help='server port number')
    parser.add_argument('--agent-type',
                        default='simul_trans_text',
                        help='Agent type')
    parser.add_argument('--scorer-type', default='text', help='Scorer type')
    parser.add_argument('--start-idx',
                        type=int,
                        default=0,
                        help='Start index of the sentence to evaluate')
    parser.add_argument('--end-idx',
                        type=int,
                        default=float('inf'),
                        help='End index of the sentence to evaluate')
    parser.add_argument('--scores',
                        action="store_true",
                        help='Request scores from server')
    parser.add_argument('--reset-server',
                        action="store_true",
                        help='Reset the server')
    parser.add_argument('--num-threads',
                        type=int,
                        default=10,
                        help='Number of threads used by agent')
    parser.add_argument('--local',
                        action="store_true",
                        default=False,
                        help='Local evaluation')

    args, _ = parser.parse_known_args()

    for registry_name, REGISTRY in REGISTRIES.items():
        choice = getattr(args, registry_name, None)
        if choice is not None:
            cls = REGISTRY["registry"][choice]
            if hasattr(cls, "add_args"):
                cls.add_args(parser)
    args = parser.parse_args()

    return args
Esempio n. 7
0
def register_hydra_cfg(cs: ConfigStore, name: str = "default") -> None:
    """cs: config store instance, register common training configs"""

    for k, v in CONFIGS.items():
        try:
            cs.store(name=k, node=v())
        except BaseException:
            logger.error(f"{k} - {v()}")
            raise

    register_module_dataclass(cs, TASK_DATACLASS_REGISTRY, "task")
    register_module_dataclass(cs, MODEL_DATACLASS_REGISTRY, "model")

    for k, v in REGISTRIES.items():
        register_module_dataclass(cs, v["dataclass_registry"], k)
Esempio n. 8
0
def hydra_init() -> None:
    cs = ConfigStore.instance()

    for k in FairseqConfig.__dataclass_fields__:
        v = FairseqConfig.__dataclass_fields__[k].default
        try:
            cs.store(name=k, node=v)
        except BaseException:
            logger.error(f"{k} - {v}")
            raise

    register_module_dataclass(cs, TASK_DATACLASS_REGISTRY, "task")
    register_module_dataclass(cs, MODEL_DATACLASS_REGISTRY, "model")

    for k, v in REGISTRIES.items():
        register_module_dataclass(cs, v["dataclass_registry"], k)
Esempio n. 9
0
def parse_args_and_arch(parser,
                        input_args=None,
                        parse_known=False,
                        suppress_defaults=False):
    if suppress_defaults:
        # Parse args without any default values. This requires us to parse
        # twice, once to identify all the necessary task/model args, and a second
        # time with all defaults set to None.
        args = parse_args_and_arch(
            parser,
            input_args=input_args,
            parse_known=parse_known,
            suppress_defaults=False,
        )
        suppressed_parser = argparse.ArgumentParser(add_help=False,
                                                    parents=[parser])
        suppressed_parser.set_defaults(
            **{k: None
               for k, v in vars(args).items()})
        args = suppressed_parser.parse_args(input_args)
        return argparse.Namespace(
            **{k: v
               for k, v in vars(args).items() if v is not None})

    from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY

    # The parser doesn't know about model/criterion/optimizer-specific args, so
    # we parse twice. First we parse the model/criterion/optimizer, then we
    # parse a second time after adding the *-specific arguments.
    # If input_args is given, we will parse those args instead of sys.argv.
    args, _ = parser.parse_known_args(input_args)

    # Add model-specific args to parser.
    if hasattr(args, "arch"):
        model_specific_group = parser.add_argument_group(
            "Model-specific configuration",
            # Only include attributes which are explicitly given as command-line
            # arguments or which have default values.
            argument_default=argparse.SUPPRESS,
        )
        ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group)

    # Add *-specific args to parser.
    from fairseq.registry import REGISTRIES

    for registry_name, REGISTRY in REGISTRIES.items():
        choice = getattr(args, registry_name, None)
        if choice is not None:
            cls = REGISTRY["registry"][choice]
            if hasattr(cls, "add_args"):
                cls.add_args(parser)
    if hasattr(args, "task"):
        from fairseq.tasks import TASK_REGISTRY

        TASK_REGISTRY[args.task].add_args(parser)
    if getattr(args, "use_bmuf", False):
        # hack to support extra args for block distributed data parallelism
        from fairseq.optim.bmuf import FairseqBMUF

        FairseqBMUF.add_args(parser)

    # Parse a second time.
    if parse_known:
        args, extra = parser.parse_known_args(input_args)
    else:
        args = parser.parse_args(input_args)
        extra = None

    # Post-process args.
    if hasattr(args,
               "max_sentences_valid") and args.max_sentences_valid is None:
        args.max_sentences_valid = args.max_sentences
    if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None:
        args.max_tokens_valid = args.max_tokens
    if getattr(args, "memory_efficient_fp16", False):
        args.fp16 = True

    # Apply architecture configuration.
    if hasattr(args, "arch"):
        ARCH_CONFIG_REGISTRY[args.arch](args)

    if parse_known:
        return args, extra
    else:
        return args
Esempio n. 10
0
def parse_args_and_arch(
    parser: argparse.ArgumentParser,
    input_args: List[str] = None,
    parse_known: bool = False,
    suppress_defaults: bool = False,
    modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None,
):
    """
    Args:
        parser (ArgumentParser): the parser
        input_args (List[str]): strings to parse, defaults to sys.argv
        parse_known (bool): only parse known arguments, similar to
            `ArgumentParser.parse_known_args`
        suppress_defaults (bool): parse while ignoring all default values
        modify_parser (Optional[Callable[[ArgumentParser], None]]):
            function to modify the parser, e.g., to set default values
    """
    if suppress_defaults:
        # Parse args without any default values. This requires us to parse
        # twice, once to identify all the necessary task/model args, and a second
        # time with all defaults set to None.
        args = parse_args_and_arch(
            parser,
            input_args=input_args,
            parse_known=parse_known,
            suppress_defaults=False,
        )
        suppressed_parser = argparse.ArgumentParser(add_help=False, parents=[parser])
        suppressed_parser.set_defaults(**{k: None for k, v in vars(args).items()})
        args = suppressed_parser.parse_args(input_args)
        return argparse.Namespace(
            **{k: v for k, v in vars(args).items() if v is not None}
        )

    from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY, MODEL_REGISTRY

    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument("--user-dir", default=None)
    usr_args, _ = usr_parser.parse_known_args(input_args)
    import_user_module(usr_args)

    if modify_parser is not None:
        modify_parser(parser)

    # The parser doesn't know about model/criterion/optimizer-specific args, so
    # we parse twice. First we parse the model/criterion/optimizer, then we
    # parse a second time after adding the *-specific arguments.
    # If input_args is given, we will parse those args instead of sys.argv.
    args, _ = parser.parse_known_args(input_args)

    # Add model-specific args to parser.
    if hasattr(args, "arch"):
        model_specific_group = parser.add_argument_group(
            "Model-specific configuration",
            # Only include attributes which are explicitly given as command-line
            # arguments or which have default values.
            argument_default=argparse.SUPPRESS,
        )
        if args.arch in ARCH_MODEL_REGISTRY:
            ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group)
        elif args.arch in MODEL_REGISTRY:
            MODEL_REGISTRY[args.arch].add_args(model_specific_group)
        else:
            raise RuntimeError()

    # Add *-specific args to parser.
    from fairseq.registry import REGISTRIES

    for registry_name, REGISTRY in REGISTRIES.items():
        choice = getattr(args, registry_name, None)
        if choice is not None:
            cls = REGISTRY["registry"][choice]
            if hasattr(cls, "add_args"):
                cls.add_args(parser)
    if hasattr(args, "task"):
        from fairseq.tasks import TASK_REGISTRY

        TASK_REGISTRY[args.task].add_args(parser)
    if getattr(args, "use_bmuf", False):
        # hack to support extra args for block distributed data parallelism
        from fairseq.optim.bmuf import FairseqBMUF

        FairseqBMUF.add_args(parser)

    # Modify the parser a second time, since defaults may have been reset
    if modify_parser is not None:
        modify_parser(parser)

    # Parse a second time.
    if parse_known:
        args, extra = parser.parse_known_args(input_args)
    else:
        args = parser.parse_args(input_args)
        extra = None
    # Post-process args.
    if (
        hasattr(args, "batch_size_valid") and args.batch_size_valid is None
    ) or not hasattr(args, "batch_size_valid"):
        args.batch_size_valid = args.batch_size
    if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None:
        args.max_tokens_valid = args.max_tokens
    if getattr(args, "memory_efficient_fp16", False):
        args.fp16 = True
    if getattr(args, "memory_efficient_bf16", False):
        args.bf16 = True
    args.tpu = getattr(args, "tpu", False)
    args.bf16 = getattr(args, "bf16", False)
    if args.bf16:
        args.tpu = True
    if args.tpu and args.fp16:
        raise ValueError("Cannot combine --fp16 and --tpu, use --bf16 on TPUs")

    if getattr(args, "seed", None) is None:
        args.seed = 1  # default seed for training
        args.no_seed_provided = True
    else:
        args.no_seed_provided = False

    # Apply architecture configuration.
    if hasattr(args, "arch") and args.arch in ARCH_CONFIG_REGISTRY:
        ARCH_CONFIG_REGISTRY[args.arch](args)

    if parse_known:
        return args, extra
    else:
        return args
Esempio n. 11
0
def get_parser(desc, default_task="translation"):
    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument("--user-dir", default=None)
    usr_args, _ = usr_parser.parse_known_args()
    utils.import_user_module(usr_args)

    parser = argparse.ArgumentParser(allow_abbrev=False)
    # fmt: off
    parser.add_argument('--no-progress-bar',
                        action='store_true',
                        help='disable progress bar')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=100,
        metavar='N',
        help='log progress every N batches (when progress bar is disabled)')
    parser.add_argument('--log-format',
                        default=None,
                        help='log format to use',
                        choices=['json', 'none', 'simple', 'tqdm'])
    parser.add_argument(
        '--tensorboard-logdir',
        metavar='DIR',
        default='',
        help='path to save logs for tensorboard, should match --logdir '
        'of running tensorboard (default: no tensorboard logging)')
    parser.add_argument('--seed',
                        default=None,
                        type=int,
                        metavar='N',
                        help='pseudo random number generator seed')
    parser.add_argument('--cpu',
                        action='store_true',
                        help='use CPU instead of CUDA')
    parser.add_argument('--tpu',
                        action='store_true',
                        help='use TPU instead of CUDA')
    parser.add_argument('--bf16',
                        action='store_true',
                        help='use bfloat16; implies --tpu')
    parser.add_argument('--fp16', action='store_true', help='use FP16')
    parser.add_argument(
        '--memory-efficient-bf16',
        action='store_true',
        help='use a memory-efficient version of BF16 training; implies --bf16')
    parser.add_argument(
        '--memory-efficient-fp16',
        action='store_true',
        help='use a memory-efficient version of FP16 training; implies --fp16')
    parser.add_argument('--fp16-no-flatten-grads',
                        action='store_true',
                        help='don\'t flatten FP16 grads tensor')
    parser.add_argument('--fp16-init-scale',
                        default=2**7,
                        type=int,
                        help='default FP16 loss scale')
    parser.add_argument('--fp16-scale-window',
                        type=int,
                        help='number of updates before increasing loss scale')
    parser.add_argument(
        '--fp16-scale-tolerance',
        default=0.0,
        type=float,
        help='pct of updates that can overflow before decreasing the loss scale'
    )
    parser.add_argument(
        '--min-loss-scale',
        default=1e-4,
        type=float,
        metavar='D',
        help='minimum FP16 loss scale, after which training is stopped')
    parser.add_argument('--threshold-loss-scale',
                        type=float,
                        help='threshold FP16 loss scale from below')
    parser.add_argument(
        '--user-dir',
        default=None,
        help=
        'path to a python module containing custom extensions (tasks and/or architectures)'
    )
    parser.add_argument(
        '--empty-cache-freq',
        default=0,
        type=int,
        help='how often to clear the PyTorch CUDA cache (0 to disable)')
    parser.add_argument(
        '--all-gather-list-size',
        default=16384,
        type=int,
        help='number of bytes reserved for gathering stats from workers')
    parser.add_argument('--model-parallel-size',
                        type=int,
                        metavar='N',
                        default=1,
                        help='total number of GPUs to parallelize model over')
    parser.add_argument('--checkpoint-suffix',
                        default='',
                        help='suffix to add to the checkpoint file name')
    parser.add_argument('--quantization-config-path',
                        default=None,
                        help='path to quantization config file')
    parser.add_argument('--profile',
                        action='store_true',
                        help='enable autograd profiler emit_nvtx')

    from fairseq.registry import REGISTRIES
    for registry_name, REGISTRY in REGISTRIES.items():
        parser.add_argument(
            '--' + registry_name.replace('_', '-'),
            default=REGISTRY['default'],
            choices=REGISTRY['registry'].keys(),
        )

    # Task definitions can be found under fairseq/tasks/
    from fairseq.tasks import TASK_REGISTRY
    parser.add_argument('--task',
                        metavar='TASK',
                        default=default_task,
                        choices=TASK_REGISTRY.keys(),
                        help='task')
    # fmt: on
    return parser
Esempio n. 12
0
def get_parser(desc, default_task='translation'):
    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument('--user-dir', default=None)
    usr_args, _ = usr_parser.parse_known_args()
    utils.import_user_module(usr_args)

    parser = argparse.ArgumentParser(allow_abbrev=False)
    # fmt: off
    parser.add_argument('--no-progress-bar',
                        action='store_true',
                        help='disable progress bar')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=1000,
        metavar='N',
        help='log progress every N batches (when progress bar is disabled)')
    parser.add_argument('--log-format',
                        default=None,
                        help='log format to use',
                        choices=['json', 'none', 'simple', 'tqdm'])
    parser.add_argument(
        '--tensorboard-logdir',
        metavar='DIR',
        default='',
        help='path to save logs for tensorboard, should match --logdir '
        'of running tensorboard (default: no tensorboard logging)')
    parser.add_argument("--tbmf-wrapper",
                        action="store_true",
                        help="[FB only] ")
    parser.add_argument('--seed',
                        default=1,
                        type=int,
                        metavar='N',
                        help='pseudo random number generator seed')
    parser.add_argument('--cpu',
                        action='store_true',
                        help='use CPU instead of CUDA')
    parser.add_argument('--fp16', action='store_true', help='use FP16')
    parser.add_argument(
        '--memory-efficient-fp16',
        action='store_true',
        help='use a memory-efficient version of FP16 training; implies --fp16')
    parser.add_argument('--fp16-init-scale',
                        default=2**7,
                        type=int,
                        help='default FP16 loss scale')
    parser.add_argument('--fp16-scale-window',
                        type=int,
                        help='number of updates before increasing loss scale')
    parser.add_argument(
        '--fp16-scale-tolerance',
        default=0.0,
        type=float,
        help='pct of updates that can overflow before decreasing the loss scale'
    )
    parser.add_argument(
        '--min-loss-scale',
        default=1e-4,
        type=float,
        metavar='D',
        help='minimum FP16 loss scale, after which training is stopped')
    parser.add_argument('--threshold-loss-scale',
                        type=float,
                        help='threshold FP16 loss scale from below')
    parser.add_argument(
        '--user-dir',
        default=None,
        help=
        'path to a python module containing custom extensions (tasks and/or architectures)'
    )
    parser.add_argument('--sde',
                        action='store_true',
                        help='whether to use sde')
    parser.add_argument('--update-language-sampling',
                        type=int,
                        default=-1,
                        help='update language sampling every N step')
    parser.add_argument('--extra-update-language-sampling',
                        type=int,
                        default=-1,
                        help='update language sampling every N step')

    parser.add_argument('--scale-norm',
                        action='store_true',
                        help='whether to use scaled norm')
    parser.add_argument('--fix-norm',
                        type=float,
                        default=None,
                        help='whether to use fixed norm at output embedding')

    parser.add_argument(
        '--data-actor',
        type=str,
        default=None,
        help='type of data actor [base|ave_emb|only_grad|interpolate_grad]')
    parser.add_argument('--data-actor-embed-dropout',
                        type=float,
                        default=0.,
                        help='')
    parser.add_argument('--data-actor-proj-dropout',
                        type=float,
                        default=0.,
                        help='')
    parser.add_argument('--data-actor-proj-linear-bias',
                        type=float,
                        default=None,
                        help='the bias term to data actor linear projection')
    parser.add_argument(
        '--data-actor-proj-post-bias',
        type=float,
        default=0,
        help='the bias term to add after data actor project activation')
    parser.add_argument(
        '--data-actor-sigmoid-scale',
        type=float,
        default=1.,
        help='the bias term to add after data actor project activation')
    parser.add_argument('--extra-data-actor',
                        type=str,
                        default=None,
                        help='type of data actor [ave_emb]')
    parser.add_argument('--combine-probs',
                        type=str,
                        default=None,
                        help=' [weight_by_size]')
    parser.add_argument('--data-actor-lr',
                        type=eval_str_list,
                        default=0.01,
                        help='lr for optimizing data actor')
    parser.add_argument('--data-actor-optim-step',
                        type=int,
                        default=1,
                        help='number of steps to optimize data actor')
    parser.add_argument('--data-actor-lr-scheduler', type=str, default=None)
    parser.add_argument('--data-actor-embed-dim',
                        type=int,
                        default=32,
                        help='dimension of word embedding for data actor')
    parser.add_argument('--lan-embed-dim',
                        type=int,
                        default=None,
                        help='dimension of word embedding for data actor')
    parser.add_argument('--data-actor-model-embed',
                        type=int,
                        default=0,
                        help='[0|1] whether to use model embedding')
    parser.add_argument('--data-actor-embed-grad',
                        type=int,
                        default=1,
                        help='[0|1] whether to optimize model embedding')
    parser.add_argument('--data-loss-lambda',
                        type=float,
                        default=0,
                        help='the percentage of using actual data loss')
    parser.add_argument('--data-loss-lambda-warmup-steps',
                        type=int,
                        default=-1)
    parser.add_argument('--data-loss-lambda-init', type=float, default=-1)
    parser.add_argument('--data-loss-lambda-final', type=float, default=-1)
    parser.add_argument('--out-score-type',
                        type=str,
                        default='sigmoid',
                        help='[sigmoid|exp]')
    parser.add_argument(
        '--data-actor-share-model',
        action='store_true',
        help=
        'whether to allow data actor and main model to share the same parameter'
    )
    parser.add_argument('--tanh-constant',
                        type=float,
                        default=10,
                        help='the constant multiplier for tanh output')
    parser.add_argument('--exp-constant',
                        type=float,
                        default=0.1,
                        help='the constant multiplier for tanh output')
    parser.add_argument('--eval-bleu',
                        action='store_true',
                        help='whether to valid on bleu score')
    parser.add_argument('--only-load-data-actor',
                        action='store_true',
                        help='whether to valid on bleu score')
    parser.add_argument('--data-actor-proj-grad-only', action='store_true')
    parser.add_argument('--load-model-as-data-actor',
                        action='store_true',
                        help='use the model as data actor')
    parser.add_argument('--grad-sim',
                        type=str,
                        default='cosine',
                        help='[cosine|dot_prod]')
    parser.add_argument('--dev-grad-eta', type=float, default=0.0001)
    parser.add_argument('--proj-grad-sim',
                        type=str,
                        default='cosine',
                        help='[cosine|dot_prod]')
    parser.add_argument('--loss-steps',
                        type=int,
                        default=1,
                        help='number of steps to calculate loss for grad sim')
    parser.add_argument('--scale-reward',
                        action='store_true',
                        help='whether to scale reward by current p')
    parser.add_argument('--baseline',
                        action='store_true',
                        help='whether to scale reward by current p')
    parser.add_argument('--relu-reward',
                        action='store_true',
                        help='whether to relu the reward')
    parser.add_argument('--discount-reward',
                        type=float,
                        default=-1,
                        help='discount factor for reward')
    parser.add_argument('--reward-scale',
                        type=float,
                        default=0.0001,
                        help='scale factor of the reward')
    parser.add_argument('--language-weight',
                        type=str,
                        default=None,
                        help='dev language weights separated by comma')
    parser.add_argument('--data-actor-step-update',
                        action='store_true',
                        help='whether to update at training step')
    parser.add_argument(
        '--exact-update',
        action='store_true',
        help='whether to do exact update in the approximate setting')
    parser.add_argument('--loss-weight', type=str, default=None, help='[low|]')
    parser.add_argument('--discount-grad',
                        action='store_true',
                        help='whether to use the default discount grad')
    parser.add_argument('--a0', type=float, default=0.05, help='[low|]')
    parser.add_argument('--a1', type=float, default=0.95, help='[low|]')
    parser.add_argument('--switch-obj-epoch',
                        type=int,
                        default=1,
                        help='the epoch to update val loss to trainer')
    parser.add_argument('--embedding-file',
                        type=str,
                        default=None,
                        help='the file path to init data actor embedding')
    parser.add_argument('--data-actor-feature-postprocess',
                        type=str,
                        default='last',
                        help='[tanh|average]')

    # TCS options
    parser.add_argument(
        '--lan-dists',
        default=None,
        type=str,
        help='comman separated numbers that indicate language distance')
    parser.add_argument(
        '--data-condition',
        default="target",
        type=str,
        help='[source|target] whether to condition on source or target')

    parser.add_argument(
        '--sample-instance',
        action='store_true',
        help=
        'whether to sample for each instance in a batch for mulitlingual_data')
    parser.add_argument(
        '--sample-tag-prob',
        default=-1,
        type=float,
        help='probability of using tags other than the language')

    parser.add_argument('--data-actor-multilin',
                        action='store_true',
                        help='whether to multiling version of the actor')
    parser.add_argument('--utility-type',
                        type=str,
                        default='ave',
                        help='type of utility function [ave|min-half|median]')
    parser.add_argument('--eval-lang-pairs',
                        type=str,
                        default=None,
                        help='dev data keys for multilin actor')

    parser.add_argument('--no-dev',
                        action='store_true',
                        help='not use dev set gradient')
    parser.add_argument('--pretrain-data-actor',
                        action='store_true',
                        help='pretrain the data actor')
    parser.add_argument('--pretrain-type',
                        type=str,
                        default='lan_dist',
                        help='[lan_dist|datasize]')
    parser.add_argument('--feature-type',
                        type=str,
                        default='ones',
                        help='[ones|valid_loss|train_loss]')
    parser.add_argument('--layerwise-dds',
                        action='store_true',
                        help='use layerwise DDS')
    parser.add_argument('--tensorwise-dds', action='store_true')
    parser.add_argument('--dds-no-neg-reward',
                        action='store_true',
                        help='set the negative reward for DDS to 0')
    parser.add_argument('--proj-grad', action='store_true')
    parser.add_argument('--train-on-proj', action='store_true')
    parser.add_argument('--train-proj-grad',
                        action='store_true',
                        help="use the training grad to project")
    parser.add_argument('--train-proj-grad-sum',
                        action='store_true',
                        help="use the training grad to project")
    parser.add_argument('--save-proj-train',
                        action='store_true',
                        help="whether to use saved moving avg grad to project")
    parser.add_argument(
        '--remove-sample-id',
        action='store_true',
        help="do not project on current language being trained")
    parser.add_argument('--proj-lan-id', type=str, default=None)
    parser.add_argument('--paramwise-proj-grad', action='store_true')
    parser.add_argument('--sample-proj-count',
                        type=int,
                        default=1,
                        help='number of tasks to sample for projection')
    parser.add_argument(
        '--optim-weight-softmax-tau',
        type=float,
        default=-1,
        help='a float between (0, 1], smaller value makes weight more peaky')
    parser.add_argument('--optim-weight-above-one', action='store_true')

    parser.add_argument('--datasize-t',
                        type=int,
                        default=None,
                        help='temperature for controlling datasize sampling')
    parser.add_argument('--alpha-p',
                        type=float,
                        default=0,
                        help='[0-1] amount of interpolation for p')
    parser.add_argument(
        '--num-dev-samples',
        type=int,
        default=8,
        help=
        "number of samples to select for dev batch for gradient; max token is set to 1200"
    )

    parser.add_argument('--reward-level', type=str, default="sent")
    parser.add_argument('--reward-constant', type=float, default=0.01)

    parser.add_argument('--only-optim-model-key', type=str, default=None)
    parser.add_argument('--upsample-factor', type=int, default=0)

    parser.add_argument('--data-score-label-smooth',
                        type=str,
                        default="none",
                        help="[no_smooth|weigted_smooth]")
    from fairseq.registry import REGISTRIES
    for registry_name, REGISTRY in REGISTRIES.items():
        parser.add_argument(
            '--' + registry_name.replace('_', '-'),
            default=REGISTRY['default'],
            choices=REGISTRY['registry'].keys(),
        )

    # Task definitions can be found under fairseq/tasks/
    from fairseq.tasks import TASK_REGISTRY
    parser.add_argument('--task',
                        metavar='TASK',
                        default=default_task,
                        choices=TASK_REGISTRY.keys(),
                        help='task')
    # fmt: on
    return parser
Esempio n. 13
0
def get_parser(desc, default_task='translation'):
    # Before creating the true parser, we need to import optional user module
    # in order to eagerly import custom tasks, optimizers, architectures, etc.
    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
    usr_parser.add_argument('--user-dir', default=None)
    usr_args, _ = usr_parser.parse_known_args()
    utils.import_user_module(usr_args)

    parser = argparse.ArgumentParser(allow_abbrev=False)
    # fmt: off
    parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar')
    parser.add_argument('--log-interval', type=int, default=1000, metavar='N',
                        help='log progress every N batches (when progress bar is disabled)')
    parser.add_argument('--log-format', default=None, help='log format to use',
                        choices=['json', 'none', 'simple', 'tqdm'])
    parser.add_argument('--tensorboard-logdir', metavar='DIR', default='',
                        help='path to save logs for tensorboard, should match --logdir '
                             'of running tensorboard (default: no tensorboard logging)')
    parser.add_argument('--seed', default=1, type=int, metavar='N',
                        help='pseudo random number generator seed')
    parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA')
    parser.add_argument('--fp16', action='store_true', help='use FP16')
    parser.add_argument('--memory-efficient-fp16', action='store_true',
                        help='use a memory-efficient version of FP16 training; implies --fp16')
    parser.add_argument('--fp16-init-scale', default=2 ** 7, type=int,
                        help='default FP16 loss scale')
    parser.add_argument('--fp16-scale-window', type=int,
                        help='number of updates before increasing loss scale')
    parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float,
                        help='pct of updates that can overflow before decreasing the loss scale')
    parser.add_argument('--min-loss-scale', default=1e-4, type=float, metavar='D',
                        help='minimum FP16 loss scale, after which training is stopped')
    parser.add_argument('--threshold-loss-scale', type=float,
                        help='threshold FP16 loss scale from below')
    parser.add_argument('--user-dir', default=None,
                        help='path to a python module containing custom extensions (tasks and/or architectures)')
    parser.add_argument('--empty-cache-freq', default=0, type=int,
                        help='how often to clear the PyTorch CUDA cache (0 to disable)')
    parser.add_argument('--all-gather-list-size', default=16384, type=int,
                        help='number of bytes reserved for gathering stats from workers')
    # By xxx: float_valid
    parser.add_argument('--float-valid', default=False, action='store_true',
                        help='if true, use float type for valid step (for DynamicConv)')

    from fairseq.registry import REGISTRIES
    for registry_name, REGISTRY in REGISTRIES.items():
        parser.add_argument(
            '--' + registry_name.replace('_', '-'),
            default=REGISTRY['default'],
            choices=REGISTRY['registry'].keys(),
        )

    # Task definitions can be found under fairseq/tasks/
    from fairseq.tasks import TASK_REGISTRY
    parser.add_argument('--task', metavar='TASK', default=default_task,
                        choices=TASK_REGISTRY.keys(),
                        help='task')
    # fmt: on

    parser.add_argument('--num-ref', default=None, nargs='+', action=StoreDictKeyPair, metavar='NUMREFSPLIT',
                       help='dict for number of references for valid and test')

    return parser