def get_parser(desc, default_task="translation"): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument("--user-dir", default=None) usr_args, _ = usr_parser.parse_known_args() import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) gen_parser_from_dataclass(parser, CommonParams()) from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( "--" + registry_name.replace("_", "-"), default=REGISTRY["default"], choices=REGISTRY["registry"].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument( "--task", metavar="TASK", default=default_task, choices=TASK_REGISTRY.keys(), help="task", ) # fmt: on return parser
def parse_args_and_arch(parser, input_args=None, parse_known=False): from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY # The parser doesn't know about model/criterion/optimizer-specific args, so # we parse twice. First we parse the model/criterion/optimizer, then we # parse a second time after adding the *-specific arguments. # If input_args is given, we will parse those args instead of sys.argv. args, _ = parser.parse_known_args(input_args) # Add model-specific args to parser. if hasattr(args, 'arch'): model_specific_group = parser.add_argument_group( 'Model-specific configuration', # Only include attributes which are explicitly given as command-line # arguments or which have default values. argument_default=argparse.SUPPRESS, ) ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group) # Add *-specific args to parser. from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): choice = getattr(args, registry_name, None) if choice is not None: cls = REGISTRY['registry'][choice] if hasattr(cls, 'add_args'): cls.add_args(parser) if hasattr(args, 'task'): from fairseq.tasks import TASK_REGISTRY TASK_REGISTRY[args.task].add_args(parser) if getattr(args, 'use_bmuf', False): # hack to support extra args for block distributed data parallelism from fairseq.optim.bmuf import FairseqBMUF FairseqBMUF.add_args(parser) # Parse a second time. if parse_known: args, extra = parser.parse_known_args(input_args) else: args = parser.parse_args(input_args) extra = None # Post-process args. if hasattr(args, 'max_sentences_valid') and args.max_sentences_valid is None: args.max_sentences_valid = args.max_sentences if hasattr(args, 'max_tokens_valid') and args.max_tokens_valid is None: args.max_tokens_valid = args.max_tokens if getattr(args, 'memory_efficient_fp16', False): args.fp16 = True # Apply architecture configuration. if hasattr(args, 'arch'): ARCH_CONFIG_REGISTRY[args.arch](args) if parse_known: return args, extra else: return args
def get_parser(desc, default_task="translation"): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument("--user-dir", default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = configargparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--configs", required=False, is_config_file=True) parser.add_argument( "--pdb", action="store_true", default=False, help="debug with pdb" ) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument("--tbmf-wrapper", action="store_true", help="[FB only] ") parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2 ** 7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale') parser.add_argument('--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument('--user-dir', default=None, help='path to a python module containing custom extensions (tasks and/or architectures)') from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def override_module_args(args: Namespace) -> Tuple[List[str], List[str]]: """use the field in args to overrides those in cfg""" overrides = [] deletes = [] for k in FairseqConfig.__dataclass_fields__.keys(): overrides.extend( _override_attr(k, FairseqConfig.__dataclass_fields__[k].type, args) ) if args is not None: if hasattr(args, "task"): from fairseq.tasks import TASK_DATACLASS_REGISTRY migrate_registry( "task", args.task, TASK_DATACLASS_REGISTRY, args, overrides, deletes ) else: deletes.append("task") # these options will be set to "None" if they have not yet been migrated # so we can populate them with the entire flat args CORE_REGISTRIES = {"criterion", "optimizer", "lr_scheduler"} from fairseq.registry import REGISTRIES for k, v in REGISTRIES.items(): if hasattr(args, k): migrate_registry( k, getattr(args, k), v["dataclass_registry"], args, overrides, deletes, use_name_as_val=k not in CORE_REGISTRIES, ) else: deletes.append(k) no_dc = True if hasattr(args, "arch"): from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_MODEL_NAME_REGISTRY if args.arch in ARCH_MODEL_REGISTRY: m_cls = ARCH_MODEL_REGISTRY[args.arch] dc = getattr(m_cls, "__dataclass", None) if dc is not None: m_name = ARCH_MODEL_NAME_REGISTRY[args.arch] overrides.append("model={}".format(m_name)) overrides.append("model._name={}".format(args.arch)) # override model params with those exist in args overrides.extend(_override_attr("model", dc, args)) no_dc = False if no_dc: deletes.append("model") return overrides, deletes
def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--hostname", type=str, default=DEFAULT_HOSTNAME, help="server hostname") parser.add_argument("--port", type=int, default=DEFAULT_PORT, help="server port number") parser.add_argument("--agent-type", default="simul_trans_text", help="Agent type") parser.add_argument("--scorer-type", default="text", help="Scorer type") parser.add_argument( "--start-idx", type=int, default=0, help="Start index of the sentence to evaluate", ) parser.add_argument( "--end-idx", type=int, default=float("inf"), help="End index of the sentence to evaluate", ) parser.add_argument("--scores", action="store_true", help="Request scores from server") parser.add_argument("--reset-server", action="store_true", help="Reset the server") parser.add_argument("--num-threads", type=int, default=10, help="Number of threads used by agent") parser.add_argument("--local", action="store_true", default=False, help="Local evaluation") args, _ = parser.parse_known_args() for registry_name, REGISTRY in REGISTRIES.items(): choice = getattr(args, registry_name, None) if choice is not None: cls = REGISTRY["registry"][choice] if hasattr(cls, "add_args"): cls.add_args(parser) args = parser.parse_args() return args
def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--hostname', type=str, default=DEFAULT_HOSTNAME, help='server hostname') parser.add_argument('--port', type=int, default=DEFAULT_PORT, help='server port number') parser.add_argument('--agent-type', default='simul_trans_text', help='Agent type') parser.add_argument('--scorer-type', default='text', help='Scorer type') parser.add_argument('--start-idx', type=int, default=0, help='Start index of the sentence to evaluate') parser.add_argument('--end-idx', type=int, default=float('inf'), help='End index of the sentence to evaluate') parser.add_argument('--scores', action="store_true", help='Request scores from server') parser.add_argument('--reset-server', action="store_true", help='Reset the server') parser.add_argument('--num-threads', type=int, default=10, help='Number of threads used by agent') parser.add_argument('--local', action="store_true", default=False, help='Local evaluation') args, _ = parser.parse_known_args() for registry_name, REGISTRY in REGISTRIES.items(): choice = getattr(args, registry_name, None) if choice is not None: cls = REGISTRY["registry"][choice] if hasattr(cls, "add_args"): cls.add_args(parser) args = parser.parse_args() return args
def register_hydra_cfg(cs: ConfigStore, name: str = "default") -> None: """cs: config store instance, register common training configs""" for k, v in CONFIGS.items(): try: cs.store(name=k, node=v()) except BaseException: logger.error(f"{k} - {v()}") raise register_module_dataclass(cs, TASK_DATACLASS_REGISTRY, "task") register_module_dataclass(cs, MODEL_DATACLASS_REGISTRY, "model") for k, v in REGISTRIES.items(): register_module_dataclass(cs, v["dataclass_registry"], k)
def hydra_init() -> None: cs = ConfigStore.instance() for k in FairseqConfig.__dataclass_fields__: v = FairseqConfig.__dataclass_fields__[k].default try: cs.store(name=k, node=v) except BaseException: logger.error(f"{k} - {v}") raise register_module_dataclass(cs, TASK_DATACLASS_REGISTRY, "task") register_module_dataclass(cs, MODEL_DATACLASS_REGISTRY, "model") for k, v in REGISTRIES.items(): register_module_dataclass(cs, v["dataclass_registry"], k)
def parse_args_and_arch(parser, input_args=None, parse_known=False, suppress_defaults=False): if suppress_defaults: # Parse args without any default values. This requires us to parse # twice, once to identify all the necessary task/model args, and a second # time with all defaults set to None. args = parse_args_and_arch( parser, input_args=input_args, parse_known=parse_known, suppress_defaults=False, ) suppressed_parser = argparse.ArgumentParser(add_help=False, parents=[parser]) suppressed_parser.set_defaults( **{k: None for k, v in vars(args).items()}) args = suppressed_parser.parse_args(input_args) return argparse.Namespace( **{k: v for k, v in vars(args).items() if v is not None}) from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY # The parser doesn't know about model/criterion/optimizer-specific args, so # we parse twice. First we parse the model/criterion/optimizer, then we # parse a second time after adding the *-specific arguments. # If input_args is given, we will parse those args instead of sys.argv. args, _ = parser.parse_known_args(input_args) # Add model-specific args to parser. if hasattr(args, "arch"): model_specific_group = parser.add_argument_group( "Model-specific configuration", # Only include attributes which are explicitly given as command-line # arguments or which have default values. argument_default=argparse.SUPPRESS, ) ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group) # Add *-specific args to parser. from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): choice = getattr(args, registry_name, None) if choice is not None: cls = REGISTRY["registry"][choice] if hasattr(cls, "add_args"): cls.add_args(parser) if hasattr(args, "task"): from fairseq.tasks import TASK_REGISTRY TASK_REGISTRY[args.task].add_args(parser) if getattr(args, "use_bmuf", False): # hack to support extra args for block distributed data parallelism from fairseq.optim.bmuf import FairseqBMUF FairseqBMUF.add_args(parser) # Parse a second time. if parse_known: args, extra = parser.parse_known_args(input_args) else: args = parser.parse_args(input_args) extra = None # Post-process args. if hasattr(args, "max_sentences_valid") and args.max_sentences_valid is None: args.max_sentences_valid = args.max_sentences if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None: args.max_tokens_valid = args.max_tokens if getattr(args, "memory_efficient_fp16", False): args.fp16 = True # Apply architecture configuration. if hasattr(args, "arch"): ARCH_CONFIG_REGISTRY[args.arch](args) if parse_known: return args, extra else: return args
def parse_args_and_arch( parser: argparse.ArgumentParser, input_args: List[str] = None, parse_known: bool = False, suppress_defaults: bool = False, modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None, ): """ Args: parser (ArgumentParser): the parser input_args (List[str]): strings to parse, defaults to sys.argv parse_known (bool): only parse known arguments, similar to `ArgumentParser.parse_known_args` suppress_defaults (bool): parse while ignoring all default values modify_parser (Optional[Callable[[ArgumentParser], None]]): function to modify the parser, e.g., to set default values """ if suppress_defaults: # Parse args without any default values. This requires us to parse # twice, once to identify all the necessary task/model args, and a second # time with all defaults set to None. args = parse_args_and_arch( parser, input_args=input_args, parse_known=parse_known, suppress_defaults=False, ) suppressed_parser = argparse.ArgumentParser(add_help=False, parents=[parser]) suppressed_parser.set_defaults(**{k: None for k, v in vars(args).items()}) args = suppressed_parser.parse_args(input_args) return argparse.Namespace( **{k: v for k, v in vars(args).items() if v is not None} ) from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY, MODEL_REGISTRY # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument("--user-dir", default=None) usr_args, _ = usr_parser.parse_known_args(input_args) import_user_module(usr_args) if modify_parser is not None: modify_parser(parser) # The parser doesn't know about model/criterion/optimizer-specific args, so # we parse twice. First we parse the model/criterion/optimizer, then we # parse a second time after adding the *-specific arguments. # If input_args is given, we will parse those args instead of sys.argv. args, _ = parser.parse_known_args(input_args) # Add model-specific args to parser. if hasattr(args, "arch"): model_specific_group = parser.add_argument_group( "Model-specific configuration", # Only include attributes which are explicitly given as command-line # arguments or which have default values. argument_default=argparse.SUPPRESS, ) if args.arch in ARCH_MODEL_REGISTRY: ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group) elif args.arch in MODEL_REGISTRY: MODEL_REGISTRY[args.arch].add_args(model_specific_group) else: raise RuntimeError() # Add *-specific args to parser. from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): choice = getattr(args, registry_name, None) if choice is not None: cls = REGISTRY["registry"][choice] if hasattr(cls, "add_args"): cls.add_args(parser) if hasattr(args, "task"): from fairseq.tasks import TASK_REGISTRY TASK_REGISTRY[args.task].add_args(parser) if getattr(args, "use_bmuf", False): # hack to support extra args for block distributed data parallelism from fairseq.optim.bmuf import FairseqBMUF FairseqBMUF.add_args(parser) # Modify the parser a second time, since defaults may have been reset if modify_parser is not None: modify_parser(parser) # Parse a second time. if parse_known: args, extra = parser.parse_known_args(input_args) else: args = parser.parse_args(input_args) extra = None # Post-process args. if ( hasattr(args, "batch_size_valid") and args.batch_size_valid is None ) or not hasattr(args, "batch_size_valid"): args.batch_size_valid = args.batch_size if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None: args.max_tokens_valid = args.max_tokens if getattr(args, "memory_efficient_fp16", False): args.fp16 = True if getattr(args, "memory_efficient_bf16", False): args.bf16 = True args.tpu = getattr(args, "tpu", False) args.bf16 = getattr(args, "bf16", False) if args.bf16: args.tpu = True if args.tpu and args.fp16: raise ValueError("Cannot combine --fp16 and --tpu, use --bf16 on TPUs") if getattr(args, "seed", None) is None: args.seed = 1 # default seed for training args.no_seed_provided = True else: args.no_seed_provided = False # Apply architecture configuration. if hasattr(args, "arch") and args.arch in ARCH_CONFIG_REGISTRY: ARCH_CONFIG_REGISTRY[args.arch](args) if parse_known: return args, extra else: return args
def get_parser(desc, default_task="translation"): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument("--user-dir", default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument('--seed', default=None, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--tpu', action='store_true', help='use TPU instead of CUDA') parser.add_argument('--bf16', action='store_true', help='use bfloat16; implies --tpu') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-bf16', action='store_true', help='use a memory-efficient version of BF16 training; implies --bf16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-no-flatten-grads', action='store_true', help='don\'t flatten FP16 grads tensor') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) parser.add_argument( '--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument( '--user-dir', default=None, help= 'path to a python module containing custom extensions (tasks and/or architectures)' ) parser.add_argument( '--empty-cache-freq', default=0, type=int, help='how often to clear the PyTorch CUDA cache (0 to disable)') parser.add_argument( '--all-gather-list-size', default=16384, type=int, help='number of bytes reserved for gathering stats from workers') parser.add_argument('--model-parallel-size', type=int, metavar='N', default=1, help='total number of GPUs to parallelize model over') parser.add_argument('--checkpoint-suffix', default='', help='suffix to add to the checkpoint file name') parser.add_argument('--quantization-config-path', default=None, help='path to quantization config file') parser.add_argument('--profile', action='store_true', help='enable autograd profiler emit_nvtx') from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument( '--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument( '--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument("--tbmf-wrapper", action="store_true", help="[FB only] ") parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument( '--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2**7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument( '--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale' ) parser.add_argument( '--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument( '--user-dir', default=None, help= 'path to a python module containing custom extensions (tasks and/or architectures)' ) parser.add_argument('--sde', action='store_true', help='whether to use sde') parser.add_argument('--update-language-sampling', type=int, default=-1, help='update language sampling every N step') parser.add_argument('--extra-update-language-sampling', type=int, default=-1, help='update language sampling every N step') parser.add_argument('--scale-norm', action='store_true', help='whether to use scaled norm') parser.add_argument('--fix-norm', type=float, default=None, help='whether to use fixed norm at output embedding') parser.add_argument( '--data-actor', type=str, default=None, help='type of data actor [base|ave_emb|only_grad|interpolate_grad]') parser.add_argument('--data-actor-embed-dropout', type=float, default=0., help='') parser.add_argument('--data-actor-proj-dropout', type=float, default=0., help='') parser.add_argument('--data-actor-proj-linear-bias', type=float, default=None, help='the bias term to data actor linear projection') parser.add_argument( '--data-actor-proj-post-bias', type=float, default=0, help='the bias term to add after data actor project activation') parser.add_argument( '--data-actor-sigmoid-scale', type=float, default=1., help='the bias term to add after data actor project activation') parser.add_argument('--extra-data-actor', type=str, default=None, help='type of data actor [ave_emb]') parser.add_argument('--combine-probs', type=str, default=None, help=' [weight_by_size]') parser.add_argument('--data-actor-lr', type=eval_str_list, default=0.01, help='lr for optimizing data actor') parser.add_argument('--data-actor-optim-step', type=int, default=1, help='number of steps to optimize data actor') parser.add_argument('--data-actor-lr-scheduler', type=str, default=None) parser.add_argument('--data-actor-embed-dim', type=int, default=32, help='dimension of word embedding for data actor') parser.add_argument('--lan-embed-dim', type=int, default=None, help='dimension of word embedding for data actor') parser.add_argument('--data-actor-model-embed', type=int, default=0, help='[0|1] whether to use model embedding') parser.add_argument('--data-actor-embed-grad', type=int, default=1, help='[0|1] whether to optimize model embedding') parser.add_argument('--data-loss-lambda', type=float, default=0, help='the percentage of using actual data loss') parser.add_argument('--data-loss-lambda-warmup-steps', type=int, default=-1) parser.add_argument('--data-loss-lambda-init', type=float, default=-1) parser.add_argument('--data-loss-lambda-final', type=float, default=-1) parser.add_argument('--out-score-type', type=str, default='sigmoid', help='[sigmoid|exp]') parser.add_argument( '--data-actor-share-model', action='store_true', help= 'whether to allow data actor and main model to share the same parameter' ) parser.add_argument('--tanh-constant', type=float, default=10, help='the constant multiplier for tanh output') parser.add_argument('--exp-constant', type=float, default=0.1, help='the constant multiplier for tanh output') parser.add_argument('--eval-bleu', action='store_true', help='whether to valid on bleu score') parser.add_argument('--only-load-data-actor', action='store_true', help='whether to valid on bleu score') parser.add_argument('--data-actor-proj-grad-only', action='store_true') parser.add_argument('--load-model-as-data-actor', action='store_true', help='use the model as data actor') parser.add_argument('--grad-sim', type=str, default='cosine', help='[cosine|dot_prod]') parser.add_argument('--dev-grad-eta', type=float, default=0.0001) parser.add_argument('--proj-grad-sim', type=str, default='cosine', help='[cosine|dot_prod]') parser.add_argument('--loss-steps', type=int, default=1, help='number of steps to calculate loss for grad sim') parser.add_argument('--scale-reward', action='store_true', help='whether to scale reward by current p') parser.add_argument('--baseline', action='store_true', help='whether to scale reward by current p') parser.add_argument('--relu-reward', action='store_true', help='whether to relu the reward') parser.add_argument('--discount-reward', type=float, default=-1, help='discount factor for reward') parser.add_argument('--reward-scale', type=float, default=0.0001, help='scale factor of the reward') parser.add_argument('--language-weight', type=str, default=None, help='dev language weights separated by comma') parser.add_argument('--data-actor-step-update', action='store_true', help='whether to update at training step') parser.add_argument( '--exact-update', action='store_true', help='whether to do exact update in the approximate setting') parser.add_argument('--loss-weight', type=str, default=None, help='[low|]') parser.add_argument('--discount-grad', action='store_true', help='whether to use the default discount grad') parser.add_argument('--a0', type=float, default=0.05, help='[low|]') parser.add_argument('--a1', type=float, default=0.95, help='[low|]') parser.add_argument('--switch-obj-epoch', type=int, default=1, help='the epoch to update val loss to trainer') parser.add_argument('--embedding-file', type=str, default=None, help='the file path to init data actor embedding') parser.add_argument('--data-actor-feature-postprocess', type=str, default='last', help='[tanh|average]') # TCS options parser.add_argument( '--lan-dists', default=None, type=str, help='comman separated numbers that indicate language distance') parser.add_argument( '--data-condition', default="target", type=str, help='[source|target] whether to condition on source or target') parser.add_argument( '--sample-instance', action='store_true', help= 'whether to sample for each instance in a batch for mulitlingual_data') parser.add_argument( '--sample-tag-prob', default=-1, type=float, help='probability of using tags other than the language') parser.add_argument('--data-actor-multilin', action='store_true', help='whether to multiling version of the actor') parser.add_argument('--utility-type', type=str, default='ave', help='type of utility function [ave|min-half|median]') parser.add_argument('--eval-lang-pairs', type=str, default=None, help='dev data keys for multilin actor') parser.add_argument('--no-dev', action='store_true', help='not use dev set gradient') parser.add_argument('--pretrain-data-actor', action='store_true', help='pretrain the data actor') parser.add_argument('--pretrain-type', type=str, default='lan_dist', help='[lan_dist|datasize]') parser.add_argument('--feature-type', type=str, default='ones', help='[ones|valid_loss|train_loss]') parser.add_argument('--layerwise-dds', action='store_true', help='use layerwise DDS') parser.add_argument('--tensorwise-dds', action='store_true') parser.add_argument('--dds-no-neg-reward', action='store_true', help='set the negative reward for DDS to 0') parser.add_argument('--proj-grad', action='store_true') parser.add_argument('--train-on-proj', action='store_true') parser.add_argument('--train-proj-grad', action='store_true', help="use the training grad to project") parser.add_argument('--train-proj-grad-sum', action='store_true', help="use the training grad to project") parser.add_argument('--save-proj-train', action='store_true', help="whether to use saved moving avg grad to project") parser.add_argument( '--remove-sample-id', action='store_true', help="do not project on current language being trained") parser.add_argument('--proj-lan-id', type=str, default=None) parser.add_argument('--paramwise-proj-grad', action='store_true') parser.add_argument('--sample-proj-count', type=int, default=1, help='number of tasks to sample for projection') parser.add_argument( '--optim-weight-softmax-tau', type=float, default=-1, help='a float between (0, 1], smaller value makes weight more peaky') parser.add_argument('--optim-weight-above-one', action='store_true') parser.add_argument('--datasize-t', type=int, default=None, help='temperature for controlling datasize sampling') parser.add_argument('--alpha-p', type=float, default=0, help='[0-1] amount of interpolation for p') parser.add_argument( '--num-dev-samples', type=int, default=8, help= "number of samples to select for dev batch for gradient; max token is set to 1200" ) parser.add_argument('--reward-level', type=str, default="sent") parser.add_argument('--reward-constant', type=float, default=0.01) parser.add_argument('--only-optim-model-key', type=str, default=None) parser.add_argument('--upsample-factor', type=int, default=0) parser.add_argument('--data-score-label-smooth', type=str, default="none", help="[no_smooth|weigted_smooth]") from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on return parser
def get_parser(desc, default_task='translation'): # Before creating the true parser, we need to import optional user module # in order to eagerly import custom tasks, optimizers, architectures, etc. usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) usr_parser.add_argument('--user-dir', default=None) usr_args, _ = usr_parser.parse_known_args() utils.import_user_module(usr_args) parser = argparse.ArgumentParser(allow_abbrev=False) # fmt: off parser.add_argument('--no-progress-bar', action='store_true', help='disable progress bar') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='log progress every N batches (when progress bar is disabled)') parser.add_argument('--log-format', default=None, help='log format to use', choices=['json', 'none', 'simple', 'tqdm']) parser.add_argument('--tensorboard-logdir', metavar='DIR', default='', help='path to save logs for tensorboard, should match --logdir ' 'of running tensorboard (default: no tensorboard logging)') parser.add_argument('--seed', default=1, type=int, metavar='N', help='pseudo random number generator seed') parser.add_argument('--cpu', action='store_true', help='use CPU instead of CUDA') parser.add_argument('--fp16', action='store_true', help='use FP16') parser.add_argument('--memory-efficient-fp16', action='store_true', help='use a memory-efficient version of FP16 training; implies --fp16') parser.add_argument('--fp16-init-scale', default=2 ** 7, type=int, help='default FP16 loss scale') parser.add_argument('--fp16-scale-window', type=int, help='number of updates before increasing loss scale') parser.add_argument('--fp16-scale-tolerance', default=0.0, type=float, help='pct of updates that can overflow before decreasing the loss scale') parser.add_argument('--min-loss-scale', default=1e-4, type=float, metavar='D', help='minimum FP16 loss scale, after which training is stopped') parser.add_argument('--threshold-loss-scale', type=float, help='threshold FP16 loss scale from below') parser.add_argument('--user-dir', default=None, help='path to a python module containing custom extensions (tasks and/or architectures)') parser.add_argument('--empty-cache-freq', default=0, type=int, help='how often to clear the PyTorch CUDA cache (0 to disable)') parser.add_argument('--all-gather-list-size', default=16384, type=int, help='number of bytes reserved for gathering stats from workers') # By xxx: float_valid parser.add_argument('--float-valid', default=False, action='store_true', help='if true, use float type for valid step (for DynamicConv)') from fairseq.registry import REGISTRIES for registry_name, REGISTRY in REGISTRIES.items(): parser.add_argument( '--' + registry_name.replace('_', '-'), default=REGISTRY['default'], choices=REGISTRY['registry'].keys(), ) # Task definitions can be found under fairseq/tasks/ from fairseq.tasks import TASK_REGISTRY parser.add_argument('--task', metavar='TASK', default=default_task, choices=TASK_REGISTRY.keys(), help='task') # fmt: on parser.add_argument('--num-ref', default=None, nargs='+', action=StoreDictKeyPair, metavar='NUMREFSPLIT', help='dict for number of references for valid and test') return parser