class Config(BaseModel.Config): model: str = "mmbt" # classification or pretraining training_head_type: str = "pretraining" bert_model_name: str = "bert-base-uncased" direct_features_input: bool = False freeze_text: bool = False freeze_modal: bool = False freeze_complete_base: bool = False finetune_lr_multiplier: float = 1 # Dimension of the embedding finally returned by the modal encoder modal_hidden_size: int = 2048 text_hidden_size: int = 768 num_labels: int = 2 # This actually is Union[ImageEncoderConfig, ImageFeatureEncoderConfig] modal_encoder: EncoderFactory.Config = ImageEncoderFactory.Config( type=ImageEncoderTypes.resnet152, params=ResNet152ImageEncoder.Config()) text_encoder: EncoderFactory.Config = TextEncoderFactory.Config( type=TextEncoderTypes.transformer, params=TransformerEncoder.Config( bert_model_name=II("bert_model_name")), ) use_modal_start_token: bool = True use_modal_end_token: bool = True fused_feature_only: bool = False output_dim: int = 768
class Interpolation: x: int = 100 y: int = 200 # The real type of y is int, cast the interpolation string # to help static type checkers to see this truth z1: int = II("x") z2: str = SI("${x}_${y}")
class FairseqBMUFConfig(FairseqDataclass): block_lr: float = field(default=1, metadata={"help": "block learning rate for bmuf"}) block_momentum: float = field(default=0.875, metadata={"help": "block momentum for bmuf"}) global_sync_iter: int = field( default=50, metadata={"help": "Iteration for syncing global model"}) warmup_iterations: int = field( default=500, metadata={"help": "warmup iterations for model to broadcast"}) use_nbm: bool = field( default=False, metadata={ "help": "Specify whether you want to use classical BM / Nesterov BM" }, ) average_sync: bool = field( default=False, metadata={ "help": "Specify whether you want to average the local momentum after each sync" }, ) distributed_world_size: int = II( "distributed_training.distributed_world_size")
class Data2VecTextConfig(FairseqDataclass): max_positions: int = II("task.tokens_per_sample") head_layers: int = 1 transformer: TransformerConfig = TransformerConfig() load_checkpoint_heads: bool = field( default=False, metadata={ "help": "(re-)register and load heads when loading checkpoints" }, ) loss_beta: float = field( default=0, metadata={"help": "beta for smooth l1 loss. 0 means use l2 loss"}) loss_scale: Optional[float] = field( default=None, metadata={ "help": "scale the reconstruction loss by this constant. if None then scales by 1/sqrt(dim)" }, ) average_top_k_layers: int = field( default=8, metadata={"help": "how many layers to average"}) layer_norm_target_layer: bool = False instance_norm_target_layer: bool = False batch_norm_target_layer: bool = False instance_norm_targets: bool = False layer_norm_targets: bool = False ema_decay: float = field(default=0.999, metadata={"help": "initial ema decay rate"}) ema_end_decay: float = field(default=0.9999, metadata={"help": "final ema decay rate"}) # when to finish annealing ema decay rate ema_anneal_end_step: int = II("optimization.max_update") ema_transformer_layers_only: bool = field( default=True, metadata={ "help": "whether to momentum update only the transformer layers" }, )
class FairseqAdamConfig(FairseqDataclass): adam_betas: Any = field(default=(0.9, 0.999), metadata={"help": "betas for Adam optimizer"}) adam_eps: float = field(default=1e-8, metadata={"help": "epsilon for Adam optimizer"}) weight_decay: float = field(default=0.0, metadata={"help": "weight decay"}) use_old_adam: bool = field( default=False, metadata={"help": "Use fairseq.optim.adam.Adam"}) use_fused_adam: bool = field( default=False, metadata={ "help": "Use habana_frameworks.torch.hpex.optimizers.FusedAdamW" }) # TODO common vars below in parent tpu: bool = II("common.tpu") lr: List[float] = II("optimization.lr") use_habana: bool = II("common.use_habana")
class SentencePredictionConfig(FairseqDataclass): data: str = field(default=MISSING, metadata={"help": "path to data directory"}) num_classes: int = field( default=-1, metadata={"help": "number of classes or regression targets"}, ) init_token: Optional[int] = field( default=None, metadata={"help": "add token at the beginning of each batch item"}, ) separator_token: Optional[int] = field( default=None, metadata={"help": "add separator token between inputs"}, ) no_shuffle: bool = field(default=False, ) shorten_method: SHORTEN_METHOD_CHOICES = field( default="none", metadata={ "help": "if not none, shorten sequences that exceed tokens_per_sample" }, ) shorten_data_split_list: str = field( default="", metadata={ "help": "comma-separated list of dataset splits to apply shortening to, " 'e.g., "train,valid" (default: all dataset splits)' }, ) add_prev_output_tokens: bool = field( default=False, metadata={ "help": "add prev_output_tokens to sample, used for encoder-decoder arch" }, ) max_positions: int = field( default=512, metadata={"help": "max tokens per example"}, ) regression_target: bool = II("criterion.regression_target") classification_head_name: str = II("criterion.classification_head_name") seed: int = II("common.seed")
class TruncatedBPTTLMConfig(FairseqDataclass): data: str = field(default="???", metadata={"help": "path to data directory"}) tokens_per_sample: int = field( default=1024, metadata={"help": "max number of tokens per sequence"}, ) batch_size: int = II("dataset.batch_size") # Some models use *max_target_positions* to know how many positional # embeddings to learn. We use II(...) to make it default to # *tokens_per_sample*, but in principle there could be more positional # embeddings than tokens in a single batch. This may also be irrelevant for # custom model implementations. max_target_positions: int = II("task.tokens_per_sample") # these will be populated automatically if not provided data_parallel_rank: Optional[int] = None data_parallel_size: Optional[int] = None
class ReduceLROnPlateauV2Config(FairseqDataclass): lr_shrink: float = field( default=0.1, metadata={ "help": "shrink factor for annealing, lr_new = (lr * lr_shrink)" }, ) lr_threshold: float = field( default=1e-4, metadata={ "help": "threshold for measuring the new optimum, to only focus on significant changes" }, ) lr_patience: int = field( default=0, metadata={ "help": "number of epochs with no improvement after which learning rate will be reduced" }, ) warmup_updates: int = field( default=0, metadata={ "help": "warmup the learning rate linearly for the first N updates" }, ) warmup_init_lr: float = field( default=-1, metadata={ "help": "initial learning rate during warmup phase; default is cfg.lr" }, ) final_lr_scale: float = field( default=0.01, metadata={"help": "final learning rate scale; default to 0.01"}, ) start_reduce_lr_epoch: int = field( default=0, metadata={"help": "start to reduce lr from the specified epoch"}, ) # TODO common vars at parent class lr: List[float] = II("optimization.lr") maximize_best_checkpoint_metric: bool = II( "checkpoint.maximize_best_checkpoint_metric")
class NumLanguageModelingConfig(LanguageModelingConfig): numlm_data_send_log_value: bool = field( default=False, metadata={ "help": "prepare log value of the number tokens for number rotation embedding." }) fp16: bool = II("common.fp16")
class TuneConfig: metric: str = "loss" mode: str = "min" num_samples: int = 1 name: Optional[str] = II("trainer.name") checkpoint_freq: int = 100 checkpoint_at_end: bool = True additional_config: Dict[str, Any] = field(default_factory=lambda: {})
class AudioPretrainingConfig(FairseqDataclass): data: str = field(default=MISSING, metadata={"help": "path to data directory"}) labels: Optional[str] = field( default=None, metadata={"help": "extension of the label file to load, used for fine-tuning"}, ) binarized_dataset: bool = field( default=False, metadata={ "help": "if true, loads binarized dataset (useful for very large datasets). " "See examples/wav2vec/scripts/binarize_manifest.sh" }, ) sample_rate: int = field( default=16_000, metadata={ "help": "target sample rate. audio files will be up/down sampled to this rate" }, ) normalize: bool = field( default=False, metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, ) enable_padding: bool = field( default=False, metadata={"help": "pad shorter samples instead of cropping"} ) max_sample_size: Optional[int] = field( default=None, metadata={"help": "max sample size to crop to for batching"} ) min_sample_size: Optional[int] = field( default=None, metadata={"help": "min sample size to skip small examples"} ) num_batch_buckets: int = field( default=0, metadata={"help": "number of buckets"}, ) precompute_mask_indices: bool = field( default=False, metadata={ "help": "flag to compute mask indices in data preparation.", }, ) inferred_w2v_config: Optional[InferredW2vConfig] = field( default=None, metadata={ "help": "wav2vec 2.0 masking arguments used to pre-compute masks (required for TPU)", }, ) tpu: bool = II("common.tpu") text_compression_level: ChoiceEnum([x.name for x in TextCompressionLevel]) = field( default="none", metadata={ "help": "compression level for texts (e.g. audio filenames, " "target texts): none/low/high (default: none). " }, )
class SpeechToTextModTaskConfig(SpeechToTextTaskConfig): sample_ratios: str = field( default="1", metadata={"help": "sample ratios of the train subsets"} ) da_p_augm: float = field( default="1", metadata={"help": "The probability that data augmentation is applied to an example."} ) da_tempo: str = field( default="1,1", metadata={"help": "The range from which to sample the tempo factor during data augmentation"} ) da_pitch: str = field( default="0,0", metadata={"help": "The range from which to sample the pitch value during data augmentation. \ Measured in cents (i.e. 100ths of a semitone)"} ) da_echo_delay: str = field( default="0,0", metadata={"help": "The range from which to sample the echo delay value during data augmentation. \ Measured in milliseconds"} ) da_echo_decay: str = field( default="0,0", metadata={"help": "The range from which to sample the echo decay factor during data augmentation."} ) normalize: bool = field( default=True, metadata={"help": "Whether to normalize the audiowave to zero mean and unit variance."} ) interactive_tgt_lang: Optional[str] = field( default=None, metadata={"help": "Target language to be used with Fairseq's interactive mode."} ) seed: int = II("common.seed") max_tokens: int = II("dataset.max_tokens")
class SpeechRecognitionEspressoConfig(FairseqDataclass): data: Optional[str] = field( default=None, metadata={"help": "path to data directory"} ) dict: Optional[str] = field(default=None, metadata={"help": "path to the dictionary"}) non_lang_syms: Optional[str] = field( default=None, metadata={ "help": "path to a file listing non-linguistic symbols, e.g., <NOISE> " "etc. One entry per line. To be filtered out when calculating WER/CER" }, ) word_dict: Optional[str] = field( default=None, metadata={"help": "path to the word dictionary. Only relevant for decoding"}, ) wer_output_filter: Optional[str] = field( default=None, metadata={"help": "path to wer_output_filter file for WER evaluation"}, ) max_source_positions: Optional[int] = field( default=1024, metadata={"help": "max number of tokens in the source sequence"} ) max_target_positions: Optional[int] = field( default=1024, metadata={"help": "max number of tokens in the target sequence"} ) upsample_primary: int = field( default=1, metadata={"help": "amount to upsample primary dataset"}, ) num_batch_buckets: Optional[int] = field( default=0, metadata={ "help": "if >0, then bucket source and target lengths into N " "buckets and pad accordingly; this is useful on TPUs " "to minimize the number of compilations" }, ) feat_in_channels: int = field(default=1, metadata={"help": "feature input channels"}) specaugment_config: Optional[str] = field( default=None, metadata={ "help": "SpecAugment config string. If not None and not empty, " "then apply SpecAugment. Should be an evaluatable expression of " "a python dict. See speech_tools.specaug_interpolate.specaug() for " "all allowed arguments. Argments not appearing in this string " "will take on their default values" }, ) global_cmvn_stats_path: Optional[str] = field( default=None, metadata={"help": "If not None, apply global cmvn using this global cmvn stats file (.npz)."}, ) # TODO common vars below add to parent seed: int = II("common.seed") data_buffer_size: int = II("dataset.data_buffer_size") tpu: bool = II("common.tpu") train_subset: str = II("dataset.train_subset") valid_subset: str = II("dataset.valid_subset") gen_subset: str = II("dataset.gen_subset") required_seq_len_multiple: int = II("dataset.required_seq_len_multiple")
class TransducerLossCriterionConfig(FairseqDataclass): sentence_avg: bool = II("optimization.sentence_avg") print_training_sample_interval: int = field( default=500, metadata={ "help": "print a training sample (reference + prediction) every this number of updates" }, )
class PolynomialDecayScheduleConfig(FairseqDataclass): warmup_updates: int = field( default=0, metadata={"help": "warmup the learning rate linearly for the first N updates"}, ) force_anneal: Optional[int] = field( default=None, metadata={"help": "force annealing at specified epoch"}, ) end_learning_rate: float = field( default=0.0, metadata={"help": "learning rate to decay to"}, ) power: float = field( default=1.0, metadata={"help": "decay exponent"}, ) total_num_update: float = II("optimization.max_update") lr: List[float] = II("optimization.lr")
class HubertSeq2SeqConfig(HubertAsrConfig): decoder_embed_dim: int = field( default=768, metadata={"help": "decoder embedding dimension"}) decoder_ffn_embed_dim: int = field( default=3072, metadata={"help": "decoder embedding dimension for FFN"}) decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"}) decoder_layerdrop: float = field( default=0.0, metadata={"help": "decoder layerdrop chance"}) decoder_attention_heads: int = field( default=4, metadata={"help": "num decoder attention heads"}) decoder_learned_pos: bool = field( default=False, metadata={"help": "use learned positional embeddings in the decoder"}, ) decoder_normalize_before: bool = field( default=False, metadata={"help": "apply layernorm before each decoder block"}) no_token_positional_embeddings: bool = field( default=False, metadata={ "help": "if set, disables positional embeddings (outside self attention)" }, ) decoder_dropout: float = field( default=0.0, metadata={"help": "dropout probability in the decoder"}) decoder_attention_dropout: float = field( default=0.0, metadata={ "help": "dropout probability for attention weights inside the decoder" }, ) decoder_activation_dropout: float = field( default=0.0, metadata={ "help": "dropout probability after activation in FFN inside the decoder" }, ) max_target_positions: int = field( default=2048, metadata={"help": "max target positions"}) share_decoder_input_output_embed: bool = field( default=False, metadata={"help": "share decoder input and output embeddings"}) autoregressive: bool = II("task.autoregressive") seq2seq_path: str = field( default="", metadata={"help": "reset_dict"}, ) reset_dict: bool = field( default=False, metadata={"help": "reset_dict"}, )
class CtcCriterionConfig(FairseqDataclass): zero_infinity: bool = field( default=True, metadata={ "help": "zero inf loss when source length <= target length. Should be set for CTC NAT since we have no idea if this condition holds." }, ) sentence_avg: bool = II("optimization.sentence_avg") cutoff: bool = field( default=False, metadata={"help": "Apply cutoff data augmentation."}, ) cutoff_regularization_loss: float = field( default=5.0, metadata={"help": "Cutoff regularization coefficient."}, ) post_process: str = field( default="letter", metadata={ "help": "how to post process predictions into words. can be letter, " "wordpiece, BPE symbols, etc. " "See fairseq.data.data_utils.post_process() for full list of options" }, ) wer_kenlm_model: Optional[str] = field( default=None, metadata={ "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" }, ) wer_lexicon: Optional[str] = field( default=None, metadata={"help": "lexicon to use with wer_kenlm_model"}, ) wer_lm_weight: float = field( default=2.0, metadata={"help": "lm weight to use with wer_kenlm_model"}, ) wer_word_score: float = field( default=-1.0, metadata={"help": "lm word score to use with wer_kenlm_model"}, ) wer_args: Optional[str] = field( default=None, metadata={ "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" }, )
class VoxelCADatasetConfig(BaseDatasetConfig): _target_: str = "artefact_nca.dataset.voxel_dataset.VoxelDataset" entity_name: Optional[str] = None target_voxel: Optional[Any] = None target_color_dict: Optional[Dict[Any, Any]] = dataclasses.field( default_factory=lambda: None) target_unique_val_dict: Optional[Dict[Any, Any]] = dataclasses.field( default_factory=lambda: None) nbt_path: Optional[str] = None load_coord: List[int] = dataclasses.field( default_factory=lambda: [50, 10, 10]) load_entity_config: Dict[Any, Any] = dataclasses.field( default_factory=lambda: {}) spawn_at_bottom: bool = False use_random_seed_block: bool = False input_shape: Optional[List[int]] = None num_hidden_channels: Any = II("trainer.num_hidden_channels") half_precision: Any = II("trainer.half_precision") pool_size: int = 32 padding_by_power: Optional[int] = None
class LatticeFreeMMICriterionConfig(FairseqDataclass): sentence_avg: bool = II("params.optimization.sentence_avg") ddp_backend: DDP_BACKEND_CHOICES = II( "params.distributed_training.ddp_backend") denominator_fst_path: str = field( default=None, metadata={"help": "path to the denominator fst file"}) leaky_hmm_coefficient: float = field( default=1.0e-05, metadata={"help": "leaky-hmm coefficient for the denominator"}, ) xent_regularization_coefficient: float = field( default=0.0, metadata={"help": "cross-entropy regularization coefficient"}, ) output_l2_regularization_coefficient: float = field( default=0.0, metadata={ "help": "L2 regularization coefficient for the network's output" }, )
class FairseqCPUAdamConfig(FairseqDataclass): adam_betas: str = field(default="(0.9, 0.999)", metadata={"help": "betas for Adam optimizer"}) adam_eps: float = field(default=1e-8, metadata={"help": "epsilon for Adam optimizer"}) weight_decay: float = field(default=0.0, metadata={"help": "weight decay"}) fp16_adam_stats: bool = field( default=False, metadata={"help": "use FP16 stats (with automatic scaling)"}) # TODO common vars below in parent lr: List[float] = II("optimization.lr")
class BoolConfig: # with default value with_default: bool = True # default is None null_default: Optional[bool] = None # explicit no default mandatory_missing: bool = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: bool = II("with_default")
class EnumConfig: # with default value with_default: Color = Color.BLUE # default is None null_default: Optional[Color] = None # explicit no default mandatory_missing: Color = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: Color = II("with_default")
class FloatConfig: # with default value with_default: float = 0.10 # default is None null_default: Optional[float] = None # explicit no default mandatory_missing: float = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: float = II("with_default")
class StringConfig: # with default value with_default: str = "foo" # default is None null_default: Optional[str] = None # explicit no default mandatory_missing: str = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: str = II("with_default")
class IntegersConfig: # with default value with_default: int = 10 # default is None null_default: Optional[int] = None # explicit no default mandatory_missing: int = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: int = II("with_default")
class BytesConfig: # with default value with_default: bytes = b"binary" # default is None null_default: Optional[bytes] = None # explicit no default mandatory_missing: bytes = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: bytes = II("with_default")
class PathConfig: # with default value with_default: Path = Path("hello.txt") # default is None null_default: Optional[Path] = None # explicit no default mandatory_missing: Path = MISSING # interpolation, will inherit the type and value of `with_default' interpolation: Path = II("with_default")
class LSTMLanguageModelEspressoConfig(FairseqDataclass): dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) decoder_embed_dim: int = field( default=48, metadata={"help": "decoder embedding dimension"} ) decoder_embed_path: Optional[str] = field( default=None, metadata={"help": "path to pre-trained decoder embedding"} ) decoder_freeze_embed: bool = field( default=False, metadata={"help": "freeze decoder embeddings"} ) decoder_hidden_size: int = field( default=650, metadata={"help": "decoder hidden size"} ) decoder_layers: int = field( default=2, metadata={"help": "number of decoder layers"} ) decoder_out_embed_dim: int = field( default=650, metadata={"help": "decoder output embedding dimension"} ) decoder_rnn_residual: bool = field( default=False, metadata={ "help": "create residual connections for rnn decoder layers " "(starting from the 2nd layer), i.e., the actual output of such " "layer is the sum of its input and output" }, ) adaptive_softmax_cutoff: Optional[str] = field( default=None, metadata={ "help": "comma separated list of adaptive softmax cutoff points. " "Must be used with adaptive_loss criterion" }, ) share_embed: bool = field( default=False, metadata={"help": "share input and output embeddings"} ) is_wordlm: bool = field( default=False, metadata={ "help": "whether it is word LM or subword LM. Only relevant for ASR decoding " "with LM, and it determines how the underlying decoder instance gets the " "dictionary from the task instance when calling cls.build_model()" }, ) # Granular dropout settings (if not specified these default to --dropout) decoder_dropout_in: Optional[float] = field( default=II("model.dropout"), metadata={"help": "dropout probability for decoder input embedding"}, ) decoder_dropout_out: Optional[float] = field( default=II("model.dropout"), metadata={"help": "dropout probability for decoder output"}, ) # options from other parts of the config tokens_per_sample: int = II("task.tokens_per_sample") max_target_positions: Optional[int] = II("task.max_target_positions") tpu: bool = II("common.tpu") criterion_name: Optional[str] = II("criterion._name")
class InverseSquareRootScheduleConfig(FairseqDataclass): warmup_updates: int = field( default=4000, metadata={"help": "warmup the learning rate linearly for the first N updates"}, ) warmup_init_lr: float = field( default=-1, metadata={ "help": "initial learning rate during warmup phase; default is args.lr" }, ) # TODO common vars at parent class lr: List[float] = II("params.optimization.lr")
class Nested: # with default value with_default: int = 10 # default is None null_default: Optional[int] = None # explicit no default mandatory_missing: int = MISSING # Note that since relative interpolations are not yet supported, # Nested configs and interpolations does not play too well together interpolation: int = II("value_at_root")