Ejemplo n.º 1
0
def dummy_frozen_config():
    """Creates a dummy model config that can be used by all tests."""
    config = default_config.get_config()
    config.model_arch = default_config.ModelArchitecture.FF_ONLY
    config.d_emb = 4
    config.d_model = 4
    config.d_ff = 4
    config.max_seq_length = 8
    config.num_layers = 1
    config.vocab_size = 1000
    config.train_batch_size = 2
    return ml_collections.FrozenConfigDict(config)
Ejemplo n.º 2
0
def get_config():
    """Config for pre-training."""
    config = base_config.get_config()

    # Determines which model to use.
    config.model_arch: ModelArchitecture = ModelArchitecture.F_NET

    config.mode: TrainingMode = TrainingMode.PRETRAINING

    # Total batch size for training.
    config.train_batch_size: int = 256
    # Total batch size for eval.
    config.eval_batch_size: int = 256

    # The base learning rate for Adam.
    config.learning_rate: float = 1e-4
    # If set, determines how much to clip the gradient during training.
    config.clipped_grad_norm: Optional[float] = None

    # Number of training steps.
    config.num_train_steps: int = int(1e6)
    # Number of warm-up steps. We generally find that that larger models need more
    # warm-up steps.
    config.num_warmup_steps: int = int(1e4)

    # How often to save the model checkpoint.
    config.save_checkpoints_steps: int = 2000
    # Frequency fo eval during training, e.g. every 2000 steps.
    config.eval_frequency: int = 2000

    # Maximum number of eval steps.
    config.max_num_eval_steps: int = 1000

    # Do not start from a pre-trained checkpoint.
    config.init_checkpoint_dir: str = ''

    # Maximum number of masked LM predictions per sequence.
    config.max_predictions_per_seq: int = 80
    # Proportion of tokens for masked LM predictions. Total number of selected
    # tokens will be at most config.max_predictions_per_seq.
    config.masking_rate: float = 0.15
    # Proportion of masked tokens to replace with ['MASK'].
    config.mask_token_proportion: float = 0.8
    # Proportion of masked tokens to replace with a random token.
    config.random_token_proportion: float = 0.1
    # Remaining 1 - config.mask_token_proportion - config.random_token_proportion
    # fraction of selected tokens are left as is.

    # Dummy attribute for repeated runs.
    config.trial: int = 0

    return config
Ejemplo n.º 3
0
def dummy_config(model_arch):
    """Creates a dummy model config that can be used by all tests."""
    config = base_config.get_config()
    config.model_arch = model_arch
    config.d_emb = 8
    config.d_model = 8
    config.d_ff = 8
    config.max_seq_length = 16
    config.num_heads = 1
    config.num_layers = 2
    config.vocab_size = 280
    config.train_batch_size = 3
    config.eval_batch_size = 2
    config.use_fft = True

    return config
Ejemplo n.º 4
0
def get_config():
    """Config for fine-tuning (classification)."""
    config = base_config.get_config()

    # Determines which model to use.
    config.model_arch: ModelArchitecture = ModelArchitecture.F_NET

    config.mode: TrainingMode = TrainingMode.CLASSIFICATION

    # This is either "glue/DS_g", where DS_g is one of the following:
    # [cola, sst2, mrpc, qqp, stsb, mnli, qnli, rte, wnli].
    config.dataset_name: str = "glue/rte"

    # How often to save the model checkpoint.
    config.save_checkpoints_steps: int = 200
    # Training metrics will be computed (1 / eval_proportion) times during
    # training at regularly spaced intervals, regardless of dataset size.
    config.eval_proportion: float = 0.05

    # Total batch size for training.
    config.train_batch_size: int = 64
    # Total batch size for eval (and predictions).
    config.eval_batch_size: int = 32

    # The base learning rate for Adam.
    config.learning_rate: float = 1e-5

    # Total number of training epochs to perform.
    config.num_train_epochs: float = 3
    # Proportion of training to perform linear learning rate warmup for.
    # E.g., 0.1 = 10% of training steps.
    config.warmup_proportion: float = 0.1

    # Maximum number of eval steps on validation split. Actual number of step may
    # be less for small eval datasets.
    config.max_num_eval_steps: int = int(1e5)

    # Initial checkpoint directory or filepath (usually from a pre-trained model).
    config.init_checkpoint_dir: str = ""

    # Dummy attribute for repeated runs.
    config.trial: int = 0

    return config
Ejemplo n.º 5
0
def dummy_frozen_config(
    model_arch,
    max_seq_length = 16,
    use_tpu_fourier_optimizations = False,
    dataset_name = "dummy/data"):
  """Creates a dummy model config that can be used by all tests."""
  config = base_config.get_config()
  config.model_arch = model_arch
  config.use_tpu_fourier_optimizations = use_tpu_fourier_optimizations
  config.d_emb = 8
  config.d_model = 8
  config.d_ff = 8
  config.max_seq_length = max_seq_length
  config.num_heads = 1
  config.num_layers = 2
  config.vocab_size = 28000
  config.train_batch_size = 3
  config.eval_batch_size = 2
  config.dataset_name = dataset_name

  # Pre-training only.
  config.max_predictions_per_seq = 7

  return ml_collections.FrozenConfigDict(config)