def transformer_sketch_ranged(rhp):
  """Range of hparams for vizier."""

  hparams = transformer_sketch()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)

  rhp.set_categorical("ffn_layer",
                      ["conv_hidden_relu_with_sepconv", "conv_hidden_relu"])
  rhp.set_discrete("batch_size", [1024, 2048, 4096])
  rhp.set_discrete("num_hidden_layers", [2, 3, 4, 5, 6])
  rhp.set_discrete("hidden_size", [32, 64, 128, 256, 512, 1024],
                   scale=rhp.LOG_SCALE)
  rhp.set_discrete("kernel_height", [1, 3, 5, 7])
  rhp.set_discrete("kernel_width", [1, 3, 5, 7])
  rhp.set_discrete("compress_steps", [0, 1, 2])
  rhp.set_float("dropout", 0.0, 0.5)
  rhp.set_float("weight_decay", 1e-4, .03, scale=rhp.LOG_SCALE)
  rhp.set_float("label_smoothing", 0.0, 0.2)
  rhp.set_float("clip_grad_norm", 0.01, 8.0, scale=rhp.LOG_SCALE)
  rhp.set_float("learning_rate", 0.1, 1.0, scale=rhp.LOG_SCALE)
  rhp.set_categorical("initializer",
                      ["uniform", "orthogonal", "uniform_unit_scaling"])
  rhp.set_float("initializer_gain", 0.5, 3.5)
  rhp.set_categorical("learning_rate_decay_scheme",
                      ["none", "sqrt", "noam", "exp10k"])
  rhp.set_float("optimizer_adam_epsilon", 1e-7, 1e-2, scale=rhp.LOG_SCALE)
  rhp.set_float("optimizer_adam_beta1", 0.8, 0.9)
  rhp.set_float("optimizer_adam_beta2", 0.995, 0.999)
  rhp.set_categorical("optimizer", [
      "Adam", "Adagrad", "Momentum", "RMSProp", "SGD", "YellowFin"])
Beispiel #2
0
def slicenet_range1(ranged_hparams):
  """Small range of hyperparameters."""
  rhp = ranged_hparams

  hparams = slicenet_params1()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)

  rhp.set_float("clip_grad_norm", 1.0, 10.0, scale=rhp.LOG_SCALE)
  rhp.set_float("learning_rate", 0.02, 1.0, scale=rhp.LOG_SCALE)
  rhp.set_float("optimizer_adam_beta2", 0.995, 0.998)
  rhp.set_float("weight_decay", 1.0, 5.0)
Beispiel #3
0
def transformer_tpu_range(rhp):
  """Small range of hyperparameters."""
  hparams = transformer_tpu()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
  # After starting from base, set intervals for some parameters.
  rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE)
  rhp.set_discrete("learning_rate_warmup_steps",
                   [1000, 2000, 4000, 8000, 16000])
  rhp.set_float("initializer_gain", 0.5, 2.0)
  rhp.set_float("optimizer_adam_beta1", 0.85, 0.95)
  rhp.set_float("optimizer_adam_beta2", 0.97, 0.99)
  rhp.set_float("weight_decay", 0.0, 2.0)
Beispiel #4
0
def transformer_tpu_range(rhp):
  """Small range of hyperparameters."""
  hparams = transformer_tpu()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
  # After starting from base, set intervals for some parameters.
  rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE)
  rhp.set_discrete("learning_rate_warmup_steps",
                   [1000, 2000, 4000, 8000, 16000])
  rhp.set_float("initializer_gain", 0.5, 2.0)
  rhp.set_float("optimizer_adam_beta1", 0.85, 0.95)
  rhp.set_float("optimizer_adam_beta2", 0.97, 0.99)
  rhp.set_float("weight_decay", 0.0, 2.0)
def transformer_tpu_batch_range(rhp):
    hparams = transformer_tpu()
    common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
    rhp.set_discrete("tpu_batch_size_per_shard", [1, 2, 3, 4])
def transformer_tiny_tpu_range(rhp):
    """Small range of hyperparameters."""
    hparams = transformer_tiny_tpu()
    common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
    rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE)
    rhp.set_float("weight_decay", 0.0, 2.0)
Beispiel #7
0
def transformer_tpu_batch_range(rhp):
  hparams = transformer_tpu()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
  rhp.set_discrete("tpu_batch_size_per_shard", [1] + list(range(2, 16, 2)))
  rhp.set_discrete("max_length", list(range(128, 416, 16)))
Beispiel #8
0
def transformer_tpu_batch_range(rhp):
  hparams = transformer_tpu()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
  rhp.set_discrete("tpu_batch_size_per_shard", [1, 2, 3, 4])
Beispiel #9
0
def transformer_tiny_tpu_range(rhp):
  """Small range of hyperparameters."""
  hparams = transformer_tiny_tpu()
  common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp)
  rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE)
  rhp.set_float("weight_decay", 0.0, 2.0)