Esempio n. 1
0
def sample_mlp_vae_family_cfg(seed):
    """Samples a task config for a MLP VAE model on image datasets.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
    rng = np.random.RandomState(seed)
    cfg = {}
    enc_n_layers = rng.choice([1, 2, 3, 4])
    cfg["enc_hidden_units"] = [
        utils.sample_log_int(rng, 32, 128) for _ in range(enc_n_layers)
    ]

    dec_n_layers = rng.choice([1, 2, 3])
    cfg["dec_hidden_units"] = [
        utils.sample_log_int(rng, 32, 128) for _ in range(dec_n_layers)
    ]

    cfg["activation"] = utils.sample_activation(rng)
    cfg["w_init"] = utils.sample_initializer(rng)
    cfg["dataset"] = utils.sample_image_dataset(rng)
    return cfg
Esempio n. 2
0
def sample_rnn_text_classification_family_cfg(seed):
  """Samples a task config for an RNN classification task.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """

  rng = np.random.RandomState(seed)
  cfg = {}

  cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128)
  cfg["w_init"] = utils.sample_initializer(rng)
  cfg["dataset"] = utils.sample_text_dataset(rng)

  # TODO(lmetz) trim this if using characters...
  cfg["vocab_size"] = utils.sample_log_int(rng, 100, 10000)

  cfg["core"] = utils.sample_rnn_core(rng)
  cfg["trainable_init"] = bool(rng.choice([True, False]))
  cfg["loss_compute"] = rng.choice(["last", "avg", "max"])
  return cfg
Esempio n. 3
0
def sample_mlp_ae_family_cfg(seed):
    """Sample a task config for a mlp autoencoder on image data.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
    rng = np.random.RandomState(seed)
    cfg = {}
    n_layers = rng.choice([1, 2, 3, 4, 5, 6])
    cfg["hidden_units"] = [
        utils.sample_log_int(rng, 32, 128) for _ in range(n_layers)
    ]

    cfg["activation"] = utils.sample_activation(rng)
    cfg["w_init"] = utils.sample_initializer(rng)

    cfg["dataset"] = utils.sample_image_dataset(rng)
    # Give relu double weight as this is what is often used in practice.
    cfg["output_type"] = rng.choice(
        ["tanh", "tanh", "sigmoid", "sigmoid", "linear_center", "linear"])

    # Give l2 double weight as this is often used
    cfg["loss_type"] = rng.choice(["l2", "l2", "l1"])

    cfg["reduction_type"] = rng.choice(["reduce_mean", "reduce_sum"])

    return cfg
Esempio n. 4
0
def sample_mlp_family_cfg(seed):
  """Sample a task config for a MLP model on image datasets.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
  # random offset seed to ensure different task families sample different
  # configs.
  rng = np.random.RandomState(seed + 99123491)
  cfg = collections.OrderedDict()
  n_layers = rng.choice([1, 2, 3, 4, 5, 6])
  cfg["layer_sizes"] = [
      utils.sample_log_int(rng, 16, 128) for _ in range(n_layers)
  ]
  cfg["activation"] = utils.sample_activation(rng)
  cfg["w_init"] = utils.sample_initializer(rng)
  cfg["dataset"] = utils.sample_image_dataset(rng)
  cfg["center_data"] = bool(rng.choice([True, False]))
  return cfg
Esempio n. 5
0
def sample_maf_family_cfg(seed):
    """Sample a task config for a MAF model on image datasets.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
    # random offset to seed to prevent clashing with other problems.
    rng = np.random.RandomState(seed + 6123348)
    cfg = {}

    cfg["activation"] = utils.sample_activation(rng)
    cfg["w_init"] = utils.sample_initializer(rng)
    cfg["dataset"] = utils.sample_image_dataset(rng)

    n_layers = int(rng.choice([1, 2]))
    cfg["hidden_units"] = [
        utils.sample_log_int(rng, 16, 128) for _ in range(n_layers)
    ]
    cfg["num_bijectors"] = int(rng.choice([1, 2, 3, 4]))
    return cfg
Esempio n. 6
0
def sample_nvp_family_cfg(seed):
    """Sample a task config for a NVP model on image datasets.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
    # Random offset on seed to ensure different randomness for task.
    rng = np.random.RandomState(seed + 123919936)
    cfg = {}

    # TODO(lmetz) consider also forwarding per layer activation and per layer
    # w_init.
    cfg["activation"] = utils.sample_activation(rng)
    cfg["w_init"] = utils.sample_initializer(rng)
    cfg["dataset"] = utils.sample_image_dataset(rng)

    n_layers = int(rng.choice([1, 2]))
    cfg["hidden_units"] = [
        utils.sample_log_int(rng, 16, 128) for _ in range(n_layers)
    ]
    cfg["num_bijectors"] = int(rng.choice([1, 2, 3, 4]))
    return cfg
Esempio n. 7
0
def sample_word_language_model_family_cfg(seed):
    """Sample a word language model config."""
    rng = np.random.RandomState(seed)
    cfg = {}
    cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128)
    cfg["w_init"] = utils.sample_initializer(rng)
    cfg["vocab_size"] = utils.sample_log_int(rng, 1000, 30000)
    cfg["core"] = utils.sample_rnn_core(rng)
    cfg["trainable_init"] = bool(rng.choice([True, False]))

    cfg["dataset"] = utils.sample_word_lm_dataset(rng)
    return cfg
Esempio n. 8
0
def sample_conv_fc_family_cfg(seed):
    """Sample a task config for a conv net with a fully connected layer on top.

  These configs are nested python structures that provide enough information
  to create an instance of the problem.

  Args:
    seed: int Random seed to generate task from.

  Returns:
    A nested dictionary containing a configuration.
  """
    rng = np.random.RandomState(seed)
    cfg = {}
    layer_choices = [1, 2, 3, 4, 5]
    max_layer = np.max(layer_choices)
    n_layers = rng.choice(layer_choices)
    # pattern for how strides are chosen. Either all 2s, repeated 1,2
    # or repeated 2,1.
    stride_pattern = rng.choice(["all_two", "one_two", "two_one"])
    if stride_pattern == "all_two":
        cfg["strides"] = ([2] * max_layer)[0:n_layers]
    elif stride_pattern == "one_two":
        cfg["strides"] = ([1, 2] * max_layer)[0:n_layers]
    elif stride_pattern == "two_one":
        cfg["strides"] = ([2, 1] * max_layer)[0:n_layers]
    cfg["strides"] = list(zip(cfg["strides"], cfg["strides"]))
    cfg["hidden_units"] = [
        utils.sample_log_int(rng, 8, 64) for _ in range(n_layers)
    ]

    cfg["activation"] = utils.sample_activation(rng)
    cfg["w_init"] = utils.sample_initializer(rng)
    cfg["padding"] = [
        str(rng.choice([snt.SAME, snt.VALID])) for _ in range(n_layers)
    ]

    n_fc_layers = rng.choice([0, 1, 2, 3])
    cfg["fc_hidden_units"] = [
        utils.sample_log_int(rng, 32, 128) for _ in range(n_fc_layers)
    ]

    cfg["use_bias"] = bool(rng.choice([True, False]))
    cfg["dataset"] = utils.sample_image_dataset(rng)
    cfg["center_data"] = bool(rng.choice([True, False]))
    return cfg
    def test_sample_get_initializer(self):
        rng = np.random.RandomState(123)
        sampled_init = []
        num = 3000
        for _ in range(num):
            init_name, args = utils.sample_initializer(rng)
            sampled_init.append(init_name)
            # smoke test to ensure graph builds
            out = utils.get_initializer((init_name, args))((10, 10))
            self.assertIsInstance(out, tf.Tensor)

        uniques, counts = np.unique(sampled_init, return_counts=True)
        counts_map = {str(u): c for u, c in zip(uniques, counts)}
        # 13 is the total sum of unnormalized probs
        amount_per_n = num / float(13)
        self.assertNear(counts_map["he_normal"], amount_per_n * 2, 40)
        self.assertNear(counts_map["orthogonal"], amount_per_n, 40)
        self.assertNear(counts_map["glorot_normal"], amount_per_n * 2, 40)
Esempio n. 10
0
def sample_char_rnn_language_model_family_cfg(seed):
    """Samples a character NN language modeling task."""
    rng = np.random.RandomState(seed)
    cfg = {}
    cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128)
    cfg["w_init"] = utils.sample_initializer(rng)

    full_vocab = utils.sample_bool(rng, 0.8)
    if full_vocab:
        cfg["vocab_size"] = 256
    else:
        # only operate on some subset of full words.
        cfg["vocab_size"] = utils.sample_log_int(rng, 100, 256)
    cfg["core"] = utils.sample_rnn_core(rng)
    cfg["trainable_init"] = bool(rng.choice([True, False]))

    cfg["dataset"] = utils.sample_char_lm_dataset(rng)
    return cfg