Esempio n. 1
0
def universal_transformer_tiny():
    hparams = transformer.transformer_tiny()
    hparams = update_hparams_for_universal_transformer(hparams)
    # anywhere that we get good scores for the tiny model, it's with "sepconv"
    hparams.transformer_ffn_type = "sepconv"
    hparams.num_rec_steps = 8
    return hparams
Esempio n. 2
0
def transformer_extra_tiny():
    hparams = transformer.transformer_tiny()
    hparams.num_hidden_layers = 1
    hparams.hidden_size = 32
    hparams.filter_size = 128
    hparams.num_heads = 2
    return hparams
Esempio n. 3
0
    def testSlowVsFast(self):
        model, features = get_model(transformer.transformer_tiny())

        decode_length = 30

        out_logits, _ = model(features)
        out_logits = tf.squeeze(out_logits, axis=[2, 3])
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
            labels=tf.reshape(features["targets"], [-1]))
        loss = tf.reduce_mean(loss)
        apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

        with self.test_session():
            tf.global_variables_initializer().run()
            for _ in range(10):
                apply_grad.run()

        model.set_mode(tf.estimator.ModeKeys.PREDICT)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            greedy_result = model._slow_greedy_infer(features,
                                                     decode_length)["outputs"]
            greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

            fast_result = model._greedy_infer(features,
                                              decode_length)["outputs"]

        with self.test_session():
            greedy_res = greedy_result.eval()
            fast_res = fast_result.eval()

        self.assertEqual(fast_res.shape,
                         (BATCH_SIZE, INPUT_LENGTH + decode_length))
        self.assertAllClose(greedy_res, fast_res)
Esempio n. 4
0
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features
Esempio n. 5
0
    def _create_greedy_infer_model(self):
        """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
        model, features = get_model(transformer.transformer_tiny())

        out_logits, _ = model(features)
        out_logits = tf.squeeze(out_logits, axis=[2, 3])
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
            labels=tf.reshape(features["targets"], [-1]))
        loss = tf.reduce_mean(loss)
        apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

        with self.test_session():
            tf.global_variables_initializer().run()
            for _ in range(10):
                apply_grad.run()

        model.set_mode(tf.estimator.ModeKeys.PREDICT)

        return model, features
Esempio n. 6
0
def m2m_m_transformer_hparams():
    hparams = transformer_tiny()
    hparams.num_hidden_layers = 4
    hparams.hidden_size = 128
    hparams.filter_size = 512
    hparams.num_heads = 4
    return hparams
 def _testTransformer(self, net):
     batch_size = 3
     input_length = 5
     target_length = 7
     vocab_size = 9
     hparams = transformer.transformer_tiny()
     p_hparams = problem_hparams.test_problem_hparams(
         hparams, vocab_size, vocab_size)
     inputs = -1 + np.random.random_integers(
         vocab_size, size=(batch_size, input_length, 1, 1))
     targets = -1 + np.random.random_integers(
         vocab_size, size=(batch_size, target_length, 1, 1))
     with self.test_session() as session:
         features = {
             "inputs": tf.constant(inputs, dtype=tf.int32),
             "targets": tf.constant(targets, dtype=tf.int32),
             "target_space_id": tf.constant(1, dtype=tf.int32),
         }
         model = net(hparams, p_hparams)
         shadred_logits, _, _ = model.model_fn(features, True)
         logits = tf.concat(shadred_logits, 0)
         session.run(tf.global_variables_initializer())
         res = session.run(logits)
     self.assertEqual(res.shape,
                      (batch_size, target_length, 1, 1, vocab_size))
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features
Esempio n. 9
0
def transformer_extra_tiny_agg():
    hparams = transformer.transformer_tiny()
    hparams.batch_size = 8
    hparams.num_hidden_layers = 1
    hparams.hidden_size = 64
    hparams.filter_size = 256
    hparams.num_heads = 2
    return hparams
Esempio n. 10
0
 def testEvolvedTransformer(self):
     model, features = get_model(hparams=transformer.transformer_tiny())
     logits, _ = model(features)
     with self.test_session() as session:
         session.run(tf.global_variables_initializer())
         res = session.run(logits)
     self.assertEqual(res.shape,
                      (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE))
Esempio n. 11
0
def dstc_transformer_hparams_v8():
    hparams = transformer_tiny()
    hparams.num_hidden_layers = 4
    hparams.dropout = 0.7
    hparams.hidden_size = 128
    hparams.filter_size = 512
    hparams.num_heads = 4
    return hparams
Esempio n. 12
0
def t_rel_len2048_dropout15_tiny():
  """Hparams for LM with relative attention, tiny transformer."""
  # hparams = transformer.transformer_base()
  hparams = transformer.transformer_tiny()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  # Need to specify num_hidden_layers
  hparams.attention_key_channels = 512
  hparams.num_hidden_layers = 8
  return hparams
Esempio n. 13
0
def get_model():
  hparams = transformer.transformer_tiny()
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE,
                                                   hparams)
  hparams.problem_hparams = p_hparams

  inputs = np.random.randint(VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = np.random.randint(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32),
      "inputs": tf.constant(inputs, dtype=tf.int32, name="inputs"),
  }

  return (evolved_transformer.EvolvedTransformer(
      hparams, tf.estimator.ModeKeys.TRAIN, p_hparams), features)
Esempio n. 14
0
def transformer_encoding(node_seq_input, num_nodes, params, mode):
    """Construct a node-level encoder based on the transformer module.

  Args:
    node_seq_input : tf.Tensor. A tensor with 3 dimensions.
    num_nodes: tf.Tensor. Number of nodes per instance.
    params : dict. A parameter dictionary.
    mode : tf.estimator.ModeKeys object.

  Returns:
    node_seq_output: tf.Tensor. A tensor with 3 dimensions.
  """
    node_weights = tf.sequence_mask(num_nodes)

    hparams = transformer.transformer_tiny()
    hparams.hidden_size = params["transformer_hidden_unit"]
    hparams.num_heads = params["transformer_head"]
    hparams.num_hidden_layers = params["transformer_hidden_layer"]

    if hparams.hidden_size % hparams.num_heads != 0:
        raise ValueError(
            "The hidden_size needs to be divisible by trans_head.")

    transformer_encoder = transformer.TransformerEncoder(hparams, mode=mode)
    # Input shape [batch_size, sequence_length, 1, hidden_dim].
    node_seq_input = tf.layers.dense(node_seq_input, hparams.hidden_size)
    node_seq_input_reshape = tf.expand_dims(node_seq_input, 2)
    # Targets and target_space_id are required by decoder of transformer,
    # are both set as 0 for encoder.
    node_seq_output = transformer_encoder(
        {
            "inputs": node_seq_input_reshape,
            "targets": 0,
            "target_space_id": 0,
        },
        nonpadding=node_weights)
    node_seq_output = tf.squeeze(node_seq_output[0], 2)
    # Construct a residue network by adding up the input and output
    node_seq_output = tf.add(node_seq_input, node_seq_output)
    return node_seq_output
Esempio n. 15
0
def transformer_hparams(hidden_size):
  """Creates hyperpameters for autoregressive prior.

  Args:
    hidden_size: Width of attention layers and neural network output layer.

  Returns:
    hparams: Hyperpameters with basic presets for a Transformer.
  """
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("shared_rel", False)
  hparams.add_hparam("q_filter_width", 1)
  hparams.add_hparam("kv_filter_width", 1)
  hparams.hidden_size = hidden_size
  hparams.num_layers = 6
  hparams.layer_prepostprocess_dropout = 0.
  hparams.attention_dropout = 0.
  hparams.relu_dropout = 0.
  hparams.block_length = 1
  hparams.block_width = 1
  hparams.ffn_layer = "conv_hidden_relu"
  return hparams
Esempio n. 16
0
    def testBeamVsFast(self):
        model, features = get_model(transformer.transformer_tiny())

        decode_length = 30

        out_logits, _ = model(features)
        out_logits = tf.squeeze(out_logits, axis=[2, 3])
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
            labels=tf.reshape(features["targets"], [-1]))
        loss = tf.reduce_mean(loss)
        apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

        with self.test_session():
            tf.global_variables_initializer().run()
            for _ in range(10):
                apply_grad.run()

        model.set_mode(tf.estimator.ModeKeys.PREDICT)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            beam_result = model._beam_decode_slow(features,
                                                  decode_length,
                                                  beam_size=4,
                                                  top_beams=1,
                                                  alpha=1.0)["outputs"]

            fast_result = model._beam_decode(features,
                                             decode_length,
                                             beam_size=4,
                                             top_beams=1,
                                             alpha=1.0)["outputs"]

        with self.test_session():
            beam_res = beam_result.eval()
            fast_res = fast_result.eval()

        self.assertAllClose(beam_res, fast_res)
 def _testTransformer(self, net):
   batch_size = 3
   input_length = 5
   target_length = 7
   vocab_size = 9
   hparams = transformer.transformer_tiny()
   p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size,
                                                    vocab_size)
   inputs = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, input_length, 1, 1))
   targets = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, target_length, 1, 1))
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(inputs, dtype=tf.int32),
         "targets": tf.constant(targets, dtype=tf.int32),
         "target_space_id": tf.constant(1, dtype=tf.int32),
     }
     model = net(hparams, p_hparams)
     shadred_logits, _, _ = model.model_fn(features, True)
     logits = tf.concat(shadred_logits, 0)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (batch_size, target_length, 1, 1, vocab_size))
Esempio n. 18
0
def universal_transformer_tiny1():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams
Esempio n. 19
0
def transformer_aux_tiny():
    """Set of hyperparameters."""
    hparams = transformer.transformer_tiny()
    hparams.shared_embedding_and_softmax_weights = False
    hparams.add_hparam("shift_values", "1,2")
    return hparams
Esempio n. 20
0
def transformer_tiny_bs3():
    hparams = transformer.transformer_tiny()
    hparams.add_hparam("block_size", 3)
    return hparams
Esempio n. 21
0
def r_transformer_tiny():
    hparams = transformer.transformer_tiny()
    hparams = update_hparams_for_r_transformer(hparams)
    hparams.num_rec_steps = 8
    return hparams
def evolved_transformer_tiny():
    """Base parameters for Evolved Transformer model."""
    hparams = add_evolved_transformer_hparams(transformer.transformer_tiny())
    hparams.learning_rate_schedule = ("constant*single_cycle_cos_decay")
    return hparams
Esempio n. 23
0
def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams
def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams