Beispiel #1
0
  def testTransformerStackFPropWithPackedInputs(self):
    # batch = 2. time = 2, depth = 2
    with self.session(use_gpu=True) as sess:
      with tf.variable_scope('packing_test', reuse=tf.AUTO_REUSE):
        params = self._TransformerParams()
        xformer = mt_layers.TransformerStack(params)
        packed_params = params.Copy()
        packed_params.packed_input = True
        xformer_packed = mt_layers.TransformerStack(packed_params)

        input_arr = np.array([[[0, 1], [1, -1]], [[1, 2], [-2, -1]]], dtype=int)
        paddings_arr = np.array([[0, 0], [0, 0]], dtype=int)
        seg_id_arr = np.array([[0, 1, 0, 1]], dtype=int)

        inputs = tf.constant(input_arr.tolist(), dtype=tf.float32)
        paddings = tf.constant(paddings_arr.tolist(), dtype=tf.float32)
        inputs_packed = tf.reshape(inputs, [-1, 1, 2])
        paddings_packed = tf.reshape(paddings, [-1, 1])
        seg_id = tf.transpose(
            tf.constant(seg_id_arr.tolist(), dtype=tf.float32))

        output, _, _ = xformer.FProp(xformer.theta, inputs, paddings, seg_id)

        output_packed, _, _ = xformer_packed.FProp(
            xformer_packed.theta, inputs_packed, paddings_packed, seg_id)
        output_packed = tf.reshape(output_packed, tf.shape(output))

        tf.global_variables_initializer().run()
        output, output_packed = sess.run([output, output_packed])

        self.assertAllClose(output_packed, output)
Beispiel #2
0
  def testTransparentTransformerStackEvalFProp(self):
    # time = 2, batch = 1
    with self.session(use_gpu=True) as sess:
      params = self._TransformerParams(is_eval=True)
      params.is_transparent = True
      params.num_transparent_outputs = 2

      xformer = mt_layers.TransformerStack(params)

      input_arr = np.array([[[0, 1]], [[1, -1]]], dtype=int)
      paddings_arr = np.array([[0], [0]], dtype=int)

      inputs = tf.constant(input_arr.tolist(), dtype=tf.float32)
      paddings = tf.constant(paddings_arr.tolist(), dtype=tf.float32)

      tf.global_variables_initializer().run()
      outputs, _, _ = xformer.FPropDefaultTheta(inputs, paddings)
      out = sess.run(outputs)
      # pylint: disable=bad-whitespace
      # pyformat: disable
      self.assertAllClose(
          [[[-0.23663561,  0.99756944]],
           [[ 0.91392964, -0.85869682]]],
          out[:, :, :, 0])
      self.assertAllClose(
          [[[-0.23663561,  0.99756944]],
           [[ 0.91392964, -0.85869682]]],
          out[:, :, :, 1])
Beispiel #3
0
    def testTransformerStackAlternateLayers(self):
        batch = 3
        tf.flags.FLAGS.tpu_compatible = True
        with self.session(use_gpu=False) as sess:
            model_dim = 2
            num_transformer_layers = 2
            transformer_tpl = layers_with_attention.TransformerLayer.Params()
            transformer_tpl.tr_atten_tpl.num_attention_heads = 1
            transformer_tpl.tr_fflayer_tpl.hidden_dim = 2

            params = mt_layers.TransformerStack.Params().Set(
                name='transformer',
                model_dim=model_dim,
                num_transformer_layers=num_transformer_layers,
                transformer_tpl=[
                    transformer_tpl.Copy()
                    for _ in range(num_transformer_layers)
                ],
                random_seed=123456)

            xformer = mt_layers.TransformerStack(params)
            input_arr = np.array([
                [[0, 1]] * batch,
                [[1, -1]] * batch,
            ],
                                 dtype=int)
            paddings_arr = np.array([[0] * batch, [0] * batch], dtype=int)
            inputs = tf.constant(input_arr.tolist(),
                                 dtype=py_utils.FPropDtype(params))
            paddings = tf.constant(paddings_arr.tolist(),
                                   dtype=py_utils.FPropDtype(params))
            output, _, _ = xformer.FProp(xformer.theta, inputs, paddings)

            tf.global_variables_initializer().run()
            output = sess.run(output)
            print(repr(output))
            # pylint: disable=bad-whitespace
            # pyformat: disable
            self.assertAllCloseAccordingToType(
                np.array([[[-2.17566538, -0.2821945],
                           [-2.17566514, -0.28219438],
                           [-2.17566514, -0.28219438]],
                          [[-0.71516591, -0.90594757],
                           [-0.71516603, -0.90594769],
                           [-0.71516603, -0.90594769]]]), output)
Beispiel #4
0
  def testTransparentTransformerStackEvalFProp(self):
    # time = 2, batch = 1
    with self.session(use_gpu=True) as sess, self.SetEval(True):
      params = self._TransformerParams()
      params.is_transparent = True
      params.num_transparent_outputs = 2

      xformer = mt_layers.TransformerStack(params)

      input_arr = np.array([[[0, 1]], [[1, -1]]], dtype=int)
      paddings_arr = np.array([[0], [0]], dtype=int)

      inputs = tf.constant(input_arr.tolist(), dtype=tf.float32)
      paddings = tf.constant(paddings_arr.tolist(), dtype=tf.float32)

      self.evaluate(tf.global_variables_initializer())
      outputs, _, _ = xformer.FPropDefaultTheta(inputs, paddings)
      out = sess.run(outputs)
      self.assertAllClose([[[1.38054, -1.37836]], [[-0.811525, 1.183977]]],
                          out[:, :, :, 0])
      self.assertAllClose([[[1.38054, -1.37836]], [[-0.811525, 1.183977]]],
                          out[:, :, :, 1])
Beispiel #5
0
    def testTransformerStackAlternateLayers(self):
        batch = 3
        tf.flags.FLAGS.tpu_compatible = True
        with self.session(use_gpu=False):
            model_dim = 2
            num_transformer_layers = 2
            transformer_tpl = layers_with_attention.TransformerLayer.Params()
            transformer_tpl.tr_atten_tpl.num_attention_heads = 1
            transformer_tpl.tr_fflayer_tpl.hidden_dim = 2

            params = mt_layers.TransformerStack.Params().Set(
                name='transformer',
                model_dim=model_dim,
                num_transformer_layers=num_transformer_layers,
                transformer_tpl=[
                    transformer_tpl.Copy()
                    for _ in range(num_transformer_layers)
                ],
                random_seed=123456)

            xformer = mt_layers.TransformerStack(params)
            input_arr = np.array([
                [[0, 1]] * batch,
                [[1, -1]] * batch,
            ],
                                 dtype=int)
            paddings_arr = np.array([[0] * batch, [0] * batch], dtype=int)
            inputs = tf.constant(input_arr.tolist(),
                                 dtype=py_utils.FPropDtype(params))
            paddings = tf.constant(paddings_arr.tolist(),
                                   dtype=py_utils.FPropDtype(params))
            output, _, _ = xformer.FProp(xformer.theta, inputs, paddings)

            self.evaluate(tf.global_variables_initializer())
            output = self.evaluate(output)
            print(repr(output))
            self.assertAllCloseAccordingToType(
                np.array([[[-0.940543, 1.479253]] * batch,
                          [[-0.413938, -2.550903]] * batch]), output)