Ejemplo n.º 1
0
  def testTransformerLayerWithInputPackingFProp(self):
    with self.session(use_gpu=True) as sess:
      with tf.variable_scope('transformer_packed_test', reuse=tf.AUTO_REUSE):
        np.random.seed(6348575)
        depth = 4
        p = layers_with_attention.TransformerLayer.Params()
        p.name = 'transformer'
        p.source_dim = depth
        p.is_decoder = True
        p.tr_fflayer_tpl.hidden_dim = 7
        p.tr_atten_tpl.num_attention_heads = 2
        packed_params = p.Copy()
        transformer = layers_with_attention.TransformerLayer(p)
        packed_params.packed_input = True
        transformer_packed = layers_with_attention.TransformerLayer(
            packed_params)

        dtype = tf.float32
        source_vecs = tf.stack([
            tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(5)
        ])
        source_padding = tf.transpose(
            tf.constant([[0, 0, 0, 0, 1], [0, 0, 0, 0, 0]], dtype=dtype))
        aux_vecs = tf.stack([
            tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(7)
        ])
        aux_paddings = tf.transpose(
            tf.constant(
                [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1]], dtype=dtype))

        source_vecs_packed = tf.reshape(source_vecs, [-1, 1, depth])
        aux_vecs_packed = tf.reshape(aux_vecs, [-1, 1, depth])
        source_padding_packed = tf.reshape(source_padding, [-1, 1])
        aux_padding_packed = tf.reshape(aux_paddings, [-1, 1])
        source_segment_id = tf.transpose(
            tf.constant([[0, 1, 0, 1, 0, 1, 0, 1, 0, 1]], dtype=tf.float32))
        aux_segment_id = tf.transpose(
            tf.constant(
                [[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]], dtype=tf.float32))

        h, _ = transformer.FPropDefaultTheta(
            source_vecs,
            source_padding,
            aux_vecs=aux_vecs,
            aux_paddings=aux_paddings,
            source_segment_id=None,
            aux_segment_id=None)

        h_packed, _ = transformer_packed.FPropDefaultTheta(
            source_vecs_packed,
            source_padding_packed,
            aux_vecs=aux_vecs_packed,
            aux_paddings=aux_padding_packed,
            source_segment_id=source_segment_id,
            aux_segment_id=aux_segment_id)
        h_packed = tf.reshape(h_packed, tf.shape(h))

        tf.global_variables_initializer().run()
        actual_layer, p_layer = sess.run([h, h_packed])
        self.assertAllClose(actual_layer, p_layer)
Ejemplo n.º 2
0
 def testTransformerLayerConstruction(self):
   p = layers_with_attention.TransformerLayer.Params()
   p.name = 'transformer'
   p.source_dim = 4
   p.tr_fflayer_tpl.hidden_dim = 7
   p.tr_atten_tpl.num_attention_heads = 2
   p.is_decoder = True
   _ = layers_with_attention.TransformerLayer(p)
Ejemplo n.º 3
0
    def testTransformerLayerFProp(self):
        with self.session(use_gpu=True) as sess:
            np.random.seed(6348575)
            depth = 4
            p = layers_with_attention.TransformerLayer.Params()
            p.name = 'transformer'
            p.source_dim = depth
            p.has_aux_atten = True
            p.mask_self_atten = True
            p.tr_fflayer_tpl.hidden_dim = 7
            p.tr_atten_tpl.num_attention_heads = 2
            transformer = layers_with_attention.TransformerLayer(p)

            (source_vecs, source_padding, aux_vecs,
             aux_paddings) = self._testTransformerAttentionLayerInputs(
                 depth=depth)

            h, probs = transformer.FPropDefaultTheta(source_vecs,
                                                     source_padding,
                                                     aux_vecs=aux_vecs,
                                                     aux_paddings=aux_paddings)

            tf.global_variables_initializer().run()
            actual_layer_output, actual_prob_output = sess.run([h, probs])
            tf.logging.info(np.array_repr(actual_layer_output))
            tf.logging.info(np.array_repr(actual_prob_output))
            # pylint: disable=bad-whitespace
            # pyformat: disable
            expected_layer_output = [
                [[0.68134278, 0.74287307, 0.04602078, 1.99463582],
                 [0.20382279, -1.50973201, 1.33421206, 0.53317755]],
                [[2.46715426, 2.84406185, -0.60359633, 0.51742059],
                 [1.06444919, -1.45264888, -0.06196141, 0.35242724]],
                [[2.3442452, -0.56243378, -1.1149826, 0.50276589],
                 [1.04868603, -1.68515253, 0.3093726, -0.19512933]],
                [[-0.11517292, -1.21290886, 1.31996512, 1.14821553],
                 [3.14395714, -1.07060659, 0.27842081, -1.81273639]],
                [[1.39219522, -0.81882864, -0.32732445, 1.36851478],
                 [-0.79119539, -0.28148842, 0.29963702, 1.37034667]]
            ]
            expected_prob_output = [
                [[0.21795762, 0., 0.26612395, 0., 0.31251648, 0., 0.20340192],
                 [0., 0.2677784, 0., 0.32895881, 0., 0.40326279, 0.]],
                [[0.25721505, 0., 0.24116731, 0., 0.25138181, 0., 0.2502358],
                 [0., 0.25691482, 0., 0.31076014, 0., 0.43232504, 0.]],
                [[0.24550268, 0., 0.25128055, 0., 0.25109866, 0., 0.25211811],
                 [0., 0.26769161, 0., 0.32481128, 0., 0.40749705, 0.]],
                [[0.22675318, 0., 0.26633731, 0., 0.28919035, 0., 0.21771915],
                 [0., 0.35955882, 0., 0.36869824, 0., 0.271743, 0.]],
                [[0.21504655, 0., 0.26958644, 0., 0.30847484, 0., 0.20689213],
                 [0., 0.29516917, 0., 0.29359812, 0., 0.41123265, 0.]]
            ]
            # pyformat: enable
            # pylint: enable=bad-whitespace
            self.assertAllClose(expected_layer_output, actual_layer_output)
            self.assertAllClose(expected_prob_output, actual_prob_output)
Ejemplo n.º 4
0
    def testTransformerLayerExtendStep(self):
        with self.session(use_gpu=True) as sess:
            np.random.seed(6348575)
            depth = 4
            p = layers_with_attention.TransformerLayer.Params()
            p.name = 'transformer'
            p.source_dim = depth
            p.has_aux_atten = True
            p.mask_self_atten = True
            p.tr_fflayer_tpl.hidden_dim = 7
            p.tr_atten_tpl.num_attention_heads = 2
            transformer = layers_with_attention.TransformerLayer(p)

            (source_vecs, _, aux_vecs,
             aux_paddings) = self._testTransformerAttentionLayerInputs(
                 depth=depth)
            source_padding = tf.zeros([5, 2])

            h1, probs1 = transformer.FPropDefaultTheta(
                source_vecs,
                source_padding,
                aux_vecs=aux_vecs,
                aux_paddings=aux_paddings)

            h2 = []
            probs2 = []
            cached_source_vecs = tf.zeros([0, 2, 4])
            cached_source_contexts = tf.zeros([0, 2, 4])
            prefix_states = py_utils.NestedMap(key=cached_source_vecs,
                                               value=cached_source_contexts)
            for i in range(5):
                h, probs, prefix_states = transformer.ExtendStep(
                    transformer.theta, source_vecs[i, :, :], prefix_states,
                    aux_vecs, aux_paddings)
                h2.append(h)
                probs2.append(probs)

            h2 = tf.stack(h2)
            probs2 = tf.concat(probs2, 0)

            tf.global_variables_initializer().run()
            h1_v, probs1_v, h2_v, probs2_v = sess.run([h1, probs1, h2, probs2])
            self.assertAllClose(h1_v, h2_v)
            self.assertAllClose(probs1_v, probs2_v)