def testForwardPassSplitBatch(self): with self.session(use_gpu=False) as sess: bs = 8 sl = 20 tf.set_random_seed(8372749040) p = self._EncoderParams() p.random_seed = 1234 mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) enc_out = mt_enc.FPropDefaultTheta(batch) inputs1, inputs2 = tf.split(batch.ids, 2, 0) paddings1, paddings2 = tf.split(batch.paddings, 2, 0) batch.ids = inputs1 batch.paddings = paddings1 enc_out1 = mt_enc.FPropDefaultTheta(batch) batch.ids = inputs2 batch.paddings = paddings2 enc_out2 = mt_enc.FPropDefaultTheta(batch) tf.global_variables_initializer().run() actual_enc_out, actual_enc_out1, actual_enc_out2 = sess.run( [enc_out[0], enc_out1[0], enc_out2[0]]) self.assertAllClose( actual_enc_out, np.concatenate([actual_enc_out1, actual_enc_out2], 1))
def testForwardPassWithInputPacking(self): with self.session(use_gpu=False) as sess: with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE): bs = 3 sl = 3 tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) packed_params = p.Copy() packed_params.packed_input = True mt_enc_packed = encoder.TransformerEncoder(packed_params) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) packed_batch = py_utils.NestedMap() packed_batch.ids = tf.reshape(batch.ids, [1, -1]) packed_batch.paddings = tf.reshape(batch.paddings, [1, -1]) packed_batch.segment_ids = tf.constant( [[0, 0, 0, 1, 1, 1, 2, 2, 2]], dtype=tf.float32) packed_batch.segment_pos = tf.constant( [[0, 1, 2, 0, 1, 2, 0, 1, 2]], dtype=tf.int32) enc_out = mt_enc.FPropDefaultTheta(batch).encoded enc_out = tf.transpose(enc_out, [1, 0, 2]) packed_enc_out = mt_enc_packed.FPropDefaultTheta(packed_batch) packed_enc_out = tf.reshape(packed_enc_out.encoded, tf.shape(enc_out)) enc_out = tf.reduce_sum(enc_out, axis=0) packed_enc_out = tf.reduce_sum(packed_enc_out, axis=0) tf.global_variables_initializer().run() actual_enc_out, actual_packed_enc_out = sess.run( [enc_out, packed_enc_out]) self.assertAllClose(actual_packed_enc_out, actual_enc_out)
def testForwardPassWithSourceMask(self): with self.session(use_gpu=False): bs = 2 sl = 21 tf.random.set_seed(8372749040) p = self._EncoderParams() p.task_emb = p.token_emb.Copy() p.task_emb.vocab_size = 4 # 4 tasks, 2 languages. p.apply_source_mask = True mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.task_ids = tf.constant( np.random.randint(low=0, high=3, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) enc_out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(enc_out.encoded, 0) tf.global_variables_initializer().run() actual_enc_out = enc_out_sum.eval() # pyformat: disable # pylint: disable=bad-whitespace print(actual_enc_out) expected_enc_out = [[ 1.2796695, -31.786999, -0.4054371, -32.61311, 42.414032, 11.020337, 54.11595, -61.322884, 39.59355, 15.315693, -20.373957, 1.8548615, -17.743631, 3.1409538, 30.730812, 41.4348 ], [ -1.0374013, -31.306532, -2.6323478, -32.078648, 45.800484, 16.40942, 55.001144, -63.10233, 40.4261, 14.19862, -23.027012, 1.0839913, -20.739471, 0.7242559, 32.499565, 41.592197 ]] # pylint: enable=bad-whitespace # pyformat: enable self.assertAllClose(expected_enc_out, actual_enc_out, rtol=1e-05, atol=1e-05)
def testForwardPass(self): with self.session(use_gpu=False): bs = 2 sl = 21 tf.random.set_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(out.encoded, 0) emb_out_sum = tf.reduce_sum(out.embedded_inputs, 0) enc_padding = out.padding self.evaluate(tf.global_variables_initializer()) actual_enc_out, actual_enc_out_sum, actual_emb_out_sum, \ actual_padding = self.evaluate( [out.encoded, enc_out_sum, emb_out_sum, enc_padding]) # pyformat: disable # pylint: disable=bad-whitespace expected_enc_out = [ [ 49.45291519, -31.5743885 , 39.43684387, -47.67513275, 35.39754105, 14.41970444, 29.58752823, -43.06747055, 24.09403419, -7.62717247, 18.48112106, 20.42408371, 5.1519866 , -19.66542244, 29.81095314, 56.90407944], [ 55.26333618, -30.39743614, 29.68314743, -37.61392975, 43.02292252, 13.88345146, 15.73033905, -24.68696213, 24.70776558, -29.18026161, 15.41469955, 27.77672577, -5.36326742, -22.78984642, 22.15843391, 22.7237072 ]] expected_emb_out_sum = [ [ 3.11785889, 1.33086884, -1.96904886, -4.81911993, 1.25389254, 1.52582073, 0.79906291, 4.07078457, -1.20546532, -2.97308111, 0.22460097, 2.99702668, -2.29453254, 6.06631422, 1.68836212, 5.35728741], [ 1.41723049, -1.39409399, -1.49569404, -0.24654561, 1.09658146, 4.51638842, 2.72023368, -0.45651400, 3.46091199, -0.43925080, 1.02091551, 3.89704037, 1.87841535, -0.27947778, -0.91630745, 1.34230828]] # pylint: enable=bad-whitespace # pyformat: enable self.assertAllEqual(actual_enc_out.shape, [sl, bs, p.model_dim]) self.assertAllEqual(actual_padding.shape, [sl, bs]) self.assertAllClose( expected_enc_out, actual_enc_out_sum, rtol=1e-05, atol=1e-05) self.assertAllClose( expected_emb_out_sum, actual_emb_out_sum, rtol=1e-05, atol=1e-05)
def testForwardPassWithTaskEmb(self): with self.session(use_gpu=False): bs = 2 sl = 21 tf.random.set_seed(8372749040) p = self._EncoderParams() p.task_emb = p.token_emb.Copy() p.task_emb.vocab_size = 4 mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.task_ids = tf.constant( np.random.randint(low=0, high=3, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) enc_out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(enc_out.encoded, 0) self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out_sum.eval() # pyformat: disable # pylint: disable=bad-whitespace expected_enc_out = [[ 1.2796677, -31.786997, -0.4054339, -32.61311, 42.41403, 11.020338, 54.115948, -61.322887, 39.593548, 15.315696, -20.373957, 1.8548622, -17.743631, 3.140956, 30.730812, 41.4348 ], [ -1.0373995, -31.306532, -2.6323462, -32.078648, 45.80049, 16.409424, 55.00114, -63.102333, 40.4261, 14.198621, -23.027012, 1.0839912, -20.739473, 0.7242553, 32.49956, 41.592197 ]] # pylint: enable=bad-whitespace # pyformat: enable self.assertAllClose(expected_enc_out, actual_enc_out, rtol=1e-05, atol=1e-05)
def testForwardPass(self): with self.session(use_gpu=False) as sess: bs = 2 sl = 21 tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) enc_out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(enc_out[0], 0) enc_atten_probs = enc_out[1] tf.global_variables_initializer().run() actual_enc_out, actual_enc_out_sum, actual_atten_probs = sess.run( [enc_out[0], enc_out_sum, enc_atten_probs]) # pyformat: disable # pylint: disable=bad-whitespace expected_enc_out = [[ 49.45291519, -31.5743885, 39.43684387, -47.67513275, 35.39754105, 14.41970444, 29.58752823, -43.06747055, 24.09403419, -7.62717247, 18.48112106, 20.42408371, 5.1519866, -19.66542244, 29.81095314, 56.90407944 ], [ 55.26333618, -30.39743614, 29.68314743, -37.61392975, 43.02292252, 13.88345146, 15.73033905, -24.68696213, 24.70776558, -29.18026161, 15.41469955, 27.77672577, -5.36326742, -22.78984642, 22.15843391, 22.7237072 ]] # pylint: enable=bad-whitespace # pyformat: enable self.assertAllEqual(actual_enc_out.shape, [sl, bs, p.model_dim]) self.assertAllEqual(actual_atten_probs.shape, [sl, bs]) self.assertAllClose(expected_enc_out, actual_enc_out_sum)
def testForwardPassWithIndividuallyTaggedTokens(self): with self.session(use_gpu=False): bs = 3 sl = 3 tf.random.set_seed(8372749040) p = self._EncoderParams() p.packed_input = False p.individually_tagged_input = True mt_enc_tagged = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) tagged_batch = py_utils.NestedMap() tagged_batch.ids = tf.reshape(batch.ids, [1, -1]) tagged_batch.paddings = tf.reshape(batch.paddings, [1, -1]) tagged_batch.segment_ids = tf.constant( [[0, 0, 0, 1, 1, 1, 2, 2, 2]], dtype=tf.int32) tagged_enc_out = mt_enc_tagged.FPropDefaultTheta(tagged_batch) tagged_enc_out_sum = tf.reduce_sum(tagged_enc_out.encoded, 0) self.evaluate(tf.global_variables_initializer()) actual_tagged_enc_out = tagged_enc_out_sum.eval() print(actual_tagged_enc_out) expected_enc_out = [[ 19.668077, -11.905859, 7.9366484, -16.66984, 23.359558, 13.41925, 13.443447, -14.168186, 9.430209, -16.471195, 2.6439285, 11.756948, -4.6066704, -10.32788, 13.434055, 8.899297 ]] self.assertAllClose(actual_tagged_enc_out, expected_enc_out, atol=1.0e-4)
def testEncoderVars(self): p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) enc_vars = mt_enc.vars flatten_vars = enc_vars.Flatten() self.assertEqual(len(flatten_vars), 91)
def testTransparentEncoderConstruction(self): p = self._EncoderParams() p.transformer_stack.is_transparent = True p.transformer_stack.num_transparent_outputs = 2 _ = encoder.TransformerEncoder(p)
def testEncoderConstruction(self): p = self._EncoderParams() _ = encoder.TransformerEncoder(p)