コード例 #1
0
    def _testGPipeTransformerStackTrainEncoderTransparentFProp(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=2,
                num_encoder_layers=2)
            params.is_transparent = True
            params.num_transparent_outputs = 1
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(
                batch=batch)
            py_utils.GetOrCreateGlobalStep()
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_output = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, inputs, paddings,
                                       tgt_inputs, tgt_paddings)
            enc_out = sess.run(enc_output)
            dec_out = sess.run(dec_output)
            self.assertAllClose(enc_out, [[[-0.118476, 1.031626]] * batch,
                                          [[0.643884, -1.02581167]] * batch])
            self.assertAllClose(dec_out, [[[-2.8764534, 1.00808454]] * batch,
                                          [[1.02129495, -0.78406084]] * batch,
                                          [[1.02129495, -0.78406084]] * batch])
コード例 #2
0
    def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = _TransformerParamsWithEmbeddings(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds(
                batch=batch)
            inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch)
            tf.random.set_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings,
                                       tgt_inputs, tgt_paddings)[2]
            enc_out_1 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(
                [[[0.017581, 0.802863, 0.975554, -1.164572]] * batch,
                 [[-0.549953, 1.196884, 4.910457, -0.102137]] * batch],
                enc_out_1)
            self.assertAllClose(
                [[[-1.122128, 1.111972, 4.642949, -2.14831]] * batch,
                 [[-1.336919, 1.182709, 4.785938, -2.039246]] * batch,
                 [[-1.335168, 1.297679, 4.720459, -2.111006]] * batch],
                dec_out)
コード例 #3
0
    def _testGPipeTransformerStackTrainTransparentFProp(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.num_transparent_outputs = 3
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(
                batch=batch)
            py_utils.GetOrCreateGlobalStep()
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, inputs, paddings,
                                       tgt_inputs, tgt_paddings)
            enc_out_1, enc_out_2, enc_out_3 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(enc_out_1, enc_out_2)
            self.assertAllClose(enc_out_2, enc_out_3)
            self.assertAllClose(enc_out_1,
                                [[[-0.27896273, 1.46589136]] * batch,
                                 [[1.03141928, -0.847896]] * batch])
            self.assertAllClose(dec_out, [[[2.926736, -4.090812]] * batch,
                                          [[-1.69508219, 1.75891459]] * batch,
                                          [[-1.6950829, 1.75891507]] * batch])
コード例 #4
0
    def _testGPipeTransformerEncoderFPropDefaultTheta(self,
                                                      splits=1,
                                                      num_micro_batches=1):
        batch = 4
        tf.flags.FLAGS.tpu_compatible = True
        with self.session() as sess:
            params = self._TransformerParams(
                num_decoder_layers=4,
                num_encoder_layers=4,
                splits=splits,
                num_micro_batches=num_micro_batches)
            params.dtype = tf.float32
            params.fprop_dtype = tf.float32
            xformer = GPipeTransformerStack(params)

            inputs, paddings, _, _ = self._random_inputs(batch)

            output = xformer.EncoderFPropDefaultTheta(inputs, paddings)

            tf.global_variables_initializer().run()
            output = sess.run(output)

            self.assertAllCloseAccordingToType(
                [[[0.21085747, 0.60925347]] * batch,
                 [[0.21085747, 0.60925347]] * batch], output)
コード例 #5
0
    def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = _TransformerParamsWithEmbeddings(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds(
                batch=batch)
            inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch)
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings,
                                       tgt_inputs, tgt_paddings)[2]
            enc_out_1 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(
                [[[0.68660116, 0.947429, 0.78953624, -1.20142817]] * batch,
                 [[0.57919669, 1.12979364, 4.29336643, 0.45106331]] * batch],
                enc_out_1)
            self.assertAllClose(
                [[[-0.46651918, -1.62957835, 1.15657926, 1.08397353]] * batch,
                 [[-0.34674695, -1.65999401, 1.08431196, 1.07384491]] * batch,
                 [[-0.41073492, -1.60431314, 1.04607999, 1.08858371]] * batch],
                dec_out)
コード例 #6
0
    def testGPipeTransformerStackTrainEncoderTransparentFPropEval(self):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(num_decoder_layers=3,
                                             num_encoder_layers=3)
            params.is_transparent = True
            params.num_transparent_outputs = 1
            params.is_eval = True

            xformer = GPipeTransformerStack(params)

            inputs, paddings, _, _ = self._random_inputs(batch=batch)

            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            enc_out = sess.run(enc_outputs)
            self.assertAllClose(enc_out, [[[0.18823329, 0.71548849]] * batch,
                                          [[0.76032472, -0.82791042]] * batch])
コード例 #7
0
    def testGPipeTransformerStackTrainTransparentFPropEval(self):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(num_decoder_layers=3,
                                             num_encoder_layers=1)
            params.is_transparent = True
            params.is_eval = True

            xformer = GPipeTransformerStack(params)

            inputs, paddings, _, _ = self._random_inputs(batch=batch)

            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            enc_out = sess.run(enc_outputs)
            self.assertAllClose(
                enc_out, [[[[-0.27896273] * 3, [1.46589136] * 3]] * batch,
                          [[[1.03141928] * 3, [-0.847896] * 3]] * batch])