def model_fn(features, labels, mode):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT

    model = Transformer(hp, train_mode=TRAIN)
    model.build_model(features['x'], labels)

    predictions = {'predition': model.preds}
    if PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = model.loss
    accuracy = tf.metrics.accuracy(
        labels[:, 1:], model.preds)  # accuracy[0]는 매 mini batch마다 계산되지 않는다.

    # 마지막에 취하는 mean은 의미가 없다. 숫자 1개 이므로... tf.metrics op를 만들기 위해 형식상.
    seq_accuracy = tf.metrics.mean(
        tf.reduce_prod(tf.cast(
            tf.equal(predictions['predition'], labels[:, 1:]), tf.float16),
                       axis=-1))

    if EVAL:
        # eval_metric_ops는 마지막 한번만 출력
        eval_metric_ops = {
            'acc': accuracy,
            'seq_accuracy': seq_accuracy
        }  # 여기는 tf.metrics로 만들어진 것만...

        # evaluation_hooks는 지정한 주기에 따라 출력
        evaluation_hooks = tf.train.LoggingTensorHook(
            {
                "acc": accuracy[1],
                "seq_accuacy": seq_accuracy[1]
            },
            every_n_iter=1)  # 각 iteration의 loss값
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            eval_metric_ops=eval_metric_ops,
            predictions=predictions,
            evaluation_hooks=[
                evaluation_hooks
            ])  # loss(iteration 평균), eval_metric_ops른 넣었기 때문에 2개가 return

    if TRAIN:
        start = time.time()  # 누적 경과 시간 출력
        global_step = tf.train.get_global_step()
        train_op = model.add_optimizer(global_step)
        logging_hook = tf.train.LoggingTensorHook(
            {
                "loss----": loss,
                "char accuracy": accuracy[1],
                "seq_accuracy": seq_accuracy[1],
                "elapsed": tf.timestamp() - start
            },
            every_n_iter=1000)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          train_op=train_op,
                                          loss=loss,
                                          training_hooks=[logging_hook])
Exemple #2
0
class TransformerTest(tf.test.TestCase):
    def setUp(self):
        self.t = Transformer(model_name='test',
                             num_heads=4,
                             d_model=64,
                             d_ff=128,
                             num_enc_layers=2,
                             num_dec_layers=2)
        self.batch_size = 4
        self.seq_len = 5
        self.raw_input_ph = tf.placeholder(tf.int32,
                                           shape=(self.batch_size,
                                                  self.seq_len))
        self.fake_data = np.array([
            [1, 2, 3, 4, 5],
            [1, 2, 0, 0, 0],
            [1, 2, 3, 4, 0],
            [1, 2, 3, 0, 0],
        ])

    def tearDown(self):
        shutil.rmtree(self.t.checkpoint_dir)
        shutil.rmtree(self.t.log_dir)
        shutil.rmtree(self.t.tb_dir)

    def test_build_and_load_model(self):
        dm = DatasetManager('iwslt15')
        dm.load_vocab()

        self.t.build_model('iwslt15', dm.source_id2word, dm.target_id2word,
                           PAD_ID)
        print_trainable_variables()
        self.t.init()
        value_dict = self.t.get_variable_values()

        tf.reset_default_graph()
        model = Transformer.load_model('test')
        out = model.predict(np.zeros(model.raw_input_ph.shape))
        assert out.shape == model.raw_target_ph.shape

        value_dict2 = model.get_variable_values()
        for k in value_dict2:
            print("\n*************************************")
            print(k)
            print(value_dict[k])
            print(value_dict2[k])
            assert np.allclose(value_dict[k], value_dict2[k])

    def test_construct_padding_mask(self):
        with self.test_session() as sess:
            mask_ph = self.t.construct_padding_mask(self.raw_input_ph)
            mask = sess.run(mask_ph,
                            feed_dict={self.raw_input_ph: self.fake_data})
            expected = np.array([
                [[1., 1., 1., 1., 1.]] * self.seq_len,
                [[1., 1., 0., 0., 0.]] * self.seq_len,
                [[1., 1., 1., 1., 0.]] * self.seq_len,
                [[1., 1., 1., 0., 0.]] * self.seq_len,
            ])
            np.testing.assert_array_equal(mask, expected)

    def test_construct_autoregressive_mask(self):
        with self.test_session() as sess:
            data = np.random.randint(5, size=(self.batch_size, self.seq_len))
            tri_matrix = [[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 1, 1, 0, 0],
                          [1, 1, 1, 1, 0], [1, 1, 1, 1, 1]]
            expected = np.array([tri_matrix] * self.batch_size).astype(
                np.float32)
            mask_ph = self.t.construct_autoregressive_mask(self.raw_input_ph)
            mask = sess.run(mask_ph, feed_dict={self.raw_input_ph: data})
            np.testing.assert_array_equal(mask, expected)

    def test_label_smoothing(self):
        with self.test_session() as sess:
            ohe = np.array([[
                [0, 1, 0, 0, 0],
                [1, 0, 0, 0, 0],
                [0, 0, 0, 0, 1],
            ]]).astype(np.float)  # (1, 4, 5)
            out = self.t.label_smoothing(tf.convert_to_tensor(ohe)).eval()
            expected = np.array([[
                [0.02, 0.92, 0.02, 0.02, 0.02],
                [0.92, 0.02, 0.02, 0.02, 0.02],
                [0.02, 0.02, 0.02, 0.02, 0.92],
            ]])
            np.testing.assert_array_equal(out, expected)

    def test_positional_encoding_sinusoid(self):
        with self.test_session() as sess:
            self.t.d_model = 8
            pos_enc = self.t.positional_encoding_sinusoid(
                tf.convert_to_tensor(self.fake_data)).eval()
            assert pos_enc.shape == (4, 5, 8)

            one_enc = pos_enc[0]
            np.testing.assert_array_equal(one_enc, pos_enc[1])
            np.testing.assert_array_equal(one_enc, pos_enc[2])
            np.testing.assert_array_equal(one_enc, pos_enc[3])

            # embedding vector of one position: pos=0, i=0-7
            np.testing.assert_array_equal(
                one_enc[0],
                np.array([
                    np.sin(0),
                    np.cos(0),
                    np.sin(0),
                    np.cos(0),
                    np.sin(0),
                    np.cos(0),
                    np.sin(0),
                    np.cos(0)
                ]))
            # one embedding dimension of different positions: pos=0-4, i=2
            np.testing.assert_array_equal(
                one_enc[:, 2],
                np.array([
                    np.sin(0),
                    np.sin(1 / np.power(10000., 2. / 8)),
                    np.sin(2 / np.power(10000., 2. / 8)),
                    np.sin(3 / np.power(10000., 2. / 8)),
                    np.sin(4 / np.power(10000., 2. / 8)),
                ]).astype(tf.float32))