def model_fn(features, labels, mode): TRAIN = mode == tf.estimator.ModeKeys.TRAIN EVAL = mode == tf.estimator.ModeKeys.EVAL PREDICT = mode == tf.estimator.ModeKeys.PREDICT model = Transformer(hp, train_mode=TRAIN) model.build_model(features['x'], labels) predictions = {'predition': model.preds} if PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) loss = model.loss accuracy = tf.metrics.accuracy( labels[:, 1:], model.preds) # accuracy[0]는 매 mini batch마다 계산되지 않는다. # 마지막에 취하는 mean은 의미가 없다. 숫자 1개 이므로... tf.metrics op를 만들기 위해 형식상. seq_accuracy = tf.metrics.mean( tf.reduce_prod(tf.cast( tf.equal(predictions['predition'], labels[:, 1:]), tf.float16), axis=-1)) if EVAL: # eval_metric_ops는 마지막 한번만 출력 eval_metric_ops = { 'acc': accuracy, 'seq_accuracy': seq_accuracy } # 여기는 tf.metrics로 만들어진 것만... # evaluation_hooks는 지정한 주기에 따라 출력 evaluation_hooks = tf.train.LoggingTensorHook( { "acc": accuracy[1], "seq_accuacy": seq_accuracy[1] }, every_n_iter=1) # 각 iteration의 loss값 return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=eval_metric_ops, predictions=predictions, evaluation_hooks=[ evaluation_hooks ]) # loss(iteration 평균), eval_metric_ops른 넣었기 때문에 2개가 return if TRAIN: start = time.time() # 누적 경과 시간 출력 global_step = tf.train.get_global_step() train_op = model.add_optimizer(global_step) logging_hook = tf.train.LoggingTensorHook( { "loss----": loss, "char accuracy": accuracy[1], "seq_accuracy": seq_accuracy[1], "elapsed": tf.timestamp() - start }, every_n_iter=1000) return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss, training_hooks=[logging_hook])
class TransformerTest(tf.test.TestCase): def setUp(self): self.t = Transformer(model_name='test', num_heads=4, d_model=64, d_ff=128, num_enc_layers=2, num_dec_layers=2) self.batch_size = 4 self.seq_len = 5 self.raw_input_ph = tf.placeholder(tf.int32, shape=(self.batch_size, self.seq_len)) self.fake_data = np.array([ [1, 2, 3, 4, 5], [1, 2, 0, 0, 0], [1, 2, 3, 4, 0], [1, 2, 3, 0, 0], ]) def tearDown(self): shutil.rmtree(self.t.checkpoint_dir) shutil.rmtree(self.t.log_dir) shutil.rmtree(self.t.tb_dir) def test_build_and_load_model(self): dm = DatasetManager('iwslt15') dm.load_vocab() self.t.build_model('iwslt15', dm.source_id2word, dm.target_id2word, PAD_ID) print_trainable_variables() self.t.init() value_dict = self.t.get_variable_values() tf.reset_default_graph() model = Transformer.load_model('test') out = model.predict(np.zeros(model.raw_input_ph.shape)) assert out.shape == model.raw_target_ph.shape value_dict2 = model.get_variable_values() for k in value_dict2: print("\n*************************************") print(k) print(value_dict[k]) print(value_dict2[k]) assert np.allclose(value_dict[k], value_dict2[k]) def test_construct_padding_mask(self): with self.test_session() as sess: mask_ph = self.t.construct_padding_mask(self.raw_input_ph) mask = sess.run(mask_ph, feed_dict={self.raw_input_ph: self.fake_data}) expected = np.array([ [[1., 1., 1., 1., 1.]] * self.seq_len, [[1., 1., 0., 0., 0.]] * self.seq_len, [[1., 1., 1., 1., 0.]] * self.seq_len, [[1., 1., 1., 0., 0.]] * self.seq_len, ]) np.testing.assert_array_equal(mask, expected) def test_construct_autoregressive_mask(self): with self.test_session() as sess: data = np.random.randint(5, size=(self.batch_size, self.seq_len)) tri_matrix = [[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 1, 1, 0, 0], [1, 1, 1, 1, 0], [1, 1, 1, 1, 1]] expected = np.array([tri_matrix] * self.batch_size).astype( np.float32) mask_ph = self.t.construct_autoregressive_mask(self.raw_input_ph) mask = sess.run(mask_ph, feed_dict={self.raw_input_ph: data}) np.testing.assert_array_equal(mask, expected) def test_label_smoothing(self): with self.test_session() as sess: ohe = np.array([[ [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 1], ]]).astype(np.float) # (1, 4, 5) out = self.t.label_smoothing(tf.convert_to_tensor(ohe)).eval() expected = np.array([[ [0.02, 0.92, 0.02, 0.02, 0.02], [0.92, 0.02, 0.02, 0.02, 0.02], [0.02, 0.02, 0.02, 0.02, 0.92], ]]) np.testing.assert_array_equal(out, expected) def test_positional_encoding_sinusoid(self): with self.test_session() as sess: self.t.d_model = 8 pos_enc = self.t.positional_encoding_sinusoid( tf.convert_to_tensor(self.fake_data)).eval() assert pos_enc.shape == (4, 5, 8) one_enc = pos_enc[0] np.testing.assert_array_equal(one_enc, pos_enc[1]) np.testing.assert_array_equal(one_enc, pos_enc[2]) np.testing.assert_array_equal(one_enc, pos_enc[3]) # embedding vector of one position: pos=0, i=0-7 np.testing.assert_array_equal( one_enc[0], np.array([ np.sin(0), np.cos(0), np.sin(0), np.cos(0), np.sin(0), np.cos(0), np.sin(0), np.cos(0) ])) # one embedding dimension of different positions: pos=0-4, i=2 np.testing.assert_array_equal( one_enc[:, 2], np.array([ np.sin(0), np.sin(1 / np.power(10000., 2. / 8)), np.sin(2 / np.power(10000., 2. / 8)), np.sin(3 / np.power(10000., 2. / 8)), np.sin(4 / np.power(10000., 2. / 8)), ]).astype(tf.float32))