def test_pors_model_tie_encs_decs(self): self.params.update({ 'tie_sent_para_enc': True, 'tie_sent_para_dec': True, 'vocab_size': 12 }) # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant( [ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant( [[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} spid_dict = {pors.BOS: 10, pors.BOP: 11} ret_tensors = pors.pors_model(features, self.params, True, spid_dict) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model_mask(self, use_tpu): FLAGS.use_tpu = use_tpu self.params.update({ 'rnn_hidden_size': 6, 'latent_size': 4, 'embedding_size': 10, 'trf_hidden_size': 10, 'tie_embeddings': True, 'mask_rate_input': 0.1, 'mask_prob_input': 0.1, 'vocab_size': 11, 'encoder_type': 'transformer', }) # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant( [ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant( [[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True, spid_dict={pors.MASK: 10}) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_different_latent(self, tie_emb): self.params.update({ 'rnn_hidden_size': 5, 'latent_size': 4, 'embedding_size': 3, 'trf_hidden_size': 3, 'tie_embeddings': tie_emb, 'tie_sent_para_dec': False, }) # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model_encoder_decoder_type(self, encoder_type, decoder_type, use_tpu): FLAGS.use_tpu = use_tpu self.params.update({ 'add_critic': False, 'embedding_size': 4, 'latent_size': 4, 'trf_hidden_size': 4, 'trf_num_heads': 2, 'encoder_type': encoder_type, 'decoder_type': decoder_type, }) tf.reset_default_graph() # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]]], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model_noisy_paragraph(self): self.params.update({'noisy_paragraph_prob': 0.1}) for use_tpu in [False]: FLAGS.use_tpu = use_tpu tf.reset_default_graph() # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids, 'noisy_paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) # 2 steps to train both discriminator/generator loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model(self, use_tpu, add_critic): # TODO(peterjliu): Actually test on TPU. Setting this flag is not enough. FLAGS.use_tpu = use_tpu FLAGS.decode_reconstructions = True self.params.update({'add_critic': add_critic}) tf.reset_default_graph() # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) # Verify that ae_vars and d_vars is all vars. ae_vars = tf.trainable_variables('ae_') d_vars = tf.trainable_variables('disc_') self.assertEqual(set(tf.trainable_variables()), set(ae_vars + d_vars)) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) # 2 steps to train both discriminator/generator loss, _, _, pred_dict = ss.run(ret_tensors[:-1]) if not use_tpu: self.assertIn('decoded_paragraph', pred_dict, msg=str(pred_dict)) self.assertIn('decoded_sentences', pred_dict, msg=str(pred_dict)) self.assertGreater(loss, 0) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model_out_domain_pretrain(self, use_tpu): FLAGS.out_domain_pretrain_steps = 0 FLAGS.in_domain_pretrain_steps = 0 FLAGS.use_tpu = use_tpu # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) # Verify that ae_vars and d_vars is all vars. ae_vars = tf.trainable_variables('ae_') d_vars = tf.trainable_variables('disc_') self.assertEqual(set(tf.trainable_variables()), set(ae_vars + d_vars)) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) d_vars_np1 = ss.run(d_vars) # 2 steps where we only train generator loss, _, _, _ = ss.run(ret_tensors[:-1]) loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0) d_vars_np2 = ss.run(d_vars) for i in range(len(d_vars_np1)): # D-vars should not update self.assertAllClose(d_vars_np1[i], d_vars_np2[i]) self.assertGreater(loss, 0)
def test_pors_model_pretrain_methods(self, pretrain_as_autoencoder, lm_pretrain_dec, nsp_pretrain, cpp_pretrain_scheme, use_tpu, pretrain_order): FLAGS.out_domain_pretrain_steps = 1 FLAGS.in_domain_pretrain_steps = 1 FLAGS.pretrain_as_autoencoder = pretrain_as_autoencoder self.params.update({ 'add_critic': False, 'vocab_size': 11, 'pretrain_order': pretrain_order, 'first_pretrain_steps': 1, 'lm_pretrain_dec': lm_pretrain_dec, 'nsp_pretrain': nsp_pretrain, 'cpp_pretrain_scheme': cpp_pretrain_scheme, 'encoder_type': 'transformer' }) FLAGS.use_tpu = use_tpu tf.reset_default_graph() # Batch of 4 paragraphs each with 5 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]]], dtype=tf.int64) pids = tf.constant([ [3, 2, 4, 2, 4, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 4, 2, 5, 5, 1], [3, 2, 4, 2, 4, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model( features, self.params, True, spid_dict={pors.MASK: 10}) # Verify that ae_vars and d_vars is all vars. ae_vars = tf.trainable_variables('ae_') d_vars = tf.trainable_variables('disc_') self.assertEqual(set(tf.trainable_variables()), set(ae_vars + d_vars)) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) ss.run(tf.tables_initializer()) # pre-train the first component on out-domain data for 1 step # If simultaneous or autoencoder then encoder and decoder are jointly # pre-trained. loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0) # pre-train the second component on in-domain data for 1 step loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0) # 1 regular training step loss, _, _, _ = ss.run(ret_tensors[:-1]) self.assertGreater(loss, 0)
def test_pors_model_pretrain_noD(self): # Tests that disciminator is not updated during pre-training. FLAGS.in_domain_pretrain_steps = 10 tf.reset_default_graph() # Batch of 2 paragraphs each with 3 sentences. sids = tf.constant([ [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], ], dtype=tf.int64) pids = tf.constant([[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True) # Verify that ae_vars and d_vars is all vars. d_vars = tf.trainable_variables('disc_') with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) # 2 steps where we only train generator d_vars_np1 = ss.run(d_vars) _, _, _, _ = ss.run(ret_tensors[:-1]) d_vars_np2 = ss.run(d_vars) for i in range(len(d_vars_np1)): self.assertAllClose(d_vars_np1[i], d_vars_np2[i])
def test_pors_cpu_tpu_diff(self, lm_pretrain): self.params.update({ 'add_critic': False, 'lm_pretrain_dec': lm_pretrain, 'lambda_lm_pretrain_p': 1.0, 'lambda_lm_pretrain_s': 1.0, 'vocab_size': 13, 'encoder_type': 'transformer', }) if lm_pretrain: FLAGS.in_domain_pretrain_steps = 10 FLAGS.pretrain_as_autoencoder = False else: FLAGS.in_domain_pretrain_steps = 0 # This tests that the loss computed by cpu and tpu is the same. # Batch of 4 paragraphs each with 3 sentences. losses = {} for tpu in [True, False]: tf.reset_default_graph() FLAGS.use_tpu = tpu tf.random.set_random_seed(1234) sids = tf.constant( [[[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]], [[3, 2, 1, 0, 0], [4, 2, 4, 1, 0], [2, 1, 0, 0, 0]], [[5, 2, 3, 1, 0], [4, 2, 5, 1, 0], [5, 1, 0, 0, 0]]], dtype=tf.int64) pids = tf.constant( [[3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1], [3, 2, 4, 2, 4, 2, 1, 0], [5, 2, 3, 4, 2, 5, 5, 1]], dtype=tf.int64) features = {'sentences': sids, 'paragraphs': pids} ret_tensors = pors.pors_model(features, self.params, True, spid_dict={ pors.MASK: 10, pors.BOS: 11, pors.BOP: 12 }) with self.session() as ss: ss.run(tf.initializers.global_variables()) ss.run(tf.initializers.local_variables()) # 2 steps to train both discriminator/generator loss, _, _, _ = ss.run(ret_tensors[:-1]) losses[tpu] = loss self.assertAllClose(losses[True], losses[False])