class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator(beam=1) self.src_data = list(self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() self.model.initialize_generator(beam=1) outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) dy.renew_cg() train_loss = self.model.calc_loss(src=self.src_data[0], trg=outputs[0].actions, loss_calculator=LossCalculator()).value() self.assertAlmostEqual(-outputs[0].score, train_loss, places=4)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() outputs = self.model.generate( xnmt.batcher.mark_as_batch([self.src_data[0]]), [0], GreedySearch(), forced_trg_ids=xnmt.batcher.mark_as_batch([self.trg_data[0]])) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.src_data[0], trg=outputs[0], loss_calculator=AutoRegressiveMLELoss()).value() self.assertAlmostEqual(-output_score, train_loss, places=5)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): xnmt.events.clear() self.model_context = ModelContext() self.model_context.dynet_param_collection = PersistentParamCollection( "some_file", 1) self.model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100, bridge=CopyBridge(self.model_context, dec_layers=1)), ) self.model.initialize_training_strategy(TrainingStrategy()) self.model.set_train(False) self.model.initialize_generator() self.training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") self.corpus_parser = BilingualCorpusParser( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), training_corpus=self.training_corpus) def test_single(self): dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output( self.training_corpus.train_src_data[0], 0, forced_trg_ids=self.training_corpus.train_trg_data[0]) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.training_corpus.train_src_data[0], trg=outputs[0].actions).value() self.assertAlmostEqual(-output_score, train_loss, places=5)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): xnmt.events.clear() self.exp_global = ExpGlobal( dynet_param_collection=PersistentParamCollection("some_file", 1)) self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(exp_global=self.exp_global), attender=MlpAttender(exp_global=self.exp_global), trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(exp_global=self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator() self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.src_data[0], trg=outputs[0].actions, loss_calculator=LossCalculator()).value() self.assertAlmostEqual(-output_score, train_loss, places=5)
class TestSegmentingEncoder(unittest.TestCase): def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 64 xnmt.events.clear() ParamManager.init_param_col() self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim) self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader() self.trg_reader = PlainTextReader() self.loss_calculator = AutoRegressiveMLELoss() baseline = Linear(input_dim=layer_dim, output_dim=1) policy_network = Linear(input_dim=layer_dim, output_dim=2) self.poisson_prior = PoissonPrior(mu=3.3) self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior) self.conf_penalty = ConfidencePenalty() self.policy_gradient = PolicyGradient(input_dim=layer_dim, output_dim=2, baseline=baseline, policy_network=policy_network, z_normalization=True, conf_penalty=self.conf_penalty, sample=5) self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1) self.segmenting_encoder = SegmentingSeqTransducer( embed_encoder=self.segment_encoder_bilstm, segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), policy_learning=self.policy_gradient, eps_greedy=self.eps_greedy, length_prior=self.length_prior, ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), trg_embed_dim=layer_dim, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = xnmt.batcher.TrgBatcher(batch_size=3, src_pad_token=1, trg_pad_token=2) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True) def test_reinforce_loss(self): self.model.global_fertility = 1.0 loss = self.model.calc_loss(self.src[0], self.trg[0], AutoRegressiveMLELoss()) reinforce_loss = self.model.calc_additional_loss( self.trg[0], self.model, loss) pl = self.model.encoder.policy_learning # Ensure correct length src = self.src[0] mask = src.mask.np_arr outputs = self.segmenting_encoder.compose_output actions = self.segmenting_encoder.segment_actions # Ensure sample == outputs self.assertEqual(len(outputs), pl.sample) self.assertEqual(len(actions), pl.sample) for sample_action in actions: for i, sample_item in enumerate(sample_action): # The last segmentation is 1 self.assertEqual(sample_item[-1], src[i].len_unpadded()) # Assert that all flagged actions are </s> list( self.assertEqual(pl.actions[j][0][i], 1) for j in range(len(mask[i])) if mask[i][j] == 1) self.assertTrue("mle" in loss.expr_factors) self.assertTrue("fertility" in loss.expr_factors) self.assertTrue("rl_reinf" in reinforce_loss.expr_factors) self.assertTrue("rl_baseline" in reinforce_loss.expr_factors) self.assertTrue("rl_confpen" in reinforce_loss.expr_factors) # Ensure we are sampling from the policy learning self.assertEqual(self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.POLICY) def calc_loss_single_batch(self): loss = self.model.calc_loss(self.src[0], self.trg[0], AutoRegressiveMLELoss()) reinforce_loss = self.model.calc_additional_loss( self.trg[0], self.model, loss) return loss, reinforce_loss def test_gold_input(self): self.model.encoder.policy_learning = None self.model.encoder.eps_greedy = None self.calc_loss_single_batch() self.assertEqual(self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.GOLD) @unittest.skipUnless(has_cython(), "requires cython to run") def test_sample_input(self): self.model.encoder.eps_greedy.eps_prob = 1.0 self.calc_loss_single_batch() self.assertEqual( self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.POLICY_SAMPLE) self.assertEqual(self.model.encoder.policy_learning.sampling_action, PolicyGradient.SamplingAction.PREDEFINED) def test_global_fertility(self): # Test Global fertility weight self.model.global_fertility = 1.0 self.segmenting_encoder.policy_learning = None loss1, _ = self.calc_loss_single_batch() self.assertTrue("fertility" in loss1.expr_factors) def test_policy_train_test(self): self.model.set_train(True) self.calc_loss_single_batch() self.assertEqual(self.model.encoder.policy_learning.sampling_action, PolicyGradient.SamplingAction.POLICY_CLP) self.model.set_train(False) self.calc_loss_single_batch() self.assertEqual(self.model.encoder.policy_learning.sampling_action, PolicyGradient.SamplingAction.POLICY_AMAX) def test_no_policy_train_test(self): self.model.encoder.policy_learning = None self.model.set_train(True) self.calc_loss_single_batch() self.assertEqual(self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.PURE_SAMPLE) self.model.set_train(False) self.calc_loss_single_batch() self.assertEqual(self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.PURE_SAMPLE) def test_sample_during_search(self): self.model.set_train(False) self.model.encoder.sample_during_search = True self.calc_loss_single_batch() self.assertEqual(self.model.encoder.segmenting_action, SegmentingSeqTransducer.SegmentingAction.POLICY) @unittest.skipUnless(has_cython(), "requires cython to run") def test_policy_gold(self): self.model.encoder.eps_greedy.prior = GoldInputPrior("segment") self.model.encoder.eps_greedy.eps_prob = 1.0 self.calc_loss_single_batch()