def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en"))
def test_py_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) self.set_train(True) for sent_i in range(10): dy.renew_cg() src = self.src_data[sent_i].get_padded_sent( Vocab.ES, 4 - (len(self.src_data[sent_i]) % 4)) self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder(embeddings) self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings))
def setUp(self): xnmt.events.clear() self.model_context = ModelContext() self.model_context.dynet_param_collection = PersistentParamCollection( "some_file", 1) self.model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100, bridge=CopyBridge(self.model_context, dec_layers=1)), ) self.model.initialize_training_strategy(TrainingStrategy()) self.model.set_train(False) self.model.initialize_generator() self.training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") self.corpus_parser = BilingualCorpusParser( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), training_corpus=self.training_corpus)
def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model)
def test_py_lstm_mask(self): model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(self.exp_global, layers=1), attender=MlpAttender(self.exp_global), trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100), ) batcher = xnmt.batcher.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) self.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal( train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator() self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en"))
def setUp(self): xnmt.events.clear() self.exp_global = ExpGlobal( dynet_param_collection=PersistentParamCollection("some_file", 1)) self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(exp_global=self.exp_global), attender=MlpAttender(exp_global=self.exp_global), trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(exp_global=self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator(beam=1) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en"))
class TestForcedDecodingOutputs(unittest.TestCase): def assertItemsEqual(self, l1, l2): self.assertEqual(len(l1), len(l2)) for i in range(len(l1)): self.assertEqual(l1[i], l2[i]) def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator() self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) self.search = GreedySearch() def assert_forced_decoding(self, sent_id): dy.renew_cg() outputs = self.model.generate_output( self.src_data[sent_id], sent_id, self.search, forced_trg_ids=self.trg_data[sent_id]) self.assertItemsEqual(self.trg_data[sent_id], outputs[0].actions) def test_forced_decoding(self): for i in range(1): self.assert_forced_decoding(sent_id=i)
def test_loss_model1(self): model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model)
def test_loss_model2(self): model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(self.model_context, layers=3), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
class TestGreedyVsBeam(unittest.TestCase): """ Test if greedy search produces same output as beam search with beam 1. """ def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_greedy_vs_beam(self): dy.renew_cg() outputs = self.model.generate( xnmt.batcher.mark_as_batch([self.src_data[0]]), [0], BeamSearch(beam_size=1), forced_trg_ids=xnmt.batcher.mark_as_batch([self.trg_data[0]])) output_score1 = outputs[0].score dy.renew_cg() outputs = self.model.generate( xnmt.batcher.mark_as_batch([self.src_data[0]]), [0], GreedySearch(), forced_trg_ids=xnmt.batcher.mark_as_batch([self.trg_data[0]])) output_score2 = outputs[0].score self.assertAlmostEqual(output_score1, output_score2)
class TestGreedyVsBeam(unittest.TestCase): """ Test if greedy search produces same output as beam search with beam 1. """ def setUp(self): xnmt.events.clear() self.model_context = ModelContext() self.model_context.dynet_param_collection = PersistentParamCollection( "some_file", 1) self.model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100, bridge=CopyBridge(self.model_context, dec_layers=1)), ) self.model.initialize_training_strategy(TrainingStrategy()) self.model.set_train(False) self.training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") self.corpus_parser = BilingualCorpusParser( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), training_corpus=self.training_corpus) def test_greedy_vs_beam(self): dy.renew_cg() self.model.initialize_generator(beam=1) outputs = self.model.generate_output( self.training_corpus.train_src_data[0], 0, forced_trg_ids=self.training_corpus.train_trg_data[0]) output_score1 = outputs[0].score dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output( self.training_corpus.train_src_data[0], 0, forced_trg_ids=self.training_corpus.train_trg_data[0]) output_score2 = outputs[0].score self.assertAlmostEqual(output_score1, output_score2)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 64 xnmt.events.clear() ParamManager.init_param_col() self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader() self.trg_reader = PlainTextReader() self.loss_calculator = AutoRegressiveMLELoss() self.segmenting_encoder = SegmentingSeqTransducer( segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), trg_embed_dim=layer_dim, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = xnmt.batcher.TrgBatcher(batch_size=3, src_pad_token=1, trg_pad_token=2) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
class TestForcedDecodingOutputs(unittest.TestCase): def assertItemsEqual(self, l1, l2): self.assertEqual(len(l1), len(l2)) for i in range(len(l1)): self.assertEqual(l1[i], l2[i]) def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def assert_forced_decoding(self, sent_id): dy.renew_cg() outputs = self.model.generate( xnmt.batcher.mark_as_batch([self.src_data[sent_id]]), [sent_id], BeamSearch(), forced_trg_ids=xnmt.batcher.mark_as_batch([self.trg_data[sent_id] ])) self.assertItemsEqual(self.trg_data[sent_id].words, outputs[0].actions) def test_forced_decoding(self): for i in range(1): self.assert_forced_decoding(sent_id=i)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator(beam=1) self.src_data = list(self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() self.model.initialize_generator(beam=1) outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) dy.renew_cg() train_loss = self.model.calc_loss(src=self.src_data[0], trg=outputs[0].actions, loss_calculator=LossCalculator()).value() self.assertAlmostEqual(-outputs[0].score, train_loss, places=4)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): layer_dim = 512 xnmt.events.clear() ParamManager.init_param_col() self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() outputs = self.model.generate( xnmt.batcher.mark_as_batch([self.src_data[0]]), [0], GreedySearch(), forced_trg_ids=xnmt.batcher.mark_as_batch([self.trg_data[0]])) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.src_data[0], trg=outputs[0], loss_calculator=AutoRegressiveMLELoss()).value() self.assertAlmostEqual(-output_score, train_loss, places=5)
def test_py_lstm_mask(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) batcher = xnmt.batcher.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) self.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal(train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = AutoRegressiveMLELoss() train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher)] train_args['trainer'] = DummyTrainer() train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args) training_regimen.run_training(save_fct = lambda: None) self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words, training_regimen.dev_loss_tracker.dev_score.loss, places=5)
def test_loss_model2(self): model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(self.exp_global, layers=3), attender=MlpAttender(self.exp_global), trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): xnmt.events.clear() self.model_context = ModelContext() self.model_context.dynet_param_collection = PersistentParamCollection( "some_file", 1) self.model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100, bridge=CopyBridge(self.model_context, dec_layers=1)), ) self.model.initialize_training_strategy(TrainingStrategy()) self.model.set_train(False) self.model.initialize_generator() self.training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") self.corpus_parser = BilingualCorpusParser( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), training_corpus=self.training_corpus) def test_single(self): dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output( self.training_corpus.train_src_data[0], 0, forced_trg_ids=self.training_corpus.train_trg_data[0]) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.training_corpus.train_src_data[0], trg=outputs[0].actions).value() self.assertAlmostEqual(-output_score, train_loss, places=5)
class TestForcedDecodingOutputs(unittest.TestCase): def assertItemsEqual(self, l1, l2): self.assertEqual(len(l1), len(l2)) for i in range(len(l1)): self.assertEqual(l1[i], l2[i]) def setUp(self): xnmt.events.clear() self.model_context = ModelContext() self.model_context.dynet_param_collection = PersistentParamCollection( "some_file", 1) self.model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100), ) self.model.set_train(False) self.model.initialize_generator() self.training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") self.corpus_parser = BilingualCorpusParser( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), training_corpus=self.training_corpus) def assert_forced_decoding(self, sent_id): dy.renew_cg() outputs = self.model.generate_output( self.training_corpus.train_src_data[sent_id], sent_id, forced_trg_ids=self.training_corpus.train_trg_data[sent_id]) self.assertItemsEqual(self.training_corpus.train_trg_data[sent_id], outputs[0].actions) def test_forced_decoding(self): for i in range(1): self.assert_forced_decoding(sent_id=i)
class TestForcedDecodingOutputs(unittest.TestCase): def assertItemsEqual(self, l1, l2): self.assertEqual(len(l1), len(l2)) for i in range(len(l1)): self.assertEqual(l1[i], l2[i]) def setUp(self): xnmt.events.clear() self.exp_global = ExpGlobal( dynet_param_collection=PersistentParamCollection("some_file", 1)) self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(exp_global=self.exp_global), attender=MlpAttender(exp_global=self.exp_global), trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(exp_global=self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator(beam=1) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def assert_forced_decoding(self, sent_id): dy.renew_cg() outputs = self.model.generate_output( self.src_data[sent_id], sent_id, forced_trg_ids=self.trg_data[sent_id]) self.assertItemsEqual(self.trg_data[sent_id], outputs[0].actions) def test_forced_decoding(self): for i in range(1): self.assert_forced_decoding(sent_id=i)
def test_res_lstm_encoder_len(self): model = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=ResidualLSTMSeqTransducer(self.model_context, layers=3), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100), ) self.assert_in_out_len_equal(model)
def test_uni_lstm_encoder_len(self): model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), encoder=UniLSTMSeqTransducer(self.exp_global), attender=MlpAttender(self.exp_global), trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100), ) self.assert_in_out_len_equal(model)
def test_loss_model4(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=DotAttender(), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_layers=1, dec_dim=layer_dim)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model)
def test_loss_model2(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)
def test_overfitting(self): layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(alpha=0.1) train_args['batcher'] = batcher training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( **train_args) for _ in range(50): training_regimen.run_training(save_fct=lambda: None, update_weights=True) self.assertAlmostEqual( 0.0, training_regimen.train_loss_tracker.epoch_loss.sum() / training_regimen.train_loss_tracker.epoch_words, places=2)
class TestGreedyVsBeam(unittest.TestCase): """ Test if greedy search produces same output as beam search with beam 1. """ def setUp(self): xnmt.events.clear() self.exp_global = ExpGlobal( dynet_param_collection=PersistentParamCollection("some_file", 1)) self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(exp_global=self.exp_global), attender=MlpAttender(exp_global=self.exp_global), trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(exp_global=self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) self.model.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_greedy_vs_beam(self): dy.renew_cg() self.model.initialize_generator(beam=1) outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) output_score1 = outputs[0].score dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) output_score2 = outputs[0].score self.assertAlmostEqual(output_score1, output_score2)
def test_uni_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.assert_in_out_len_equal(model)
class TestFreeDecodingLoss(unittest.TestCase): def setUp(self): xnmt.events.clear() self.exp_global = ExpGlobal( dynet_param_collection=PersistentParamCollection("some_file", 1)) self.model = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(exp_global=self.exp_global), attender=MlpAttender(exp_global=self.exp_global), trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(exp_global=self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) self.model.set_train(False) self.model.initialize_generator() self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) def test_single(self): dy.renew_cg() self.model.initialize_generator() outputs = self.model.generate_output(self.src_data[0], 0, forced_trg_ids=self.trg_data[0]) output_score = outputs[0].score dy.renew_cg() train_loss = self.model.calc_loss( src=self.src_data[0], trg=outputs[0].actions, loss_calculator=LossCalculator()).value() self.assertAlmostEqual(-output_score, train_loss, places=5)