class TestBatchTraining(unittest.TestCase): def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader() self.trg_reader = PlainTextReader() self.src_data = list(self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en")) def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1): """ Tests whether single loss equals batch loss. Here we don't truncate the target side and use masking. """ batch_size = 5 src_sents = self.src_data[:batch_size] src_min = min([x.sent_len() for x in src_sents]) src_sents_trunc = [s.words[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min-1] = Vocab.ES while len(single_sent)%pad_src_to_multiple != 0: single_sent.append(Vocab.ES) trg_sents = sorted(self.trg_data[:batch_size], key=lambda x: x.sent_len(), reverse=True) trg_max = max([x.sent_len() for x in trg_sents]) np_arr = np.zeros([batch_size, trg_max]) for i in range(batch_size): for j in range(trg_sents[i].sent_len(), trg_max): np_arr[i,j] = 1.0 trg_masks = Mask(np_arr) trg_sents_padded = [[w for w in s] + [Vocab.ES]*(trg_max-s.sent_len()) for s in trg_sents] src_sents_trunc = [SimpleSentenceInput(s) for s in src_sents_trunc] trg_sents_padded = [SimpleSentenceInput(s) for s in trg_sents_padded] single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss = model.calc_loss(src=src_sents_trunc[sent_id], trg=trg_sents[sent_id], loss_calculator=AutoRegressiveMLELoss()).value() single_loss += train_loss dy.renew_cg() batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc), trg=mark_as_batch(trg_sents_padded, trg_masks), loss_calculator=AutoRegressiveMLELoss()).value() self.assertAlmostEqual(single_loss, np.sum(batched_loss), places=4) def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model2(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4) def test_loss_model3(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model)
class TestTruncatedBatchTraining(unittest.TestCase): def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader() self.trg_reader = PlainTextReader() self.src_data = list( self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.trg_reader.read_sents("examples/data/head.en")) def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1): """ Tests whether single loss equals batch loss. Truncating src / trg sents to same length so no masking is necessary """ batch_size = 5 src_sents = self.src_data[:batch_size] src_min = min([len(x) for x in src_sents]) src_sents_trunc = [s[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min - 1] = Vocab.ES while len(single_sent) % pad_src_to_multiple != 0: single_sent.append(Vocab.ES) trg_sents = self.trg_data[:batch_size] trg_min = min([len(x) for x in trg_sents]) trg_sents_trunc = [s[:trg_min] for s in trg_sents] for single_sent in trg_sents_trunc: single_sent[trg_min - 1] = Vocab.ES single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss = model.calc_loss( src=src_sents_trunc[sent_id], trg=trg_sents_trunc[sent_id], loss_calculator=LossCalculator()).value() single_loss += train_loss dy.renew_cg() batched_loss = model.calc_loss( src=mark_as_batch(src_sents_trunc), trg=mark_as_batch(trg_sents_trunc), loss_calculator=LossCalculator()).value() self.assertAlmostEqual(single_loss, sum(batched_loss), places=4) def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model2(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4) def test_loss_model3(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model4(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=DotAttender(), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) model.set_train(False) self.assert_single_loss_equals_batch_loss(model)
xnmt.tee.set_out_file(log_file) ParamManager.init_param_col() ParamManager.param_col.model_file = model_file src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") batcher = SrcBatcher(batch_size=64) inference = SimpleInference(batcher=batcher) layer_dim = 512 model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim,
class TestEncoder(unittest.TestCase): def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader() self.trg_reader = PlainTextReader() self.src_data = list( self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.trg_reader.read_sents("examples/data/head.en")) @xnmt.events.register_xnmt_event def set_train(self, val): pass @xnmt.events.register_xnmt_event def start_sent(self, src): pass def assert_in_out_len_equal(self, model): dy.renew_cg() self.set_train(True) src = self.src_data[0] self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder(embeddings) self.assertEqual(len(embeddings), len(encodings)) def test_bi_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) self.assert_in_out_len_equal(model) def test_uni_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) self.assert_in_out_len_equal(model) def test_res_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=ResidualLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) self.assert_in_out_len_equal(model) def test_py_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) self.set_train(True) for sent_i in range(10): dy.renew_cg() src = self.src_data[sent_i].get_padded_sent( Vocab.ES, 4 - (len(self.src_data[sent_i]) % 4)) self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder(embeddings) self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings)) def test_py_lstm_mask(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100), ) batcher = xnmt.batcher.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) self.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] self.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal( train_src[sent_i].mask.np_arr, encodings.mask.np_arr)