Example #1
0
class TestBatchTraining(unittest.TestCase):

  def setUp(self):
    xnmt.events.clear()
    ParamManager.init_param_col()

    self.src_reader = PlainTextReader()
    self.trg_reader = PlainTextReader()
    self.src_data = list(self.src_reader.read_sents("examples/data/head.ja"))
    self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en"))

  def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1):
    """
    Tests whether single loss equals batch loss.
    Here we don't truncate the target side and use masking.
    """
    batch_size = 5
    src_sents = self.src_data[:batch_size]
    src_min = min([x.sent_len() for x in src_sents])
    src_sents_trunc = [s.words[:src_min] for s in src_sents]
    for single_sent in src_sents_trunc:
      single_sent[src_min-1] = Vocab.ES
      while len(single_sent)%pad_src_to_multiple != 0:
        single_sent.append(Vocab.ES)
    trg_sents = sorted(self.trg_data[:batch_size], key=lambda x: x.sent_len(), reverse=True)
    trg_max = max([x.sent_len() for x in trg_sents])
    np_arr = np.zeros([batch_size, trg_max])
    for i in range(batch_size):
      for j in range(trg_sents[i].sent_len(), trg_max):
        np_arr[i,j] = 1.0
    trg_masks = Mask(np_arr)
    trg_sents_padded = [[w for w in s] + [Vocab.ES]*(trg_max-s.sent_len()) for s in trg_sents]

    src_sents_trunc = [SimpleSentenceInput(s) for s in src_sents_trunc]
    trg_sents_padded = [SimpleSentenceInput(s) for s in trg_sents_padded]

    single_loss = 0.0
    for sent_id in range(batch_size):
      dy.renew_cg()
      train_loss = model.calc_loss(src=src_sents_trunc[sent_id],
                                   trg=trg_sents[sent_id],
                                   loss_calculator=AutoRegressiveMLELoss()).value()
      single_loss += train_loss

    dy.renew_cg()

    batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc),
                                   trg=mark_as_batch(trg_sents_padded, trg_masks),
                                   loss_calculator=AutoRegressiveMLELoss()).value()
    self.assertAlmostEqual(single_loss, np.sum(batched_loss), places=4)

  def test_loss_model1(self):
    layer_dim = 512
    model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                trg_embed_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                               hidden_dim=layer_dim,
                                                               decoder_input_dim=layer_dim,
                                                               yaml_path="model.decoder.rnn"),
                                transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    model.set_train(False)
    self.assert_single_loss_equals_batch_loss(model)

  def test_loss_model2(self):
    layer_dim = 512
    model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=PyramidalLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                trg_embed_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                               hidden_dim=layer_dim,
                                                               decoder_input_dim=layer_dim,
                                                               yaml_path="model.decoder.rnn"),
                                transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    model.set_train(False)
    self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)

  def test_loss_model3(self):
    layer_dim = 512
    model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=BiLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                trg_embed_dim=layer_dim,
                                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                               hidden_dim=layer_dim,
                                                               decoder_input_dim=layer_dim,
                                                               yaml_path="model.decoder.rnn"),
                                transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    model.set_train(False)
    self.assert_single_loss_equals_batch_loss(model)
Example #2
0
class TestTruncatedBatchTraining(unittest.TestCase):
    def setUp(self):
        xnmt.events.clear()
        ParamManager.init_param_col()

        self.src_reader = PlainTextReader()
        self.trg_reader = PlainTextReader()
        self.src_data = list(
            self.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.trg_reader.read_sents("examples/data/head.en"))

    def assert_single_loss_equals_batch_loss(self,
                                             model,
                                             pad_src_to_multiple=1):
        """
    Tests whether single loss equals batch loss.
    Truncating src / trg sents to same length so no masking is necessary
    """
        batch_size = 5
        src_sents = self.src_data[:batch_size]
        src_min = min([len(x) for x in src_sents])
        src_sents_trunc = [s[:src_min] for s in src_sents]
        for single_sent in src_sents_trunc:
            single_sent[src_min - 1] = Vocab.ES
            while len(single_sent) % pad_src_to_multiple != 0:
                single_sent.append(Vocab.ES)
        trg_sents = self.trg_data[:batch_size]
        trg_min = min([len(x) for x in trg_sents])
        trg_sents_trunc = [s[:trg_min] for s in trg_sents]
        for single_sent in trg_sents_trunc:
            single_sent[trg_min - 1] = Vocab.ES

        single_loss = 0.0
        for sent_id in range(batch_size):
            dy.renew_cg()
            train_loss = model.calc_loss(
                src=src_sents_trunc[sent_id],
                trg=trg_sents_trunc[sent_id],
                loss_calculator=LossCalculator()).value()
            single_loss += train_loss

        dy.renew_cg()

        batched_loss = model.calc_loss(
            src=mark_as_batch(src_sents_trunc),
            trg=mark_as_batch(trg_sents_trunc),
            loss_calculator=LossCalculator()).value()
        self.assertAlmostEqual(single_loss, sum(batched_loss), places=4)

    def test_loss_model1(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        model.set_train(False)
        self.assert_single_loss_equals_batch_loss(model)

    def test_loss_model2(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                               hidden_dim=layer_dim,
                                               layers=3),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        model.set_train(False)
        self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4)

    def test_loss_model3(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim,
                                        layers=3),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        model.set_train(False)
        self.assert_single_loss_equals_batch_loss(model)

    def test_loss_model4(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=DotAttender(),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        model.set_train(False)
        self.assert_single_loss_equals_batch_loss(model)
Example #3
0
xnmt.tee.set_out_file(log_file)

ParamManager.init_param_col()
ParamManager.param_col.model_file = model_file

src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")

batcher = SrcBatcher(batch_size=64)

inference = SimpleInference(batcher=batcher)

layer_dim = 512

model = DefaultTranslator(
    src_reader=PlainTextReader(vocab=src_vocab),
    trg_reader=PlainTextReader(vocab=trg_vocab),
    src_embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                    vocab_size=len(src_vocab)),
    encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                hidden_dim=layer_dim,
                                layers=1),
    attender=MlpAttender(hidden_dim=layer_dim,
                         state_dim=layer_dim,
                         input_dim=layer_dim),
    trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                    vocab_size=len(trg_vocab)),
    decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                              rnn_layer=UniLSTMSeqTransducer(
                                  input_dim=layer_dim,
                                  hidden_dim=layer_dim,
Example #4
0
class TestEncoder(unittest.TestCase):
    def setUp(self):
        xnmt.events.clear()
        ParamManager.init_param_col()

        self.src_reader = PlainTextReader()
        self.trg_reader = PlainTextReader()
        self.src_data = list(
            self.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.trg_reader.read_sents("examples/data/head.en"))

    @xnmt.events.register_xnmt_event
    def set_train(self, val):
        pass

    @xnmt.events.register_xnmt_event
    def start_sent(self, src):
        pass

    def assert_in_out_len_equal(self, model):
        dy.renew_cg()
        self.set_train(True)
        src = self.src_data[0]
        self.start_sent(src)
        embeddings = model.src_embedder.embed_sent(src)
        encodings = model.encoder(embeddings)
        self.assertEqual(len(embeddings), len(encodings))

    def test_bi_lstm_encoder_len(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim,
                                        layers=3),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      lstm_dim=layer_dim,
                                      mlp_hidden_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      vocab_size=100),
        )
        self.assert_in_out_len_equal(model)

    def test_uni_lstm_encoder_len(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      lstm_dim=layer_dim,
                                      mlp_hidden_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      vocab_size=100),
        )
        self.assert_in_out_len_equal(model)

    def test_res_lstm_encoder_len(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=ResidualLSTMSeqTransducer(input_dim=layer_dim,
                                              hidden_dim=layer_dim,
                                              layers=3),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      lstm_dim=layer_dim,
                                      mlp_hidden_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      vocab_size=100),
        )
        self.assert_in_out_len_equal(model)

    def test_py_lstm_encoder_len(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                               hidden_dim=layer_dim,
                                               layers=3),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      lstm_dim=layer_dim,
                                      mlp_hidden_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      vocab_size=100),
        )
        self.set_train(True)
        for sent_i in range(10):
            dy.renew_cg()
            src = self.src_data[sent_i].get_padded_sent(
                Vocab.ES, 4 - (len(self.src_data[sent_i]) % 4))
            self.start_sent(src)
            embeddings = model.src_embedder.embed_sent(src)
            encodings = model.encoder(embeddings)
            self.assertEqual(int(math.ceil(len(embeddings) / float(4))),
                             len(encodings))

    def test_py_lstm_mask(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                               hidden_dim=layer_dim,
                                               layers=1),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      lstm_dim=layer_dim,
                                      mlp_hidden_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      vocab_size=100),
        )

        batcher = xnmt.batcher.TrgBatcher(batch_size=3)
        train_src, _ = \
          batcher.pack(self.src_data, self.trg_data)

        self.set_train(True)
        for sent_i in range(3):
            dy.renew_cg()
            src = train_src[sent_i]
            self.start_sent(src)
            embeddings = model.src_embedder.embed_sent(src)
            encodings = model.encoder(embeddings)
            if train_src[sent_i].mask is None:
                assert encodings.mask is None
            else:
                np.testing.assert_array_almost_equal(
                    train_src[sent_i].mask.np_arr, encodings.mask.np_arr)