コード例 #1
0
class TestFreeDecodingLoss(unittest.TestCase):

  def setUp(self):
    layer_dim = 512
    xnmt.events.clear()
    ParamManager.init_param_col()
    self.model = DefaultTranslator(
      src_reader=PlainTextReader(),
      trg_reader=PlainTextReader(),
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim,
                                trg_embed_dim=layer_dim, vocab_size=100,
                                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    self.model.set_train(False)
    self.model.initialize_generator(beam=1)

    self.src_data = list(self.model.src_reader.read_sents("examples/data/head.ja"))
    self.trg_data = list(self.model.trg_reader.read_sents("examples/data/head.en"))

  def test_single(self):
    dy.renew_cg()
    self.model.initialize_generator(beam=1)
    outputs = self.model.generate_output(self.src_data[0], 0,
                                         forced_trg_ids=self.trg_data[0])
    dy.renew_cg()
    train_loss = self.model.calc_loss(src=self.src_data[0],
                                      trg=outputs[0].actions,
                                      loss_calculator=LossCalculator()).value()

    self.assertAlmostEqual(-outputs[0].score, train_loss, places=4)
コード例 #2
0
class TestGreedyVsBeam(unittest.TestCase):
    """
  Test if greedy search produces same output as beam search with beam 1.
  """
    def setUp(self):
        layer_dim = 512
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        self.model.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

    def test_greedy_vs_beam(self):
        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(self.src_data[0],
                                             0,
                                             BeamSearch(beam_size=1),
                                             forced_trg_ids=self.trg_data[0])
        output_score1 = outputs[0].score

        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(self.src_data[0],
                                             0,
                                             GreedySearch(),
                                             forced_trg_ids=self.trg_data[0])
        output_score2 = outputs[0].score

        self.assertAlmostEqual(output_score1, output_score2)
コード例 #3
0
ファイル: test_decoding.py プロジェクト: pmichel31415/xnmt
class TestForcedDecodingOutputs(unittest.TestCase):
    def assertItemsEqual(self, l1, l2):
        self.assertEqual(len(l1), len(l2))
        for i in range(len(l1)):
            self.assertEqual(l1[i], l2[i])

    def setUp(self):
        layer_dim = 512
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                      trg_embed_dim=layer_dim,
                                      rnn_layer=UniLSTMSeqTransducer(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_input_dim=layer_dim,
                                          yaml_path="model.decoder.rnn_layer"),
                                      mlp_layer=MLP(
                                          input_dim=layer_dim,
                                          hidden_dim=layer_dim,
                                          decoder_rnn_dim=layer_dim,
                                          vocab_size=100,
                                          yaml_path="model.decoder.rnn_layer"),
                                      bridge=CopyBridge(dec_dim=layer_dim,
                                                        dec_layers=1)),
        )
        self.model.set_train(False)
        self.model.initialize_generator()

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

        self.search = GreedySearch()

    def assert_forced_decoding(self, sent_id):
        dy.renew_cg()
        outputs = self.model.generate_output(
            self.src_data[sent_id],
            sent_id,
            self.search,
            forced_trg_ids=self.trg_data[sent_id])
        self.assertItemsEqual(self.trg_data[sent_id], outputs[0].actions)

    def test_forced_decoding(self):
        for i in range(1):
            self.assert_forced_decoding(sent_id=i)
コード例 #4
0
class TestGreedyVsBeam(unittest.TestCase):
    """
  Test if greedy search produces same output as beam search with beam 1.
  """
    def setUp(self):
        xnmt.events.clear()
        self.model_context = ModelContext()
        self.model_context.dynet_param_collection = PersistentParamCollection(
            "some_file", 1)
        self.model = DefaultTranslator(
            src_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(self.model_context),
            attender=MlpAttender(self.model_context),
            trg_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(self.model_context,
                                      vocab_size=100,
                                      bridge=CopyBridge(self.model_context,
                                                        dec_layers=1)),
        )
        self.model.initialize_training_strategy(TrainingStrategy())
        self.model.set_train(False)

        self.training_corpus = BilingualTrainingCorpus(
            train_src="examples/data/head.ja",
            train_trg="examples/data/head.en",
            dev_src="examples/data/head.ja",
            dev_trg="examples/data/head.en")
        self.corpus_parser = BilingualCorpusParser(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            training_corpus=self.training_corpus)

    def test_greedy_vs_beam(self):
        dy.renew_cg()
        self.model.initialize_generator(beam=1)
        outputs = self.model.generate_output(
            self.training_corpus.train_src_data[0],
            0,
            forced_trg_ids=self.training_corpus.train_trg_data[0])
        output_score1 = outputs[0].score

        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(
            self.training_corpus.train_src_data[0],
            0,
            forced_trg_ids=self.training_corpus.train_trg_data[0])
        output_score2 = outputs[0].score

        self.assertAlmostEqual(output_score1, output_score2)
コード例 #5
0
class TestFreeDecodingLoss(unittest.TestCase):
    def setUp(self):
        xnmt.events.clear()
        self.model_context = ModelContext()
        self.model_context.dynet_param_collection = PersistentParamCollection(
            "some_file", 1)
        self.model = DefaultTranslator(
            src_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(self.model_context),
            attender=MlpAttender(self.model_context),
            trg_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(self.model_context,
                                      vocab_size=100,
                                      bridge=CopyBridge(self.model_context,
                                                        dec_layers=1)),
        )
        self.model.initialize_training_strategy(TrainingStrategy())
        self.model.set_train(False)
        self.model.initialize_generator()

        self.training_corpus = BilingualTrainingCorpus(
            train_src="examples/data/head.ja",
            train_trg="examples/data/head.en",
            dev_src="examples/data/head.ja",
            dev_trg="examples/data/head.en")
        self.corpus_parser = BilingualCorpusParser(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            training_corpus=self.training_corpus)

    def test_single(self):
        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(
            self.training_corpus.train_src_data[0],
            0,
            forced_trg_ids=self.training_corpus.train_trg_data[0])
        output_score = outputs[0].score

        dy.renew_cg()
        train_loss = self.model.calc_loss(
            src=self.training_corpus.train_src_data[0],
            trg=outputs[0].actions).value()

        self.assertAlmostEqual(-output_score, train_loss, places=5)
コード例 #6
0
class TestForcedDecodingOutputs(unittest.TestCase):
    def assertItemsEqual(self, l1, l2):
        self.assertEqual(len(l1), len(l2))
        for i in range(len(l1)):
            self.assertEqual(l1[i], l2[i])

    def setUp(self):
        xnmt.events.clear()
        self.model_context = ModelContext()
        self.model_context.dynet_param_collection = PersistentParamCollection(
            "some_file", 1)
        self.model = DefaultTranslator(
            src_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(self.model_context),
            attender=MlpAttender(self.model_context),
            trg_embedder=SimpleWordEmbedder(self.model_context,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100),
        )
        self.model.initialize_training_strategy(TrainingStrategy())
        self.model.set_train(False)
        self.model.initialize_generator()

        self.training_corpus = BilingualTrainingCorpus(
            train_src="examples/data/head.ja",
            train_trg="examples/data/head.en",
            dev_src="examples/data/head.ja",
            dev_trg="examples/data/head.en")
        self.corpus_parser = BilingualCorpusParser(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            training_corpus=self.training_corpus)

    def assert_forced_decoding(self, sent_id):
        dy.renew_cg()
        outputs = self.model.generate_output(
            self.training_corpus.train_src_data[sent_id],
            sent_id,
            forced_trg_ids=self.training_corpus.train_trg_data[sent_id])
        self.assertItemsEqual(self.training_corpus.train_trg_data[sent_id],
                              outputs[0].actions)

    def test_forced_decoding(self):
        for i in range(1):
            self.assert_forced_decoding(sent_id=i)
コード例 #7
0
class TestGreedyVsBeam(unittest.TestCase):
    """
  Test if greedy search produces same output as beam search with beam 1.
  """
    def setUp(self):
        xnmt.events.clear()
        self.exp_global = ExpGlobal(
            dynet_param_collection=PersistentParamCollection("some_file", 1))
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(exp_global=self.exp_global),
            attender=MlpAttender(exp_global=self.exp_global),
            trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(exp_global=self.exp_global,
                                      vocab_size=100,
                                      bridge=CopyBridge(
                                          exp_global=self.exp_global,
                                          dec_layers=1)),
        )
        self.model.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

    def test_greedy_vs_beam(self):
        dy.renew_cg()
        self.model.initialize_generator(beam=1)
        outputs = self.model.generate_output(self.src_data[0],
                                             0,
                                             forced_trg_ids=self.trg_data[0])
        output_score1 = outputs[0].score

        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(self.src_data[0],
                                             0,
                                             forced_trg_ids=self.trg_data[0])
        output_score2 = outputs[0].score

        self.assertAlmostEqual(output_score1, output_score2)
コード例 #8
0
class TestForcedDecodingOutputs(unittest.TestCase):
    def assertItemsEqual(self, l1, l2):
        self.assertEqual(len(l1), len(l2))
        for i in range(len(l1)):
            self.assertEqual(l1[i], l2[i])

    def setUp(self):
        xnmt.events.clear()
        self.exp_global = ExpGlobal(
            dynet_param_collection=PersistentParamCollection("some_file", 1))
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(exp_global=self.exp_global),
            attender=MlpAttender(exp_global=self.exp_global),
            trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(exp_global=self.exp_global,
                                      vocab_size=100,
                                      bridge=CopyBridge(
                                          exp_global=self.exp_global,
                                          dec_layers=1)),
        )
        self.model.set_train(False)
        self.model.initialize_generator(beam=1)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

    def assert_forced_decoding(self, sent_id):
        dy.renew_cg()
        outputs = self.model.generate_output(
            self.src_data[sent_id],
            sent_id,
            forced_trg_ids=self.trg_data[sent_id])
        self.assertItemsEqual(self.trg_data[sent_id], outputs[0].actions)

    def test_forced_decoding(self):
        for i in range(1):
            self.assert_forced_decoding(sent_id=i)
コード例 #9
0
ファイル: test_decoding.py プロジェクト: xxcharles/xnmt
class TestFreeDecodingLoss(unittest.TestCase):
    def setUp(self):
        xnmt.events.clear()
        self.exp_global = ExpGlobal(
            dynet_param_collection=PersistentParamCollection("some_file", 1))
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(),
            trg_reader=PlainTextReader(),
            src_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            encoder=BiLSTMSeqTransducer(exp_global=self.exp_global),
            attender=MlpAttender(exp_global=self.exp_global),
            trg_embedder=SimpleWordEmbedder(exp_global=self.exp_global,
                                            vocab_size=100),
            decoder=MlpSoftmaxDecoder(exp_global=self.exp_global,
                                      vocab_size=100,
                                      bridge=CopyBridge(
                                          exp_global=self.exp_global,
                                          dec_layers=1)),
        )
        self.model.set_train(False)
        self.model.initialize_generator()

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

    def test_single(self):
        dy.renew_cg()
        self.model.initialize_generator()
        outputs = self.model.generate_output(self.src_data[0],
                                             0,
                                             forced_trg_ids=self.trg_data[0])
        output_score = outputs[0].score

        dy.renew_cg()
        train_loss = self.model.calc_loss(
            src=self.src_data[0],
            trg=outputs[0].actions,
            loss_calculator=LossCalculator()).value()

        self.assertAlmostEqual(-output_score, train_loss, places=5)