Beispiel #1
0
    def setUp(self):
        layer_dim = 512
        events.clear()
        ParamManager.init_param_col()
        src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
        trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(vocab=src_vocab),
            trg_reader=PlainTextReader(vocab=trg_vocab),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
Beispiel #2
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 32
        xnmt.events.clear()
        ParamManager.init_param_col()

        edge_vocab = Vocab(vocab_file="examples/data/parse/head.en.edge_vocab")
        node_vocab = Vocab(vocab_file="examples/data/parse/head.en.node_vocab")
        value_vocab = Vocab(vocab_file="examples/data/head.en.vocab")

        self.src_reader = input_readers.PlainTextReader(vocab=value_vocab)
        self.trg_reader = input_readers.CoNLLToRNNGActionsReader(
            surface_vocab=value_vocab,
            nt_vocab=node_vocab,
            edg_vocab=edge_vocab)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.src_reader.read_sents("examples/data/head.en"))
        self.trg_data = list(
            self.trg_reader.read_sents("examples/data/parse/head.en.conll"))
        self.loss_calculator = MLELoss()
        self.head_composer = composer.DyerHeadComposer(
            fwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                hidden_dim=layer_dim),
            bwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                hidden_dim=layer_dim),
            transform=AuxNonLinear(input_dim=layer_dim,
                                   aux_input_dim=layer_dim,
                                   output_dim=layer_dim))

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=LookupEmbedder(emb_dim=layer_dim,
                                        vocab_size=len(value_vocab)),
            encoder=IdentitySeqTransducer(),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=RNNGDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                bridge=NoBridge(dec_dim=layer_dim, dec_layers=1),
                graph_reader=self.trg_reader,
                head_composer=self.head_composer))
        event_trigger.set_train(True)

        my_batcher = batchers.TrgBatcher(batch_size=1)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Beispiel #3
0
class TestGreedyVsBeam(unittest.TestCase):
    """
  Test if greedy search produces same output as beam search with beam 1.
  """
    def setUp(self):
        layer_dim = 512
        events.clear()
        ParamManager.init_param_col()
        src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
        trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(vocab=src_vocab),
            trg_reader=PlainTextReader(vocab=trg_vocab),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))

    def test_greedy_vs_beam(self):
        dy.renew_cg()
        outputs = self.model.generate(
            batchers.mark_as_batch([self.src_data[0]]),
            BeamSearch(beam_size=1))
        output_score1 = outputs[0].score

        dy.renew_cg()
        outputs = self.model.generate(
            batchers.mark_as_batch([self.src_data[0]]), GreedySearch())
        output_score2 = outputs[0].score

        self.assertAlmostEqual(output_score1, output_score2)
Beispiel #4
0
 def test_loss_model1(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         decoder=AutoRegressiveDecoder(
             input_dim=layer_dim,
             embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100),
             rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                      hidden_dim=layer_dim,
                                      decoder_input_dim=layer_dim,
                                      yaml_path="model.decoder.rnn"),
             transform=NonLinear(input_dim=layer_dim * 2,
                                 output_dim=layer_dim),
             scorer=Softmax(input_dim=layer_dim, vocab_size=100),
             bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
     )
     event_trigger.set_train(False)
     self.assert_single_loss_equals_batch_loss(model)
Beispiel #5
0
 def test_bi_lstm_encoder_len(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim,
                                     layers=3),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         decoder=AutoRegressiveDecoder(
             input_dim=layer_dim,
             embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
             rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                      hidden_dim=layer_dim,
                                      decoder_input_dim=layer_dim,
                                      yaml_path="model.decoder.rnn"),
             transform=NonLinear(input_dim=layer_dim * 2,
                                 output_dim=layer_dim),
             scorer=Softmax(input_dim=layer_dim, vocab_size=100),
             bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
     )
     self.assert_in_out_len_equal(model)
Beispiel #6
0
 def test_py_lstm_encoder_len(self):
     layer_dim = 512
     model = DefaultTranslator(
         src_reader=self.src_reader,
         trg_reader=self.trg_reader,
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                            hidden_dim=layer_dim,
                                            layers=3),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         decoder=AutoRegressiveDecoder(
             input_dim=layer_dim,
             embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
             rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                      hidden_dim=layer_dim,
                                      decoder_input_dim=layer_dim,
                                      yaml_path="model.decoder.rnn"),
             transform=NonLinear(input_dim=layer_dim * 2,
                                 output_dim=layer_dim),
             scorer=Softmax(input_dim=layer_dim, vocab_size=100),
             bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
     )
     event_trigger.set_train(True)
     for sent_i in range(10):
         dy.renew_cg()
         src = self.src_data[sent_i].create_padded_sent(
             4 - (self.src_data[sent_i].sent_len() % 4))
         event_trigger.start_sent(src)
         embeddings = model.src_embedder.embed_sent(src)
         encodings = model.encoder.transduce(embeddings)
         self.assertEqual(int(math.ceil(len(embeddings) / float(4))),
                          len(encodings))
Beispiel #7
0
 def test_train_dev_loss_equal(self):
   layer_dim = 512
   batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
   train_args = {}
   train_args['src_file'] = "examples/data/head.ja"
   train_args['trg_file'] = "examples/data/head.en"
   train_args['loss_calculator'] = MLELoss()
   train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")),
                                           trg_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")),
                                           src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
                                           attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim,
                                                                hidden_dim=layer_dim),
                                           decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                                                     embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                                                     rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                                                                    hidden_dim=layer_dim,
                                                                                                    decoder_input_dim=layer_dim,
                                                                                                    yaml_path="model.decoder.rnn"),
                                                                     transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                                                     scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                                                     bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
                                           )
   train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'],
                                           src_file="examples/data/head.ja",
                                           ref_file="examples/data/head.en",
                                           batcher=batcher)]
   train_args['trainer'] = DummyTrainer()
   train_args['batcher'] = batcher
   train_args['run_for_epochs'] = 1
   training_regimen = regimens.SimpleTrainingRegimen(**train_args)
   training_regimen.run_training(save_fct = lambda: None)
   self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words,
                          training_regimen.dev_loss_tracker.dev_score.loss, places=5)
Beispiel #8
0
class TestFreeDecodingLoss(unittest.TestCase):
    def setUp(self):
        layer_dim = 512
        events.clear()
        ParamManager.init_param_col()
        src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
        trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(vocab=src_vocab),
            trg_reader=PlainTextReader(vocab=trg_vocab),
            src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100),
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))

    def test_single(self):
        dy.renew_cg()
        outputs = self.model.generate(
            batchers.mark_as_batch([self.src_data[0]]), GreedySearch())
        output_score = outputs[0].score

        dy.renew_cg()
        train_loss = self.model.calc_nll(src=self.src_data[0],
                                         trg=outputs[0]).value()

        self.assertAlmostEqual(-output_score, train_loss, places=3)
Beispiel #9
0
  def setUp(self):
    # Seeding
    numpy.random.seed(2)
    random.seed(2)
    layer_dim = 4
    xnmt.events.clear()
    ParamManager.init_param_col()
    self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim)
    self.segment_composer = SumComposer()

    self.src_reader = CharFromWordTextReader(vocab=Vocab(vocab_file="test/data/head.ja.charvocab"))
    self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="test/data/head.en.vocab"))
    self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5)

    baseline = Linear(input_dim=layer_dim, output_dim=1)
    policy_network = Linear(input_dim=layer_dim, output_dim=2)
    self.poisson_prior = PoissonPrior(mu=3.3)
    self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior)
    self.conf_penalty = ConfidencePenalty()
    self.policy_gradient = PolicyGradient(input_dim=layer_dim,
                                          output_dim=2,
                                          baseline=baseline,
                                          policy_network=policy_network,
                                          z_normalization=True,
                                          conf_penalty=self.conf_penalty)
    self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1)
    self.segmenting_encoder = SegmentingSeqTransducer(
      embed_encoder = self.segment_encoder_bilstm,
      segment_composer =  self.segment_composer,
      final_transducer = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
      policy_learning = self.policy_gradient,
      eps_greedy = self.eps_greedy,
      length_prior = self.length_prior,
    )

    self.model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=self.segmenting_encoder,
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                    rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim,
                                                             decoder_input_dim=layer_dim, yaml_path="decoder"),
                                    transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim,
                                                           aux_input_dim=layer_dim),
                                    scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                                    embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                    bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    event_trigger.set_train(True)

    self.layer_dim = layer_dim
    self.src_data = list(self.model.src_reader.read_sents("test/data/head.ja"))
    self.trg_data = list(self.model.trg_reader.read_sents("test/data/head.en"))
    my_batcher = batchers.TrgBatcher(batch_size=3)
    self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
    dy.renew_cg(immediate_compute=True, check_validity=True)
Beispiel #10
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 4
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.segment_composer = SumComposer()
        self.src_reader = CharFromWordTextReader(vocab=Vocab(
            vocab_file="examples/data/head.ja.charvocab"))
        self.trg_reader = PlainTextReader(vocab=Vocab(
            vocab_file="examples/data/head.en.vocab"))
        self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5)
        self.segmenting_encoder = SegmentingSeqTransducer(
            segment_composer=self.segment_composer,
            final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim,
                                                 hidden_dim=layer_dim),
        )

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=self.segmenting_encoder,
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(True)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
        my_batcher = batchers.TrgBatcher(batch_size=3)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Beispiel #11
0
    def test_py_lstm_mask(self):
        layer_dim = 512
        model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim,
                                               hidden_dim=layer_dim,
                                               layers=1),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )

        batcher = batchers.TrgBatcher(batch_size=3)
        train_src, _ = \
          batcher.pack(self.src_data, self.trg_data)

        event_trigger.set_train(True)
        for sent_i in range(3):
            dy.renew_cg()
            src = train_src[sent_i]
            event_trigger.start_sent(src)
            embeddings = model.src_embedder.embed_sent(src)
            encodings = model.encoder.transduce(embeddings)
            if train_src[sent_i].mask is None:
                assert encodings.mask is None
            else:
                np.testing.assert_array_almost_equal(
                    train_src[sent_i].mask.np_arr, encodings.mask.np_arr)
Beispiel #12
0
    def add_input(self, prev_word, state) -> DefaultTranslator.Output:
        if batchers.is_batched(self.src_sents):
            src = self.src_sents[0]
        else:
            src = self.src_sents

        force_actions = None
        look_oracle = self.policy_train_oracle if self.train else self.policy_test_oracle
        if type(src) == sent.CompoundSentence:
            src, force_actions = src.sents[0], src.sents[1].words
        force_actions = force_actions if look_oracle else None
        if type(prev_word) == list:
            prev_word = prev_word[0]

        while True:
            force_action = None

            # If we look at the oracle, fill the value of force action accordingly
            if look_oracle:
                now_position = state.has_been_read + state.has_been_written
                # Case when in the inference the produced sentence is longer than the reference.
                # The number of enough reads have been taken, just need to write the output until the end.
                if now_position < len(force_actions):
                    force_action = force_actions[now_position]
                else:
                    force_action = self.Action.WRITE.value

            # Taking the next action
            next_action = self._next_action(state, src.len_unpadded(),
                                            force_action)

            if next_action.content == self.Action.WRITE.value:
                state = state.write(self.src_encoding, prev_word, next_action)
                break
            elif next_action.content == self.Action.READ.value:
                state = state.read(self.src_encoding[state.has_been_read],
                                   next_action)
            else:
                raise ValueError(next_action.content)

        return DefaultTranslator.Output(state, state.decoder_state.attention)
Beispiel #13
0
 def test_overfitting(self):
     layer_dim = 16
     batcher = SrcBatcher(batch_size=10, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = MLELoss()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(vocab=Vocab(
             vocab_file="examples/data/head.ja.vocab")),
         trg_reader=PlainTextReader(vocab=Vocab(
             vocab_file="examples/data/head.en.vocab")),
         src_embedder=LookupEmbedder(vocab_size=100, emb_dim=layer_dim),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         decoder=AutoRegressiveDecoder(
             input_dim=layer_dim,
             embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100),
             rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                      hidden_dim=layer_dim,
                                      decoder_input_dim=layer_dim,
                                      yaml_path="model.decoder.rnn"),
             transform=NonLinear(input_dim=layer_dim * 2,
                                 output_dim=layer_dim),
             scorer=Softmax(input_dim=layer_dim, vocab_size=100),
             bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(alpha=0.1)
     train_args['batcher'] = batcher
     training_regimen = regimens.SimpleTrainingRegimen(**train_args)
Beispiel #14
0
layer_dim = 512

model = DefaultTranslator(
    src_reader=PlainTextReader(vocab=src_vocab),
    trg_reader=PlainTextReader(vocab=trg_vocab),
    src_embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                    vocab_size=len(src_vocab)),
    encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                hidden_dim=layer_dim,
                                layers=1),
    attender=MlpAttender(hidden_dim=layer_dim,
                         state_dim=layer_dim,
                         input_dim=layer_dim),
    decoder=AutoRegressiveDecoder(
        input_dim=layer_dim,
        embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                    vocab_size=len(trg_vocab)),
        rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                 hidden_dim=layer_dim,
                                 decoder_input_dim=layer_dim,
                                 yaml_path="decoder"),
        transform=AuxNonLinear(input_dim=layer_dim,
                               output_dim=layer_dim,
                               aux_input_dim=layer_dim),
        scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim),
        bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    inference=inference)

train = SimpleTrainingRegimen(
    name=f"{EXP}",