コード例 #1
0
def train_model(args):
    trace('making vocaburaries ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden)

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
        model.init_optimizer()

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch = model.train(src_batch, trg_batch)

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
                trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
                trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
                trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))

    trace('finished.')
    def train_model(self):
        trace('making vocaburaries ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        model = self.new(src_vocab, trg_vocab, self.embed, self.hidden, self.parameter_dict)

        random_number = random.randint(0, self.minibatch)
        for i_epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
            model.init_optimizer()

            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch = model.train(src_batch, trg_batch)

                if trained == 0:
                    self.print_out(random_number, i_epoch, trained, src_batch, trg_batch, hyp_batch)

                trained += K

            trace('saving model ...')
            model.save("ChainerMachineTranslation" + '.%03d' % (self.epoch + 1))

        trace('finished.')
コード例 #3
0
def train_model(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden)

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
        model.init_optimizer()

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch = model.train(src_batch, trg_batch)

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
                trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
                trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
                trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))

    trace('finished.')
コード例 #4
0
def train(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    attmt = AttentionMT(args.vocab, args.embed, args.hidden)
    if args.use_gpu:
        attmt.to_gpu()

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(
            gens.sorted_parallel(gen1, gen2, 100 * args.minibatch),
            args.minibatch)
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(attmt)
        opt.add_hook(optimizer.GradientClipping(5))

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch, loss = forward(src_batch, trg_batch, src_vocab,
                                      trg_vocab, attmt, True, 0)
            loss.backward()
            opt.update()

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' %
                      (epoch + 1, args.epoch, trained + k + 1))
                trace(
                    '  src = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in src_batch[k]]))
                trace(
                    '  trg = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in trg_batch[k]]))
                trace(
                    '  hyp = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        prefix = args.model + '.%03.d' % (epoch + 1)
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        attmt.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', attmt)

    trace('finished.')
    def train(self):
        """
        Train method
        If you use the word2vec model, you possible to use the copy weight
        Optimizer method use the Adagrad
        """
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        self.attention_dialogue = AttentionDialogue(self.vocab, self.embed,
                                                    self.hidden, self.XP)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, self.attention_dialogue.emb)
            self.copy_model(self.word2vec,
                            self.attention_dialogue.dec,
                            dec_flag=True)

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.attention_dialogue)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab,
                    self.attention_dialogue, True, 0)
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch,
                               trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab.save(model_path + '.srcvocab')
        trg_vocab.save(model_path + '.trgvocab')
        self.attention_dialogue.save_spec(model_path + '.spec')
        serializers.save_hdf5(model_path + '.weights', self.attention_dialogue)

        trace('finished.')
    def train(self):
        """
        Train method
        If you use the word2vec model, you possible to use the copy weight
        Optimizer method use the Adagrad
        """
        trace("making vocabularies ...")
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace("making model ...")
        self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, self.attention_dialogue.emb)
            self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True)

        for epoch in range(self.epoch):
            trace("epoch %d/%d: " % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.attention_dialogue)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0
                )
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)

                trained += K

        trace("saving model ...")
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab.save(model_path + ".srcvocab")
        trg_vocab.save(model_path + ".trgvocab")
        self.attention_dialogue.save_spec(model_path + ".spec")
        serializers.save_hdf5(model_path + ".weights", self.attention_dialogue)

        trace("finished.")
コード例 #7
0
    def train(self):
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, encdec.enc)
            self.copy_model(self.word2vec, encdec.dec, dec_flag=True)

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(encdec)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                # If you use the ipython note book you hace to use the forward function
                # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab, encdec, True,
                    0)
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch,
                               trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', encdec)

        trace('finished.')
    def test(self):
        trace('loading model ...')
        src_vocab = Vocabulary.load(self.model + '.srcvocab')
        trg_vocab = Vocabulary.load(self.model + '.trgvocab')
        encdec = EncoderDecoder.load_spec(self.model + '.spec')
        serializers.load_hdf5(self.model + '.weights', encdec)

        trace('generating translation ...')
        generated = 0

        with open(self.target, 'w') as fp:
            for src_batch in gens.batch(gens.word_list(self.source),
                                        self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace('sample %8d - %8d ...' % (generated + 1, generated + K))
                hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab,
                                         encdec, False, self.generation_limit)

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append('</s>')
                    hyp = hyp[:hyp.index('</s>')]
                    print("src : " +
                          "".join(src_batch[source_cuont]).replace("</s>", ""))
                    print('hyp : ' + ''.join(hyp))
                    print(' '.join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace('finished.')
    def test_model(self, model_name):
        trace('loading model ...')
        model = self.load(model_name)

        trace('generating translation ...')
        generated = 0

        with open(self.test_target, 'w') as fp:
            for src_batch in gens.batch(gens.word_list(self.test_source),
                                        self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace('sample %8d - %8d ...' % (generated + 1, generated + K))
                hyp_batch = model.predict(src_batch, self.generation_limit)

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append('</s>')
                    hyp = hyp[:hyp.index('</s>')]
                    # BLEUの結果を計算するため.
                    print("".join(src_batch[source_cuont]).replace("</s>", ""))
                    print(' '.join(hyp))
                    print(' '.join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace('finished.')
    def test_model(self, model_name):
        trace('loading model ...')
        model = self.load(model_name)
    
        trace('generating translation ...')
        generated = 0

        with open(self.test_target, 'w') as fp:
            for src_batch in gens.batch(gens.word_list(self.test_source), self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace('sample %8d - %8d ...' % (generated + 1, generated + K))
                hyp_batch = model.predict(src_batch, self.generation_limit)

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append('</s>')
                    hyp = hyp[:hyp.index('</s>')]
                    # BLEUの結果を計算するため.
                    print("".join(src_batch[source_cuont]).replace("</s>", ""))
                    print(' '.join(hyp))
                    print(' '.join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace('finished.')
コード例 #11
0
def test(args):
  trace('loading model ...')
  src_vocab = Vocabulary.load(args.model + '.srcvocab')
  trg_vocab = Vocabulary.load(args.model + '.trgvocab')
  attmt = AttentionMT.load_spec(args.model + '.spec')
  if args.use_gpu:
    attmt.to_gpu()
  serializers.load_hdf5(args.model + '.weights', attmt)
  
  trace('generating translation ...')
  generated = 0

  with open(args.target, 'w') as fp:
    for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
      src_batch = fill_batch(src_batch)
      K = len(src_batch)

      trace('sample %8d - %8d ...' % (generated + 1, generated + K))
      hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, attmt, False, args.generation_limit)

      for hyp in hyp_batch:
        hyp.append('</s>')
        hyp = hyp[:hyp.index('</s>')]
        print(' '.join(hyp), file=fp)

      generated += K

  trace('finished.')
コード例 #12
0
    def test(self):
        trace('loading model ...')
        src_vocab = Vocabulary.load(self.model + '.srcvocab')
        trg_vocab = Vocabulary.load(self.model + '.trgvocab')
        encdec = EncoderDecoder.load_spec(self.model + '.spec')
        serializers.load_hdf5(self.model + '.weights', encdec)

        trace('generating translation ...')
        generated = 0

        with open(self.target, 'w') as fp:
            for src_batch in gens.batch(gens.word_list(self.source), self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace('sample %8d - %8d ...' % (generated + 1, generated + K))
                hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit)

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append('</s>')
                    hyp = hyp[:hyp.index('</s>')]
                    print("src : " + "".join(src_batch[source_cuont]).replace("</s>", ""))
                    print('hyp : ' +''.join(hyp))
                    print(' '.join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace('finished.')
コード例 #13
0
    def train(self):
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, encdec.enc)
            self.copy_model(self.word2vec, encdec.dec, dec_flag=True)
        else:
            encdec = self.encdec

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
            opt = optimizers.AdaGrad(lr = 0.01)
            opt.setup(encdec)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
                loss.backward()
                opt.update()

                if trained == 0:
                    self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', encdec)

        trace('finished.')
コード例 #14
0
def train(args):
  trace('making vocabularies ...')
  src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
  trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

  trace('making model ...')
  attmt = AttentionMT(args.vocab, args.embed, args.hidden)
  if args.use_gpu:
    attmt.to_gpu()

  for epoch in range(args.epoch):
    trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
    trained = 0
    gen1 = gens.word_list(args.source)
    gen2 = gens.word_list(args.target)
    gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
    opt = optimizers.AdaGrad(lr = 0.01)
    opt.setup(attmt)
    opt.add_hook(optimizer.GradientClipping(5))

    for src_batch, trg_batch in gen3:
      src_batch = fill_batch(src_batch)
      trg_batch = fill_batch(trg_batch)
      K = len(src_batch)
      hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
      loss.backward()
      opt.update()

      for k in range(K):
        trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
        trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
        trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
        trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

      trained += K

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    src_vocab.save(prefix + '.srcvocab')
    trg_vocab.save(prefix + '.trgvocab')
    attmt.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', attmt)

  trace('finished.')
コード例 #15
0
ファイル: mt_s2s_encdec.py プロジェクト: delihiros/dqname
 def dqn(self, s):
   words = s.strip().split()
   v = ([b for b in gens.batch([words], 1)])
   src_batch = fill_batch(v[0])
   hyp_batch = forward(src_batch, None, self.src_vocab, self.trg_vocab, self.encdec, False, self.args.generation_limit)
   result = ""
   for hyp in hyp_batch:
     hyp.append('</s>')
     hyp = hyp[:hyp.index('</s>')]
     print(' '.join(hyp))
     result = ' '.join(hyp)
   return result
    def train_model(self):
        trace('making vocaburaries ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        model = self.new(src_vocab, trg_vocab, self.embed, self.hidden,
                         self.parameter_dict)

        random_number = random.randint(0, self.minibatch)
        for i_epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            model.init_optimizer()

            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch = model.train(src_batch, trg_batch)

                if trained == 0:
                    self.print_out(random_number, i_epoch, trained, src_batch,
                                   trg_batch, hyp_batch)

                trained += K

            trace('saving model ...')
            model.save("ChainerMachineTranslation" + '.%03d' %
                       (self.epoch + 1))

        trace('finished.')
    def test(self):
        """
        Test method
        You have to parepare the train model
        """
        trace('loading model ...')
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab = Vocabulary.load(model_path + '.srcvocab')
        trg_vocab = Vocabulary.load(model_path + '.trgvocab')
        self.attention_dialogue = AttentionDialogue.load_spec(
            model_path + '.spec', self.XP)
        serializers.load_hdf5(model_path + '.weights', self.attention_dialogue)

        trace('generating translation ...')
        generated = 0

        with open(self.test_target, 'w') as fp:
            for src_batch in gens.batch(gens.word_list(self.source),
                                        self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace('sample %8d - %8d ...' % (generated + 1, generated + K))
                hyp_batch = self.forward_implement(src_batch, None, src_vocab,
                                                   trg_vocab,
                                                   self.attention_dialogue,
                                                   False,
                                                   self.generation_limit)

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append('</s>')
                    hyp = hyp[:hyp.index('</s>')]
                    print("src : " +
                          "".join(src_batch[source_cuont]).replace("</s>", ""))
                    print('hyp : ' + ''.join(hyp))
                    print(' '.join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace('finished.')
コード例 #18
0
def test(args):

    trace('loading model ...')
    src_vocab = Vocabulary.load(args.model + '.srcvocab')
    trg_vocab = Vocabulary.load(args.model + '.trgvocab')
    encdec = EncoderDecoder.load_spec(args.model + '.spec')
    if args.use_gpu:
        encdec.to_gpu()
    serializers.load_hdf5(args.model + '.weights', encdec)

    trace('generating translation ...')
    generated = 0

    temp = gens.to_words(args.target)
    # temp.append("</s>")
    src_batch = []
    src_batch.append(temp)
    # src_batch = [['私は', '太郎', 'です', '(´', 'ー', '`*)', 'ウンウン', '</s>']]
    src_batch = fill_batch(src_batch)
    print("src_batch:", src_batch)
    K = len(src_batch)

    trace('sample %8d - %8d ...' % (generated + 1, generated + K))
    print("question:")
    for srp in src_batch:
        srp.append('</s>')
        srp = srp[:srp.index('</s>')]
        print(''.join(srp))

    hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, encdec, False,
                        args.generation_limit)
    print("answser:")
    for hyp in hyp_batch:
        hyp.append('</s>')
        hyp = hyp[:hyp.index('</s>')]
        print(''.join(hyp))
    print("----------------")

    generated += K

    trace('finished.')
    def test(self):
        """
        Test method
        You have to parepare the train model
        """
        trace("loading model ...")
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab = Vocabulary.load(model_path + ".srcvocab")
        trg_vocab = Vocabulary.load(model_path + ".trgvocab")
        self.attention_dialogue = AttentionDialogue.load_spec(model_path + ".spec", self.XP)
        serializers.load_hdf5(model_path + ".weights", self.attention_dialogue)

        trace("generating translation ...")
        generated = 0

        with open(self.test_target, "w") as fp:
            for src_batch in gens.batch(gens.word_list(self.source), self.minibatch):
                src_batch = fill_batch(src_batch)
                K = len(src_batch)

                trace("sample %8d - %8d ..." % (generated + 1, generated + K))
                hyp_batch = self.forward_implement(
                    src_batch, None, src_vocab, trg_vocab, self.attention_dialogue, False, self.generation_limit
                )

                source_cuont = 0
                for hyp in hyp_batch:
                    hyp.append("</s>")
                    hyp = hyp[: hyp.index("</s>")]
                    print("src : " + "".join(src_batch[source_cuont]).replace("</s>", ""))
                    print("hyp : " + "".join(hyp))
                    print(" ".join(hyp), file=fp)
                    source_cuont = source_cuont + 1

                generated += K

        trace("finished.")
コード例 #20
0
def test_model(args):
    trace('loading model ...')
    model = EncoderDecoderModel.load(args.model)
    
    trace('generating translation ...')
    generated = 0

    with open(args.target, 'w') as fp:
        for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
            src_batch = fill_batch(src_batch)
            K = len(src_batch)

            trace('sample %8d - %8d ...' % (generated + 1, generated + K))
            hyp_batch = model.predict(src_batch, args.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print(' '.join(hyp), file=fp)

            generated += K

    trace('finished.')
コード例 #21
0
def test_model(args):
    trace('loading model ...')
    model = EncoderDecoderModel.load(args.model)
    
    trace('generating translation ...')
    generated = 0

    with open(args.target, 'w') as fp:
        for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
            src_batch = fill_batch(src_batch)
            K = len(src_batch)

            trace('sample %8d - %8d ...' % (generated + 1, generated + K))
            hyp_batch = model.predict(src_batch, args.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print(' '.join(hyp), file=fp)

            generated += K

    trace('finished.')
コード例 #22
0
def train(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.input_word_list(), args.vocab)
    trg_vocab = Vocabulary.new(gens.output_word_list(), args.vocab)
    trace('making model ...')
    encdec = EncoderDecoder(args.vocab, args.embed, args.hidden)

    if args.load_model != "":
        print("model load  %s ... " % (args.load_model))
        src_vocab = Vocabulary.load(args.load_model + '.srcvocab')
        trg_vocab = Vocabulary.load(args.load_model + '.trgvocab')
        encdec = EncoderDecoder.load_spec(args.load_model + '.spec')
        serializers.load_hdf5(args.load_model + '.weights', encdec)

    if args.use_gpu:
        encdec.to_gpu()

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.input_word_list()
        gen2 = gens.output_word_list()
        gen3 = gens.batch(
            gens.sorted_parallel(gen1, gen2, 100 * args.minibatch),
            args.minibatch)
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(encdec)
        opt.add_hook(optimizer.GradientClipping(5))

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch, loss = forward(src_batch, trg_batch, src_vocab,
                                      trg_vocab, encdec, True, 0)
            loss.backward()
            opt.update()

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' %
                      (epoch + 1, args.epoch, trained + k + 1))
                trace(
                    '  src = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in src_batch[k]]))
                trace(
                    '  trg = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in trg_batch[k]]))
                trace(
                    '  hyp = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in hyp_batch[k]]))

            trained += K

        if epoch % args.model_save_timing == 0:

            trace('saving model ...')
            prefix = args.model + '.%03.d' % (epoch + 1)
            src_vocab.save(prefix + '.srcvocab')
            trg_vocab.save(prefix + '.trgvocab')
            encdec.save_spec(prefix + '.spec')
            serializers.save_hdf5(prefix + '.weights', encdec)

    trace('finished.')