def save_model(self): trace('saving model ...') prefix = self.model self.trg_vocab.save("model/" + prefix + '.trgvocab') self.encdec.save_spec("model/" + prefix + '.spec') serializers.save_hdf5("model/" + prefix + '.weights', self.encdec) trace('finished.')
def train(self, epoch): trace('making vocabularies ...') self.trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) for trg_batch in gen: self.batch_size = len(trg_batch) self.trg_batch = fill_batch(trg_batch) if len(trg_batch) != self.minibatch: break self.encdec.clear(self.batch_size) self.__forward_img() self.encdec.reset(self.batch_size) loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec, True, 0) loss.backward() opt.update() K = len(self.trg_batch) - 2 self.print_out(K, hyp_batch, epoch)
def train(self): trace('making vocabularies ...') trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) random_number = random.randint(0, self.minibatch - 1) for trg_batch in gen: self.trg_batch = fill_batch(trg_batch) if len(self.trg_batch) != self.minibatch: break hyp_batch, loss = self.forward(trg_vocab, self.use_gpu, self.gpu_id) loss.backward() opt.update() K = len(self.trg_batch) if trained == 0: self.print_out(random_number, epoch, trained, hyp_batch) trained += K trace('saving model ...') prefix = self.model trg_vocab.save(prefix + '.trgvocab') self.encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', self.encdec) trace('finished.')
def print_out(self, K, i_epoch, trained, hyp_batch): # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + K + 1)) trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1)) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
def print_out(self, K, hyp_batch, epoch): if len(self.trg_batch) - 2 < K: K = len(self.trg_batch) - 2 if len(hyp_batch) - 2 < K: K = len(hyp_batch) - 2 trace('epoch %3d/%3d, sample %8d' % (epoch, self.epoch, K + 1)) # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1)) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
def test(self): trace('loading model ...') trg_vocab = Vocabulary.load(self.model + '.trgvocab') self.encdec = EncoderDecoderAttention.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', self.encdec) trace('generating translation ...') generated = 0 trace('sample %8d - %8d ...' % (generated + 1, generated)) hyp_batch = self.forward(trg_vocab, False, self.generation_limit) source_cuont = 0 with open(self.target, 'w') as fp: for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[: hyp.index('</s>')] print('hyp : ' + ''.join(hyp)) fp.write(' '.join(hyp)) source_cuont = source_cuont + 1 trace('finished.')
def test(self): trace('loading model ...') trg_vocab = Vocabulary.load(self.model + '.trgvocab') self.encdec = EncoderDecoderAttention.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', self.encdec) trace('generating translation ...') generated = 0 trace('sample %8d - %8d ...' % (generated + 1, generated)) hyp_batch = self.forward(trg_vocab, False, self.generation_limit) source_cuont = 0 with open(self.target, 'w') as fp: for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' + ''.join(hyp)) fp.write(' '.join(hyp)) source_cuont = source_cuont + 1 trace('finished.')
def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' +''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.')
def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.')