def train_model(self): trace('making vocaburaries ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') model = self.new(src_vocab, trg_vocab, self.embed, self.hidden, self.parameter_dict) random_number = random.randint(0, self.minibatch) for i_epoch in range(self.epoch): trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) model.init_optimizer() for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch = model.train(src_batch, trg_batch) if trained == 0: self.print_out(random_number, i_epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') model.save("ChainerMachineTranslation" + '.%03d' % (self.epoch + 1)) trace('finished.')
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(word_lists, args.vocab) phrase_set = set() semi_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semi_set |= set(extract_semi_labels(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.queue, args.stack, len(phrase_set), len(semi_set), ) if USE_GPU: parser.to_gpu() opt = optimizers.AdaGrad(lr = 0.005) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(5)) for epoch in range(args.epoch): n = 0 for samples in batch(zip(word_lists, op_lists), args.minibatch): parser.zerograds() loss = my_zeros((), np.float32) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semi_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) trace('finished.')
def load(filename): self = AttentionalTranslationModel() with ModelFile(filename) as fp: self.__src_vocab = Vocabulary.load(fp.get_file_pointer()) self.__trg_vocab = Vocabulary.load(fp.get_file_pointer()) self.__n_embed = int(fp.read()) self.__n_hidden = int(fp.read()) self.__make_model() wrapper.begin_model_access(self.__model) fp.read_embed(self.__model.w_xi) fp.read_linear(self.__model.w_ia) fp.read_linear(self.__model.w_aa) fp.read_linear(self.__model.w_ib) fp.read_linear(self.__model.w_bb) fp.read_linear(self.__model.w_aw) fp.read_linear(self.__model.w_bw) fp.read_linear(self.__model.w_pw) fp.read_linear(self.__model.w_we) fp.read_linear(self.__model.w_ap) fp.read_linear(self.__model.w_bp) fp.read_embed(self.__model.w_yp) fp.read_linear(self.__model.w_pp) fp.read_linear(self.__model.w_cp) fp.read_linear(self.__model.w_dp) fp.read_linear(self.__model.w_py) wrapper.end_model_access(self.__model) return self
def test(args): trace('loading model ...') word_vocab = Vocabulary.load(args.model + '.words') phrase_vocab = Vocabulary.load(args.model + '.phrases') semiterminal_vocab = Vocabulary.load(args.model + '.semiterminals') parser = Parser.load_spec(args.model + '.spec') if args.use_gpu: parser.to_gpu() serializers.load_hdf5(args.model + '.weights', parser) embed_cache = {} parser.reset() trace('generating parse trees ...') with open(args.source) as fp: for l in fp: word_list = to_vram_words(convert_word_list(l.split(), word_vocab)) tree = combine_xbar( restore_labels( parser.forward(word_list, None, args.unary_limit, embed_cache), phrase_vocab, semiterminal_vocab)) print('( ' + tree_to_string(tree) + ' )') trace('finished.')
def train_model(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden) for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) model.init_optimizer() for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch = model.train(src_batch, trg_batch) for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') model.save(args.model + '.%03d' % (epoch + 1)) trace('finished.')
def test(self): trace('loading model ...') src_vocab = Vocabulary.load(self.model + '.srcvocab') trg_vocab = Vocabulary.load(self.model + '.trgvocab') encdec = EncoderDecoder.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit) source_cuont = 0 for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print('hyp : ' +''.join(hyp)) print(' '.join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace('finished.')
def test(self): trace('loading model ...') src_vocab = Vocabulary.load(self.model + '.srcvocab') trg_vocab = Vocabulary.load(self.model + '.trgvocab') encdec = EncoderDecoder.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit) source_cuont = 0 for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace('finished.')
def train_model(args): trace('making vocaburaries ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden) for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) model.init_optimizer() for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch = model.train(src_batch, trg_batch) for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') model.save(args.model + '.%03d' % (epoch + 1)) trace('finished.')
def test(args): trace('loading model ...') src_vocab = Vocabulary.load(args.model + '.srcvocab') trg_vocab = Vocabulary.load(args.model + '.trgvocab') attmt = AttentionMT.load_spec(args.model + '.spec') if args.use_gpu: attmt.to_gpu() serializers.load_hdf5(args.model + '.weights', attmt) trace('generating translation ...') generated = 0 with open(args.target, 'w') as fp: for src_batch in gens.batch(gens.word_list(args.source), args.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, attmt, False, args.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print(' '.join(hyp), file=fp) generated += K trace('finished.')
def __predict_sentence(self, src_batch): dialogue = EncoderDecoderModelForwardSlack(self.parameter) src_vocab = Vocabulary.load(self.model_name + '.srcvocab') trg_vocab = Vocabulary.load(self.model_name + '.trgvocab') model = EncoderDecoder.load_spec(self.model_name + '.spec') serializers.load_hdf5(dialogue.model + '.weights', model) hyp_batch = dialogue.forward(src_batch, None, src_vocab, trg_vocab, model, False, self.generation_limit) return hyp_batch
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') attmt = AttentionMT(args.vocab, args.embed, args.hidden) if args.use_gpu: attmt.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(attmt) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace( ' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace( ' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace( ' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') attmt.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', attmt) trace('finished.')
def train(self): """ Train method If you use the word2vec model, you possible to use the copy weight Optimizer method use the Adagrad """ trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP) if self.word2vecFlag: self.copy_model(self.word2vec, self.attention_dialogue.emb) self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.attention_dialogue) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab.save(model_path + '.srcvocab') trg_vocab.save(model_path + '.trgvocab') self.attention_dialogue.save_spec(model_path + '.spec') serializers.save_hdf5(model_path + '.weights', self.attention_dialogue) trace('finished.')
def train(args): if os.path.exists("./model/vocab.bin"): src_vocab = Vocabulary.load("./model/vocab.bin") else: src_vocab = Vocabulary.new(gens.word_list(args.source), args.n_vocab) src_vocab.save('./model/vocab.bin') if os.path.exists("./model/tag.bin"): trg_tag = Vocabulary.load("./model/tag.bin") else: trg_tag = Vocabulary.new(gens.word_list(args.target), args.n_tag) trg_tag.save('./model/tag.bin') print("vocab_len:{}".format(src_vocab.__len__)) print("tag_len:{}".format(trg_tag.__len__)) encdec = BiEncDecLSTM(args.n_vocab, args.layer, args.embed, args.hidden, args.n_tag) optimizer = optimizers.Adam() optimizer.setup(encdec) for e_i in range(args.epoch): tt_list = [[src_vocab.stoi(char) for char in char_arr] for char_arr in gens.word_list(args.source_tr)] tag_list = [ trg_tag.stoi(tag[0]) for tag in gens.word_list(args.target_tr) ] print("{}:{}".format(len(tt_list), len(tag_list))) assert len(tt_list) == len(tag_list) ind_arr = [ri for ri in range(len(tt_list))] random.shuffle(ind_arr) tt_now = (tt_list[ri] for ri in ind_arr) tag_now = (tag_list[ri] for ri in ind_arr) tt_gen = gens.batch(tt_now, args.batchsize) tag_gen = gens.batch(tag_now, args.batchsize) for tt, tag in zip(tt_gen, tag_gen): y_ws = encdec(tt) teac_arr = [src_vocab.itos(t) for t in tt[0]] pred_arr = [trg_tag.itos(y_each.data.argmax(0)) for y_each in y_ws] print("teach:{}:{}:{}".format(teac_arr, trg_tag.itos(tag[0]), pred_arr[0])) tag = xp.array(tag, dtype=xp.int32) loss = F.softmax_cross_entropy(y_ws, tag) encdec.cleargrads() loss.backward() optimizer.update() # loss.backward() # optimizer.target.cleargrads() # loss.backward() # loss.unchain_backward() # optimizer.update() serializers.save_npz('./model/attn_tag_model_{}.npz'.format(e_i), encdec)
def __init__(self, args): trace('loading model ...') self.args = args self.src_vocab = Vocabulary.load(args.model + '.srcvocab') self.trg_vocab = Vocabulary.load(args.model + '.trgvocab') self.encdec = EncoderDecoder.load_spec(args.model + '.spec') if args.use_gpu: self.encdec.to_gpu() serializers.load_hdf5(args.model + '.weights', self.encdec) trace('generating translation ...')
def __predict_sentence(self, src_batch): """ predict sentence :param src_batch: get the source sentence :return: """ dialogue = EncoderDecoderModelAttention(self.parameter) src_vocab = Vocabulary.load(self.model_name + '.srcvocab') trg_vocab = Vocabulary.load(self.model_name + '.trgvocab') model = AttentionDialogue.load_spec(self.model_name + '.spec', self.XP) serializers.load_hdf5(self.model_name + '.weights', model) hyp_batch = dialogue.forward_implement(src_batch, None, src_vocab, trg_vocab, model, False, self.generation_limit) return hyp_batch
def setCateg(self, args): categ_name = "./{}/categ_{}.bin".format(args.dataname, args.dataname) if os.path.exists(categ_name): categ_vocab = Vocabulary.load(categ_name) else: set_cat = set() [[set_cat.add(word) for word in word_arr] for word_arr in gens.word_list(args.category)] n_categ = len(set_cat) + 3 print("n_categ:{}".format(n_categ)) categ_vocab = Vocabulary.new(gens.word_list(args.category), n_categ) categ_vocab.save(categ_name) self.categ_vocab = categ_vocab return categ_vocab
def setVocab(self, args): vocab_name = "./{}/vocab_{}.bin".format(args.dataname, args.dataname) if os.path.exists(vocab_name): src_vocab = Vocabulary.load(vocab_name) else: set_vocab = set() [[set_vocab.add(word) for word in word_arr] for word_arr in gens.word_list(args.source)] n_vocab = len(set_vocab) + 3 print("n_vocab:{}".format(n_vocab)) print("arg_vocab:{}".format(args.n_vocab)) src_vocab = Vocabulary.new(gens.word_list(args.source), args.n_vocab) src_vocab.save(vocab_name) self.vocab = src_vocab return src_vocab
def train(self): """ Train method If you use the word2vec model, you possible to use the copy weight Optimizer method use the Adagrad """ trace("making vocabularies ...") src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace("making model ...") self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP) if self.word2vecFlag: self.copy_model(self.word2vec, self.attention_dialogue.emb) self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True) for epoch in range(self.epoch): trace("epoch %d/%d: " % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.attention_dialogue) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0 ) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace("saving model ...") prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab.save(model_path + ".srcvocab") trg_vocab.save(model_path + ".trgvocab") self.attention_dialogue.save_spec(model_path + ".spec") serializers.save_hdf5(model_path + ".weights", self.attention_dialogue) trace("finished.")
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) # If you use the ipython note book you hace to use the forward function # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) else: encdec = self.encdec for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() if trained == 0: self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def load(self, filename): with ModelFile(filename) as fp: self.src_vocab = Vocabulary.load(fp.get_file_pointer()) self.trg_vocab = Vocabulary.load(fp.get_file_pointer()) self.n_embed = int(fp.read()) self.n_hidden = int(fp.read()) self.make_model() wrapper.begin_model_access(self.model) fp.read_embed(self.model.weight_xi) fp.read_linear(self.model.weight_ip) fp.read_linear(self.model.weight_pp) fp.read_linear(self.model.weight_pq) fp.read_linear(self.model.weight_qj) fp.read_linear(self.model.weight_jy) fp.read_embed(self.model.weight_yq) fp.read_linear(self.model.weight_qq) wrapper.end_model_access(self.model) return self
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') attmt = AttentionMT(args.vocab, args.embed, args.hidden) if args.use_gpu: attmt.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(attmt) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') attmt.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', attmt) trace('finished.')
def test(args, epoch): model_name = "./model/attn_tag_model_{}.npz".format(epoch) encdec = BiEncDecLSTM(args.n_vocab, args.layer, args.embed, args.hidden, args.n_tag) serializers.load_npz(model_name, encdec) src_vocab = Vocabulary.load("./model/vocab.bin") trg_tag = Vocabulary.load("./model/tag.bin") tt_now = ([src_vocab.stoi(char) for char in char_arr] for char_arr in gens.word_list(args.source_te)) tag_now = (trg_tag.stoi(tag[0]) for tag in gens.word_list(args.target_te)) tt_gen = gens.batch(tt_now, args.batchsize) tag_gen = gens.batch(tag_now, args.batchsize) correct_num = 0 wrong_num = 0 fw = codecs.open("./output/result_attn_tw{}.csv".format(epoch), "w", encoding="utf-8") fw.write("台詞,教師キャラ,予測キャラ,予測値,›単語\n") for tt, tag in zip(tt_gen, tag_gen): y, att_w = encdec.callAndAtt(tt) max_y = [ max( F.softmax(F.reshape(y_each.data, (1, len(y_each.data)))).data[0]) for y_each in y ] y = [y_each.data.argmax(0) for y_each in y] for tt_e, y_e, tag_e, max_y_e, att_w_e in zip(tt, y, tag, max_y, att_w): txt = ",".join([src_vocab.itos(id) for id in tt_e]) tag_e = trg_tag.itos(tag_e) y_e = trg_tag.itos(y_e) att_ind = att_w_e.data.argmax() most_word = src_vocab.itos(tt_e[att_ind]) fw.write("{}:{}:{}:{}:{}\n".format(txt, tag_e, y_e, max_y_e, most_word)) correct_num += len([1 for y_e, tag_e in zip(y, tag) if y_e == tag_e]) wrong_num += len([1 for y_e, tag_e in zip(y, tag) if y_e != tag_e]) print("epoch:{}".format(epoch)) print(" correct:{}".format(correct_num)) print(" wrong:{}".format(wrong_num)) fw.write("correct{}\n".format(correct_num)) fw.write("wrong:{}\n".format(wrong_num)) fw.close()
def load(filename): self = EncoderDecoderModel() with ModelFile(filename) as fp: self.__src_vocab = Vocabulary.load(fp.get_file_pointer()) self.__trg_vocab = Vocabulary.load(fp.get_file_pointer()) self.__n_embed = int(fp.read()) self.__n_hidden = int(fp.read()) self.__make_model() wrapper.begin_model_access(self.__model) fp.read_embed(self.__model.w_xi) fp.read_linear(self.__model.w_ip) fp.read_linear(self.__model.w_pp) fp.read_linear(self.__model.w_pq) fp.read_linear(self.__model.w_qj) fp.read_linear(self.__model.w_jy) fp.read_embed(self.__model.w_yq) fp.read_linear(self.__model.w_qq) wrapper.end_model_access(self.__model) return self
def test(self): """ Test method You have to parepare the train model """ trace('loading model ...') prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab = Vocabulary.load(model_path + '.srcvocab') trg_vocab = Vocabulary.load(model_path + '.trgvocab') self.attention_dialogue = AttentionDialogue.load_spec( model_path + '.spec', self.XP) serializers.load_hdf5(model_path + '.weights', self.attention_dialogue) trace('generating translation ...') generated = 0 with open(self.test_target, 'w') as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = self.forward_implement(src_batch, None, src_vocab, trg_vocab, self.attention_dialogue, False, self.generation_limit) source_cuont = 0 for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace('finished.')
def test(args): trace('loading model ...') word_vocab = Vocabulary.load(args.model + '.words') phrase_vocab = Vocabulary.load(args.model + '.phrases') semi_vocab = Vocabulary.load(args.model + '.semiterminals') parser = Parser.load_spec(args.model + '.spec') if USE_GPU: parser.to_gpu() serializers.load_hdf5(args.model + '.weights', parser) trace('generating parse trees ...') with open(args.source) as fp: for l in fp: word_list = convert_word_list(l.split(), word_vocab) tree = restore_labels( parser.forward(word_list, None, args.unary_limit), phrase_vocab, semi_vocab) print('( ' + tree_to_string(tree) + ' )') trace('finished.')
def test(args): trace('loading model ...') src_vocab = Vocabulary.load(args.model + '.srcvocab') trg_vocab = Vocabulary.load(args.model + '.trgvocab') encdec = EncoderDecoder.load_spec(args.model + '.spec') if args.use_gpu: encdec.to_gpu() serializers.load_hdf5(args.model + '.weights', encdec) trace('generating translation ...') generated = 0 temp = gens.to_words(args.target) # temp.append("</s>") src_batch = [] src_batch.append(temp) # src_batch = [['私は', '太郎', 'です', '(´', 'ー', '`*)', 'ウンウン', '</s>']] src_batch = fill_batch(src_batch) print("src_batch:", src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) print("question:") for srp in src_batch: srp.append('</s>') srp = srp[:srp.index('</s>')] print(''.join(srp)) hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, encdec, False, args.generation_limit) print("answser:") for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print(''.join(hyp)) print("----------------") generated += K trace('finished.')
def load(filename): self = SegmentationModel() with ModelFile(filename) as fp: self.__vocab = Vocabulary.load(fp.get_file_pointer()) self.__n_context = int(fp.read()) self.__n_hidden = int(fp.read()) self.__make_model() wrapper.begin_model_access(self.__model) fp.read_embed(self.__model.w_xh) fp.read_linear(self.__model.w_hy) wrapper.end_model_access(self.__model) return self
def load(filename): self = TransSegmentationModel() with ModelFile(filename) as fp: self.__vocab = Vocabulary.load(fp.get_file_pointer()) self.__n_context = int(fp.read()) self.__n_hidden = int(fp.read()) self.__make_model() wrapper.begin_model_access(self.__model) fp.read_embed(self.__model.w_xh) fp.read_linear(self.__model.w_hy) wrapper.end_model_access(self.__model) return self
def test(args): trace('loading model ...') word_vocab = Vocabulary.load(args.model + '.words') phrase_vocab = Vocabulary.load(args.model + '.phrases') semi_vocab = Vocabulary.load(args.model + '.semiterminals') parser = Parser.load_spec(args.model + '.spec') if USE_GPU: parser.to_gpu() serializers.load_hdf5(args.model + '.weights', parser) trace('generating parse trees ...') with open(args.source) as fp: for l in fp: word_list = convert_word_list(l.split(), word_vocab) tree = restore_labels( parser.forward(word_list, None, args.unary_limit), phrase_vocab, semi_vocab ) print('( ' + tree_to_string(tree) + ' )') trace('finished.')
def train_model(args): train_begin = time.time() trace('making vocaburaries ...') vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab) trace('begin training ...') model = TransSegmentationModel.new(vocab, args.context, args.hidden, args.labels, args.eta) for epoch in range(args.epoch): epoch_beg = time.time() trace('START epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 total_loss = 0 model.init_optimizer() with open(args.corpus) as fp: for text in fp: word_list = text.split() if not word_list: continue text = ' '.join(word_list) letters = ''.join(word_list) labels, accum_loss_f = model.train(text) total_loss += accum_loss_f trained += 1 hyp = make_hyp(letters, labels) """for 1sentence output trace("accum_loss : %lf"% (accum_loss_f)) trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trace('trained %d: '% trained) trace(text) trace(hyp) """ """ if trained % 100 == 0: trace(' %8d' % trained) """ trace('FINISHED epoch %d/%d: ' % (epoch + 1, args.epoch)) trace('total_loss : %lf' % total_loss) trace('saving model ...') model.save(args.model + '.%03d' % (epoch + 1)) epoch_time = time.time() - epoch_beg trace('elapsed_time/1epoch : %lf' % epoch_time) trace('finished.') elapsed_time = time.time() - train_begin trace('train_time : %lf' % elapsed_time) trace('')
def train_model(args): train_begin = time.time() trace('making vocaburaries ...') vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab) trace('begin training ...') model = TransSegmentationModel.new(vocab, args.context, args.hidden, args.labels, args.eta) for epoch in range(args.epoch): epoch_beg = time.time() trace('START epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 total_loss = 0 model.init_optimizer() with open(args.corpus) as fp: for text in fp: word_list = text.split() if not word_list: continue text = ' '.join(word_list) letters = ''.join(word_list) labels, accum_loss_f = model.train(text) total_loss += accum_loss_f trained += 1 hyp = make_hyp(letters, labels) """for 1sentence output trace("accum_loss : %lf"% (accum_loss_f)) trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trace('trained %d: '% trained) trace(text) trace(hyp) """ """ if trained % 100 == 0: trace(' %8d' % trained) """ trace('FINISHED epoch %d/%d: ' % (epoch + 1, args.epoch)) trace('total_loss : %lf'%total_loss) trace('saving model ...') model.save(args.model + '.%03d' % (epoch + 1)) epoch_time = time.time() - epoch_beg trace('elapsed_time/1epoch : %lf'%epoch_time) trace('finished.') elapsed_time = time.time() - train_begin trace('train_time : %lf'%elapsed_time) trace('')
def test(self): """ Test method You have to parepare the train model """ trace("loading model ...") prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab = Vocabulary.load(model_path + ".srcvocab") trg_vocab = Vocabulary.load(model_path + ".trgvocab") self.attention_dialogue = AttentionDialogue.load_spec(model_path + ".spec", self.XP) serializers.load_hdf5(model_path + ".weights", self.attention_dialogue) trace("generating translation ...") generated = 0 with open(self.test_target, "w") as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace("sample %8d - %8d ..." % (generated + 1, generated + K)) hyp_batch = self.forward_implement( src_batch, None, src_vocab, trg_vocab, self.attention_dialogue, False, self.generation_limit ) source_cuont = 0 for hyp in hyp_batch: hyp.append("</s>") hyp = hyp[: hyp.index("</s>")] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print("hyp : " + "".join(hyp)) print(" ".join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace("finished.")
def train_model(self): trace('making vocaburaries ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') model = self.new(src_vocab, trg_vocab, self.embed, self.hidden, self.parameter_dict) random_number = random.randint(0, self.minibatch) for i_epoch in range(self.epoch): trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) model.init_optimizer() for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch = model.train(src_batch, trg_batch) if trained == 0: self.print_out(random_number, i_epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') model.save("ChainerMachineTranslation" + '.%03d' % (self.epoch + 1)) trace('finished.')
def load(filename): self = RNNSegmentationModel() with ModelFile(filename) as fp: self.__vocab = Vocabulary.load(fp.get_file_pointer()) self.__n_embed = int(fp.read()) self.__n_hidden = int(fp.read()) self.__make_model() wrapper.begin_model_access(self.__model) fp.read_embed(self.__model.w_xe) fp.read_linear(self.__model.w_ea) fp.read_linear(self.__model.w_aa) fp.read_linear(self.__model.w_eb) fp.read_linear(self.__model.w_bb) fp.read_linear(self.__model.w_ay1) fp.read_linear(self.__model.w_by1) fp.read_linear(self.__model.w_ay2) fp.read_linear(self.__model.w_by2) wrapper.end_model_access(self.__model) return self
def train_model(args): trace("making vocabularies ...") vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab) trace("start training ...") model = SegmentationModel.new(vocab, args.context, args.hidden) for epoch in range(args.epoch): trace("epoch %d/%d: " % (epoch + 1, args.epoch)) trained = 0 model.init_optimizer() with open(args.corpus) as fp: for text in fp: word_list = text.split() if not word_list: continue text = " ".join(word_list) letters = "".join(word_list) scores = model.train(text) trained += 1 hyp = make_hyp(letters, scores) trace(trained) trace(text) trace(hyp) trace(" ".join("%+.1f" % x for x in scores)) if trained % 100 == 0: trace(" %8d" % trained) trace("saveing model ...") model.save(args.model + ".%03d" % (epoch + 1)) trace("finished.")
def train_model(args): trace('making vocabularies ...') vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab) trace('start training ...') model = RNNSegmentationModel.new(vocab, args.embed, args.hidden) for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 model.init_optimizer() with open(args.corpus) as fp: for text in fp: word_list = text.split() if not word_list: continue text = ' '.join(word_list) letters = ''.join(word_list) scores = model.train(text) trained += 1 hyp = make_hyp(letters, scores) trace(trained) trace(text) trace(hyp) trace(' '.join('%+.1f' % x for x in scores)) if trained % 100 == 0: trace(' %8d' % trained) trace('saveing model ...') model.save(args.model + '.%03d' % (epoch + 1)) trace('finished.')
assert (display.shape == (1, V)) output_str = str(k) + ": " for v in xrange(self._V): if display[:, v]: output_str += vocab[v] + "\t" print(output_str) """ run HDP on a synthetic corpus. """ if __name__ == '__main__': from util.vocabulary import Vocabulary, parse_raw_text bdir = "../../data/nips12/" #data = import_monolingual_data(bdir + "doc.dat") data = parse_raw_text(bdir) data = data[:50] voca = Vocabulary(exclude_stopwords=True) corpus = [voca.doc2bow(doc) for doc in data] #corpus = numpy.array([numpy.random.poisson(1, k) for k in numpy.random.poisson(50, 20)]) corpus = dict((i, j) for i, j in enumerate(corpus)) gs = UncollapsedGibbsSampling() gs._initialize(corpus) gs.sample(100) print(gs._K) print(gs._n_kv)
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] lower_lists = [[w.lower() for w in words] for words in word_lists] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(lower_lists, args.vocab) phrase_set = set() semiterminal_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semiterminal_set |= set(extract_semiterminals(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [ convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists ] trace('start training ...') parser = Parser( args.vocab, args.embed, args.char_embed, args.queue, args.stack, args.srstate, len(phrase_set), len(semiterminal_set), ) if args.use_gpu: parser.to_gpu() opt = optimizers.SGD(lr=0.1) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) batch_set = list(zip(word_lists, op_lists)) for epoch in range(args.epoch): n = 0 random.shuffle(batch_set) for samples in batch(batch_set, args.minibatch): parser.zerograds() loss = XP.fzeros(()) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward_train(word_list, op_list) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semiterminal_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) opt.lr *= 0.92 trace('finished.')
def train(): parser = argparse.ArgumentParser() # 配置文件 parser.add_argument( "--config-yml", default="exp_fvqa/exp2.yml", help= "Path to a config file listing reader, model and solver parameters.") parser.add_argument("--cpu-workers", type=int, default=8, help="Number of CPU workers for dataloader.") parser.add_argument( "--save-dirpath", default="fvqa/exp_data/checkpoints", help= "Path of directory to create checkpoint directory and save checkpoints." ) parser.add_argument( "--load-pthpath", default="", help="To continue training, path to .pth file of saved checkpoint.") parser.add_argument("--gpus", default="", help="gpus") parser.add_argument( "--overfit", action="store_true", help="Whether to validate on val split after every epoch.") parser.add_argument( "--validate", action="store_true", help="Whether to validate on val split after every epoch.") args = parser.parse_args() # set mannual seed torch.manual_seed(10) torch.cuda.manual_seed(10) cudnn.benchmark = True cudnn.deterministic = True config = yaml.load(open(args.config_yml)) device = torch.device("cuda:0") if args.gpus != "cpu" else torch.device( "cpu") # Print config and args. print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) print('Loading TrainDataset...') train_dataset = FvqaTrainDataset(config, overfit=args.overfit) train_dataloader = DataLoader(train_dataset, batch_size=config['solver']['batch_size'], num_workers=args.cpu_workers, shuffle=True, collate_fn=collate_fn) if args.validate: print('Loading TestDataset...') val_dataset = FvqaTestDataset(config, overfit=args.overfit) val_dataloader = DataLoader(val_dataset, batch_size=config['solver']['batch_size'], num_workers=args.cpu_workers, shuffle=True, collate_fn=collate_fn) print('Loading glove...') que_vocab = Vocabulary(config['dataset']['word2id_path']) glove = np.load(config['dataset']['glove_vec_path']) glove = torch.Tensor(glove) print('Building Model...') model = CMGCNnet(config, que_vocabulary=que_vocab, glove=glove, device=device) if torch.cuda.device_count() > 1 and args.gpus != "cpu": print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) print(model) iterations = len(train_dataset) // config["solver"]["batch_size"] + 1 def lr_lambda_fun(current_iteration: int) -> float: current_epoch = float(current_iteration) / iterations if current_epoch <= config["solver"]["warmup_epochs"]: alpha = current_epoch / float(config["solver"]["warmup_epochs"]) return config["solver"]["warmup_factor"] * (1. - alpha) + alpha else: idx = bisect(config["solver"]["lr_milestones"], current_epoch) return pow(config["solver"]["lr_gamma"], idx) optimizer = optim.Adamax(model.parameters(), lr=config["solver"]["initial_lr"]) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda_fun) T = iterations * (config["solver"]["num_epochs"] - config["solver"]["warmup_epochs"] + 1) scheduler2 = lr_scheduler.CosineAnnealingLR( optimizer, int(T), eta_min=config["solver"]["eta_min"], last_epoch=-1) summary_writer = SummaryWriter(log_dir=args.save_dirpath) checkpoint_manager = CheckpointManager(model, optimizer, args.save_dirpath, config=config) if args.load_pthpath == "": start_epoch = 0 else: start_epoch = int(args.load_pthpath.split("_")[-1][:-4]) model_state_dict, optimizer_state_dict = load_checkpoint( args.load_pthpath) if isinstance(model, nn.DataParallel): model.module.load_state_dict(model_state_dict) else: model.load_state_dict(model_state_dict) optimizer.load_state_dict(optimizer_state_dict) print("Loading resume model from {}...".format(args.load_pthpath)) global_iteration_step = start_epoch * iterations for epoch in range(start_epoch, config['solver']['num_epochs']): print(f"\nTraining for epoch {epoch}:") train_answers = [] train_preds = [] for i, batch in enumerate(tqdm(train_dataloader)): optimizer.zero_grad() fact_batch_graph = model(batch) batch_loss = cal_batch_loss(fact_batch_graph, batch, device, neg_weight=0.1, pos_weight=0.9) batch_loss.backward() optimizer.step() fact_graphs = dgl.unbatch(fact_batch_graph) for i, fact_graph in enumerate(fact_graphs): train_pred = fact_graph.ndata['h'].squeeze() # (num_nodes,1) train_preds.append(train_pred) # [(num_nodes,)] train_answers.append(batch['facts_answer_id_list'][i]) summary_writer.add_scalar('train/loss', batch_loss, global_iteration_step) summary_writer.add_scalar("train/lr", optimizer.param_groups[0]["lr"], global_iteration_step) summary_writer.add_text('train/loss', str(batch_loss.item()), global_iteration_step) summary_writer.add_text('train/lr', str(optimizer.param_groups[0]["lr"]), global_iteration_step) if global_iteration_step <= iterations * config["solver"][ "warmup_epochs"]: scheduler.step(global_iteration_step) else: global_iteration_step_in_2 = iterations * config["solver"][ "warmup_epochs"] + 1 - global_iteration_step scheduler2.step(int(global_iteration_step_in_2)) global_iteration_step = global_iteration_step + 1 torch.cuda.empty_cache() checkpoint_manager.step() train_acc_1, train_acc_3 = cal_acc( train_answers, train_preds) print( "trainacc@1={:.2%} & trainacc@3={:.2%} " .format(train_acc_1, train_acc_3)) summary_writer.add_scalars( 'train/acc', { 'acc@1': train_acc_1, 'acc@3': train_acc_3 }, epoch) if args.validate: model.eval() answers = [] # [batch_answers,...] preds = [] # [batch_preds,...] print(f"\nValidation after epoch {epoch}:") for i, batch in enumerate(tqdm(val_dataloader)): with torch.no_grad(): fact_batch_graph = model(batch) batch_loss = cal_batch_loss(fact_batch_graph, batch, device, neg_weight=0.1, pos_weight=0.9) summary_writer.add_scalar('test/loss', batch_loss, epoch) fact_graphs = dgl.unbatch(fact_batch_graph) for i, fact_graph in enumerate(fact_graphs): pred = fact_graph.ndata['h'].squeeze() # (num_nodes,1) preds.append(pred) # [(num_nodes,)] answers.append(batch['facts_answer_id_list'][i]) acc_1, acc_3 = cal_acc(answers, preds) print("acc@1={:.2%} & acc@3={:.2%} ". format(acc_1, acc_3)) summary_writer.add_scalars('test/acc', { 'acc@1': acc_1, 'acc@3': acc_3 }, epoch) model.train() torch.cuda.empty_cache() print('Train finished !!!') summary_writer.close()
from util.vocabulary import Vocabulary from util import generators as gens from util.controller import Controller from util.wrapper import wrapper from util.const import * if __name__ == '__main__': args = parse_args() trace('initializing ...') wrapper = wrapper(args.gpu_id) wrapper.init() trace('loading vocab ...') # src_vocab = Vocabulary.load(args.src_vocab) # trg_vocab = Vocabulary.load(args.trg_vocab) src_vocab = Vocabulary.load(VOCAB_SRC) trg_vocab = Vocabulary.load(VOCAB_TRG) controller = Controller(args.folder_name) if args.mode == 'train': controller.train_model(BasicEncoderDecoderModel, src_vocab, trg_vocab, args) elif args.mode == 'dev': controller.dev_model(BasicEncoderDecoderModel, src_vocab, trg_vocab, args) elif args.mode == 'test': controller.test_model(BasicEncoderDecoderModel, src_vocab, trg_vocab, args)
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] lower_lists = [[w.lower() for w in words] for words in word_lists] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(lower_lists, args.vocab) phrase_set = set() semiterminal_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semiterminal_set |= set(extract_semiterminals(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False) trace('converting data ...') word_lists = [to_vram_words(convert_word_list(x, word_vocab)) for x in word_lists] op_lists = [to_vram_ops(convert_op_list(x, phrase_vocab, semiterminal_vocab)) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.char_embed, args.queue, args.stack, args.srstate, len(phrase_set), len(semiterminal_set), ) if args.use_gpu: parser.to_gpu() opt = optimizers.SGD(lr = 0.1) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) batch_set = list(zip(word_lists, op_lists)) for epoch in range(args.epoch): n = 0 random.shuffle(batch_set) for samples in batch(batch_set, args.minibatch): parser.zerograds() loss = XP.fzeros(()) embed_cache = {} for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0, embed_cache) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semiterminal_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) opt.lr *= 0.92 trace('finished.')
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.input_word_list(), args.vocab) trg_vocab = Vocabulary.new(gens.output_word_list(), args.vocab) trace('making model ...') encdec = EncoderDecoder(args.vocab, args.embed, args.hidden) if args.load_model != "": print("model load %s ... " % (args.load_model)) src_vocab = Vocabulary.load(args.load_model + '.srcvocab') trg_vocab = Vocabulary.load(args.load_model + '.trgvocab') encdec = EncoderDecoder.load_spec(args.load_model + '.spec') serializers.load_hdf5(args.load_model + '.weights', encdec) if args.use_gpu: encdec.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.input_word_list() gen2 = gens.output_word_list() gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace( ' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace( ' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace( ' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K if epoch % args.model_save_timing == 0: trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')