def train_loop(model, output_dir, xp, optimizer, res_q, data_q): graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': res_q.put('end') break elif inp == 'train': res_q.put('train') model.train = True continue elif inp == 'val': res_q.put('val') model.train = False continue volatile = 'off' if model.train else 'on' x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile) t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile) if model.train: optimizer.update(model, x, t) if not graph_generated: with open('graph.dot', 'w') as o: o.write(computational_graph.build_computational_graph((model.loss,)).dump()) print('generated graph') graph_generated = True else: model(x, t) serializers.save_hdf5(output_dir + os.sep + 'model%04d'%inp[2], model) #serializers.save_hdf5(output_dir + os.sep + 'optimizer%04d'%inp[2], optimizer) res_q.put((float(model.loss.data), float(model.accuracy.data), inp[2])) del x, t
def save_model(self, epoch): dpath = "./model" if not os.path.exists(dpath): os.makedirs(dpath) fpath = "./model/generator_{:05d}.h5py".format(epoch) serializers.save_hdf5(fpath, self.generator)
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(word_lists, args.vocab) phrase_set = set() semi_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semi_set |= set(extract_semi_labels(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.queue, args.stack, len(phrase_set), len(semi_set), ) if USE_GPU: parser.to_gpu() opt = optimizers.AdaGrad(lr = 0.005) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(5)) for epoch in range(args.epoch): n = 0 for samples in batch(zip(word_lists, op_lists), args.minibatch): parser.zerograds() loss = my_zeros((), np.float32) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semi_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) trace('finished.')
def snapshot(self): S.save_hdf5(osp.join(self.save_dir, 'vgg16_{0}.chainermodel'.format(self.i_iter)), self.model) S.save_hdf5(osp.join(self.save_dir, 'vgg16_optimizer_{0}.h5'.format(self.i_iter)), self.optimizer)
def train(self): """Iterate with train data.""" log_templ = ('{i_iter}: type={type}, loss={loss}, acc={acc}, ' 'acc_cls={acc_cls}, iu={iu}, fwavacc={fwavacc}') for i_iter in xrange(self.max_iter): self.i_iter = i_iter if (self.test_interval is not None) and \ (i_iter % self.test_interval == 0): self.validate() type = 'train' self.model.train = True loss, acc, acc_cls, iu, fwavacc = self._iterate_once(type=type) log = dict(i_iter=self.i_iter, type=type, loss=loss, acc=acc, acc_cls=acc_cls, iu=iu, fwavacc=fwavacc) print(log_templ.format(**log)) self.logfile.write( '{i_iter},{type},{loss},{acc},{acc_cls},{iu},{fwavacc}\n' .format(**log)) if i_iter % self.snapshot == 0: print('{0}: saving snapshot...'.format(i_iter)) snapshot_model = osp.join( self.log_dir, 'fcn32s_{0}.chainermodel'.format(i_iter)) snapshot_optimizer = osp.join( self.log_dir, 'fcn8s_optimizer_{0}.h5'.format(i_iter)) S.save_hdf5(snapshot_model, self.model) S.save_hdf5(snapshot_optimizer, self.optimizer)
def test(self, x_l, y_l): y = F.softmax(self.mlp_enc(x_l, test=True)) y_argmax = F.argmax(y, axis=1) acc = F.accuracy(y, y_l) y_l_cpu = cuda.to_cpu(y_l.data) y_argmax_cpu = cuda.to_cpu(y_argmax.data) # Confuction Matrix cm = confusion_matrix(y_l_cpu, y_argmax_cpu) print(cm) # Wrong samples idx = np.where(y_l_cpu != y_argmax_cpu)[0] #print(idx.tolist()) # Generate and Save x_rec = self.mlp_dec(y, test=True) save_incorrect_info(x_rec.data[idx, ], x_l.data[idx, ], y.data[idx, ], y_l.data[idx, ]) # Save model serializers.save_hdf5("./model/mlp_encdec.h5py", self.model) loss = self.forward_for_losses(x_l, y_l, None, test=True) # only measure x_l supervised_loss = loss return acc, supervised_loss
def save_model(self): trace('saving model ...') prefix = self.model self.trg_vocab.save("model/" + prefix + '.trgvocab') self.encdec.save_spec("model/" + prefix + '.spec') serializers.save_hdf5("model/" + prefix + '.weights', self.encdec) trace('finished.')
def save_model(filename, model): print('Saving trained model...') if os.path.exists(filename): print('Overwriting existing file {}'.format(filename)) serializers.save_hdf5(filename, model) print('Saved trained model {}'.format(filename))
def progress_func(epoch, loss, accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy): print 'epoch: {} done'.format(epoch) print('train mean loss={}, accuracy={}'.format(loss, accuracy)) if validate_loss is not None and validate_accuracy is not None: print('validate mean loss={}, accuracy={}'.format(validate_loss, validate_accuracy)) if test_loss is not None and test_accuracy is not None: print('test mean loss={}, accuracy={}'.format(test_loss, test_accuracy)) if epoch % 10 == 0: serializers.save_hdf5(args.output + '.model', model) serializers.save_hdf5(args.output + '.state', optimizer)
def save(self, filename): """ Save the model, the optimizer, vocabulary and config""" filename = os.path.abspath(filename) serializers.save_hdf5(filename + '.model', self.model) serializers.save_hdf5(filename + '.state', self.optimizer) cPickle.dump(self.vocab, open(filename + '.vocab', "w")) cPickle.dump(self.config, open(filename + '.config', "w")) with tarfile.open(filename, "w") as tar: for fn in [filename + '.model', filename + '.state', filename + '.vocab', filename + '.config']: tar.add(fn, arcname=os.path.basename(fn)) os.remove(fn)
def save(self, dir=None): if dir is None: raise Exception() try: os.mkdir(dir) except: pass for attr in vars(self): prop = getattr(self, attr) if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod): serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop) print "model saved."
def train(args): source_vocab = Vocab(args.source, args.vocab) target_vocab = Vocab(args.target, args.vocab) att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size) if args.use_gpu: att_encdec.to_gpu() if args.source_validation: if os.path.exists(PLOT_DIR)==False: os.mkdir(PLOT_DIR) fp_loss = open(PLOT_DIR+"loss", "w") fp_loss_val = open(PLOT_DIR+"loss_val", "w") opt = optimizers.AdaDelta(args.rho, args.eps) opt.setup(att_encdec) opt.add_hook(optimizer.WeightDecay(DECAY_COEFF)) opt.add_hook(optimizer.GradientClipping(CLIP_THR)) for epoch in xrange(args.epochs): print "--- epoch: %s/%s ---"%(epoch+1, args.epochs) source_gen = word_list(args.source) target_gen = word_list(args.target) batch_gen = batch(sort(source_gen, target_gen, 100*args.minibatch), args.minibatch) n = 0 total_loss = 0.0 for source_batch, target_batch in batch_gen: n += len(source_batch) source_batch = fill_batch_end(source_batch) target_batch = fill_batch_end(target_batch) hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0) total_loss += loss.data*len(source_batch) closed_test(source_batch, target_batch, hyp_batch) loss.backward() opt.update() print "[n=%s]"%(n) print "[total=%s]"%(n) prefix = args.model_path + '%s'%(epoch+1) serializers.save_hdf5(prefix+'.attencdec', att_encdec) if args.source_validation: total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab) fp_loss.write("\t".join([str(epoch), str(total_loss/n)+"\n"])) fp_loss_val.write("\t".join([str(epoch), str(total_loss_val/n_val)+"\n"])) fp_loss.flush() fp_loss_val.flush() hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path+HPARAM_NAME) source_vocab.save(args.model_path+SRC_VOCAB_NAME) target_vocab.save(args.model_path+TAR_VOCAB_NAME) hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path+HPARAM_NAME) source_vocab.save(args.model_path+SRC_VOCAB_NAME) target_vocab.save(args.model_path+TAR_VOCAB_NAME) if args.source_validation: fp_loss.close() fp_loss_val.close()
def save(self, dir=None): if dir is None: raise Exception() try: os.mkdir(dir) except: pass for attr in vars(self): prop = getattr(self, attr) if self.should_save(prop): serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop) print "model saved."
def save(self): serializers.save_hdf5("fc_value.model", self.fc_value) serializers.save_hdf5("fc_advantage.model", self.fc_advantage) print "model saved." serializers.save_hdf5("fc_value.optimizer", self.optimizer_fc_value) serializers.save_hdf5("fc_advantage.optimizer", self.optimizer_fc_advantage) print "optimizer saved."
def train(self): """ Train method If you use the word2vec model, you possible to use the copy weight Optimizer method use the Adagrad """ trace("making vocabularies ...") src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace("making model ...") self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP) if self.word2vecFlag: self.copy_model(self.word2vec, self.attention_dialogue.emb) self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True) for epoch in range(self.epoch): trace("epoch %d/%d: " % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.attention_dialogue) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0 ) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace("saving model ...") prefix = self.model model_path = APP_ROOT + "/model/" + prefix src_vocab.save(model_path + ".srcvocab") trg_vocab.save(model_path + ".trgvocab") self.attention_dialogue.save_spec(model_path + ".spec") serializers.save_hdf5(model_path + ".weights", self.attention_dialogue) trace("finished.")
def serialize(self, epoch, filename): # Create dir path dpath = os.path.join(filename, "./model_{:05d}".format(epoch)) if os.path.exists(dpath): shutil.rmtree(dpath) os.makedirs(dpath) else: os.makedirs(dpath) # Serialize fpath = os.path.join(dpath, "encoder.h5py") serializers.save_hdf5(fpath, self.encoder) fpath = os.path.join(dpath, "generator.h5py") serializers.save_hdf5(fpath, self.generator)
def saveData(self): try: # save model file serializers.save_hdf5(MODEL_FILE, self.brain) print "succeed to save model" # save history file #fp = open(HISTORY_FILE, "w") #self.brain.state.dump(HISTORY_FILE) #pickle.dump(self.brain, fp) #fp.close() #print "succeed to save history." except: print "failed to save history."
def train(gen, dis, optimizer_gen, optimizer_dis, x_train, epoch_num, gpu_device=None, out_image_dir=None): if gpu_device == None: gen.to_cpu() dis.to_cpu() xp = np else: gen.to_gpu(gpu_device) dis.to_gpu(gpu_device) xp = cuda.cupy out_image_len = 20 z_out_image = Variable(xp.random.uniform(-1, 1, (out_image_len, LATENT_SIZE)).astype(np.float32)) for epoch in xrange(1, epoch_num + 1): x_size = len(x_train) perm = np.random.permutation(x_size) sum_loss_gen = 0 sum_loss_dis = 0 for i in xrange(0, x_size, BATCH_SIZE): x_batch = x_train[perm[i : i + BATCH_SIZE]] loss_dis = train_dis(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device) sum_loss_dis += float(loss_dis) loss_gen = train_gen(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device) sum_loss_gen += float(loss_gen) print "epoch: {} done".format(epoch) print ("gen loss={}".format(sum_loss_gen / x_size)) print ("dis loss={}".format(sum_loss_dis / x_size)) serializers.save_hdf5(args.output + ".gen.model", gen) serializers.save_hdf5(args.output + ".gen.state", optimizer_gen) serializers.save_hdf5(args.output + ".dis.model", dis) serializers.save_hdf5(args.output + ".dis.state", optimizer_dis) if out_image_dir != None: data_array = gen(z_out_image, train=False).data for i, data in enumerate(data_array): image = Image.fromarray((cuda.to_cpu(data) * 256).astype(np.uint8).reshape(data.shape[1:3])) image.save("{0}/{1:03d}_{2:03d}.png".format(out_image_dir, epoch, i))
def main(): docs = get_docs() texts = make_texts(docs, single=False) questions = get_questions() texts.extend(questions) texts = preprocess_text(texts) texts = [t for t in texts if t] tokens, vocab = preprocess.tokenize(texts, 7500, tag=False, parse=False, entity=False) log.info("Got tokens and vocabulary. Vocab size: %d" % len(vocab)) corpus, flat_corpus, doc_ids, clean_set = make_corpus(tokens=tokens, min_count=50) log.info("Got corpus") # Model Parameters # Number of documents n_docs = len(texts) log.info("number of texts: %d" % n_docs) # Number of unique words in the vocabulary n_words = flat_corpus.max() + 1 # Number of dimensions in a single word vector n_hidden = 128 # Number of topics to fit n_topics = 20 # Get the count for each key counts = corpus.keys_counts[:n_words] # Get the string representation for every compact key words = corpus.word_list(vocab)[:n_words] log.info("Words: \n %s" % words) # Fit the model log.info("fitting the model") model = LDA2Vec(n_words, n_hidden, counts, dropout_ratio=0.2) model.add_categorical_feature(n_docs, n_topics, name="document_id") model.finalize() if os.path.exists("model.hdf5"): serializers.load_hdf5("model.hdf5", model) for _ in range(200): log.info("attempt #%d" % _) model.top_words_per_topic("document_id", words) log.info("TOP_WORDS_PER_TOPIC!\n => ") log.info(model.top_words_per_topic("document_id", words)) log.info("========") model.fit(flat_corpus, categorical_features=[doc_ids], fraction=1e-3, epochs=1) model.to_cpu() serializers.save_hdf5("model.hdf5", model) model.top_words_per_topic("document_id", words)
def save(self): model_dir = self._model_dir filenames = Trainer.get_model_filenames(self.name, self.params['current_epoch']) serializers.save_hdf5(os.path.join(model_dir, filenames['model_gen']), self.dcgan.gen) serializers.save_hdf5(os.path.join(model_dir, filenames['model_dis']), self.dcgan.dis) serializers.save_hdf5(os.path.join(model_dir, filenames['opt_gen']), self.opt_gen) serializers.save_hdf5(os.path.join(model_dir, filenames['opt_dis']), self.opt_dis) with open(os.path.join(Trainer.MODEL_DIR, '{}.json'.format(self.name)), 'w') as f: f.write(json.dumps(self.params, indent=2))
def train_dcgan_labeled(images, gen, dis): o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_gen.setup(gen) o_dis.setup(dis) o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) zeros = Variable(xp.zeros(batchsize, dtype=np.int32)) ones = Variable(xp.ones(batchsize, dtype=np.int32)) for epoch in tqdm(range(n_epoch)): # discriminator # 0: from dataset # 1: from noise # train generator z = xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32) z = Variable(z) x = gen(z) yl = dis(x) L_gen = F.softmax_cross_entropy(yl, zeros) L_dis = F.softmax_cross_entropy(yl, ones) # train discriminator x = generate_data(images) yl = dis(x) L_dis += F.softmax_cross_entropy(yl, zeros) o_gen.zero_grads() L_gen.backward() o_gen.update() o_dis.zero_grads() L_dis.backward() o_dis.update() if epoch % image_save_interval == 0 and epoch > 0: z = zvis z[50:, :] = xp.random.uniform(-1, 1, (50, nz), dtype=np.float32) z = Variable(z) x = gen(z, test=True) filename = '{}/vis_{}.png'.format(out_image_dir, epoch) generate_and_save(filename, x.data.get()) path = join(out_model_dir, "dcgan_model_dis_{}.h5".format(epoch)) serializers.save_hdf5(path, dis) path = join(out_model_dir, "dcgan_model_gen_%d.h5".format(epoch)) serializers.save_hdf5(path, gen) path = join(out_model_dir, "dcgan_state_dis_%d.h5".format(epoch)) serializers.save_hdf5(path, o_dis) path = join(out_model_dir, "dcgan_state_gen_%d.h5".format(epoch)) serializers.save_hdf5(path, o_gen)
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) else: encdec = self.encdec for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() if trained == 0: self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def train_and_test(self, n_epoch=100, batchsize=100): epoch = 1 best_accuracy = 0 while epoch <= n_epoch: print 'epoch', epoch perm = np.random.permutation(self.n_train) sum_train_accuracy = 0 sum_train_loss = 0 for i in xrange(0, self.n_train, batchsize): x_batch = self.x_train[perm[i:i+batchsize]] y_batch = self.y_train[perm[i:i+batchsize]] real_batchsize = len(x_batch) self.optimizer.zero_grads() loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu) loss.backward() self.optimizer.update() sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train) # evaluation sum_test_accuracy = 0 sum_test_loss = 0 for i in xrange(0, self.n_test, batchsize): x_batch = self.x_test[i:i+batchsize] y_batch = self.y_test[i:i+batchsize] real_batchsize = len(x_batch) loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu) sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test) epoch += 1 serializers.save_hdf5('doll_model', self.model)
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') attmt = AttentionMT(args.vocab, args.embed, args.hidden) if args.use_gpu: attmt.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(attmt) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') attmt.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', attmt) trace('finished.')
def train(self): cur_log_perp = self.mod.zeros(()) accum_loss = 0 print('[train]\ngoing to train %d epochs' % self.n_epoch) for epoch in range(self.n_epoch): if epoch <= self.n_epoch / 2: train_data = self.generate_data() else: train_data = self.generate_data(go_away_from_start=True) for i in range(self.sequence_length): x = self.toVariable(train_data['input'][i], dtype='float32') t = self.toVariable(train_data['output'][i], dtype='int32') h, y = self.model(x) loss_i, accuracy_i = self.loss(y, t) accum_loss += loss_i cur_log_perp += loss_i.data # truncated BPTT if (i + 1) % self.backprop_length == 0: self.model.zerograds() accum_loss.backward() accum_loss.unchain_backward() # truncate accum_loss = 0 self.optimizer.update() if (epoch + 1) % self.validation_timing == 0: now = time.time() throughput = self.validation_timing / float(now - prev) \ if 'prev' in vars() else 0 train_perp, valid_perp_mean, valid_perp_se, perp = \ self.validate(epoch, train_data, cur_log_perp) print( ('epoch: %d, train perp: %d, validation classified %d/100 ' + '(%.2f epochs/sec)') % (epoch + 1, perp, 100 * (1 - valid_perp_mean), throughput)) S.save_hdf5('pipc_lstm_%d.pkl' % self.n_hidden, self.model) cur_log_perp = self.mod.zeros(()) prev = now sys.stdout.flush()
def train(epoch_num): image_groups, sentence_groups = make_groups(train_image_ids, train_sentences) test_image_groups, test_sentence_groups = make_groups(test_image_ids, test_sentences, train=False) for epoch in range(epoch_num): batches = random_batches(image_groups, sentence_groups) sum_loss = 0 sum_acc = 0 sum_size = 0 batch_num = len(batches) for i, (image_id_batch, sentence_batch) in enumerate(batches): loss, acc, size = forward(caption_net, images[image_id_batch], sentence_batch) optimizer.zero_grads() loss.backward() loss.unchain_backward() optimizer.update() sentence_length = sentence_batch.shape[1] sum_loss += float(loss.data) * size sum_acc += acc * size sum_size += size if (i + 1) % 500 == 0: print '{} / {} loss: {} accuracy: {}'.format(i + 1, batch_num, sum_loss / sum_size, sum_acc / sum_size) print 'epoch: {} done'.format(epoch + 1) print 'train loss: {} accuracy: {}'.format(sum_loss / sum_size, sum_acc / sum_size) sum_loss = 0 sum_acc = 0 sum_size = 0 for image_ids, sentences in zip(test_image_groups, test_sentence_groups): if len(sentences) == 0: continue size = len(sentences) for i in range(0, size, batch_size): image_id_batch = image_ids[i:i + batch_size] sentence_batch = sentences[i:i + batch_size] loss, acc, size = forward(caption_net, images[image_id_batch], sentence_batch, train=False) sentence_length = sentence_batch.shape[1] sum_loss += float(loss.data) * size sum_acc += acc * size sum_size += size print 'test loss: {} accuracy: {}'.format(sum_loss / sum_size, sum_acc / sum_size) serializers.save_hdf5(args.output + '_{0:04d}.model'.format(epoch), caption_net) serializers.save_hdf5(args.output + '_{0:04d}.state'.format(epoch), optimizer)
def save(self, dir=None, name="lstm"): if dir is None: raise Exception() try: os.mkdir(dir) except: pass serializers.save_hdf5(dir + "/%s_fc.model" % name, self.fc) serializers.save_hdf5(dir + "/%s_lstm.model" % name, self.lstm) print "model saved." serializers.save_hdf5(dir + "/%s_fc.optimizer" % name, self.optimizer_fc) serializers.save_hdf5(dir + "/%s_lstm.optimizer" % name, self.optimizer_lstm) print "optimizer saved."
def train_loop(): # Trainer graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') model.train = True continue elif inp == 'val': # start validation res_q.put('val') serializers.save_hdf5(args.out, model) serializers.save_hdf5(args.outstate, optimizer) model.train = False continue volatile = 'off' if model.train else 'on' x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile) t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile) if model.train: optimizer.update(model, x, t) if not graph_generated: with open('graph.dot', 'w') as o: o.write(computational_graph.build_computational_graph( (model.loss,)).dump()) print('generated graph', file=sys.stderr) graph_generated = True else: model(x, t) res_q.put((float(model.loss.data), float(model.accuracy.data))) del x, t
def test(self, x_l, y_l): y = F.softmax(self.mlp_ae.mlp_encoder(x_l, test=True)) y_argmax = F.argmax(y, axis=1) acc = F.accuracy(y, y_l) y_l_cpu = cuda.to_cpu(y_l.data) y_argmax_cpu = cuda.to_cpu(y_argmax.data) # Confuction Matrix cm = confusion_matrix(y_l_cpu, y_argmax_cpu) print(cm) # Wrong samples idx = np.where(y_l_cpu != y_argmax_cpu)[0] # Generate and Save x_rec = self.mlp_ae.mlp_decoder(y, self.mlp_encoder.hiddens, test=True) save_incorrect_info(x_rec.data[idx, ], x_l.data[idx, ], y.data[idx, ], y_l.data[idx, ]) # Save model serializers.save_hdf5("./model/mlp_encdec.h5py", self.mlp_ae) return acc
def fcn8s_caffe_to_chainermodel(caffe_prototxt, caffemodel_path, chainermodel_path): net = caffe.Net(caffe_prototxt, caffemodel_path, caffe.TEST) model = FCN8s() for name, param in net.params.iteritems(): layer = getattr(model, name) has_bias = True if len(param) == 1: has_bias = False print('{0}:'.format(name)) # weight print(' - W:', param[0].data.shape, layer.W.data.shape) assert param[0].data.shape == layer.W.data.shape layer.W.data = param[0].data # bias if has_bias: print(' - b:', param[1].data.shape, layer.b.data.shape) assert param[1].data.shape == layer.b.data.shape layer.b.data = param[1].data S.save_hdf5(chainermodel_path, model)
def main(): ########################### #### create dictionary #### ########################### if os.path.exists('./data/corpus/dictionary.dict'): if args.lang == 'ja': corpus = JaConvCorpus(file_path=None, batch_size=batchsize, size_filter=True) else: corpus = ConvCorpus(file_path=None, batch_size=batchsize, size_filter=True) corpus.load(load_dir='./data/corpus/') else: if args.lang == 'ja': corpus = JaConvCorpus(file_path=data_file, batch_size=batchsize, size_filter=True) else: corpus = ConvCorpus(file_path=data_file, batch_size=batchsize, size_filter=True) corpus.save(save_dir='./data/corpus/') print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) ###################### #### create model #### ###################### model = Seq2Seq(vocab_size=len(corpus.dic.token2id), feature_num=feature_num, hidden_num=hidden_num, batch_size=batchsize, gpu_flg=args.gpu) if args.gpu >= 0: model.to_gpu() optimizer = optimizers.Adam(alpha=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5)) ########################## #### create ID corpus #### ########################## input_mat = [] output_mat = [] input_mat_rev = [] # output_wp_mat = [] max_input_ren = max_output_ren = 0 for input_text, output_text in zip(corpus.posts, corpus.cmnts): output_text.append(corpus.dic.token2id["<eos>"]) # update max sentence length max_input_ren = max(max_input_ren, len(input_text)) max_output_ren = max(max_output_ren, len(output_text)) input_mat.append(input_text) output_mat.append(output_text) # # create word prediction matrix # wp = [] # for wid in output_text: # if wid not in wp: # wp.append(wid) # output_wp_mat.append(wp) # make reverse corpus for input_text in input_mat: input_mat_rev.append(input_text[::-1]) # padding for li in input_mat: insert_num = max_input_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) for li in output_mat: insert_num = max_output_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) for li in input_mat_rev: insert_num = max_input_ren - len(li) for _ in range(insert_num): li.insert(0, corpus.dic.token2id['<pad>']) # create batch matrix input_mat = np.array(input_mat, dtype=np.int32).T input_mat_rev = np.array(input_mat_rev, dtype=np.int32).T output_mat = np.array(output_mat, dtype=np.int32).T # separate corpus into Train and Test perm = np.random.permutation(len(corpus.posts)) test_input_mat = input_mat[:, perm[0:0 + testsize]] test_output_mat = output_mat[:, perm[0:0 + testsize]] test_input_mat_rev = input_mat_rev[:, perm[0:0 + testsize]] train_input_mat = input_mat[:, perm[testsize:]] train_output_mat = output_mat[:, perm[testsize:]] train_input_mat_rev = input_mat_rev[:, perm[testsize:]] # train_output_wp_mat = [] # for index in perm[testsize:]: # train_output_wp_mat.append(output_wp_mat[index]) ############################# #### train seq2seq model #### ############################# accum_loss = 0 train_loss_data = [] for num, epoch in enumerate(range(n_epoch)): total_loss = 0 batch_num = 0 perm = np.random.permutation(len(corpus.posts) - testsize) # for training for i in range(0, len(corpus.posts) - testsize, batchsize): # select batch data input_batch = remove_extra_padding( train_input_mat[:, perm[i:i + batchsize]], reverse_flg=False) input_batch_rev = remove_extra_padding( train_input_mat_rev[:, perm[i:i + batchsize]], reverse_flg=True) output_batch = remove_extra_padding( train_output_mat[:, perm[i:i + batchsize]], reverse_flg=False) # output_wp_batch = [] # for index in perm[i:i + batchsize]: # output_wp_batch.append(train_output_wp_mat[index]) # output_wp_batch = create_wp_batch(vocab_size=len(corpus.dic.token2id), # wp_lists=output_wp_batch) # Encode a sentence model.initialize(batch_size=input_batch.shape[1]) model.encode(input_batch, input_batch_rev, train=True) # Decode from encoded context end_batch = xp.array([ corpus.dic.token2id["<start>"] for _ in range(input_batch.shape[1]) ]) first_words = output_batch[0] loss, predict_mat = model.decode(end_batch, first_words, train=True) next_ids = first_words accum_loss += loss for w_ids in output_batch[1:]: loss, predict_mat = model.decode(next_ids, w_ids, train=True) next_ids = w_ids accum_loss += loss # learn model model.cleargrads() # initialize all grad to zero accum_loss.backward() # back propagation optimizer.update() total_loss += float(accum_loss.data) batch_num += 1 print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(accum_loss.data))) accum_loss = 0 train_loss_data.append(float(total_loss / batch_num)) # save model and optimizer if (epoch + 1) % 5 == 0: print('-----', epoch + 1, ' times -----') print('save the model and optimizer') serializers.save_hdf5('data/' + str(epoch) + '.model', model) serializers.save_hdf5('data/' + str(epoch) + '.state', optimizer) # save loss data with open('./data/loss_train_data.pkl', 'wb') as f: pickle.dump(train_loss_data, f)
v_iter = MultithreadIterator(validation, batch_size=batch, repeat=True, shuffle=True, n_threads=batch) model = Network(channels, blocks, ksize) if device >= 0: model.to_gpu() optimizer = optimizers.Adam().setup(model) updater = CustomUpdater({ "main": t_iter, "test": v_iter }, optimizer, (patch, patch)) trainer = Trainer(updater, (epoch, "epoch"), out=out) log = extensions.LogReport() trainer.extend(log) trainer.extend( extensions.PrintReport(["epoch", "iteration", "loss", "test"], log)) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(lambda trainer: save_hdf5( f"{out}/m{trainer.updater.iteration}.hdf5", model), trigger=(5, "epoch")) trainer.extend(lambda trainer: save_hdf5( f"{out}/o{trainer.updater.iteration}.hdf5", optimizer), trigger=(5, "epoch")) trainer.run() save_hdf5(f"{out}/model.hdf5", model) save_hdf5(f"{out}/optimizer.hdf5", optimizer)
model, test_mean_loss, test_ac, test_IoU = test( model, MiniBatchLoader, test_mean_loss, test_ac, test_IoU) if args.logflag == 'on': etime = time.clock() debugger.writelog(MiniBatchLoader.datasize_train, MiniBatchLoader.datasize_test, MiniBatchLoader.batchsize, 'Human part segmentation', stime, etime, train_mean_loss, train_ac, train_IoU, test_mean_loss, test_ac, test_IoU, epoch, LOG_FILENAME=resultdir + 'log.txt') debugger.plot_result(train_mean_loss, test_mean_loss, savename=resultdir + 'log.png') if args.saveflag == 'on' and epoch % 10 == 0: from chainer import serializers serializers.save_hdf5( resultdir + 'humanpartsnet_epoch' + str(epoch) + '.model', model) serializers.save_hdf5( resultdir + 'humanpartsnet_epoch' + str(epoch) + '.state', optimizer)
def train(params): en_model = EnglishLSTM(len(params['en_list'])) en_rythm_model = EnglishRythmLSTM(len(params['en_rythm_list'])) ja_model = JapaneseLSTM(len(params['ja_list'])) ja_rythm_model = JapaneseRythmLSTM(len(params['ja_rythm_list'])) data = { # 'english': en_model.get_train_data(params['english'], params['batch_size']), #並列 # 'en_rythm': en_model.get_train_data(params['en_rythm'], params['batch_size']) 'english': params['english'], 'en_rythm': params['en_rythm'], 'japanese': params['japanese'], 'ja_rythm': params['ja_rythm'], } # 最適化アルゴリズムにAdamを採用 optimizer = [ optimizers.Adam().setup(en_model), optimizers.Adam().setup(en_rythm_model), optimizers.Adam().setup(ja_model), optimizers.Adam().setup(ja_rythm_model), ] loss_list = [] step = [] for epoch in range(params['epoch_num']): print("epoch: %d" % (epoch + 1)) loss = 0.0 # 英語歌詞の学習 en_model.reset() for index, (en_phrase, en_rythm_phrase, ja_phrase, ja_rythm_phrase) in enumerate( zip(data['english'], data['en_rythm'], data['japanese'], data['ja_rythm'])): # 曲が違う場合は状態をリセット if len(en_phrase) == 0: en_model.reset() en_rythm_model.reset() continue # if len(en_rythm_phrase) == 0: # en_rythm_model.reset() # continue # if len(ja_phrase) == 0: # ja_model.reset() # continue # 英語の歌詞 for word in en_phrase: y_en = en_model.forward(word, params['en_list']) # 英語のリズム for rythm in en_rythm_phrase: y_en_rythm = en_rythm_model.forward(rythm, params['en_rythm_list']) # 出力を足し合わせる h = y_en + y_en_rythm # hから日本語の1単語目を推測 tx = Variable( np.array([params['ja_list'][ja_phrase[0]]], dtype=np.int32)) loss += F.softmax_cross_entropy(ja_model.predict(h), tx) # 足し合わせた出力から日本語を出力 for index, word in enumerate(ja_phrase): y_ja = ja_model.forward(word, params['ja_list']) if word != '<eos>': tx = Variable( np.array([params['ja_list'][ja_phrase[index + 1]]], dtype=np.int32)) # print(y_ja, tx) loss += F.softmax_cross_entropy(y_ja, tx) # hから日本語の1つ目のリズムを推測 tx = Variable( np.array([params['ja_rythm_list'][ja_rythm_phrase[0]]], dtype=np.int32)) loss += F.softmax_cross_entropy(ja_rythm_model.predict(h), tx) # 足し合わせた出力から日本語のリズムを出力 for index, rythm in enumerate(ja_rythm_phrase): y_ja_rythm = ja_rythm_model.forward(rythm, params['ja_rythm_list']) if rythm != '<eos>': tx = Variable( np.array([ params['ja_rythm_list'][ja_rythm_phrase[index + 1]] ], dtype=np.int32)) # print(y_ja, tx) loss += F.softmax_cross_entropy(y_ja_rythm, tx) # print(ja_model.l1.upward.W.grad) en_model.cleargrads() en_rythm_model.cleargrads() ja_model.cleargrads() ja_rythm_model.cleargrads() loss.backward() loss.unchain_backward() ja_model.reset() ja_rythm_model.reset() for opt in optimizer: opt.update() # lossの可視化 step.append(epoch + 1) loss_list.append(loss.data) print(loss) # モデルとして保存 serializers.save_hdf5('models/en_model_' + str(params['epoch_num']), en_model) serializers.save_hdf5('models/en_rythm_model_' + str(params['epoch_num']), en_rythm_model) serializers.save_hdf5('models/ja_model_' + str(params['epoch_num']), ja_model) serializers.save_hdf5('models/ja_rythm_model_' + str(params['epoch_num']), ja_rythm_model) # 学習過程のlossグラフ plt.plot(step, loss_list) plt.title("Training Data") plt.xlabel("step") plt.ylabel("loss") plt.grid(True) plt.show()
np.savez('topics.pyldavis', **data) for d, f in utils.chunks(batchsize, doc_ids, flattened): t0 = time.time() optimizer.zero_grads() l = model.fit_partial(d.copy(), f.copy()) prior = model.prior() loss = prior * fraction loss.backward() optimizer.update() msg = ("J:{j:05d} E:{epoch:05d} L:{loss:1.3e} " "P:{prior:1.3e} R:{rate:1.3e}") prior.to_cpu() loss.to_cpu() t1 = time.time() dt = t1 - t0 rate = batchsize / dt logs = dict(loss=float(l), epoch=epoch, j=j, prior=float(prior.data), rate=rate) j += 1 # # print '\nTime:', (time.time() - start), msg.format(**logs) if j > 0:# and j % 500 == 0: coherence = topic_coherence(top_words) print '\nCoherence:' for j in range(n_topics): print j, coherence[(j, 'cv')] kw = dict(top_words=top_words, coherence=coherence, epoch=epoch) progress[str(epoch)] = pickle.dumps(kw) serializers.save_hdf5("lda2vec.hdf5", model)
x_batch = np.array(x_batch_list, dtype=np.float32) y_batch = np.array(y_batch_list, dtype=np.int32) if gpu_id >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) optimizer.zero_grads() loss, accuracy = forward(x_batch, y_batch) #print loss.data,accuracy.data with open(savedir + "real_loss.txt", "a") as f: f.write(str(loss.data) + '\n') with open(savedir + "real_accuracy.txt", "a") as f: f.write(str(accuracy.data) + '\n') loss.backward() optimizer.update() sum_loss += loss.data * batchsize sum_accuracy += accuracy.data * batchsize serializers.save_hdf5( savedir + "/relation_model" + str(epoch) + '.chainer', model) serializers.save_hdf5(savedir + "/optimizer" + str(epoch) + '.chainer', optimizer) mean_loss = sum_loss / num_train_data mean_accuracy = sum_accuracy / num_train_data print mean_loss, mean_accuracy with open(savedir + "mean_loss.txt", "a") as f: f.write(str(mean_loss) + '\n') with open(savedir + "mean_accuracy.txt", "a") as f: f.write(str(mean_accuracy) + '\n')
feed_dict={loss_: np.mean(pre_train_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)}) summary_writer.add_summary(summary, test_count) samples = generator.generate(10, train=False) with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f: f.write( '\npre-train epoch {} train_loss {} test_loss {} \n'.format( epoch, np.mean(pre_train_loss), np.mean(test_loss))) for x in samples: f.write(''.join([arasuji.vocab[w] for w in x]) + '\n') serializers.save_hdf5( os.path.join(out_dir, "models", "gen_pretrain.model"), generator) else: # test test_loss = [] for _ in range(test_num // batch_size): batch = arasuji.get_test_data(batch_size) g_loss = generator.pretrain_step(batch) test_loss.append(float(g_loss.data)) print('\npre-trained test_loss {}'.format(np.mean(test_loss))) test_count = args.gen_pretrain_epoch summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)}) summary_writer.add_summary(summary, test_count) # discriminator pre-train
def train_dcgan_labeled(gen, dis, epoch0=0): o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_gen.setup(gen) o_dis.setup(dis) o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32)) for epoch in xrange(epoch0,n_epoch): perm = np.random.permutation(n_train) sum_l_dis = np.float32(0) sum_l_gen = np.float32(0) for i in xrange(0, n_train, batchsize): # discriminator # 0: from dataset # 1: from noise #print "load image start ", i x2 = np.zeros((batchsize, 1, patch_h,patch_w), dtype=np.float32) img = load_image() for j in range(batchsize): rndx = np.random.randint(img_w-patch_w) rndy = np.random.randint(img_h-patch_h) x2[j,0,:,:] = img[rndx:rndx+patch_w,rndy:rndy+patch_h] #print "load image done" # train generator z = Variable(xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32)) x = gen(z) yl = dis(x) L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32))) # train discriminator x2 = Variable(cuda.to_gpu(x2)) yl2 = dis(x2) L_dis += F.softmax_cross_entropy(yl2, Variable(xp.zeros(batchsize, dtype=np.int32))) #print "forward done" o_gen.zero_grads() L_gen.backward() o_gen.update() o_dis.zero_grads() L_dis.backward() o_dis.update() sum_l_gen += L_gen.data.get() sum_l_dis += L_dis.data.get() #print "backward done" if i%image_save_interval==0: print "visualize...", epoch, i pylab.rcParams['figure.figsize'] = (16.0,16.0) pylab.clf() vissize = 100 z = zvis z[50:,:] = (xp.random.uniform(-1, 1, (50, nz), dtype=np.float32)) z = Variable(z) x = gen(z, test=True) x = x.data.get() for i_ in range(100): tmp = data_to_image(i_, x) pylab.subplot(10,10,i_+1) pylab.imshow(tmp) pylab.axis('off') pylab.savefig('%s/vis_%d_%06d.png'%(out_image_dir, epoch,i)) print "visualized." serializers.save_hdf5("%s/dcgan_model_dis_%d.h5"%(out_model_dir, epoch),dis) serializers.save_hdf5("%s/dcgan_model_gen_%d.h5"%(out_model_dir, epoch),gen) serializers.save_hdf5("%s/dcgan_state_dis_%d.h5"%(out_model_dir, epoch),o_dis) serializers.save_hdf5("%s/dcgan_state_gen_%d.h5"%(out_model_dir, epoch),o_gen) print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train
plt.figure(figsize=(8, 6)) plt.xlim([0, n_epoch]) plt.ylim([0.975, 1.0]) def add1list(list): return map(lambda item: item + 1, list) plt.plot(add1list(xrange(len(train_acc))), train_acc) plt.plot(add1list(xrange(len(test_acc))), test_acc) plt.legend(["train_acc", "test_acc"], loc=4) plt.title("Accuracy of digit recognition.") plt.plot() # Save the model and the optimizer print 'save the model' model.to_cpu() serializers.save_hdf5('mlp.model', model) print 'save the optimizer' serializers.save_hdf5('mlp.state', optimizer) finishtime = time.time() print 'execute time = {}'.format(finishtime - starttime) plt.savefig("graph.png") plt.show() # 対話的コンソール グラフスケール変えたい時とかに # code.InteractiveConsole(globals()).interact()
def method(self): x_train, x_test = np.split(self.x_data, [self.N]) y_train, y_test = np.split(self.y_data.astype(np.int32), [self.N]) self.N_test = y_test.size optimizer = optimizers.SGD() optimizer.setup(self.model) for k in self.label_counter.keys(): self.label_counter[k] = 0 for epoch in range(self.iteration_number): perm = np.random.permutation(self.N) sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, self.N, self.batchsize): x = chainer.Variable(self.xp.asarray(x_train[perm[i:i + self.batchsize]])) t = chainer.Variable(self.xp.asarray(y_train[perm[i:i + self.batchsize]])) # Pass the loss function (Classifier defines it) and its arguments optimizer.update(self.model, x, t) if epoch == 1 and i == 0: with open('graph.dot', 'w') as o: g = computational_graph.build_computational_graph( (self.model.loss, ), remove_split=True) o.write(g.dump()) print('graph generated') #Apply the bias for output self.model.y.data = self.__bias(self.model.y.data, t.data) sum_loss += float(self.model.loss.data) * len(t.data) sum_accuracy += float(self.model.accuracy.data) * len(t.data) print('train mean loss={}, accuracy={}'.format( sum_loss / self.N, sum_accuracy / self.N)) # evaluation sum_accuracy = 0 sum_loss = 0 self.y_predict_data = [] sum_loss, sum_accuracy = 0, 0 sum_recall = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] sum_precision = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] sum_f_score = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for i in six.moves.range(0, self.N_test, self.batchsize): x = chainer.Variable(self.xp.asarray(x_test[i:i + self.batchsize]), volatile='on') t = chainer.Variable(self.xp.asarray(y_test[i:i + self.batchsize]), volatile='on') loss = self.model(x, t) for i in range(len(self.model.y.data)): self.y_predict_data.append(np.argmax(self.model.y.data[i])) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(self.model.accuracy.data) * len(t.data) sum_precision, sum_recall, sum_f_score = self.__calculate_metrics(t, sum_precision, sum_recall, sum_f_score) self.y_predict_data = [] print('test mean loss={}, accuracy={}'.format( sum_loss / self.N_test, sum_accuracy / self.N_test)) mean_recall = [n/self.N_test for n in sum_recall] mean_precision = [n/self.N_test for n in sum_precision] mean_f_score = [n/self.N_test for n in sum_f_score] print("mean_recall ,", [x for x in mean_recall]) print("mean_precision ,", [x for x in mean_precision]) print("mean_f_score ,", [x for x in mean_f_score]) # Save the model and the optimizer print('save the model') serializers.save_hdf5('emotion_recognition.model', self.model) print('save the optimizer') serializers.save_hdf5('emotion_recognition.state', optimizer)
def main(): ########################### #### create dictionary #### ########################### if os.path.exists(CORPUS_DIR + 'dictionary.dict'): corpus = JaConvCorpus(create_flg=False, batch_size=batchsize, size_filter=True) corpus.load(load_dir=CORPUS_DIR) else: corpus = JaConvCorpus(create_flg=True, batch_size=batchsize, size_filter=True) corpus.save(save_dir=CORPUS_DIR) print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) print('Emotion size: ', len(corpus.emotion_set)) # search word_threshold (general or emotional) ma = 0 mi = 999999 for word in corpus.emotion_set: wid = corpus.dic.token2id[word] if wid > ma: ma = wid if wid < mi: mi = wid word_threshold = mi ###################### #### create model #### ###################### model = PreTrainSeq2Seq(all_vocab_size=len(corpus.dic.token2id), emotion_vocab_size=len(corpus.emotion_set), feature_num=feature_num, hidden_num=hidden_num, batch_size=batchsize, label_num=label_num, label_embed_num=label_embed, gpu_flg=args.gpu) if args.gpu >= 0: model.to_gpu() optimizer = optimizers.Adam(alpha=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) ########################## #### create ID corpus #### ########################## input_mat = [] output_mat = [] input_mat_rev = [] label_mat = [] max_input_ren = max_output_ren = 0 print('start making corpus matrix...') for input_text, output_text in zip(corpus.rough_posts, corpus.rough_cmnts): # reverse an input and add eos tag output_text.append(corpus.dic.token2id["<eos>"]) # 出力の最後にeosを挿入 # update max sentence length max_input_ren = max(max_input_ren, len(input_text)) max_output_ren = max(max_output_ren, len(output_text)) # make a list of lists input_mat.append(input_text) output_mat.append(output_text) # make label lists TODO: 3値分類(pos, neg, neu)のみの対応なので可変にする n_num = p_num = 0 for word in output_text: if corpus.dic[word] in corpus.neg_words: n_num += 1 if corpus.dic[word] in corpus.pos_words: p_num += 1 if (n_num + p_num) == 0: label_mat.append([1 for _ in range(len(output_text))]) elif n_num <= p_num: label_mat.append([2 for _ in range(len(output_text))]) elif n_num > p_num: label_mat.append([0 for _ in range(len(output_text))]) else: raise ValueError # make reverse corpus for input_text in input_mat: input_mat_rev.append(input_text[::-1]) # padding (inputの文頭・outputの文末にパディングを挿入する) print('start labeling...') for li in input_mat: insert_num = max_input_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) for li in output_mat: insert_num = max_output_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) for li in input_mat_rev: insert_num = max_input_ren - len(li) for _ in range(insert_num): li.insert(0, corpus.dic.token2id['<pad>']) for li in label_mat: insert_num = max_output_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) if len(output_mat) != len(label_mat): print('Output matrix and label matrix should have the same dimension.') raise ValueError # create batch matrix print('transpose...') input_mat = np.array(input_mat, dtype=np.int32).T input_mat_rev = np.array(input_mat_rev, dtype=np.int32).T output_mat = np.array(output_mat, dtype=np.int32).T label_mat = np.array(label_mat, dtype=np.int32).T # separate corpus into Train and Test TODO:実験時はテストデータとトレーニングデータに分離する print('split train and test...') train_input_mat = input_mat train_output_mat = output_mat train_input_mat_rev = input_mat_rev train_label_mat = label_mat ############################# #### train seq2seq model #### ############################# accum_loss = 0 train_loss_data = [] print('start training...') for num, epoch in enumerate(range(n_epoch)): total_loss = 0 batch_num = 0 perm = np.random.permutation(len(corpus.rough_posts)) # for training for i in range(0, len(corpus.rough_posts), batchsize): # select batch data input_batch = remove_extra_padding( train_input_mat[:, perm[i:i + batchsize]], reverse_flg=False) input_batch_rev = remove_extra_padding( train_input_mat_rev[:, perm[i:i + batchsize]], reverse_flg=True) output_batch = remove_extra_padding( train_output_mat[:, perm[i:i + batchsize]], reverse_flg=False) label_batch = remove_extra_padding( train_label_mat[:, perm[i:i + batchsize]], reverse_flg=False) # Encode a sentence model.initialize( batch_size=input_batch.shape[1]) # initialize cell model.encode(input_batch, input_batch_rev, train=True) # encode (output: hidden Variable) # Decode from encoded context input_ids = xp.array([ corpus.dic.token2id["<start>"] for _ in range(input_batch.shape[1]) ]) for w_ids, l_ids in zip(output_batch, label_batch): loss, predict_mat = model.decode(input_ids, w_ids, label_id=l_ids, word_th=word_threshold, train=True) input_ids = w_ids accum_loss += loss # learn model model.cleargrads() # initialize all grad to zero accum_loss.backward() # back propagation optimizer.update() total_loss += float(accum_loss.data) batch_num += 1 print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(accum_loss.data))) accum_loss = 0 train_loss_data.append(float(total_loss / batch_num)) # save model and optimizer print('-----', epoch + 1, ' times -----') print('save the model and optimizer') serializers.save_hdf5('../data/seq2seq/' + str(epoch) + '_rough.model', model) serializers.save_hdf5('../data/seq2seq/' + str(epoch) + '_rough.state', optimizer) # save loss data with open('./data/loss_train_data.pkl', 'wb') as f: pickle.dump(train_loss_data, f)
def save(self, filename): tmp_filename = filename + "." + str(uuid.uuid4()) serializers.save_hdf5(tmp_filename, self) if os.path.isfile(filename): os.remove(filename) os.rename(tmp_filename, filename)
def save(self): for name, f in self.forward: if 'res' in name: serializers.save_hdf5(name + '.hdf5', getattr(self, name))
def save_params(self, epoch): print "==> saving state %s" % self.out_model_dir serializers.save_hdf5( "%s/net_model_classifier_%d.h5" % (self.out_model_dir, epoch), self.network)
def main(): # parse command line args parser = argparse.ArgumentParser() parser.add_argument('--configfile', '-c', default="", type=str, help='') args = parser.parse_args() with open(args.configfile, "r+") as f: config = yaml.load(f) # GPU settings if config["GPU"] >= 0: cuda.check_cuda_available() cuda.get_device(config["GPU"]).use() xp = cuda.cupy if config["GPU"] >= 0 else np initial_embedding = "" if "init_emb" in config and config["init_emb"] != "None": with open(config["init_emb"], "rb") as f: initial_embedding = pickle.load(f) else: initial_embedding = None ###################### #### create model #### ###################### model, corpus = load_model(config, initial_embedding) wordvector_model = load_wordvector(config) if config["GPU"] >= 0: model.to_gpu() optimizer = optimizers.Adam(alpha=config["training_rate"]) optimizer.setup(model) if "fix_embedding" in config and config["fix_embedding"]: model.enc.word_embed.disable_update() optimizer.add_hook(chainer.optimizer.GradientClipping(5)) if config["NN_model"] in ["RNN", "GRU"]: corpus.train_data[0] = [ xp.array(x, dtype=xp.int32) for x in corpus.train_data[0] ] corpus.train_data = list( zip(corpus.train_data[0], corpus.train_data[1], corpus.train_data[2], corpus.train_data[3])) if hasattr(corpus, "dev_data"): corpus.dev_data[0] = [ xp.array(x, dtype=xp.int32) for x in corpus.dev_data[0] ] corpus.dev_data = list( zip(corpus.dev_data[0], corpus.dev_data[1], corpus.dev_data[2], corpus.dev_data[3])) elif config["NN_model"] in ["CNN", "SUM", "SUMFF"]: corpus.train_data[0] = [ xp.array([ x[i] if i < len(x) else -1 for i in range(corpus.max_input_len) ], dtype=xp.int32) for x in corpus.train_data[0] ] corpus.train_data = list( zip(corpus.train_data[0], corpus.train_data[1], corpus.train_data[2], corpus.train_data[3])) if hasattr(corpus, "dev_data"): corpus.dev_data[0] = [ xp.array([ x[i] if i < len(x) else -1 for i in range(corpus.max_input_len) ], dtype=xp.int32) for x in corpus.dev_data[0] ] corpus.dev_data = list( zip(corpus.dev_data[0], corpus.dev_data[1], corpus.dev_data[2], corpus.dev_data[3])) else: print("model is not defined") exit() ############################# #### train mimic model #### ############################# if "overfit" in config and config["overfit"]: train_loss_data = [] minimum_train_loss = 9999999 minimum_epoch = 0 minimum_train_loss_flag = 0 for num, epoch in enumerate(range(999999)): total_loss = 0 batch_num = 0 random.shuffle(corpus.train_data) # for training for i in range(0, len(corpus.train_data), config["batchsize"]): # select batch data batch = corpus.train_data[i:i + config["batchsize"]] batch = list(zip(*batch)) loss = calc_batch_loss(batch, config, model, wordvector_model) # learn model model.cleargrads() # initialize all grad to zero loss.backward() # back propagation optimizer.update() total_loss += float(loss.data) batch_num += 1 # print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(loss.data))) # save model and optimizer if total_loss / batch_num < minimum_train_loss: print('-----', epoch + 1, ' times -----') print('save the model and optimizer for train loss') serializers.save_hdf5( 'data/' + config["modelname"] + '_best_train_loss.model', model) serializers.save_hdf5( 'data/' + config["modelname"] + '_best_train_loss.state', optimizer) minimum_train_loss = total_loss / batch_num minimum_epoch = epoch minimum_train_loss_flag = 0 else: minimum_train_loss_flag += 1 if minimum_train_loss_flag > 4: break if epoch == 39: print('save the model and optimizer') serializers.save_hdf5( 'data/' + config["modelname"] + '_best.model', model) serializers.save_hdf5( 'data/' + config["modelname"] + '_best.state', optimizer) # display the on-going status print('Epoch: ', num, 'Train sim loss: {:.2f}'.format(total_loss)) train_loss_data.append(float(total_loss / batch_num)) # save loss data with open('./data/train_loss_' + config["modelname"] + '.pkl', 'wb') as f: pickle.dump(train_loss_data, f) print(minimum_epoch) else: train_loss_data = [] dev_loss_data = [] minimum_loss = 9999999 minimum_train_loss = 9999999 for num, epoch in enumerate(range(config["epoch"])): total_loss = dev_loss = 0 batch_num = 0 random.shuffle(corpus.train_data) # for training for i in range(0, len(corpus.train_data), config["batchsize"]): # select batch data batch = corpus.train_data[i:i + config["batchsize"]] batch = list(zip(*batch)) loss = calc_batch_loss(batch, config, model, wordvector_model) # learn model model.cleargrads() # initialize all grad to zero loss.backward() # back propagation optimizer.update() total_loss += float(loss.data) batch_num += 1 print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(loss.data))) # for developing for i in range(0, config["devsize"], config["batchsize"]): # select dev batch data batch = corpus.dev_data[i:i + config["batchsize"]] batch = list(zip(*batch)) loss = calc_batch_loss(batch, config, model, wordvector_model) dev_loss += loss # save model and optimizer if dev_loss.data < minimum_loss: print('-----', epoch + 1, ' times -----') print('save the model and optimizer') serializers.save_hdf5( 'data/' + config["modelname"] + '_best.model', model) serializers.save_hdf5( 'data/' + config["modelname"] + '_best.state', optimizer) minimum_loss = dev_loss.data # save model and optimizer if total_loss / batch_num < minimum_train_loss: print('-----', epoch + 1, ' times -----') print('save the model and optimizer for train loss') serializers.save_hdf5( 'data/' + config["modelname"] + '_best_train_loss.model', model) serializers.save_hdf5( 'data/' + config["modelname"] + '_best_train_loss.state', optimizer) minimum_train_loss = total_loss / batch_num # display the on-going status print('Epoch: ', num, 'Train sim loss: {:.2f}'.format(total_loss), 'dev sim loss: {:.2f}'.format(float(dev_loss.data))) train_loss_data.append(float(total_loss / batch_num)) dev_loss_data.append(float(dev_loss.data)) # save loss data with open('./data/train_loss_' + config["modelname"] + '.pkl', 'wb') as f: pickle.dump(train_loss_data, f) with open('./data/dev_loss_' + config["modelname"] + '.pkl', 'wb') as f: pickle.dump(dev_loss_data, f) # evaluate with origin vector from model import Interpreter interpreter = Interpreter(config) mse_total = 0 cos_sim_total = 0 total = 0 for word in corpus.test_data: v_o = wordvector_model[word] v_m = interpreter(word) mse_total += mse(v_o, v_m) cos_sim_total += cos_sim(v_o, v_m) total += 1 print(mse_total / total / config["feature_num"]) print(cos_sim_total / total)
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU device ID') parser.add_argument('--epoch', '-e', type=int, default=50, help='# of epoch') parser.add_argument('--batch_size', type=int, default=128, help='size of mini-batch') parser.add_argument('--density', type=int, default=1, help='density of cnn kernel') parser.add_argument('--small', dest='small', action='store_true', default=False) parser.add_argument('--no_bn', dest='use_bn', action='store_false', default=True) parser.add_argument('--out', default='') parser.set_defaults(test=False) args = parser.parse_args() # model = SLPolicy(use_bn=args.use_bn) # model = RolloutPolicy() model = RLPolicy() # log directory out = datetime.datetime.now().strftime('%m%d') if args.out: out = out + '_' + args.out out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", out)) os.makedirs(os.path.join(out_dir, 'models'), exist_ok=True) # gpu if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # setting with open(os.path.join(out_dir, 'setting.txt'), 'w') as f: for k, v in args._get_kwargs(): print('{} = {}'.format(k, v)) f.write('{} = {}\n'.format(k, v)) # prepare for dataset if args.small: train = PreprocessedDataset(train_small_path) else: train = PreprocessedDataset(train_path) test = PreprocessedDataset(test_path) train_iter = iterators.SerialIterator(train, args.batch_size) val_iter = iterators.SerialIterator(test, args.batch_size, repeat=False) # optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.001)) # start training start = time.time() train_count = 0 for epoch in range(args.epoch): # train train_loss = [] train_accuracy = [] for i in range(len(train) // args.batch_size): batch = train_iter.next() x = chainer.Variable( model.xp.array([b[0] for b in batch], 'float32')) y = chainer.Variable(model.xp.array([b[1] for b in batch], 'int32')) optimizer.update(model, x, y) train_count += 1 progress_report(train_count, start, args.batch_size) train_loss.append(cuda.to_cpu(model.loss.data)) train_accuracy.append(cuda.to_cpu(model.accuracy.data)) # test test_loss = [] test_accuracy = [] valid_ply_rate = [] it = copy.copy(val_iter) for batch in it: x = chainer.Variable(model.xp.array([b[0] for b in batch], 'float32'), volatile=True) y = chainer.Variable(model.xp.array([b[1] for b in batch], 'int32'), volatile=True) plies = model(x, y, train=False) for b, ply in zip(batch, plies): if b[1] >= 0: valid_ply_rate.append( board.is_valid(b[0][:2], b[0][4][0][0], ply // 8, ply % 8)) test_loss.append(cuda.to_cpu(model.loss.data)) test_accuracy.append(cuda.to_cpu(model.accuracy.data)) print( '\nepoch {} train_loss {:.5f} train_accuracy {:.3f} \n' ' test_loss {:.5f} test_accuracy {:.3f} valid_ply_rate {:.3f}' .format(epoch, np.mean(train_loss), np.mean(train_accuracy), np.mean(test_loss), np.mean(test_accuracy), np.mean(valid_ply_rate))) with open(os.path.join(out_dir, "log"), 'a+') as f: f.write( 'epoch {} train_loss {:.5f} train_accuracy {:.3f} \n' ' test_loss {:.5f} test_accuracy {:.3f} valid_ply_rate {:.3f}\n' .format(epoch, np.mean(train_loss), np.mean(train_accuracy), np.mean(test_loss), np.mean(test_accuracy), np.mean(valid_ply_rate))) if epoch % 3 == 0: serializers.save_hdf5( os.path.join(out_dir, "models", "sl_policy_{}.model".format(epoch)), model)
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] lower_lists = [[w.lower() for w in words] for words in word_lists] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(lower_lists, args.vocab) phrase_set = set() semiterminal_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semiterminal_set |= set(extract_semiterminals(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.char_embed, args.queue, args.stack, args.srstate, len(phrase_set), len(semiterminal_set), ) if args.use_gpu: parser.to_gpu() opt = optimizers.SGD(lr=0.1) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) batch_set = list(zip(word_lists, op_lists)) for epoch in range(args.epoch): n = 0 random.shuffle(batch_set) for samples in batch(batch_set, args.minibatch): parser.zerograds() loss = XP.fzeros(()) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semiterminal_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) opt.lr *= 0.92 trace('finished.')
optm_dnn.update() loss = 0.0 loss_cnt = 0 sys.stdout.write('\n') ave_loss = sum_loss / total_cnt print("Average Loss = " + str(ave_loss)) print("Save DNN...") newdatetime = datetime.now().strftime('%m%d_%H%M%S') dnnName = DNNmode + '_s' + str(stateSize) + 'h' + str(hiddenSize) saveDNNfn = mm.DNNfn(dnn_dir, dnnName, chNum, winLen, shiftLen, fftLen, winMode, lossMode, epoch, newdatetime) serializers.save_hdf5(saveDNNfn, dnnEst) process_time = time.time() - start print("exeTime(sec):" + str(process_time)) start = time.time() try: delfn = mm.DNNfn(dnn_dir, dnnName, chNum, winLen, shiftLen, fftLen, winMode, lossMode, epoch - 1, olddatetime) os.remove(delfn) except: print('no file to delete') olddatetime = newdatetime plt.figure()
def run(data_file, is_train=False, **args): for k in six.iterkeys(args): args[k] = deepcrf.util.str_to_unicode_python2(args[k]) is_test = not is_train batchsize = args['batchsize'] model_name = args['model_name'] optimizer_name = args['optimizer'] save_dir = args['save_dir'] print(args) def convert_multi_files(data_file): if args.get('use_list_files', False): with open(data_file) as f: data_files = [filename.strip() for filename in f] else: data_files = [data_file] return data_files data_files = convert_multi_files(data_file) # TODO: check save_dir exist if not os.path.isdir(save_dir): err_msg = 'There is no dir : {}\n'.format(save_dir) err_msg += '##############################\n' err_msg += '## Please followiing: \n' err_msg += '## $ mkdir {}\n'.format(save_dir) err_msg += '##############################\n' raise ValueError(err_msg) save_name = args['save_name'] save_name = os.path.join(save_dir, save_name) xp = cuda.cupy if args['gpu'] >= 0 else np efficient_gpu = False if args['gpu'] >= 0: cuda.get_device_from_id(args['gpu']).use() xp.random.seed(1234) efficient_gpu = args.get('efficient_gpu', False) def to_gpu(x): if args['gpu'] >= 0: return chainer.cuda.to_gpu(x) return x # load files dev_file = args['dev_file'] test_file = args['test_file'] delimiter = args['delimiter'] input_idx = list(map(int, args['input_idx'].split(','))) output_idx = list(map(int, args['output_idx'].split(','))) word_input_idx = input_idx[0] # NOTE: word_idx is first column! additional_input_idx = input_idx[1:] sentences_train = [] if is_train: sentences_train = deepcrf.util.read_conll_file(filenames=data_files, delimiter=delimiter) if len(sentences_train) == 0: s = str(len(sentences_train)) err_msg = 'Invalid training sizes: {} sentences. '.format(s) raise ValueError(err_msg) else: # Predict if len(input_idx) == 1: # raw text format sentences_train = deepcrf.util.read_raw_file(filenames=data_files, delimiter=u' ') else: # conll format sentences_train = deepcrf.util.read_conll_file( filenames=data_files, delimiter=delimiter) # sentences_train = sentences_train[:100] sentences_dev = [] sentences_test = [] if dev_file: dev_file = convert_multi_files(dev_file) sentences_dev = deepcrf.util.read_conll_file(dev_file, delimiter=delimiter) if test_file: test_file = convert_multi_files(test_file) sentences_test = deepcrf.util.read_conll_file(test_file, delimiter=delimiter) # Additional setup vocab_adds = [] for ad_feat_id in additional_input_idx: sentences_additional_train = [[ feat_obj[ad_feat_id] for feat_obj in sentence ] for sentence in sentences_train] vocab_add = deepcrf.util.build_vocab(sentences_additional_train) vocab_adds.append(vocab_add) save_vocab = save_name + '.vocab' save_vocab_char = save_name + '.vocab_char' save_tags_vocab = save_name + '.vocab_tag' save_train_config = save_name + '.train_config' # TODO: check unknown pos tags # TODO: compute unk words if is_train: sentences_words_train = [[w_obj[word_input_idx] for w_obj in sentence] for sentence in sentences_train] vocab = deepcrf.util.build_vocab(sentences_words_train) vocab_char = deepcrf.util.build_vocab( deepcrf.util.flatten(sentences_words_train)) vocab_tags = deepcrf.util.build_tag_vocab(sentences_train) elif is_test: vocab = deepcrf.util.load_vocab(save_vocab) vocab_char = deepcrf.util.load_vocab(save_vocab_char) vocab_tags = deepcrf.util.load_vocab(save_tags_vocab) vocab_adds = [] for i, idx in enumerate(additional_input_idx): save_additional_vocab = save_name + '.vocab_additional_' + str(i) vocab_add = deepcrf.util.load_vocab(save_additional_vocab) vocab_adds.append(vocab_add) if args.get('word_emb_file', False): # set Pre-trained embeddings # emb_file = './emb/glove.6B.100d.txt' emb_file = args['word_emb_file'] word_emb_vocab_type = args.get('word_emb_vocab_type') def assert_word_emb_shape(shape1, shape2): err_msg = '''Pre-trained embedding size is not equal to `--n_word_emb` ({} != {})''' if shape1 != shape2: err_msg = err_msg.format(str(shape1), str(shape2)) raise ValueError(err_msg) def assert_no_emb(word_vecs): err_msg = '''There is no-embeddings! Please check your file `--word_emb_file`''' if word_vecs.shape[0] == 0: raise ValueError(err_msg) if word_emb_vocab_type == 'replace_all': # replace all vocab by Pre-trained embeddings word_vecs, vocab_glove = deepcrf.util.load_glove_embedding_include_vocab( emb_file) vocab = vocab_glove elif word_emb_vocab_type == 'replace_only': word_ids, word_vecs = deepcrf.util.load_glove_embedding( emb_file, vocab) assert_no_emb(word_vecs) elif word_emb_vocab_type == 'additional': word_vecs, vocab_glove = deepcrf.util.load_glove_embedding_include_vocab( emb_file) additional_vecs = [] for word, word_idx in sorted(six.iteritems(vocab_glove), key=lambda x: x[1]): if word not in vocab: vocab[word] = len(vocab) additional_vecs.append(word_vecs[word_idx]) additional_vecs = np.array(additional_vecs, dtype=np.float32) if args.get('vocab_file', False): vocab_file = args['vocab_file'] vocab = deepcrf.util.load_vocab(vocab_file) if args.get('vocab_char_file', False): vocab_char_file = args['vocab_char_file'] vocab_char = deepcrf.util.load_vocab(vocab_char_file) vocab_tags_inv = dict((v, k) for k, v in six.iteritems(vocab_tags)) PAD_IDX = vocab[PADDING] UNK_IDX = vocab[UNKWORD] CHAR_PAD_IDX = vocab_char[PADDING] CHAR_UNK_IDX = vocab_char[UNKWORD] tmp_xp = xp if efficient_gpu: tmp_xp = np # use CPU (numpy) def parse_to_word_ids(sentences, word_input_idx, vocab): return deepcrf.util.parse_to_word_ids(sentences, xp=tmp_xp, vocab=vocab, UNK_IDX=UNK_IDX, idx=word_input_idx) def parse_to_char_ids(sentences): return deepcrf.util.parse_to_char_ids(sentences, xp=tmp_xp, vocab_char=vocab_char, UNK_IDX=CHAR_UNK_IDX, idx=word_input_idx) def parse_to_tag_ids(sentences): return deepcrf.util.parse_to_tag_ids(sentences, xp=tmp_xp, vocab=vocab_tags, UNK_IDX=-1, idx=-1) x_train = parse_to_word_ids(sentences_train, word_input_idx, vocab) x_char_train = parse_to_char_ids(sentences_train) y_train = parse_to_tag_ids(sentences_train) x_train_additionals = [ parse_to_word_ids(sentences_train, ad_feat_id, vocab_adds[i]) for i, ad_feat_id in enumerate(additional_input_idx) ] x_dev = parse_to_word_ids(sentences_dev, word_input_idx, vocab) x_char_dev = parse_to_char_ids(sentences_dev) y_dev = parse_to_tag_ids(sentences_dev) x_dev_additionals = [ parse_to_word_ids(sentences_dev, ad_feat_id, vocab_adds[i]) for i, ad_feat_id in enumerate(additional_input_idx) ] y_dev_cpu = [[w[-1] for w in sentence] for sentence in sentences_dev] # tag_names = [] tag_names = list( set([ tag[2:] if len(tag) >= 2 else tag[0] for tag in six.iterkeys(vocab_tags) ])) x_test = parse_to_word_ids(sentences_test, word_input_idx, vocab) x_char_test = parse_to_char_ids(sentences_test) y_test = parse_to_tag_ids(sentences_test) x_test_additionals = [ parse_to_word_ids(sentences_test, ad_feat_id, vocab_adds[i]) for i, ad_feat_id in enumerate(additional_input_idx) ] cnt_train_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_train]) cnt_train_word = sum([d.size for d in x_train]) unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word cnt_dev_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_dev]) cnt_dev_word = sum([d.size for d in x_dev]) unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1) logging.info('train:' + str(len(x_train))) logging.info('dev :' + str(len(x_dev))) logging.info('test :' + str(len(x_test))) logging.info('vocab :' + str(len(vocab))) logging.info('vocab_tags:' + str(len(vocab_tags))) logging.info('unk count (train):' + str(cnt_train_unk)) logging.info('unk rate (train):' + str(unk_train_unk_rate)) logging.info('cnt all words (train):' + str(cnt_train_word)) logging.info('unk count (dev):' + str(cnt_dev_unk)) logging.info('unk rate (dev):' + str(unk_dev_unk_rate)) logging.info('cnt all words (dev):' + str(cnt_dev_word)) # show model config logging.info('######################') logging.info('## Model Config') logging.info('model_name:' + str(model_name)) logging.info('batchsize:' + str(batchsize)) logging.info('optimizer:' + str(optimizer_name)) # Save model config logging.info('######################') logging.info('## Model Save Config') logging.info('save_dir :' + str(save_dir)) # save vocab logging.info('save_vocab :' + save_vocab) logging.info('save_vocab_char :' + save_vocab_char) logging.info('save_tags_vocab :' + save_tags_vocab) logging.info('save_train_config :' + save_train_config) init_emb = None if is_train: deepcrf.util.write_vocab(save_vocab, vocab) deepcrf.util.write_vocab(save_vocab_char, vocab_char) deepcrf.util.write_vocab(save_tags_vocab, vocab_tags) deepcrf.util.write_vocab(save_train_config, args) for i, vocab_add in enumerate(vocab_adds): save_additional_vocab = save_name + '.vocab_additional_' + str(i) deepcrf.util.write_vocab(save_additional_vocab, vocab_add) n_vocab_add = [len(_vadd) for _vadd in vocab_adds] net = BiLSTM_CNN_CRF(n_vocab=len(vocab), n_char_vocab=len(vocab_char), emb_dim=args['n_word_emb'], hidden_dim=args['n_hidden'], n_layers=args['n_layer'], init_emb=init_emb, char_input_dim=args['n_char_emb'], char_hidden_dim=args['n_char_hidden'], n_label=len(vocab_tags), n_add_feature_dim=args['n_add_feature_emb'], n_add_feature=len(n_vocab_add), n_vocab_add=n_vocab_add, use_cudnn=args['use_cudnn']) my_cudnn(args['use_cudnn']) if args.get('word_emb_file', False): if word_emb_vocab_type == 'replace_all': # replace all vocab by Pre-trained embeddings assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1]) net.word_embed.W.data = word_vecs[:] elif word_emb_vocab_type == 'replace_only': assert_no_emb(word_vecs) assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1]) net.word_embed.W.data[word_ids] = word_vecs[:] elif word_emb_vocab_type == 'additional': assert_word_emb_shape(word_vecs.shape[1], net.word_embed.W.shape[1]) v_size = additional_vecs.shape[0] net.word_embed.W.data[-v_size:] = additional_vecs[:] if args.get('return_model', False): return net if args['gpu'] >= 0: net.to_gpu() init_alpha = args['init_lr'] if optimizer_name == 'adam': opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9) elif optimizer_name == 'adadelta': opt = optimizers.AdaDelta() if optimizer_name == 'sgd_mom': opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9) if optimizer_name == 'sgd': opt = optimizers.SGD(lr=init_alpha) opt.setup(net) opt.add_hook(chainer.optimizer.GradientClipping(5.0)) def eval_loop(x_data, x_char_data, y_data, x_train_additionals=[]): # dev or test net.set_train(train=False) iteration_list = range(0, len(x_data), batchsize) # perm = np.random.permutation(len(x_data)) sum_loss = 0.0 predict_lists = [] for i_index, index in enumerate(iteration_list): x = x_data[index:index + batchsize] x_char = x_char_data[index:index + batchsize] target_y = y_data[index:index + batchsize] if efficient_gpu: x = [to_gpu(_) for _ in x] x_char = [[to_gpu(_) for _ in words] for words in x_char] target_y = [to_gpu(_) for _ in target_y] x_additional = [] if len(x_train_additionals): x_additional = [[ to_gpu(_) for _ in x_ad[index:index + batchsize] ] for x_ad in x_train_additionals] output = net(x_data=x, x_char_data=x_char, x_additional=x_additional) predict, loss = net.predict(output, target_y) sum_loss += loss.data predict_lists.extend(predict) _, predict_tags = zip(*predict_lists) predicted_results = [] for predict in predict_tags: predicted = [ vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict) ] predicted_results.append(predicted) return predict_lists, sum_loss, predicted_results if args['model_filename']: model_filename = args['model_filename'] serializers.load_hdf5(model_filename, net) if is_test: # predict # model_filename = args['model_filename'] # model_filename = os.path.join(save_dir, model_filename) # serializers.load_hdf5(model_filename, net) vocab_tags_inv = dict([(v, k) for k, v in six.iteritems(vocab_tags)]) x_predict = x_train x_char_predict = x_char_train x_additionals = x_train_additionals y_predict = y_train if dev_file: predict_dev, loss_dev, predict_dev_tags = eval_loop( x_dev, x_char_dev, y_dev, x_dev_additionals) gold_predict_pairs = [y_dev_cpu, predict_dev_tags] result, phrase_info = deepcrf.util.conll_eval(gold_predict_pairs, flag=False, tag_class=tag_names) all_result = result['All_Result'] print('all_result: {}'.format(all_result)) predict_pairs, _, _tmp = eval_loop(x_predict, x_char_predict, y_predict, x_additionals) _, predict_tags = zip(*predict_pairs) predicted_output = args['predicted_output'] predicted_results = [] for predict in predict_tags: predicted = [ vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict) ] predicted_results.append(predicted) with open(predicted_output, 'w') as f: for predicted in predicted_results: for tag in predicted: f.write(tag + '\n') f.write('\n') return False logging.info('start training...') tmax = args['max_iter'] t = 0.0 prev_dev_accuracy = 0.0 prev_dev_f = 0.0 for epoch in six.moves.xrange(args['max_iter']): # train logging.info('epoch:' + str(epoch)) logging.info(' [train]') net.set_train(train=True) iteration_list = range(0, len(x_train), batchsize) perm = np.random.permutation(len(x_train)) sum_loss = 0.0 predict_train = [] for i_index, index in enumerate(iteration_list): data = [(x_train[i], x_char_train[i], y_train[i]) for i in perm[index:index + batchsize]] x, x_char, target_y = zip(*data) x_additional = [] if len(x_train_additionals): x_additional = [[ to_gpu(x_ad[add_i]) for add_i in perm[index:index + batchsize] ] for x_ad in x_train_additionals] if efficient_gpu: x = [to_gpu(_) for _ in x] x_char = [[to_gpu(_) for _ in words] for words in x_char] target_y = [to_gpu(_) for _ in target_y] output = net(x_data=x, x_char_data=x_char, x_additional=x_additional) predict, loss = net.predict(output, target_y) # loss sum_loss += loss.data # update net.cleargrads() loss.backward() opt.update() predict_train.extend(predict) # Evaluation train_accuracy = deepcrf.util.eval_accuracy(predict_train) logging.info(' loss :' + str(sum_loss)) logging.info(' accuracy :' + str(train_accuracy)) # Dev predict_dev, loss_dev, predict_dev_tags = eval_loop( x_dev, x_char_dev, y_dev, x_dev_additionals) gold_predict_pairs = [y_dev_cpu, predict_dev_tags] result, phrase_info = deepcrf.util.conll_eval(gold_predict_pairs, flag=False, tag_class=tag_names) all_result = result['All_Result'] # Evaluation dev_accuracy = deepcrf.util.eval_accuracy(predict_dev) logging.info(' [dev]') logging.info(' loss :' + str(loss_dev)) logging.info(' accuracy :' + str(dev_accuracy)) logging.info(' f_measure :' + str(all_result[-1])) dev_f = all_result[-1] if prev_dev_f < dev_f: logging.info(' [update best model on dev set!]') dev_list = [prev_dev_f, dev_f] dev_str = ' ' + ' => '.join(map(str, dev_list)) logging.info(dev_str) prev_dev_f = dev_f # Save model model_filename = save_name + '_epoch' + str(epoch) serializers.save_hdf5(model_filename + '.model', net) serializers.save_hdf5(model_filename + '.state', opt)
def train_all(params): target_save_dir = osp.join(params['save_dir'], 'prepro', params['dataset'] + '_' + params['splitBy']) graph_dir = osp.join('log_graph', params['dataset'] + '_' + params['splitBy']) model_dir = osp.join(params['save_dir'], 'model', params['dataset'] + '_' + params['splitBy']) if params['old']: params['data_json'] = 'old' + params['data_json'] params['data_h5'] = 'old' + params['data_h5'] params['image_feats'] = 'old' + params['image_feats'] params['ann_feats'] = 'old' + params['ann_feats'] params['id'] = 'old' + params['id'] params['word_emb_path'] = 'old' + params['word_emb_path'] with open('setting.json', 'w') as f: json.dump(params, f) if not osp.isdir(graph_dir): os.mkdir(graph_dir) loader = DataLoader(params) # model setting batch_size = params['batch_size'] gpu_id = params['gpu_id'] cuda.get_device(gpu_id).use() xp = cuda.cupy featsOpt = { 'ann': osp.join(target_save_dir, params['ann_feats']), 'img': osp.join(target_save_dir, params['image_feats']) } loader.loadFeats(featsOpt) loader.shuffle('train') ve = VisualEncoder(res6=L.ResNet152Layers().fc6).to_gpu(gpu_id) if 'attention' in params['id']: print('attention language encoder') le = LanguageEncoderAttn(len(loader.ix_to_word)) rl_crit = ListenerReward(len(loader.ix_to_word), attention=True).to_gpu(gpu_id) else: le = LanguageEncoder(len(loader.ix_to_word)) rl_crit = ListenerReward(len(loader.ix_to_word), attention=False).to_gpu(gpu_id) cca = CcaEmbedding().to_gpu(gpu_id) lm = LanguageModel(len(loader.ix_to_word), loader.seq_length) if params['pretrained_w']: print('pretrained word embedding...') word_emb = load_vcab_init( loader.word_to_ix, osp.join(target_save_dir, params['word_emb_path'])) le.word_emb.W.data = word_emb lm.word_emb = le.word_emb le.to_gpu(gpu_id) lm.to_gpu(gpu_id) serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit) ve_optim = optimizers.Adam(alpha=4e-5, beta1=0.8) le_optim = optimizers.Adam(alpha=4e-4, beta1=0.8) cca_optim = optimizers.Adam(alpha=4e-4, beta1=0.8) lm_optim = optimizers.Adam(alpha=4e-4, beta1=0.8) ve_optim.setup(ve) le_optim.setup(le) cca_optim.setup(cca) lm_optim.setup(lm) ve_optim.add_hook(chainer.optimizer.GradientClipping(0.1)) le_optim.add_hook(chainer.optimizer.GradientClipping(0.1)) cca_optim.add_hook(chainer.optimizer.GradientClipping(0.1)) lm_optim.add_hook(chainer.optimizer.GradientClipping(0.1)) ve.joint_enc.W.update_rule.hyperparam.alpha = 4e-4 ve.joint_enc.b.update_rule.hyperparam.alpha = 4e-4 iteration = 0 epoch = 0 val_loss_history = [] val_loss_lm_s_history = [] val_loss_lm_l_history = [] val_loss_l_history = [] val_acc_history = [] val_rank_acc_history = [] min_val_loss = 100 while True: chainer.config.train = True chainer.config.enable_backprop = True ve.zerograds() le.zerograds() cca.zerograds() lm.zerograds() rl_crit.zerograds() data = loader.getBatch('train', params) ref_ann_ids = data['ref_ann_ids'] pos_feats = Variable(xp.array(data['feats'], dtype=xp.float32)) neg_feats = Variable(xp.array(data['neg_feats'], dtype=xp.float32)) feats = F.concat([pos_feats, neg_feats, pos_feats], axis=0) seqz = np.concatenate([data['seqz'], data['seqz'], data['neg_seqz']], axis=0) lang_last_ind = calc_max_ind(seqz) seqz = Variable(xp.array(seqz, dtype=xp.int32)) vis_enc_feats = ve(feats) lang_enc_feats = le(seqz, lang_last_ind) cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats) vis_feats = vis_combine(vis_enc_feats, vis_emb_feats) logprobs = lm(vis_feats, seqz, lang_last_ind) # emb loss pairSim, vis_unpairSim, lang_unpairSim = F.split_axis(cossim, 3, axis=0) emb_flows = { 'vis': [pairSim, vis_unpairSim], 'lang': [pairSim, lang_unpairSim] } emb_loss = emb_crits(emb_flows, params['emb_margin']) # lang loss pairP, vis_unpairP, lang_unpairP = F.split_axis(logprobs, 3, axis=1) pair_num, _, lang_unpair_num = np.split(lang_last_ind, 3) num_labels = {'T': pair_num, 'F': lang_unpair_num} lm_flows = { 'T': pairP, 'visF': [pairP, vis_unpairP], 'langF': [pairP, lang_unpairP] } lm_loss = lm_crits(lm_flows, num_labels, params['lm_margin'], vlamda=params['vis_rank_weight'], llamda=params['lang_rank_weight']) # RL loss (pos,pos)のみ rl_vis_feats = F.split_axis(vis_feats, 3, axis=0)[0] sampled_seq, sample_log_probs = lm.sample(rl_vis_feats) sampled_lang_last_ind = calc_max_ind(sampled_seq) rl_loss = rl_crit(pos_feats, sampled_seq, sample_log_probs, sampled_lang_last_ind) #, lm.baseline) loss = emb_loss + lm_loss + rl_loss print(emb_loss, lm_loss, rl_loss) loss.backward() ve_optim.update() le_optim.update() cca_optim.update() lm_optim.update() if data['bounds']['wrapped']: print('one epoch finished!') loader.shuffle('train') if params['check_sent']: sampled_sents = loader.decode_sequence(cuda.to_cpu(sampled_seq), sampled_lang_last_ind) for i in range(len(sampled_sents)): print('sampled sentence : ', ' '.join(sampled_sents[i])) print('reward : ', rl_crit.reward[i]) if iteration % params['losses_log_every'] == 0: acc = xp.where(rl_crit.reward > 0.5, 1, 0).mean() print('{} iter : train loss {}, acc : {}, reward_mean : {}'.format( iteration, loss.data, acc, rl_crit.reward.mean())) if iteration % params[ 'mine_hard_every'] == 0 and iteration > 0 and params[ 'mine_hard']: make_graph(ve, cca, loader, 'train', params, xp) if (iteration % params['save_checkpoint_every'] == 0 and iteration > 0): chainer.config.train = False chainer.config.enable_backprop = False loader.resetImageIterator('val') loss_sum = 0 loss_generation = 0 loss_lm_margin = 0 loss_emb_margin = 0 loss_evals = 0 accuracy = 0 rank_acc = 0 rank_num = 0 while True: data = loader.getImageBatch('val', params) image_id = data['image_id'] img_ann_ids = data['img_ann_ids'] sent_ids = data['sent_ids'] gd_ixs = data['gd_ixs'] feats = Variable(xp.array(data['feats'], dtype=xp.float32)) seqz = data['seqz'] lang_last_ind = calc_max_ind(seqz) scores = [] for i, sent_id in enumerate(sent_ids): gd_ix = gd_ixs[i] labels = xp.zeros(len(img_ann_ids), dtype=xp.int32) labels[gd_ix] = 1 labels = Variable(labels) sent_seqz = np.concatenate( [[seqz[i]] for _ in range(len(img_ann_ids))], axis=0) one_last_ind = np.array([lang_last_ind[i]] * len(img_ann_ids)) sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32)) vis_enc_feats = ve(feats) lang_enc_feats = le(sent_seqz, one_last_ind) cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats) vis_feats = vis_combine(vis_enc_feats, vis_emb_feats) logprobs = lm(vis_feats, sent_seqz, one_last_ind).data gd_ix = gd_ixs[i] lm_generation_loss = lm_crits( { 'T': logprobs[:, gd_ix, xp.newaxis] }, { 'T': one_last_ind[gd_ix, np.newaxis] }, params['lm_margin'], vlamda=0, llamda=0).data lm_scores = -computeLosses(logprobs, one_last_ind) lm_margin_loss, _, _ = compute_margin_loss( lm_scores, gd_ix, params['lm_margin']) scores.append(lm_scores[gd_ix]) emb_margin_loss, pos_sc, max_neg_sc = compute_margin_loss( cossim.data, gd_ix, params['emb_margin']) loss_generation += lm_generation_loss loss_lm_margin += lm_margin_loss loss_emb_margin += emb_margin_loss loss_sum += lm_generation_loss + lm_margin_loss + emb_margin_loss loss_evals += 1 if pos_sc > max_neg_sc: accuracy += 1 if params['dataset'] == 'refgta': rank_a, rank_n = calc_rank_acc(scores, data['rank']) rank_acc += rank_a rank_num += rank_n print('{} iter | {}/{} validating acc : {}'.format( iteration, data['bounds']['it_pos_now'], data['bounds']['it_max'], accuracy / loss_evals)) if data['bounds']['wrapped']: print('validation finished!') fin_val_loss = cuda.to_cpu(loss_sum / loss_evals) loss_generation = cuda.to_cpu(loss_generation / loss_evals) loss_lm_margin = cuda.to_cpu(loss_lm_margin / loss_evals) loss_emb_margin = cuda.to_cpu(loss_emb_margin / loss_evals) fin_val_acc = accuracy / loss_evals break val_loss_history.append(fin_val_loss) val_loss_lm_s_history.append(loss_generation) val_loss_lm_l_history.append(loss_lm_margin) val_loss_l_history.append(loss_emb_margin) val_acc_history.append(fin_val_acc) if min_val_loss > fin_val_loss: print('val loss {} -> {} improved!'.format( min_val_loss, val_loss_history[-1])) min_val_loss = fin_val_loss serializers.save_hdf5( osp.join(model_dir, params['id'] + params['id2'] + "ve.h5"), ve) serializers.save_hdf5( osp.join(model_dir, params['id'] + params['id2'] + "le.h5"), le) serializers.save_hdf5( osp.join(model_dir, params['id'] + params['id2'] + "cca.h5"), cca) serializers.save_hdf5( osp.join(model_dir, params['id'] + params['id2'] + "lm.h5"), lm) ## graph plt.title("accuracy") plt.plot(np.arange(len(val_acc_history)), val_acc_history, label="val_accuracy") plt.legend() plt.savefig( os.path.join(graph_dir, params['id'] + params['id2'] + "_joint_acc.png")) plt.close() plt.title("loss") plt.plot(np.arange(len(val_loss_history)), val_loss_history, label="all_loss") plt.plot(np.arange(len(val_loss_history)), val_loss_lm_s_history, label="generation_loss") plt.legend() plt.savefig( os.path.join(graph_dir, params['id'] + params['id2'] + "_joint_loss.png")) plt.close() plt.title("loss") plt.plot(np.arange(len(val_loss_history)), val_loss_lm_l_history, label="lm_comp_loss") plt.plot(np.arange(len(val_loss_history)), val_loss_l_history, label="comp_loss") plt.legend() plt.savefig( os.path.join( graph_dir, params['id'] + params['id2'] + "_joint_comp_loss.png")) plt.close() if params['dataset'] == 'refgta': print(rank_num) val_rank_acc_history.append(rank_acc / rank_num) plt.title("rank loss") plt.plot(np.arange(len(val_rank_acc_history)), val_rank_acc_history, label="rank_acc") plt.legend() plt.savefig( os.path.join( graph_dir, params['id'] + params['id2'] + "_rank_acc.png")) plt.close() if iteration > params['learning_rate_decay_start'] and params[ 'learning_rate_decay_start'] >= 0: frac = (iteration - params['learning_rate_decay_start'] ) / params['learning_rate_decay_every'] decay_factor = math.pow(0.1, frac) ve_optim.alpha *= decay_factor le_optim.alpha *= decay_factor cca_optim.alpha *= decay_factor lm_optim.alpha *= decay_factor iteration += 1
# (BatchNormalization.avg_varはout_chと同じサイズ) txt = "yolov2.bn%d.avg_var = dat[%d:%d]" % (i + 1, offset, offset + out_ch) offset += out_ch exec(txt) # load convolution weight(Convolution2D.Wは、outch * in_ch * フィルタサイズ。これを(out_ch, in_ch, 3, 3)にreshapeする) txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % ( i + 1, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize) offset += (out_ch * in_ch * ksize * ksize) exec(txt) print(i + 1, offset) # load last convolution weight(BiasとConvolution2Dのみロードする) in_ch = 1024 out_ch = last_out ksize = 1 txt = "yolov2.bias%d.b.data = dat[%d:%d]" % (i + 2, offset, offset + out_ch) offset += out_ch exec(txt) txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % ( i + 2, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize) offset += out_ch * in_ch * ksize * ksize exec(txt) print(i + 2, offset) print("save weights file to yolov2_darknet.model") serializers.save_hdf5("yolov2_darknet.model", yolov2)
# トレーナーは、何回学習を繰り返すのかを決める。 # ここではepoch_size分、学習を繰り返す。 trainer = training.Trainer(updater, (epoch_size, 'epoch')) # 進行状況の表示をする。 trainer.extend(extensions.ProgressBar()) # 学習開始 trainer.run() # 学習後のモデルの正当性を確認 print("Answer:", end="") ok = 0 # テストデータ全てを試す for test_i in test: # test_iの画像情報をValiable(Chainer専用の型)に変換 x = Variable(np.array([test_i[0]], dtype=np.float32)) # test_iの正解番号 t = test_i[1] # モデルにxを入力し、順伝播させた結果を取得(list) out = model.fwd(x) # 出力が大きいユニットの番号を回答とする。 ans = np.argmax(out.data) if (ans == t): ok += 1 print((ok * 1.0) / len(test)) # モデルをh5形式で保存 serializers.save_hdf5(modelname, model)
#util.evaluate_dataset(vae, X_train, batch_size, train_log_file, False, opt) util.evaluate_dataset(vae, X_validation, batch_size, test_log_file, False, opt) if ( (args['-o'] is not None) and ((bi - 1) % (log_interval * 100) == 0) ): #Additional *100 term because we don't want a checkpoint every log point print( '##################### Saving Model Checkpoint #####################' ) batch_number = str(bi).zfill(6) modelfile = directory + '/' + batch_number + '.h5' print "Writing model checkpoint to '%s' ..." % (modelfile) serializers.save_hdf5(modelfile, vae) # (Optionally:) visualize computation graph if bi == 1 and args['--vis'] is not None: print "Writing computation graph to '%s/%s'." % (directory, args['--vis']) g = computational_graph.build_computational_graph([obj]) util.print_compute_graph(directory + '/' + args['--vis'], g) # Sample a set of poses if (bi % sample_every_epoch == 0) and data_type == 'pose': counter += 1 print " # sampling" z = np.random.normal(loc=0.0, scale=1.0, size=(1024, nlatent)) z = chainer.Variable(xp.asarray(z, dtype=np.float32), volatile='ON')
def save(self, filename): if os.path.isfile(filename): os.remove(filename) serializers.save_hdf5(filename, self)
optimizer.update() #g_accum_loss=0 return loss.data, accuracy.data else: loss, accuracy = model(x, t, True) return accuracy.data #return loss.data, accuracy.data #標準偏差を計算する g_stdDev = compute_stdDeviation(model, pathList) #学習開始 train_loop() #write stdDeviation array pickle.dump(g_stdDev, open('sigma.npy', 'wb'), -1) #write log (train) #dicPlot=dict(zip(logArray[::2],logArray[1::2])) #with open('plot.json', 'w') as f: # json.dump(dicPlot, f, sort_keys=True, indent=4) #write log (validation) #dicPlot2=dict(zip(logArray2[::2],logArray2[1::2])) #with open('plotV.json', 'w') as f: # json.dump(dicPlot2, f, sort_keys=True, indent=4) # Save final model model.to_cpu() serializers.save_hdf5('modelhdf5', model)
# parser.add_argument('--lr_decay_ratio', type=float, default=0.1) # parser.add_argument('--restart_from', type=str) # parser.add_argument('--epoch_offset', type=int, default=0) # parser.add_argument('--flip', type=int, default=0) # parser.add_argument('--rot', type=int, default=0) # parser.add_argument('--shift', type=int, default=0) parser.add_argument('--transformations', type=str, default='') # ast.literal_eval, default={}) # parser.add_argument('--size', type=int, default=28) parser.add_argument('--val_freq', type=int, default=10) parser.add_argument('--save_freq', type=int, default=10) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--gpu', type=int, default=0) # parser.add_argument('--snapshot_freq', type=int, default=10) args = parser.parse_args() val_error, model, resdict = train(logme=vars(args), **vars(args)) print 'Finished training' print 'Final validation error:', val_error print 'Saving model...' import chainer.serializers as sl sl.save_hdf5('./my.model', model)
def save_model(self, model_filename): """Save a network model to a file """ serializers.save_hdf5(model_filename, self.model) serializers.save_hdf5(model_filename + '.opt', self.optimizer)
}, open('epoch%d_loss_by_tree.json' % (epoch + 1), 'w')) loss_curve.append(sum_loss / len(train_trees)) print('train loss: {:.2f}'.format(sum_loss / len(train_trees))) print("Development data evaluation:") t = Thread(target=traverse_dev, args=(copy.deepcopy(model), dev_trees, dev_loss, args.gpus)) t.start() throughput = float(len(train_trees)) / (now - cur_time) print('{:.2f} iter/sec, {:.2f} sec'.format(throughput, now - cur_time)) print() if (epoch + 1) % args.evalinterval == 0: print("Model saving...") serializers.save_hdf5('./epoch_' + str(epoch + 1) + '.model', model) json.dump({"loss": dev_loss}, open('dev_loss_by_epoch.json', 'w')) for i, fp in enumerate(args.reorderfile): with codecs.open(fp + '.reordered', 'w', 'utf-8') as fre: for tree in rtrees[i]: _, pred = traverse(model, tree, train=False, pred=True) print(' '.join(pred), file=fre) # エポックごとのロスの描画 t.join() plt.clf() plt.figure(figsize=(8, 8)) plt.plot(np.array([i + 1 for i in range(args.epoch)]), np.array(loss_curve),
def train(modelfn, trainfn, valfn, epochs, batchsize, opt, opt_kwargs, net_kwargs, transformations, val_freq, save_freq, seed, gpu, silent=False, logme=None): # Set the seed np.random.seed(seed) # Load an pre-process the data try: datadir = os.environ['DATADIR'] except KeyError: raise RuntimeError( 'Please set DATADIR environment variable (e.g. in ~/.bashrc) ' 'to a folder containing the required datasets.') train_set = np.load(os.path.join(datadir, trainfn)) val_set = np.load(os.path.join(datadir, valfn)) train_data = train_set['data'] train_labels = train_set['labels'] val_data = val_set['data'] val_labels = val_set['labels'] train_data, val_data, train_labels, val_labels = preprocess_mnist_data( train_data, val_data, train_labels, val_labels) # create result dir log_fn, result_dir = create_result_dir(modelfn, logme) # create model and optimizer model, optimizer = get_model_and_optimizer(result_dir, modelfn, opt, opt_kwargs, net_kwargs, gpu) # get the last commit subp = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = subp.communicate() commit = out.strip() if err.strip(): logging.error('Subprocess returned %s' % err.strip()) logging.info('Commit: ' + commit) # Get number of parameters # if not silent: # print 'Parameter name, shape, size:' # for p in model.params(): # print p.name, p.data.shape, p.data.size num_params = sum([p.data.size for p in model.params()]) logging.info('Number of parameters:' + str(num_params)) if not silent: print 'Number of parameters:' + str(num_params) n_train = train_data.shape[0] n_val = val_data.shape[0] logging.info('start training...') train_epochs = [] train_errors = [] train_losses = [] train_times = [] val_epochs = [] val_errors = [] val_losses = [] val_times = [] begin_time = time.time() sum_loss, sum_accuracy = validate(val_data, val_labels, model, batchsize, silent, gpu) val_times.append(time.time() - begin_time) val_epochs.append(0) val_errors.append(1. - sum_accuracy / n_val) val_losses.append(sum_loss / n_val) msg = 'epoch:{:02d}\ttest mean loss={}, error={}'.format( 0, sum_loss / n_val, 1. - sum_accuracy / n_val) logging.info(msg) if not silent: print '\n%s' % msg # learning loop for epoch in range(1, epochs + 1): sum_loss, sum_accuracy = train_epoch(train_data, train_labels, model, optimizer, batchsize, transformations, silent, gpu) train_times.append(time.time() - begin_time) train_epochs.append(epoch) train_errors.append(1. - sum_accuracy / n_train) train_losses.append(sum_loss / n_train) msg = 'epoch:{:02d}\ttrain mean loss={}, error={}'.format( epoch, sum_loss / n_train, 1. - sum_accuracy / n_train) logging.info(msg) if not silent: print '\n%s' % msg if epoch % val_freq == 0: print 'FINETUNING' model.start_finetuning() sum_loss, sum_accuracy = train_epoch(train_data, train_labels, model, optimizer, batchsize, transformations, silent, gpu, finetune=True) msg = 'epoch:{:02d}\tfinetune mean loss={}, error={}'.format( epoch, sum_loss / n_train, 1. - sum_accuracy / n_train) logging.info(msg) if not silent: print '\n%s' % msg sum_loss, sum_accuracy = validate(val_data, val_labels, model, batchsize, silent, gpu) val_times.append(time.time() - begin_time) val_epochs.append(epoch) val_errors.append(1. - sum_accuracy / n_val) val_losses.append(sum_loss / n_val) msg = 'epoch:{:02d}\ttest mean loss={}, error={}'.format( epoch, sum_loss / n_val, 1. - sum_accuracy / n_val) logging.info(msg) if not silent: print '\n%s' % msg mean_error = 1.0 - sum_accuracy / n_val if save_freq > 0 and epoch % save_freq == 0: print 'Saving model...' serializers.save_hdf5( os.path.join(result_dir, 'epoch.' + str(epoch) + '.model'), model) print 'Saving model...' serializers.save_hdf5(os.path.join(result_dir, 'final.model'), model) resdict = { 'train_times': train_times, 'train_epochs': train_epochs, 'train_errors': train_errors, 'train_losses': train_losses, 'val_times': val_times, 'val_epochs': val_epochs, 'val_errors': val_errors, 'val_losses': val_losses } print 'Saving results...' with open(os.path.join(result_dir, 'results.pickle'), 'wb') as handle: pickle.dump(resdict, handle) return mean_error, model, resdict
def train(args): vocab = Vocabulary.from_conll(args.train, args.vocab) train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)] dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)] parser = Parser(args.vocab, args.embed, args.hidden, args.depth) if args.gpu >= 0: parser.to_gpu() opt = optimizers.AdaGrad(lr=0.01) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) for epoch in range(args.epoch): random.shuffle(train_dataset) parser.zerograds() loss = XP.fzeros(()) for i, data in enumerate(train_dataset): trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1)) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: trace(' %3d: root' % j) root = j else: parent_est = p_scores.data.argmax() trace('%c %3d -> %3d (%3d)' % ('*' if parent == parent_est else ' ', j, parent_est, parent)) loss += functions.softmax_cross_entropy( p_scores, XP.iarray([parent])) root_est = root_scores.data.argmax() trace('ROOT: %3d (%3d)' % (root_est, root)) loss += functions.softmax_cross_entropy(root_scores, XP.iarray([root])) if (i + 1) % 200 == 0: loss.backward() opt.update() parser.zerograds() loss = XP.fzeros(()) loss.backward() opt.update() trace('epoch %3d: trained. ' % (epoch + 1)) parent_num = 0 parent_match = 0 root_num = 0 root_match = 0 for i, data in enumerate(dev_dataset): trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1), rollback=True) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: root = j else: parent_est = p_scores.data.argmax() parent_num += 1 parent_match += 1 if parent_est == parent else 0 root_est = root_scores.data.argmax() root_num += 1 root_match += 1 if root_est == root else 0 result_str = \ 'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \ ( \ epoch + 1, \ parent_match / parent_num, parent_match, parent_num, \ root_match / root_num, root_match, root_num) trace(result_str) with open(args.model + '.log', 'a') as fp: print(result_str, file=fp) trace('epoch %3d: saving models ...' % (epoch + 1)) prefix = args.model + '.%03d' % (epoch + 1) vocab.save(prefix + '.vocab') parser.save_spec(prefix + '.parent_spec') serializers.save_hdf5(prefix + '.parent_weights', parser) trace('finished.')