def __init__(self): Model.__init__(self) self.fc = self.build_network(output_dim=len(config.actions)) self.optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum) self.optimizer_fc.setup(self.fc) self.optimizer_fc.add_hook(optimizer.GradientClipping(10.0)) self.load() self.update_target()
def create_value_based_learner(cfg_name): """ Creates a learner that can be used with value based algorithms from chainerrl. :param cfg_name: type str, the name of the config :return: chainerrl agent specified in config """ vb_config = Config(cfg_name) network = getattr(models, vb_config.get_str('BASIC', 'network'))( **vb_config.get_section('NETWORK')) q_func = q_functions.SingleModelStateQFunctionWithDiscreteAction(model=network) opt = getattr(optimizers, vb_config.get_str('BASIC', 'optimizer'))( **vb_config.get_section('OPTIMIZER')) opt.setup(q_func) opt.add_hook( optimizer.GradientClipping(threshold=vb_config.get_float('BASIC', 'grad_clip'))) rep_buf = replay_buffer.PrioritizedEpisodicReplayBuffer( capacity=vb_config.get_int('MEMORY_BUFFER', 'episodic_buffer_size'), wait_priority_after_sampling=vb_config.get_bool('MEMORY_BUFFER', 'wait_priority_after_sampling')) explorer = explorers.LinearDecayEpsilonGreedy( random_action_func=lambda: np.random.random_integers(0, vb_config.get_int('NETWORK', 'output_dim') - 1), **vb_config.get_section('EXPLORER')) try: learner = getattr(agents, vb_config.get_str('BASIC', 'learner'))(q_function=q_func, optimizer=opt, replay_buffer=rep_buf, phi=lambda x: x, explorer=explorer, **vb_config.get_section( 'ALGORITHM')) if vb_config.get_str('BASIC', 'load_path'): learner.load(os.path.join(get_results_path(), vb_config.get_str('BASIC', 'load_path'))) except AttributeError as e: logger.log(msg='Cannot find model {} in chainerrl.agents'.format( vb_config.get_str('BASIC', 'learner')), level=logging.ERROR) raise e logger.log(msg='Created learner {}'.format(learner.__class__.__name__), level=logging.INFO) logger.log(msg='Model parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in vb_config.get_section('EXPERIMENT').items()])), level=logging.INFO) logger.log(msg='Explorer parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in vb_config.get_section('EXPLORER').items()])), level=logging.INFO) return learner
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) # If you use the ipython note book you hace to use the forward function # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def create_async_learner(cfg_name): """ Creates a learner that can be used with asynchronous algorithms from chainerrl. :param cfg_name: type str, the name of the config :return: chainerrl agent specified in config """ config = Config(cfg_name) network = getattr(models, config.get_str('BASIC', 'network'))(**config.get_section('NETWORK')) opt = rmsprop_async.RMSpropAsync(**config.get_section('OPTIMIZER')) opt.setup(network) opt.add_hook(optimizer.GradientClipping(threshold=config.get_float('BASIC', 'grad_clip'))) learner = getattr(agents, config.get_str('BASIC', 'learner'))(network, opt, **config.get_section( 'ALGORITHM')) return learner
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab) trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab) trace('making model ...') attmt = AttentionMT(args.vocab, args.embed, args.hidden) if args.use_gpu: attmt.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.word_list(args.source) gen2 = gens.word_list(args.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(attmt) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') attmt.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', attmt) trace('finished.')
def train(self): trace('making vocabularies ...') trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) random_number = random.randint(0, self.minibatch - 1) for trg_batch in gen: self.trg_batch = fill_batch(trg_batch) if len(self.trg_batch) != self.minibatch: break hyp_batch, loss = self.forward(trg_vocab, self.use_gpu, self.gpu_id) loss.backward() opt.update() K = len(self.trg_batch) if trained == 0: self.print_out(random_number, epoch, trained, hyp_batch) trained += K trace('saving model ...') prefix = self.model trg_vocab.save(prefix + '.trgvocab') self.encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', self.encdec) trace('finished.')
def train(): dictf = open(dictpath, 'rb') w_id_dict = pickle.load(dictf) vocab_size = len(w_id_dict) model = Seq2Seq(vocab_size=vocab_size, embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, batch_size=BATCH_SIZE) model.reset() inf = open(inputpath, 'rb') data = pickle.load(inf) data = vocab_to_id(data, w_id_dict) for epoch in range(EPOCH_NUM): opt = optimizers.Adam() opt.setup(model) opt.add_hook(optimizer.GradientClipping(5)) for num in range(len(data) // BATCH_SIZE): minibatch = data[num * BATCH_SIZE:(num + 1) * BATCH_SIZE] enc_words, dec_words = make_minibatch(minibatch) total_loss = model.feedforward(enc_words=enc_words, dec_words=dec_words) total_loss.backward() opt.update() print('Epoch %s 終了' % (epoch + 1)) outputfile = outputpath % (EMBED_SIZE, HIDDEN_SIZE, BATCH_SIZE, epoch + 1) serializers.save_npz(outputfile, model)
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] lower_lists = [[w.lower() for w in words] for words in word_lists] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(lower_lists, args.vocab) phrase_set = set() semiterminal_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semiterminal_set |= set(extract_semiterminals(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [ convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists ] trace('start training ...') parser = Parser( args.vocab, args.embed, args.char_embed, args.queue, args.stack, args.srstate, len(phrase_set), len(semiterminal_set), ) if args.use_gpu: parser.to_gpu() opt = optimizers.SGD(lr=0.1) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) batch_set = list(zip(word_lists, op_lists)) for epoch in range(args.epoch): n = 0 random.shuffle(batch_set) for samples in batch(batch_set, args.minibatch): parser.zerograds() loss = XP.fzeros(()) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward_train(word_list, op_list) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semiterminal_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) opt.lr *= 0.92 trace('finished.')
def fit(self, queries, responses, train_path, epoch_num=30, batch_size=40, tag=None): train_queries = self.xp.vstack(self.xp.array(queries)) train_responses = self.xp.vstack(self.xp.array(responses)) teacher_num = min(len(train_queries), len(train_responses)) opt = optimizers.Adam() opt.setup(self.model) opt.add_hook(optimizer.GradientClipping(5)) if self.flag_gpu: self.model.to_gpu(0) self.model.reset() # 学習開始 st = datetime.datetime.now() for epoch in range(self.npz_num, epoch_num): # ミニバッチ学習 perm = np.random.permutation(teacher_num) # ランダムでuniqueな整数列リストを取得 total_loss = 0 total_accuracy = 0 for i in range(0, teacher_num, batch_size): # モデルの勾配などをリセット self.model.reset() # 整数列リストからそれぞれのwordを取得 enc_words = train_queries[perm[i:i + batch_size]].T dec_words = train_responses[perm[i:i + batch_size]].T # エンコード時のバッチサイズ encode_batch_size = len(enc_words[0]) # エンコードの計算 self.model.encode(enc_words, encode_batch_size) # <eos>をデコーダーに読み込ませる t = self.xp.array([0] * encode_batch_size, dtype='int32') # 損失の初期化 loss = self.xp.zeros((), dtype='float32') # 精度の初期化 accuracy = self.xp.zeros((), dtype='float32') # 1単語ずつデコードする for w in dec_words: y = self.model.decode(t) t = self.xp.array(w, dtype='int32') # 正解単語をarrayに変換 loss += F.softmax_cross_entropy( y, t) # 正解単語と予測単語を照らし合わせて損失を計算 accuracy += F.accuracy(y, t) # 精度の計算 loss.backward() loss.unchain_backward() opt.update() total_loss += loss.data total_accuracy += accuracy.data if (epoch + 1) % 10 == 0: # モデルの保存 if self.flag_gpu: # modelをCPUでも使えるように self.model.to_cpu() serializers.save_npz(train_path + str(epoch + 1) + ".npz", self.model) if self.flag_gpu: self.model.to_gpu(0) ed = datetime.datetime.now() epoch_data = "epoch: {}\ttag: {}\n".format(epoch + 1, str(tag)) loss_data = "\tloss: {}\n".format(round(float(total_loss), 2)) accuracy_data = "\taccuracy: {}\n".format( round(float(total_accuracy), 2)) time_data = "\ttime: {}".format(ed - st) text = epoch_data + loss_data + accuracy_data + time_data print(text) st = datetime.datetime.now()
def main(): args = parse_args() trace('making vocabulary ...') vocab, num_lines, num_words = make_vocab(args.corpus, args.vocab) trace('initializing CUDA ...') cuda.init() trace('start training ...') if args.model is 0: model = BasicRnnLM(args.embed, args.hidden, args.vocab) model.reset() elif args.model is 1: model = LSTMRnn(args.embed, args.hidden, args.vocab) model.reset() elif args.model is 2: model = AttentionLM(args.embed, args.hidden, args.vocab) model.reset() model.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) log_ppl = 0.0 trained = 0 opt = optimizers.AdaGrad(lr=0.01) opt.setup(model) opt.add_hook(optimizer.GradientClipping(5)) for batch in generate_batch(args.corpus, args.minibatch): K = len(batch) loss, perplexity = forward(batch, model) loss.backward() log_ppl += perplexity opt.update() trained += K model.reset() trace(' %d/%d' % (trained, num_lines)) log_ppl /= float(num_words) trace('Train log(PPL) = %.10f' % log_ppl) trace('Train PPL = %.10f' % math.exp(log_ppl)) log_ppl = 0.0 for batch in generate_batch(args.valid, args.minibatch): K = len(batch) loss, perplexity = forward(batch, model) log_ppl += perplexity model.reset() trace('Valid log(PPL) = %.10f' % log_ppl) trace('Valid PPL = %.10f' % math.exp(log_ppl)) trace(' writing model ...') trace('saving model ...') prefix = 'RNNLM-' + str(args.model) + '.%03.d' % (epoch + 1) save_vocab(prefix + '.srcvocab', vocab) #Fix this # Fixed model.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', model) trace('training finished.')
def train(args): vocab = Vocabulary.from_conll(args.train, args.vocab) train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)] dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)] parser = Parser(args.vocab, args.embed, args.hidden) if args.gpu >= 0: parser.to_gpu() opt = optimizers.AdaGrad(lr=0.01) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) for epoch in range(args.epoch): random.shuffle(train_dataset) parser.zerograds() loss = XP.fzeros(()) for i, data in enumerate(train_dataset): trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1)) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: trace(' %3d: root' % j) root = j else: parent_est = p_scores.data.argmax() trace('%c %3d -> %3d (%3d)' % ('*' if parent == parent_est else ' ', j, parent_est, parent)) loss += functions.softmax_cross_entropy( p_scores, XP.iarray([parent])) root_est = root_scores.data.argmax() trace('ROOT: %3d (%3d)' % (root_est, root)) loss += functions.softmax_cross_entropy(root_scores, XP.iarray([root])) if (i + 1) % 200 == 0: loss.backward() opt.update() parser.zerograds() loss = XP.fzeros(()) loss.backward() opt.update() trace('epoch %3d: trained. ' % (epoch + 1)) parent_num = 0 parent_match = 0 root_num = 0 root_match = 0 for i, data in enumerate(dev_dataset): trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1), rollback=True) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: root = j else: parent_est = p_scores.data.argmax() parent_num += 1 parent_match += 1 if parent_est == parent else 0 root_est = root_scores.data.argmax() root_num += 1 root_match += 1 if root_est == root else 0 result_str = \ 'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \ ( \ epoch + 1, \ parent_match / parent_num, parent_match, parent_num, \ root_match / root_num, root_match, root_num) trace(result_str) with open(args.model + '.log', 'a') as fp: print(result_str, file=fp) trace('epoch %3d: saving models ...' % (epoch + 1)) prefix = args.model + '.%03d' % (epoch + 1) vocab.save(prefix + '.vocab') parser.save_spec(prefix + '.parent_spec') serializers.save_hdf5(prefix + '.parent_weights', parser) trace('finished.')
def train(args): trace('loading corpus ...') with open(args.source) as fp: trees = [make_tree(l) for l in fp] trace('extracting leaf nodes ...') word_lists = [extract_words(t) for t in trees] trace('extracting gold operations ...') op_lists = [make_operations(t) for t in trees] trace('making vocabulary ...') word_vocab = Vocabulary.new(word_lists, args.vocab) phrase_set = set() semi_set = set() for tree in trees: phrase_set |= set(extract_phrase_labels(tree)) semi_set |= set(extract_semi_labels(tree)) phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False) semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False) trace('converting data ...') word_lists = [convert_word_list(x, word_vocab) for x in word_lists] op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists] trace('start training ...') parser = Parser( args.vocab, args.embed, args.queue, args.stack, len(phrase_set), len(semi_set), ) if USE_GPU: parser.to_gpu() opt = optimizers.AdaGrad(lr=0.005) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(5)) for epoch in range(args.epoch): n = 0 for samples in batch(zip(word_lists, op_lists), args.minibatch): parser.zerograds() loss = my_zeros((), np.float32) for word_list, op_list in zip(*samples): trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1)) loss += parser.forward(word_list, op_list, 0) n += 1 loss.backward() opt.update() trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) word_vocab.save(prefix + '.words') phrase_vocab.save(prefix + '.phrases') semi_vocab.save(prefix + '.semiterminals') parser.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', parser) trace('finished.')
def main(): args = parse_args() XP.set_library(args) date=time.localtime()[:6] D=[] for i in date: D.append(str(i)) D="_".join(D) save_path=args.save_path if os.path.exists(save_path)==False: os.mkdir(save_path) if args.model_path!=None: print("continue existed model!! load recipe of {}".format(args.model_path)) with open(args.model_path+'/recipe.json','r') as f: recipe=json.load(f) vae_enc=recipe["network"]["IM"]["vae_enc"] vae_z=recipe["network"]["IM"]["vae_z"] vae_dec=recipe["network"]["IM"]["vae_dec"] times=recipe["network"]["IM"]["times"] alpha=recipe["network"]["IM"]["KLcoefficient"] batchsize=recipe["setting"]["batchsize"] maxepoch=args.maxepoch weightdecay=recipe["setting"]["weightdecay"] grad_clip=recipe["setting"]["grad_clip"] cur_epoch=recipe["setting"]["cur_epoch"]+1 ini_lr=recipe["setting"]["initial_learningrate"] cur_lr=recipe["setting"]["cur_lr"] with open(args.model_path+"/../trainloss.json",'r') as f: trainloss_dic=json.load(f) with open(args.model_path+"/../valloss.json",'r') as f: valloss_dic=json.load(f) else: vae_enc=args.vae_enc vae_z=args.vae_z vae_dec=args.vae_dec times=args.times alpha=args.alpha batchsize=args.batchsize maxepoch=args.maxepoch weightdecay=args.weightdecay grad_clip=5 cur_epoch=0 ini_lr=args.lr cur_lr=ini_lr trainloss_dic={} valloss_dic={} print('this experiment started at :{}'.format(D)) print('***Experiment settings***') print('[IM]vae encoder hidden size :{}'.format(vae_enc)) print('[IM]vae hidden layer size :{}'.format(vae_z)) print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) print('[IM]sequence length:{}'.format(times)) print('max epoch :{}'.format(maxepoch)) print('mini batch size :{}'.format(batchsize)) print('initial learning rate :{}'.format(cur_lr)) print('weight decay :{}'.format(weightdecay)) print("optimization by :{}".format("Adam")) print("VAE KL coefficient:",alpha) print('*************************') vae = VAE_bernoulli_noattention(vae_enc,vae_z,vae_dec,28,28,1) opt = optimizers.Adam(alpha = cur_lr) opt.setup(vae) if args.model_path!=None: print('loading model ...') serializers.load_npz(args.model_path + '/VAEweights', vae) serializers.load_npz(args.model_path + '/optimizer', opt) else: print('making [[new]] model ...') for param in vae.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) opt.add_hook(optimizer.GradientClipping(grad_clip)) opt.add_hook(optimizer.WeightDecay(weightdecay)) if args.gpu >= 0 : vae.to_gpu() mnist=MNIST(binarize=True) train_size = mnist.train_size test_size = mnist.test_size eps = 1e-8 for epoch in range(cur_epoch+1, maxepoch+1): print('\nepoch {}'.format(epoch)) LX = 0.0 LZ = 0.0 counter = 0 for iter,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data LX += Lx.data loss = (Lx+Lz)/batchsize loss.backward() opt.update() counter += B sys.stdout.write('\rnow training ... epoch {}, {}/{} '.format(epoch,counter,mnist.train_size)) sys.stdout.flush() if (iter+1) % 100 == 0: print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B)) img_array = cuda.to_cpu(y.data) im_array = img_array.reshape(batchsize*28,28) img = im_array[:28*5] plt.clf() plt.imshow(img,cmap=cm.gray) plt.colorbar(orientation='horizontal') plt.savefig(save_path+"/"+"img{}.png".format(epoch)) trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size)) trainloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/train_size), "Lz":float(LZ/train_size), "Lx+Lz":float((LX+LZ)/train_size)} with open(save_path+"/trainloss.json",'w') as f: json.dump(trainloss_dic,f,indent=4) print('save model ...') prefix = save_path+"/"+str(epoch).zfill(3) if os.path.exists(prefix)==False: os.mkdir(prefix) serializers.save_npz(prefix + '/VAEweights', vae) serializers.save_npz(prefix + '/optimizer', opt) print('save recipe...') recipe_dic = { "date":D, "setting":{ "maxepoch":maxepoch, "batchsize":batchsize, "weightdecay":weightdecay, "grad_clip":grad_clip, "opt":"Adam", "initial_learningrate":ini_lr, "cur_epoch":epoch, "cur_lr":cur_lr}, "network":{ "IM":{ "x_size":784, "vae_enc":vae_enc, "vae_z":vae_z, "vae_dec":vae_dec, "times":times, "KLcoefficient":alpha}, }, } with open(prefix+'/recipe.json','w') as f: json.dump(recipe_dic,f,indent=4) if epoch % 1 == 0: print("\nvalidation step") LX = 0.0 LZ = 0.0 counter = 0 for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)): B = img_array.shape[0] Lz = XP.fzeros(()) vae.reset(img_array) #first to T-1 step for j in range(times-1): y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i #last step j+=1 y,kl = vae.free_energy_onestep() Lz_i = alpha*kl Lz += Lz_i Lx = Bernoulli_nll_wesp(vae.x,y,eps) LZ += Lz.data.reshape(()) LX += Lx.data.reshape(()) counter += B sys.stdout.write('\rnow testing ... epoch {}, {}/{} '.format(epoch,counter,test_size)) sys.stdout.flush() print("") trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size)) valloss_dic[str(epoch).zfill(3)]={ "Lx":float(LX/test_size), "Lz":float(LZ/test_size), "Lx+Lz":float((LX+LZ)/test_size)} with open(save_path+"/valloss.json",'w') as f: json.dump(valloss_dic,f,indent=4) img_array = cuda.to_cpu(y.data) im_array = img_array.reshape(batchsize*28,28) img = im_array[:28*5] plt.clf() plt.imshow(img,cmap=cm.gray) plt.colorbar(orientation='horizontal') plt.savefig(save_path+"/"+"img_test{}.png".format(epoch)) print('finished.')
def train(args): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.input_word_list(), args.vocab) trg_vocab = Vocabulary.new(gens.output_word_list(), args.vocab) trace('making model ...') encdec = EncoderDecoder(args.vocab, args.embed, args.hidden) if args.load_model != "": print("model load %s ... " % (args.load_model)) src_vocab = Vocabulary.load(args.load_model + '.srcvocab') trg_vocab = Vocabulary.load(args.load_model + '.trgvocab') encdec = EncoderDecoder.load_spec(args.load_model + '.spec') serializers.load_hdf5(args.load_model + '.weights', encdec) if args.use_gpu: encdec.to_gpu() for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) trained = 0 gen1 = gens.input_word_list() gen2 = gens.output_word_list() gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() for k in range(K): trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1)) trace( ' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]])) trace( ' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]])) trace( ' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]])) trained += K if epoch % args.model_save_timing == 0: trace('saving model ...') prefix = args.model + '.%03.d' % (epoch + 1) src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def train(args): if args.gpu > -1: cuda.get_device(args.gpu).use() xp = cuda.cupy else: xp = np if args.log: log_dir = args.log else: log_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '{}_{}'.format(DIR_NAME, datetime.now().strftime('%Y%m%d_%H:%M'))) if not os.path.exists(log_dir): os.mkdir(log_dir) # setting for logging logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(log_dir, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) logger.info('Arguments...') for arg, val in vars(args).items(): logger.info('{} : {}'.format(arg, val)) logger.info('Loading Vocab...') vocab = Vocab() vocab.load(args.vocab, args.lowercase) vocab.add_special_token() sufvocab = Vocab() sufvocab.load(args.sufvocab, args.lowercase) sufvocab.add_special_token(['s>', '<UNK>']) pos2id = Vocab() pos2id.load(args.poslist) logger.info('preparation for training data...') out_path = making_data(args.train_data, args.window) model = WordCSnnTagger(args.wembed, args.fembed, args.hidden, len(vocab), len(sufvocab), len(pos2id), args.window, args.objct, args.alpha) model.save_model_config(log_dir) if args.gpu > -1: model.to_gpu() opt = getattr(optimizers, args.opt)() opt.setup(model) opt.add_hook(optimizer.GradientClipping(args.gclip)) opt.add_hook(optimizer.WeightDecay(args.wdecay)) for epoch in range(args.epoch): logger.info('START epoch {}/{}'.format(epoch + 1, args.epoch)) start = time.time() sum_loss = xp.zeros((), dtype=xp.float32) n_data = 0 n_correct = 0 for i, [tags, contexts] in enumerate(line_iter(out_path, args.minibatch)): batch_ts = xp.array([pos2id[tag] for tag in tags], dtype=xp.int32) batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32) if args.lowercase: contexts = [[word.lower() for word in context] for context in contexts] batch_xs = xp.array([[vocab[word] for word in context] for context in contexts], dtype=xp.int32) batch_sufs = xp.array([[sufvocab[word[-2:]] for word in context] for context in contexts], dtype=xp.int32) batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32) batch_features = [batch_xs, batch_sufs, batch_caps] cur_batch_size = batch_ts.shape[0] ys, loss = model(batch_features, batch_ts) sum_loss += loss.data * cur_batch_size model.zerograds() loss.backward() opt.update() pred_labels = ys.data.argmax(1) n_correct += sum(1 for j in range(cur_batch_size) if pred_labels[j] == batch_ts[j]) n_data += cur_batch_size logger.info('done {} batches'.format(i + 1)) logger.info('{} epoch train loss = {}'.format(epoch + 1, sum_loss)) logger.info('{} epoch train accuracy = {}'.format(epoch + 1, float(n_correct / n_data))) logger.info('{} sec for training per epoch'.format(time.time() - start)) if args.valid_data: start = time.time() valid_loss, valid_accuracy = evaluation(model, args.valid_data, pos2id, vocab, sufvocab, args) logger.info('{} epoch valid loss = {}'.format(epoch + 1, valid_loss)) logger.info('{} epoch valid accuracy = {}'.format(epoch + 1, valid_accuracy)) logger.info('{} sec for validation per epoch'.format(time.time() - start)) if args.test_data: start = time.time() test_loss, test_accuracy = evaluation(model, args.test_data, pos2id, vocab, sufvocab, args) logger.info('{} epoch test loss = {}'.format(epoch + 1, test_loss)) logger.info('{} epoch test accuracy = {}'.format(epoch + 1, test_accuracy)) logger.info('{} sec for testing per epoch'.format(time.time() - start)) logger.info('serializing...') prefix = '{}_{}ep_{}wembed_{}fembed_{}hidden_{}window_{}minibatch_{}opt'.format(DIR_NAME, epoch + 1, args.wembed, args.fembed, args.hidden, args.window, args.minibatch, args.opt) model_path = os.path.join(log_dir, prefix + '.model') model.save(model_path) logger.info('done training')
if __name__ == "__main__": set_seed() log_tracer = LogTracer(nn_type, sep_mode) log_tracer("get train data") train, test, n_vocab = get_train_data(pad, sep_mode) log_tracer.trace_label("train", train) log_tracer.trace_label("test", test) if nn_type == "lstm": mlp = LSTM(n_vocab, n_units, N_OUT) elif nn_type == "cnn": mlp = CNN(n_vocab, n_units, N_OUT) opt = optimizers.Adam() opt.setup(mlp) opt.add_hook(optimizer.WeightDecay(w_decay)) opt.add_hook(optimizer.GradientClipping(g_clip)) log_tracer("start train") for epoch in range(n_epoch): for x, t in generate_bath(train, n_batch): mlp.cleargrads() loss, acc = mlp(x, t, train=True) loss.backward() opt.update() log_tracer.trace_train(epoch, loss.data, acc.data) x_v, t_v = parse_batch(test) loss_v, acc_v = mlp(x_v, t_v) log_tracer.trace_test(epoch, loss_v.data, acc_v.data, True) mlp.save(sep_mode)
def training(): parser = argparse.ArgumentParser() parser.add_argument('--hidden_size', type=int, default=200) parser.add_argument('--dropout', '-d', type=float, default=0.2) parser.add_argument('--batch_size', '-b', type=int, default=15) parser.add_argument('--batch_col_size', type=int, default=20) parser.add_argument('--epoch', '-e', type=int, default=50) parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--model', '-m', default='', type=str) args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) # GPUのセット if args.gpu > -1: # numpyかcuda.cupyか xp = cuda.cupy cuda.get_device(args.gpu).use() else: xp = np # 教師データ data = load_data() N = len(data) # 教師データの数 # 教師データの読み込み print('initialize DataConverter') data_converter = DataConverter( batch_col_size=args.batch_col_size) # データコンバーター data_converter.load(data) # 教師データ読み込み model = AttSeq2Seq(input_size=200, hidden_size=args.hidden_size, batch_col_size=args.batch_col_size, dropout=args.dropout, device=args.gpu) if args.gpu >= 0: model.to_gpu(0) if args.model != '': serializers.load_npz(args.model, model) opt = optimizers.Adam() opt.setup(model) opt.add_hook(optimizer.GradientClipping(5)) model.reset() # 学習開始 print("Train start") st = datetime.datetime.now() model_file_name = str(st)[:-7] for epoch in range(args.epoch): # ミニバッチ学習 perm = np.random.permutation(N) # ランダムな整数列リストを取得 total_loss = 0 for i in range(0, N, args.batch_size): enc_words = data_converter.train_queries[perm[i:i + args.batch_size]] dec_words = data_converter.train_responses[perm[i:i + args.batch_size]] model.reset() loss = model(enc_words=enc_words, dec_words=dec_words, train=True) loss.backward() loss.unchain_backward() total_loss += loss.data opt.update() print('{0}/{1}:'.format(i, N), end='\t', flush=True) #output_path = "./att_seq2seq_network/{}_{}.network".format(epoch+1, total_loss) #serializers.save_npz(output_path, model) ed = datetime.datetime.now() print("\nepoch:\t{0}\ttotal loss:\t{1}\ttime:\t{2}".format( epoch + 1, total_loss, ed - st)) st = datetime.datetime.now() model.to_cpu() serializers.save_npz("model/{0}_epoch-{1}.npz".format( model_file_name, epoch + 1), model) # npz形式で書き出し model.to_gpu()
data_converter = DataConverter(batch_col_size=BATCH_COL_SIZE) # データコンバーター data_converter.load(data) # 教師データ読み込み vocab_size = len(data_converter.vocab) # 単語数 print("単語数:", vocab_size) PrintTime("単語ID変換") # pprint.pprint(sorted(data_converter.vocab.items(), key=lambda x:x[1])) # モデルの宣言 model = AttSeq2Seq(vocab_size=vocab_size, embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, batch_col_size=BATCH_COL_SIZE) # ネットワークファイルの読み込み network = ".\\mine\\data\\network\\{file_name}\\sample1.network".format( file_name=file_name) serializers.load_npz(network, model) opt = optimizers.Adam() opt.setup(model) opt.add_hook(optimizer.GradientClipping(5)) if FLAG_GPU: model.to_gpu(0) model.reset() PrintTime("読み込み") # epoch = [] # StudyStart(".\\mine\\data\\network\\{file_name}\\sample1.network".format(file_name=file_name)) ConsoleInputText() #コンソールからの入力 # SpeechAnalysis() #コサイン類似度判定 # SpeechStart() # 学習データ入力からの推測 # SpeechAnswer(data) # 学習データ計測、欠如データ計測 # print(SpeechOneText()) PrintTime("---END---")
def for_one_batch_training(self): loss_list = [] text_count = 0 model_list = glob.glob( "_".join(self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1])) model = Att_Seq2TF(emb_size=self.EMBED_SIZE, fnn_size=self.FNN_SIZE, hidden_size=self.HIDDEN_SIZE, num_of_middle_layer=self.num_of_middle_layer, use_dropout=self.USE_DROPOUT, flag_gpu=self.FLAG_GPU) if len(model_list) != 0: for model_cand in sorted(model_list, key=lambda x: int(x.split("_")[-2][9:])): loss_list.append( float(model_cand[model_cand.find("loss") + 4:model_cand.rfind(".")])) serializers.load_hdf5(model_cand, model) text_count = int(model_cand.split("_")[-2][9:]) print(model_cand) print(text_count) print(loss_list) if self.FLAG_GPU: model.to_gpu(0) model.reset() # print("d") opt = optimizers.Adam() # optimizer.use_cleargrads() opt.setup(model) opt.add_hook(optimizer.WeightDecay(0.0005)) opt.add_hook(optimizer.GradientClipping(5)) opt_list = glob.glob( "_".join(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1])) if len(opt_list) != 0: opt_list = sorted( opt_list, key=lambda x: int(x.split("_")[-2][9:])) serializers.load_hdf5(opt_list[-1], opt) print(opt_list[-1]) # rupe_of_trainging # train_losses = [] # test_losses = [] print("start...") start_time = time.time() # 学習開始 q = Queue(100) q_valid = Queue(500) q_valid1 = Queue(500) minibatch_maker = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 0) p = Process(target=minibatch_maker.epoch_pickle, args=(q, )) p.start() # minibatch_maker1 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 1) # p1 = Process(target=minibatch_maker1.epoch_factory, args=(q, )) # p1.start() # minibatch_maker2 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 2) # p2 = Process(target=minibatch_maker2.epoch_factory, args=(q, )) # p2.start() # minibatch_maker3 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 3) # p3 = Process(target=minibatch_maker3.epoch_factory, args=(q, )) # p3.start() # minibatch_maker4 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 4) # p4 = Process(target=minibatch_maker4.epoch_factory, args=(q, )) # p4.start() # minibatch_maker5 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 5) # p5 = Process(target=minibatch_maker5.epoch_factory, args=(q, )) # p5.start() # minibatch_maker6 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 6) # p6 = Process(target=minibatch_maker6.epoch_factory, args=(q, )) # p6.start() # minibatch_maker7 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 7) # p7 = Process(target=minibatch_maker7.epoch_factory, args=(q, )) # p7.start() # minibatch_maker8 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 8) # p8 = Process(target=minibatch_maker8.epoch_factory, args=(q, )) # p8.start() # minibatch_maker9 = MinibatchMaker( # self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 9) # p9 = Process(target=minibatch_maker9.epoch_factory, args=(q, )) # p9.start() #train_len = q.get() minibatch_maker_valid = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=0) p_valid = Process( target=minibatch_maker_valid.epoch_pickle, args=(q_valid, )) p_valid.start() minibatch_maker_valid1 = MinibatchMaker( self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=1) p_valid1 = Process( target=minibatch_maker_valid1.epoch_pickle, args=(q_valid1, )) p_valid1.start() valid_len = q_valid.get() valid_len1 = q_valid1.get() #valid_len1 = 0 #print("altsvm" + str(train_len)) print("altsvm" + str(valid_len)) print("altsvm" + str(valid_len1)) # p.terminate() # p_valid.terminate() # exit() waited_count = 0 verb_data_count = 0 pseudo_epoch_count = 0 train_dict_keep = None while waited_count < 100 and (len(loss_list) <= 10 or min(loss_list[-10:]) != loss_list[-10]): if not q.empty(): # print("something") text_count += self.EPOCH_TEXT try_count = 0 # while try_count < 5: # try: # try_count += 1 # print(str(q.full())) enc_words, fnn_inputs, dec_scores = q.get() # if text_sentence_vec_dict != None: #train_dict_keep = text_sentence_vec_dict # except Exception as e: # print("cant_get") # print(e) # if len(x_train) > 0: # print("can_get") # break # sys.exit() N = len(dec_scores) verb_data_count += N if N != 0: # training start_time_train = time.time() perm = np.random.permutation(N) sum_loss = 0 # print("first_verb") for i in range(0, N, self.BATCH_SIZE): # print(i) if self.FLAG_GPU: enc_words_batch = [] for x in perm[i:i + self.BATCH_SIZE]: enc_words_batch.append(enc_words[x]) # enc_words_batch.append( # train_dict_keep[enc_words[x][0]][enc_words[x][1]]) # enc_words_batch = cuda.to_gpu( # np.array(enc_words_batch), device=0) fnn_inputs_batch = cuda.to_gpu(fnn_inputs[ perm[i:i + self.BATCH_SIZE]], device=0) dec_scores_batch = cuda.to_gpu(dec_scores[ perm[i:i + self.BATCH_SIZE]], device=0) else: enc_words_batch = [] for x in perm[i:i + self.BATCH_SIZE]: enc_words_batch.append(enc_words[x]) # enc_words_batch.append( # train_dict_keep[enc_words[x][0]][enc_words[x][1]]) fnn_inputs_batch = fnn_inputs[ perm[i:i + self.BATCH_SIZE]] dec_scores_batch = dec_scores[ perm[i:i + self.BATCH_SIZE]] # modelのリセット model.reset() # 順伝播 model.encode(enc_words_batch) # デコーダーの計算 loss = model.decode(fnn_inputs_batch, dec_scores_batch) # print(loss) sum_loss += loss.data * len(dec_scores_batch) loss.backward() opt.update() # print("first_verb_finished") average_loss = sum_loss / N # train_losses.append(average_loss) interval = int(time.time() - start_time_train) #print("train実行時間: {}sec, N: {}".format(interval,N)) # test # loss = model(x_test, y_test) # test_losses.append(loss.data) # output learning process if text_count % 100 == 0: print("text_count: {} train loss: {} verb_data_count: {} time: {}".format( text_count, average_loss, verb_data_count, time.ctime())) if verb_data_count // self.EPOCH_LIMIT > pseudo_epoch_count: pseudo_epoch_count += 1 # print(verb_data_count) # print(pseudo_epoch_count) total_loss = 0 total_count = 0 valid_dict_keep = None model.mode_change("test") #chainer.config.train = False valid_count = 0 valid1_count = 0 while (valid_count + valid1_count) < (valid_len + valid_len1): if valid_count < valid_len and not q_valid.empty(): enc_words, fnn_inputs, dec_scores = q_valid.get() valid_count += 1 elif valid1_count < valid_len1 and not q_valid1.empty(): enc_words, fnn_inputs, dec_scores = q_valid1.get() valid1_count += 1 else: print("waiting valid " + str(valid_count) + " " + str(valid1_count)) time.sleep(10) continue # if text_sentence_vec_dict != None: #valid_dict_keep = text_sentence_vec_dict if len(dec_scores) == 0: continue N = len(dec_scores) for i in range(0, N, self.BATCH_SIZE): if self.FLAG_GPU: enc_words_batch = [] for x in enc_words[i:i + self.BATCH_SIZE]: enc_words_batch.append(x) # enc_words_batch.append( # valid_dict_keep[x[0]][x[1]]) # enc_words_batch = cuda.to_gpu( # enc_words_batch, device=0) fnn_inputs_batch = cuda.to_gpu( fnn_inputs[i:i + self.BATCH_SIZE], device=0) dec_scores_batch = cuda.to_gpu( dec_scores[i:i + self.BATCH_SIZE], device=0) else: enc_words_batch = [] for x in enc_words[i:i + self.BATCH_SIZE]: enc_words_batch.append(x) # enc_words_batch.append( # valid_dict_keep[x[0]][x[1]]) # enc_words_batch = cuda.to_gpu( # enc_words_batch, device=0) fnn_inputs_batch = fnn_inputs[ i:i + self.BATCH_SIZE] dec_scores_batch = dec_scores[ i:i + self.BATCH_SIZE] # modelのリセット model.reset() if len(enc_words_batch) == 0: print(len(enc_words)) print(len(dec_scores_batch)) print(i) exit() with chainer.no_backprop_mode(): # 順伝播 model.encode(enc_words_batch) # デコーダーの計算 loss_data = model.decode( fnn_inputs_batch, dec_scores_batch).data if not self.ARR.isnan(loss_data): total_loss += loss_data * \ len(dec_scores_batch) total_count += len(dec_scores_batch) else: print(loss_data) if total_count == 0: print("skipped") continue valid_loss = float(total_loss / total_count) model.mode_change("train") #chainer.config.train = True # print(valid_loss) # print(total_loss) # print(total_count) print("valid_count: {} valid loss: {} time: {}".format( verb_data_count // self.EPOCH_LIMIT, valid_loss, time.ctime())) try: # with open("test", mode="wb") as f: # pickle.dump("hui",f) # with open(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, str(self.USE_DROPOUT), str(self.num_of_middle_layer), str(verb_count // self.EPOCH_LIMIT), valid_loss), mode="wb") as f: # pickle.dump(opt,f) # print("will_save") # model_saved=model.copy() # model_saved.to_cpu() # fui=float(70) serializers.save_hdf5( # "/gs/hs0/tga-cl/yamashiro-s-aa/workspace/nn/fnn/model/model",model) self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), model) # print("model_saved") serializers.save_hdf5( self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), opt) except Exception as e: raise e # print("saved") loss_list.append(valid_loss) # q_valid.put((x_valid, y_valid)) waited_count = 0 else: print("waiting") time.sleep(10) print(str(text_count) + " " + str(q.qsize())) waited_count += 1 print("end") p.terminate() # p1.terminate() # p2.terminate() # p3.terminate() # p4.terminate() # p5.terminate() # p6.terminate() # p7.terminate() # p8.terminate() # p9.terminate() p_valid.terminate() p_valid1.terminate() interval = int(time.time() - start_time) print("実行時間: {}sec, last pseudo_epoch: {}".format( interval, str(verb_data_count // self.EPOCH_LIMIT)))