def train(optims, max_epoch, policy, bsize, env, num_clicks, recom_number, max_length, origin_reward, capacity): outputdir="model_output" policy_new=os.path.join(outputdir, 'model_free_simple.pickle') #weight = torch.FloatTensor(numlabel).fill_(1) optim_fn, optim_params=get_optimizer(optims) optimizer = optim_fn(filter(lambda p: p.requires_grad, policy.parameters()), **optim_params) n_epochs=max_epoch max_reward = 0 epoch = 1 best_model = None rewards = [origin_reward] while epoch <= n_epochs: _ = train_gen_pg_each(policy, env, epoch, optimizer, num_clicks, recom_number, max_length, bsize, total_size = capacity) print('saving policy at epoch {0}'.format(epoch)) if not os.path.exists(outputdir): os.makedirs(outputdir) torch.save(policy, policy_new) #Eval the new policy _, mean_reward = Eval(policy_new) rewards.append(mean_reward) # save model if mean_reward >= max_reward: best_model = policy max_reward = mean_reward epoch += 1 return best_model, rewards, max_reward
def __init__(self, emb, class_num, loader, config): self.class_num = class_num self.loader = loader self.config = config self.model = DS_Model(emb, class_num, config) self.model = self.model.to(config.device) self.eval_tool = Eval(class_num, config) self.plot_tool = Canvas(config)
def run(self, hogwash_job): self.replace_actions(hogwash_job) #bug in hogwash proposal = readDataFile(file(self.exp)) goldFile = file(self.gold) readDataFile(goldFile) #discard training data gold = readDataFile(goldFile) matrix = readClMat(file(self.mat)) evaluator = Eval(gold, proposal, matrix, self.exp) return evaluator
def test(model, criterion, loader, config): print('--------------------------------------') print('start test ...') _, _, test_loader = loader model.load_state_dict( torch.load(os.path.join(config.model_dir, 'model.pkl'))) eval_tool = Eval(config) f1, test_loss = eval_tool.evaluate(model, criterion, test_loader) print('test_loss: %.3f | micro f1 on test: %.4f' % (test_loss, f1))
def train(model, criterion, loader, config): train_loader, dev_loader, _ = loader optimizer = optim.SGD(model.parameters(), lr=config.lr) print(model) print('traning model parameters:') for name, param in model.named_parameters(): if param.requires_grad: print('%s : %s' % (name, str(param.data.shape))) print('--------------------------------------') print('start to train the model ...') eval_tool = Eval(config) max_f1 = -float('inf') current_lr = config.lr for epoch in range(1, config.epoch + 1): for step, (data, label) in enumerate(train_loader): model.train() data = data.to(config.device) label = label.to(config.device) optimizer.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() optimizer.step() _, train_loss, _ = eval_tool.evaluate(model, criterion, train_loader) f1, dev_loss, _ = eval_tool.evaluate(model, criterion, dev_loader) print( '[%03d] train_loss: %.3f | dev_loss: %.3f | micro f1 on dev: %.4f' % (epoch, train_loss, dev_loss, f1), end=' ') file_write_loss = open('./output/Attention_CNN/loss.txt', 'a') file_write_loss.writelines('%.6f \t %.6f \t %.6f \n' % (train_loss, f1, dev_loss)) file_write_loss.close() if f1 > max_f1: max_f1 = f1 torch.save(model.state_dict(), os.path.join(config.model_dir, 'model.pkl')) print('>>> save models!') else: print() # lr schedule current_lr *= 0.95 change_lr(optimizer, current_lr)
def evalTask(job): assert (job.status == "finished") ddir = job.args[1] goldFile = file(ddir / "data") trainPts = readDataFile(goldFile) truth = readDataFile(goldFile) matrixFile = file(ddir / "matrix") matrix = readClMat(matrixFile) propFile = file(str(job.results)) prop = readDataFile(propFile) res = Eval(gold=truth, prop=prop, clMat=matrix, filename=job.args[1]) res.stats["time"] = jobTime(job) return res
def train(model, criterion, loader, config): train_loader, dev_loader, _ = loader optimizer = optim.Adadelta(model.parameters(), lr=config.lr, weight_decay=config.L2_decay) print(model) print('traning model parameters:') for name, param in model.named_parameters(): if param.requires_grad: print('%s : %s' % (name, str(param.data.shape))) print('--------------------------------------') print('start to train the model ...') eval_tool = Eval(config) max_f1 = -float('inf') for epoch in range(1, config.epoch + 1): for step, (data, label) in enumerate(train_loader): model.train() data = data.to(config.device) label = label.to(config.device) optimizer.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() nn.utils.clip_grad_value_(model.parameters(), clip_value=5) optimizer.step() _, train_loss, _ = eval_tool.evaluate(model, criterion, train_loader) f1, dev_loss, _ = eval_tool.evaluate(model, criterion, dev_loader) print( '[%03d] train_loss: %.3f | dev_loss: %.3f | micro f1 on dev: %.4f' % (epoch, train_loss, dev_loss, f1), end=' ') if f1 > max_f1: max_f1 = f1 torch.save(model.state_dict(), os.path.join(config.model_dir, 'model.pkl')) print('>>> save models!') else: print()
def prediction(self, path_test, path_train, path_bestModel): self.hyperpara = Hyperparameter() eval_test = Eval() ner = NER() reader = Reader() traininsts = reader.readfiles(path_train) testinsts = reader.readfiles(path_test) ner.create_alphabet(traininsts) self.hyperpara.tag_size = ner.hyperpara.tag_size self.hyperpara.embedding_num = ner.hyperpara.embedding_num self.model = BiLstm(self.hyperpara) # BiLstm模型 if self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 1: try: self.model.load_state_dict(torch.load(path_bestModel)) except Exception: print('模型参数不匹配') else: pass elif self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 0 : try: self.model = torch.load(path_bestModel) except Exception: print('模型参数不匹配') else: pass testExamples = ner.change(testinsts) for idx in range(len(testExamples)): test_list = [] test_list.append(testExamples[idx]) x, y = ner.variable(test_list) lstm_feats = self.model(x) predict = ner.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append(ner.label_AlphaBet.list[predict[idy]]) testinsts[idx].evalPRF(predictLabels, eval_test) a, e = testinsts[idx].extractA_and_E() self.Attr.append(a) self.Eval.append(e)
print("Testing") agent.load_state_dict(torch.load(pretrained_agent)) print("Agent evaluation!") _ = evaluate_agent(agent, 101, bsize, recom_length - 1, validSample, testSample, device, 'test') print("User model evaluation!") #generator.load_state_dict(torch.load(pretrained_gen)) #Evaluate without EOS generator.load_state_dict(torch.load(pretrained_gen)) eval_acc, eval_preck, eval_rewd, eval_loss = evaluate_user( generator, 101, bsize, recom_length - 1, validSample, testSample, loss_fn_target, loss_fn_reward, device, 'test') #Save the whole policy model torch.save(agent, 'model_output/agent.pickle') if interact: #Generate new samples from the environment reward_orig, reward_optim = Eval('model_output/agent.pickle') if e == 0: rewards.append(reward_orig) rewards.append(reward_optim) ''' #Load the best model generator.load_state_dict(torch.load(pretrained_gen)) discriminator.load_state_dict(torch.load(pretrained_dis)) agent.load_state_dict(torch.load(pretrained_agent)) ''' #Generate new data subprocess.call(subprocess_cmd, shell=False) save_plot(Epochs, 1, rewards, 'all_rewards.png')
len(set([pt.label for pt in test])), "clusters" print "Objective value of truth: %.3g" % objective(test, clMat) print for algorithm in ["first", "best", "vote", "pivot"]: evals = [] print "================", algorithm, "=================" print for run in range(10): cmd = "%s -a %s %s" % (tester, algorithm, classFileName) output = StringIO() status = bettersystem(cmd, stdout=output, stderr=StringIO()) assert(status == 0) prop = readDataFile(output.getvalue().split("\n")) score = Eval(test, prop, clMat, filename=("run%d" % run)) evals.append(score) print "Run", run print score print algorithm print "Best Objective:" print min(evals, key=lambda x: x.stats["objective"]) print "Average:" print reduce(lambda x,y: x+y, evals).normalized(len(evals)) print
def train(self, path_train, path_dev, path_test, path_PRF, path_model, path_bestModel): #读取训练集、测试集、开发集 并 建立字典 reader = Reader() traininsts = reader.readfiles(path_train) devinsts = reader.readfiles(path_dev) testinsts = reader.readfiles(path_test) print('Training Instance:', len(traininsts)) print('Dev Instance:', len(devinsts)) print('Test Instance:', len(testinsts)) self.create_alphabet(traininsts) #字符串转成ID trainExamples = self.change(traininsts) # e_train devExamples = self.change(devinsts) testExamples = self.change(testinsts) self.model = BiLstm(self.hyperpara) # BiLstm模型 # 加载模型 if self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 1: try: self.model.load_state_dict(torch.load(path_bestModel)) except Exception: print('模型参数不匹配') else: pass elif self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 0 : try: self.model = torch.load(path_bestModel) except Exception: print('模型参数不匹配') else: pass optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hyperpara.lr) # 优化器 total_num = len(trainExamples) for epoch in range(1, self.hyperpara.epochs): print("————————第{}轮迭代,共{}轮————————".format(epoch, self.hyperpara.epochs)) total = 0 random.shuffle(trainExamples) # 随机打乱训练集顺序,能有效提高准确率 try: part = total_num // self.hyperpara.batch if total_num % self.hyperpara.batch != 0: part += 1 except ZeroDivisionError: print('batch数为0,除0错误') else: #开始训练 self.model.train() for idx in range(part): begin = idx * self.hyperpara.batch end = (idx + 1) * self.hyperpara.batch if end > total_num: end = total_num batch_list = [] # batch_list_len = [] for idy in range(begin, end): batch_list.append(trainExamples[idy]) # batch_list_len.append(len(trainExamples[idy].wordIndexs)) optimizer.zero_grad() x, y = self.variable(batch_list) lstm_feats = self.model(x) loss = F.cross_entropy(lstm_feats, y) total += 1 loss.backward() optimizer.step() print('current:', total, ", loss:", loss.data[0]) #开发集测试效果 eval_dev = Eval() eval_dev_A = Eval() eval_dev_E = Eval() for idx in range(len(devExamples)): dev_list = [] dev_list.append(devExamples[idx]) x, y = self.variable(dev_list) lstm_feats = self.model(x) predict = self.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append( self.label_AlphaBet.list[predict[idy]]) gold_ent, predict_ent = devinsts[idx].evalPRF( predictLabels, eval_dev) gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = devinsts[ idx].getAE(gold_ent, predict_ent) devinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A, eval_dev_A) devinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E, eval_dev_E) line = '' print('Dev: ', end="") d_precision, d_recall, d_fscore = eval_dev.getFscore() line = line + str(epoch) + '.dev:\nP:' + ( '%.2f' % (d_precision * 100)) + ' R:' + ( '%.2f' % (d_recall * 100)) + ' F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) d_precision, d_recall, d_fscore = eval_dev_A.getFscore() line = line + 'A_P:' + ('%.2f' % (d_precision * 100)) + ' A_R:' + ( '%.2f' % (d_recall * 100)) + ' A_F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) d_precision, d_recall, d_fscore = eval_dev_E.getFscore() line = line + 'E_P:' + ('%.2f' % (d_precision * 100)) + ' E_R:' + ( '%.2f' % (d_recall * 100)) + ' E_F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) #测试集测试效果 eval_test = Eval() eval_test_A = Eval() eval_test_E = Eval() for idx in range(len(testExamples)): test_list = [] test_list.append(testExamples[idx]) x, y = self.variable(test_list) lstm_feats = self.model(x) predict = self.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append( self.label_AlphaBet.list[predict[idy]]) gold_ent, predict_ent = testinsts[idx].evalPRF( predictLabels, eval_test) gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = testinsts[ idx].getAE(gold_ent, predict_ent) testinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A, eval_test_A) testinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E, eval_test_E) print('Test: ', end="") t_precision, t_recall, t_fscore = eval_test.getFscore() line = line + 'test:\nP:' + ( '%.2f' % (t_precision * 100)) + ' R:' + ( '%.2f' % (t_recall * 100)) + ' F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) t_precision, t_recall, t_fscore = eval_test_A.getFscore() line = line + 'A_P:' + ('%.2f' % (t_precision * 100)) + ' A_R:' + ( '%.2f' % (t_recall * 100)) + ' A_F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) t_precision, t_recall, t_fscore = eval_test_E.getFscore() line = line + 'E_P:' + ('%.2f' % (t_precision * 100)) + ' E_R:' + ( '%.2f' % (t_recall * 100)) + ' E_F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) #保存模型 if self.hyperpara.save_pattern == 0: torch.save(self.model.state_dict(), path_model + str(epoch) + '.pkl') elif self.hyperpara.save_pattern == 1: torch.save(self.model, path_model + str(epoch) + '.pkl') try: file = open(path_PRF, 'a+', encoding='utf-8') except IOError: print('文件读取异常') else: file.write(line) file.close()
global device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") outputdir="model_output" policy_new=os.path.join(outputdir, 'model_free_simple.pickle') #Define the environment num_clicks=100 recom_number = 20 args = get_args() optim = args.optim bsize = args.batch_size #Load environment env_path = "./model_output/environment.pickle" env = torch.load(env_path) #Load initial policy policy_path = "./model_output/orig_policy.pickle" policy = torch.load(policy_path) torch.save(policy, policy_new) #Training for model-free reinforcement learning max_epoch = 200 max_length = 5 capacity = 10000 origin_reward, _ = Eval(policy_new) _, rewards, max_reward = train(optim, max_epoch, policy, bsize, env, num_clicks, recom_number, max_length, origin_reward, capacity) #Plot rewards save_plot(max_epoch, 1, rewards, 'rewards_test.png') #Write rewards f=open('rewards_model_free.txt','ab+') np.savetxt(f,rewards) f.close()