def plot_training(self, type_:str, figsize=(8, 6), save_plot=True): plt.figure(figsize=figsize) if type_ == 'loss': x = np.arange(len(self.train_loss)) plt.plot(x, smooth_curve(self.train_loss)) plt.title('Plot of Training Loss of ' + self.model_) plt.xlabel('Iterations') plt.ylabel('Loss') elif type_ == 'accuracy': x = np.arange(1, self.epochs+1) for k in self.accuracy_tr.keys(): plt.plot(x, self.accuracy_tr[k], label=k+' accuracy (train)') if self.evaluation: plt.plot(x, self.accuracy_va[k], label=k+' accuracy (val)') plt.ylim(0, 1) plt.title('Plot of Accuracy During Training of ' + self.model_) plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() plt.grid() #plt.show() if save_plot: fn = self.fn.replace('Accuracy', type_).replace('.txt', '.png') plt.savefig(fn) print('The', type_, 'plot is saved as', fn)
def train(self, train_data, batch_size=256, learning_rate=1e-3, test_data=None, num_epochs=100, is_evaluate=False, log_every=5, beta=1.0, criterion='cross_entropy'): self.vae.train() self.id2token = { v: k for k, v in train_data.dictionary.token2id.items() } data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) optimizer = torch.optim.Adam(self.vae.parameters(), lr=learning_rate) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) trainloss_lst, valloss_lst = [], [] recloss_lst, klloss_lst = [], [] c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] for epoch in range(num_epochs): epochloss_lst = [] for iter, data in enumerate(data_loader): optimizer.zero_grad() txts, bows = data bows = bows.to(self.device) ''' n_samples = 20 rec_loss = torch.tensor(0.0).to(self.device) for i in range(n_samples): bows_recon,mus,log_vars = self.vae(bows,lambda x:torch.softmax(x,dim=1)) logsoftmax = torch.log_softmax(bows_recon,dim=1) _rec_loss = -1.0 * torch.sum(bows*logsoftmax) rec_loss += _rec_loss rec_loss = rec_loss / n_samples ''' bows_recon, mus, log_vars = self.vae( bows, lambda x: torch.softmax(x, dim=1)) if criterion == 'cross_entropy': logsoftmax = torch.log_softmax(bows_recon, dim=1) rec_loss = -1.0 * torch.sum(bows * logsoftmax) elif criterion == 'bce_softmax': rec_loss = F.binary_cross_entropy(torch.softmax(bows_recon, dim=1), bows, reduction='sum') elif criterion == 'bce_sigmoid': rec_loss = F.binary_cross_entropy( torch.sigmoid(bows_recon), bows, reduction='sum') kl_div = -0.5 * torch.sum(1 + log_vars - mus.pow(2) - log_vars.exp()) loss = rec_loss + kl_div * beta loss.backward() optimizer.step() trainloss_lst.append(loss.item() / len(bows)) epochloss_lst.append(loss.item() / len(bows)) if (iter + 1) % 10 == 0: print( f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss:{loss.item()/len(bows):<.7f}\tRec Loss:{rec_loss.item()/len(bows):<.7f}\tKL Div:{kl_div.item()/len(bows):<.7f}' ) #scheduler.step() if (epoch + 1) % log_every == 0: # The code lines between this and the next comment lines are duplicated with WLDA.py, consider to simpify them. print( f'Epoch {(epoch+1):>3d}\tLoss:{sum(epochloss_lst)/len(epochloss_lst):<.7f}' ) print('\n'.join([str(lst) for lst in self.show_topic_words()])) print('=' * 30) smth_pts = smooth_curve(trainloss_lst) plt.plot(np.array(range(len(smth_pts))) * log_every, smth_pts) plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('gsm_trainloss.png') if test_data != None: c_v, c_w2v, c_uci, c_npmi, mimno_tc, td = self.evaluate( test_data, calc4each=False) c_v_lst.append(c_v), c_w2v_lst.append( c_w2v), c_uci_lst.append(c_uci), c_npmi_lst.append( c_npmi), mimno_tc_lst.append( mimno_tc), td_lst.append(td) scrs = { 'c_v': c_v_lst, 'c_w2v': c_w2v_lst, 'c_uci': c_uci_lst, 'c_npmi': c_npmi_lst, 'mimno_tc': mimno_tc_lst, 'td': td_lst } ''' for scr_name,scr_lst in scrs.items(): plt.cla() plt.plot(np.array(range(len(scr_lst)))*log_every,scr_lst) plt.savefig(f'wlda_{scr_name}.png') ''' plt.cla() for scr_name, scr_lst in scrs.items(): if scr_name in ['c_v', 'c_w2v', 'td']: plt.plot(np.array(range(len(scr_lst))) * log_every, scr_lst, label=scr_name) plt.title('Topic Coherence') plt.xlabel('epochs') plt.legend() plt.savefig(f'gsm_tc_scores.png')
def train(self, train_data, batch_size=256, learning_rate=1e-4, test_data=None, num_epochs=100, is_evaluate=False, log_every=10, beta1=0.5, beta2=0.999, clip=0.01, n_critic=5): self.generator.train() self.encoder.train() self.discriminator.train() self.id2token = { v: k for k, v in train_data.dictionary.token2id.items() } data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) optim_G = torch.optim.Adam(self.generator.parameters(), lr=learning_rate, betas=(beta1, beta2)) optim_E = torch.optim.Adam(self.encoder.parameters(), lr=learning_rate, betas=(beta1, beta2)) optim_D = torch.optim.Adam(self.discriminator.parameters(), lr=learning_rate, betas=(beta1, beta2)) Gloss_lst, Eloss_lst, Dloss_lst = [], [], [] c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] for epoch in range(num_epochs): epochloss_lst = [] for iter, data in enumerate(data_loader): txts, bows_real = data bows_real = bows_real.to(self.device) bows_real /= torch.sum(bows_real, dim=1, keepdim=True) # Train Discriminator optim_D.zero_grad() theta_fake = torch.from_numpy( np.random.dirichlet( alpha=1.0 * np.ones(self.n_topic) / self.n_topic, size=(len(bows_real)))).float().to(self.device) loss_D = -1.0 * torch.mean( self.discriminator( self.encoder(bows_real).detach())) + torch.mean( self.discriminator( self.generator(theta_fake).detach())) loss_D.backward() optim_D.step() for param in self.discriminator.parameters(): param.data.clamp_(-clip, clip) if iter % n_critic == 0: # Train Generator optim_G.zero_grad() loss_G = -1.0 * torch.mean( self.discriminator(self.generator(theta_fake))) loss_G.backward() optim_G.step() # Train Encoder optim_E.zero_grad() loss_E = torch.mean( self.discriminator(self.encoder(bows_real))) loss_E.backward() optim_E.step() Dloss_lst.append(loss_D.item()) Gloss_lst.append(loss_G.item()) Eloss_lst.append(loss_E.item()) print( f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss_D:{loss_D.item():<.7f}\tLoss_G:{loss_G.item():<.7f}\tloss_E:{loss_E.item():<.7f}' ) if (epoch + 1) % log_every == 0: print( f'Epoch {(epoch+1):>3d}\tLoss_D_avg:{sum(Dloss_lst)/len(Dloss_lst):<.7f}\tLoss_G_avg:{sum(Gloss_lst)/len(Gloss_lst):<.7f}\tloss_E_avg:{sum(Eloss_lst)/len(Eloss_lst):<.7f}' ) print('\n'.join([str(lst) for lst in self.show_topic_words()])) print('=' * 30) smth_pts_d = smooth_curve(Dloss_lst) smth_pts_g = smooth_curve(Gloss_lst) smth_pts_e = smooth_curve(Eloss_lst) plt.cla() plt.plot(np.array(range(len(smth_pts_g))) * log_every, smth_pts_g, label='loss_G') plt.plot(np.array(range(len(smth_pts_d))) * log_every, smth_pts_d, label='loss_D') plt.plot(np.array(range(len(smth_pts_e))) * log_every, smth_pts_e, label='loss_E') plt.legend() plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('batm_trainloss.png') if test_data != None: c_v, c_w2v, c_uci, c_npmi, mimno_tc, td = self.evaluate( test_data, calc4each=False) c_v_lst.append(c_v), c_w2v_lst.append( c_w2v), c_uci_lst.append(c_uci), c_npmi_lst.append( c_npmi), mimno_tc_lst.append( mimno_tc), td_lst.append(td)
def train(self, train_data, batch_size=256, learning_rate=1e-3, test_data=None, num_epochs=100, is_evaluate=False, log_every=5, beta=1.0, ckpt=None): self.wae.train() self.id2token = {v: k for k,v in train_data.dictionary.token2id.items()} data_loader = DataLoader(train_data, batch_size=batch_size,shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) optimizer = torch.optim.Adam(self.wae.parameters(), lr=learning_rate) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5) if ckpt: self.load_model(ckpt["net"]) optimizer.load_state_dict(ckpt["optimizer"]) start_epoch = ckpt["epoch"] + 1 else: start_epoch = 0 trainloss_lst, valloss_lst = [], [] c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] for epoch in range(start_epoch, num_epochs): epochloss_lst = [] for iter, data in enumerate(data_loader): optimizer.zero_grad() txts, bows = data bows = bows.to(self.device) bows_recon, theta_q = self.wae(bows) theta_prior = self.wae.sample(dist=self.dist, batch_size=len(bows), ori_data=bows).to(self.device) logsoftmax = torch.log_softmax(bows_recon, dim=1) rec_loss = -1.0 * torch.sum(bows*logsoftmax) #rec_loss = F.binary_cross_entropy(torch.softmax(bows_recon,dim=1),bows,reduction='sum') #rec_loss = F.binary_cross_entropy(bows_recon,bows,reduction='sum') mmd = self.wae.mmd_loss(theta_q, theta_prior, device=self.device, t=0.1) #mmd = self.wae.mmd_loss(hid_vecs, theta_prior, device=self.device, t=0.1) s = torch.sum(bows)/len(bows) lamb = (5.0*s*torch.log(torch.tensor(1.0 *bows.shape[-1]))/torch.log(torch.tensor(2.0))) mmd = mmd * lamb loss = rec_loss + mmd * beta loss.backward() optimizer.step() trainloss_lst.append(loss.item()/len(bows)) epochloss_lst.append(loss.item()/len(bows)) if (iter+1) % 10 == 0: print(f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss:{loss.item()/len(bows):<.7f}\tRec Loss:{rec_loss.item()/len(bows):<.7f}\tMMD:{mmd.item()/len(bows):<.7f}') #scheduler.step() if (epoch+1) % log_every == 0: save_name = f'./ckpt/WTM_{self.taskname}_tp{self.n_topic}_{self.dist}_{time.strftime("%Y-%m-%d-%H-%M", time.localtime())}_{epoch+1}.ckpt' checkpoint = { "net": self.wae.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch, "param": { "bow_dim": self.bow_dim, "n_topic": self.n_topic, "taskname": self.taskname, "dist": self.dist, "dropout": self.dropout } } torch.save(checkpoint,save_name) print(f'Epoch {(epoch+1):>3d}\tLoss:{sum(epochloss_lst)/len(epochloss_lst):<.7f}') print('\n'.join([str(lst) for lst in self.show_topic_words()])) print('='*30) smth_pts = smooth_curve(trainloss_lst) plt.plot(np.array(range(len(smth_pts)))*log_every, smth_pts) plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('wlda_trainloss.png') if test_data!=None: c_v,c_w2v,c_uci,c_npmi,mimno_tc, td = self.evaluate(test_data,calc4each=False) c_v_lst.append(c_v), c_w2v_lst.append(c_w2v), c_uci_lst.append(c_uci),c_npmi_lst.append(c_npmi), mimno_tc_lst.append(mimno_tc), td_lst.append(td) scrs = {'c_v':c_v_lst,'c_w2v':c_w2v_lst,'c_uci':c_uci_lst,'c_npmi':c_npmi_lst,'mimno_tc':mimno_tc_lst,'td':td_lst} ''' for scr_name,scr_lst in scrs.items(): plt.cla() plt.plot(np.array(range(len(scr_lst)))*log_every,scr_lst) plt.savefig(f'wlda_{scr_name}.png') ''' plt.cla() for scr_name,scr_lst in scrs.items(): if scr_name in ['c_v','c_w2v','td']: plt.plot(np.array(range(len(scr_lst)))*log_every,scr_lst,label=scr_name) plt.title('Topic Coherence') plt.xlabel('epochs') plt.legend() plt.savefig(f'wlda_tc_scores.png')
def train(self, train_data, test_data, learning_rate, batch_size, num_epochs, log_every, ckpt=None): self.vae.train() self.id2token = { v: k for k, v in train_data.dictionary.token2id.items() } data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) optimizer = torch.optim.Adam(self.vae.parameters(), lr=learning_rate) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) if ckpt: self.load_model(ckpt["net"]) optimizer.load_state_dict(ckpt["optimizer"]) start_epoch = ckpt["epoch"] + 1 else: start_epoch = 0 trainloss_lst, valloss_lst = [], [] recloss_lst, klloss_lst = [], [] c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] for epoch in range(start_epoch, num_epochs): epochloss_lst = [] loss_sum = 0.0 ppx_sum = 0.0 word_count = 0.0 doc_count = 0.0 for iter, data in enumerate(data_loader): optimizer.zero_grad() word_count_list = [] txts, bows = data #bows=[batch,2000] # print("bows", len(bows), type(bows), bows[0]) # print("txts", len(txts), txts) bows = bows.to(self.device) #[batch_size, |V|] p_x, mus, log_vars = self.vae(bows) #p_x = [batch_size,|V|] #logsoftmax = torch.log_softmax(p_x,dim=1) #logsoftmax = [batch_size,|V|],GSM这里不用softmax,再做log,因为beta和theta都已经做了 logsoftmax = torch.log(p_x + 1e-10) rec_loss = -1.0 * torch.sum( bows * logsoftmax ) #bows*logsoftmax = [batch_size, |V|], 其中torch.sum 把所有的loss全部加起来了,也可以只用加某一维度。 rec_loss_per = -1.0 * torch.sum(bows * logsoftmax, dim=1) rec_loss_per = rec_loss_per.cpu().detach().numpy() kl_div = -0.5 * torch.sum(1 + log_vars - mus.pow(2) - log_vars.exp()) loss = rec_loss + kl_div # cal perplexity loss_sum += loss.item() for txt in txts: word_count_list.append(len(txt)) word_count += len(txt) word_count_np = np.array(word_count_list) doc_count += len(txts) ppx_sum += np.sum(np.true_divide(rec_loss_per, word_count_np)) loss.backward() optimizer.step() trainloss_lst.append(loss.item() / len(bows)) epochloss_lst.append(loss.item() / len(bows)) if (iter + 1) % 10 == 0: print( f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss:{loss.item()/len(bows):<.7f}\tRec Loss:{rec_loss.item()/len(bows):<.7f}\tKL Div:{kl_div.item()/len(bows):<.7f}' ) #scheduler.step() if (epoch + 1) % log_every == 0: print("word_count", word_count) ppx = np.exp(loss_sum / word_count) ppx_document = np.exp(ppx_sum / doc_count) print("ppx", ppx) print("ppx_document", ppx_document) save_name = f'./ckpt/GSM_{train_data}_tp{self.n_topic}_{time.strftime("%Y-%m-%d-%H-%M", time.localtime())}_ep{epoch+1}.ckpt' checkpoint = { "net": self.vae.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch, "param": { "bow_dim": self.bow_dim, "n_topic": self.n_topic, } } torch.save(checkpoint, save_name) # The code lines between this and the next comment lines are duplicated with WLDA.py, consider to simpify them. print( f'Epoch {(epoch+1):>3d}\tLoss:{sum(epochloss_lst)/len(epochloss_lst):<.7f}' ) # print('\n'.join([str(lst) for lst in self.show_topic_words()])) # print('='*30) smth_pts = smooth_curve(trainloss_lst) plt.plot(np.array(range(len(smth_pts))) * log_every, smth_pts) plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('gsm_trainloss.png')
def train(self, train_data, batch_size=256, learning_rate=2e-3, test_data=None, num_epochs=100, is_evaluate=False, log_every=5, beta=1.0, gamma=1e7, criterion='cross_entropy'): self.vade.train() self.id2token = { v: k for k, v in train_data.dictionary.token2id.items() } data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) #self.pretrain(data_loader,pre_epoch=30,retrain=True,metric='cross_entropy') self.pretrain(data_loader, pre_epoch=30, retrain=True, metric='bce_softmax') optimizer = torch.optim.Adam(self.vade.parameters(), lr=learning_rate) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5) trainloss_lst, valloss_lst = [], [] c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] for epoch in range(num_epochs): epochloss_lst = [] for iter, data in enumerate(data_loader): #optimizer.zero_grad() txts, bows = data bows = bows.to(self.device) bows_recon, mus, log_vars = self.vade( bows, collate_fn=lambda x: F.softmax(x, dim=1), isPretrain=False) #bows_recon, mus, log_vars = self.vade(bows,collate_fn=None,isPretrain=False) if criterion == 'cross_entropy': logsoftmax = torch.log_softmax(bows_recon, dim=1) rec_loss = -1.0 * torch.sum(bows * logsoftmax) rec_loss /= len(bows) elif criterion == 'bce_softmax': rec_loss = F.binary_cross_entropy(torch.softmax(bows_recon, dim=1), bows, reduction='sum') elif criterion == 'bce_sigmoid': rec_loss = F.binary_cross_entropy( torch.sigmoid(bows_recon), bows, reduction='sum') kl_div = self.vade.gmm_kl_div(mus, log_vars) center_mut_dists = self.vade.mus_mutual_distance() loss = rec_loss + kl_div * beta + center_mut_dists * gamma optimizer.zero_grad() loss.backward() #nn.utils.clip_grad_norm_(self.vade.parameters(), max_norm=20, norm_type=2) optimizer.step() trainloss_lst.append(loss.item() / len(bows)) epochloss_lst.append(loss.item() / len(bows)) if (iter + 1) % 10 == 0: print( f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss:{loss.item()/len(bows):<.7f}\tRec Loss:{rec_loss.item()/len(bows):<.7f}\tGMM_KL_Div:{kl_div.item()/len(bows):<.7f}\tCenter_Mutual_Distance:{center_mut_dists/(len(bows)*(len(bows)-1))}' ) #scheduler.step() if (epoch + 1) % log_every == 0: print( f'Epoch {(epoch+1):>3d}\tLoss:{sum(epochloss_lst)/len(epochloss_lst):<.7f}' ) print('\n'.join([str(lst) for lst in self.show_topic_words()])) print('=' * 30) smth_pts = smooth_curve(trainloss_lst) plt.plot(np.array(range(len(smth_pts))) * log_every, smth_pts) plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('gmntm_trainloss.png') if test_data != None: c_v, c_w2v, c_uci, c_npmi, mimno_tc, td = self.evaluate( test_data, calc4each=False) c_v_lst.append(c_v), c_w2v_lst.append( c_w2v), c_uci_lst.append(c_uci), c_npmi_lst.append( c_npmi), mimno_tc_lst.append( mimno_tc), td_lst.append(td) scrs = { 'c_v': c_v_lst, 'c_w2v': c_w2v_lst, 'c_uci': c_uci_lst, 'c_npmi': c_npmi_lst, 'mimno_tc': mimno_tc_lst, 'td': td_lst } ''' for scr_name,scr_lst in scrs.items(): plt.cla() plt.plot(np.array(range(len(scr_lst)))*log_every,scr_lst) plt.savefig(f'wlda_{scr_name}.png') ''' plt.cla() for scr_name, scr_lst in scrs.items(): if scr_name in ['c_v', 'c_w2v', 'td']: plt.plot(np.array(range(len(scr_lst))) * log_every, scr_lst, label=scr_name) plt.title('Topic Coherence') plt.xlabel('epochs') plt.legend() plt.savefig(f'gmntm_tc_scores.png')
from utils import smooth_curve from main import main if __name__ == '__main__': model = 'TwoLayerNet' args = init_arguments().parse_args() args.models = [model] d_train_loss = {} for optimizer in ['SGD', 'Momentum', 'AdaGrad', 'Adam']: args.optimizer = optimizer print('------------------', args.optimizer, '------------------\n') trainer = main(args, feature_type='SURF', return_trainer=True)[model] d_train_loss[optimizer] = trainer.train_loss fn = trainer.fn.replace('Accuracy', 'loss_optimizers').replace('.txt', '.pkl') with open(fn, 'wb') as f: pickle.dump(fn, f, pickle.HIGHEST_PROTOCOL) plt.figure(figsize=(8, 6)) for optimizer in ['SGD', 'Momentum', 'AdaGrad', 'Adam']: x = np.arange(len(d_train_loss[optimizer])) plt.plot(x, smooth_curve(d_train_loss[optimizer]), label=optimizer) plt.title('Plot of Training Loss of ' + model + ' with Different Optimizers') plt.xlabel('Iterations') plt.ylabel('Loss') plt.legend() plt.savefig(fn.replace('.pkl', '.png')) print('The plot of training loss is saved as', fn)
CMs = list() CMs.append(confusion_matrix(test_labels, pred_labels)) CM = np.sum(CMs, axis=0) FN = CM[1][0] TP = CM[1][1] FP = CM[0][1] print("TP = {}".format(TP)) print("FP = {}".format(FP)) print("FN = {}".format(FN)) f1 = 2. * TP / (2. * TP + FP + FN) print("F1 = {}".format(f1)) acc = history.history["acc"] val_acc = history.history["val_acc"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'o', label='Training acc') plt.plot(epochs, smooth_curve(val_acc), label='Validation acc') plt.title('Training and validation accuracy') plt.xlabel("epoch") plt.legend() plt.figure() plt.plot(epochs, loss, 'o', label='Training loss') plt.plot(epochs, smooth_curve(val_loss), label='Validation loss') plt.title('Training and validation loss') plt.xlabel("epoch") plt.legend() plt.show()
for key in optimizers.keys(): # 计算梯度 更新参数 grads = networks[key].gradient(train_img_batch, train_label_batch) optimizers[key].update(networks[key].params, grads) # 计算 loss loss = networks[key].loss(train_img_batch, train_label_batch) train_loss[key].append(loss) # 每 100 次打印 1 次 loss if i % 100 == 0: print("=========== " + "iteration: " + str(i) + " ===========") for key in optimizers.keys(): print(key + ": " + str(train_loss[key][-1])) # 绘制曲线 markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"} x = np.arange(iters_times) for key in optimizers.keys(): plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) plt.xlabel("iterations") plt.ylabel("loss") plt.ylim(0, 1) plt.legend() plt.show()
def train(self, train_data, batch_size=256, learning_rate=1e-3, test_data=None, num_epochs=100, is_evaluate=False, log_every=5, beta=1.0, criterion='cross_entropy', ckpt=None): self.vae.train() self.id2token = { v: k for k, v in train_data.dictionary.token2id.items() } # 得到从id到词的映射 data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=train_data.collate_fn) # 分批载入 optimizer = torch.optim.Adam(self.vae.parameters(), lr=learning_rate) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) if ckpt: # 是否从ckpt开始训练 self.load_model(ckpt["net"]) optimizer.load_state_dict(ckpt["optimizer"]) start_epoch = ckpt["epoch"] + 1 else: start_epoch = 0 trainloss_lst, valloss_lst = [], [] # 存下每步的loss方便最后画图 recloss_lst, klloss_lst = [], [] # VAE的两大loss,重建和KL c_v_lst, c_w2v_lst, c_uci_lst, c_npmi_lst, mimno_tc_lst, td_lst = [], [], [], [], [], [] # 各个指标方便画图 for epoch in range(start_epoch, num_epochs): epochloss_lst = [] for iter, data in enumerate(data_loader): optimizer.zero_grad() # 梯度清0 txts, bows = data # 载入数据 bows = bows.to(self.device) ''' n_samples = 20 rec_loss = torch.tensor(0.0).to(self.device) for i in range(n_samples): bows_recon,mus,log_vars = self.vae(bows,lambda x:torch.softmax(x,dim=1)) logsoftmax = torch.log_softmax(bows_recon,dim=1) _rec_loss = -1.0 * torch.sum(bows*logsoftmax) rec_loss += _rec_loss rec_loss = rec_loss / n_samples ''' bows_recon, mus, log_vars = self.vae( bows, lambda x: torch.softmax( x, dim=1)) # 输入BOW到VAE里面,得到重建loss,分布的mu和var # 算VAE的重建loss if criterion == 'cross_entropy': logsoftmax = torch.log_softmax(bows_recon, dim=1) rec_loss = -1.0 * torch.sum(bows * logsoftmax) elif criterion == 'bce_softmax': rec_loss = F.binary_cross_entropy(torch.softmax(bows_recon, dim=1), bows, reduction='sum') elif criterion == 'bce_sigmoid': rec_loss = F.binary_cross_entropy( torch.sigmoid(bows_recon), bows, reduction='sum') kl_div = -0.5 * torch.sum( 1 + log_vars - mus.pow(2) - log_vars.exp()) # KL散度 loss = rec_loss + kl_div * beta # 总loss loss.backward() # 反向传播 optimizer.step() trainloss_lst.append(loss.item() / len(bows)) epochloss_lst.append(loss.item() / len(bows)) if (iter + 1) % 10 == 0: # 定期打印 print( f'Epoch {(epoch+1):>3d}\tIter {(iter+1):>4d}\tLoss:{loss.item()/len(bows):<.7f}\tRec Loss:{rec_loss.item()/len(bows):<.7f}\tKL Div:{kl_div.item()/len(bows):<.7f}' ) #scheduler.step() if (epoch + 1) % log_every == 0: save_name = f'./ckpt/GSM_{self.taskname}_tp{self.n_topic}_{time.strftime("%Y-%m-%d-%H-%M", time.localtime())}_ep{epoch+1}.ckpt' checkpoint = { "net": self.vae.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch, "param": { "bow_dim": self.bow_dim, "n_topic": self.n_topic, "taskname": self.taskname } } torch.save(checkpoint, save_name) # 存模型参数 # The code lines between this and the next comment lines are duplicated with WLDA.py, consider to simpify them. # 下面是loss画图部分 print( f'Epoch {(epoch+1):>3d}\tLoss:{sum(epochloss_lst)/len(epochloss_lst):<.7f}' ) print('\n'.join([str(lst) for lst in self.show_topic_words()])) print('=' * 30) smth_pts = smooth_curve(trainloss_lst) plt.plot(np.array(range(len(smth_pts))) * log_every, smth_pts) plt.xlabel('epochs') plt.title('Train Loss') plt.savefig('gsm_trainloss.png') if test_data != None: c_v, c_w2v, c_uci, c_npmi, mimno_tc, td = self.evaluate( test_data, calc4each=False) # 评估主题指标 c_v_lst.append(c_v), c_w2v_lst.append( c_w2v), c_uci_lst.append(c_uci), c_npmi_lst.append( c_npmi), mimno_tc_lst.append( mimno_tc), td_lst.append(td) scrs = { 'c_v': c_v_lst, 'c_w2v': c_w2v_lst, 'c_uci': c_uci_lst, 'c_npmi': c_npmi_lst, 'mimno_tc': mimno_tc_lst, 'td': td_lst } ''' for scr_name,scr_lst in scrs.items(): plt.cla() plt.plot(np.array(range(len(scr_lst)))*log_every,scr_lst) plt.savefig(f'wlda_{scr_name}.png') ''' # 指标画图 plt.cla() for scr_name, scr_lst in scrs.items(): if scr_name in ['c_v', 'c_w2v', 'td']: plt.plot(np.array(range(len(scr_lst))) * log_every, scr_lst, label=scr_name) plt.title('Topic Coherence') plt.xlabel('epochs') plt.legend() plt.savefig(f'gsm_tc_scores.png')
nargs='+', type=int, default=(12, 10)) args = parser.parse_args() args.models = [model] d_train_loss = {} for n_Ranges in args.n_Ranges_list: print('----> n_Ranges =', n_Ranges, '\n') args.n_Ranges = n_Ranges trainer = main(args, feature_type=feature_type, return_trainer=True)[model] d_train_loss[n_Ranges] = trainer.train_loss fn = trainer.fn.replace('Accuracy', 'dict_loss_n_Ranges').replace('.txt', '.pkl') with open(fn, 'wb') as f: pickle.dump(d_train_loss, f, pickle.HIGHEST_PROTOCOL) plt.figure(figsize=args.plot_figsize_) for n_Ranges in args.n_Ranges_list: x = np.arange(len(d_train_loss[n_Ranges])) plt.plot(x, smooth_curve(d_train_loss[n_Ranges]), label=n_Ranges) plt.title('Plot of Training Loss of ' + model + ' with Different Numbers of Bin Cuts') plt.xlabel('Iterations') plt.ylabel('Loss') plt.legend() plt.savefig(fn.replace('.pkl', '.png')) print('The plot of training loss is saved as', fn)