def train(net, optimizer, trainloader, epoch): train_step = len(trainloader) net.train() cumul_duration = 0 start_time = time.time() for i, (sample, img) in enumerate(trainloader): for key in sample: sample[key] = sample[key].cuda() output = net(sample) loss = cal_loss(sample, output) optimizer.zero_grad() loss.backward() optimizer.step() duration = time.time() - start_time cumul_duration += duration exp_ep_end = cumul_duration / (i + 1) * (train_step - i) if (i + 1) % 10 == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Duration: {:.2f}, Epoch end: {:.0f}m {:.0f}s' .format(epoch + 1, args.epoch, i + 1, train_step, loss.item(), duration, exp_ep_end / 60, exp_ep_end % 60)) save_file = 'model_%02d.pkl' print('Saving Model : ' + save_file % (epoch + 1)) torch.save(net.state_dict(), './models/' + save_file % (epoch + 1))
def train(net, optimizer, trainloader, epoch): train_step = len(trainloader) net.train() pbar = tqdm(trainloader) for i, sample in enumerate(pbar): iter_start_time = time.time() for key in sample: sample[key] = sample[key].cuda() output = net(sample) loss = cal_loss(sample, output) optimizer.zero_grad() loss.backward() optimizer.step() t = time.time() - iter_start_time if (i + 1) % 10 == 0: tqdm.write( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Time:{:.3f}'. format(epoch + 1, args.epoch, i + 1, train_step, loss.item(), t)) save_file = 'model_%02d.pkl' print('Saving Model : ' + save_file % (epoch + 1)) torch.save(net.state_dict(), './models/' + save_file % (epoch + 1))
def validate(self): self.model.eval() val_total_loss = 0.0 mes = "Epoch {}, validation average loss:{:.4f}, Perplexity:{:.4f}" with torch.no_grad(): for imgs, tgt4training, tgt4cal_loss in self.val_loader: imgs = imgs.to(self.device) tgt4training = tgt4training.to(self.device) tgt4cal_loss = tgt4cal_loss.to(self.device) epsilon = cal_epsilon(self.args.decay_k, self.total_step, self.args.sample_method) logits = self.model(imgs, tgt4training, epsilon) loss = cal_loss(logits, tgt4cal_loss) val_total_loss += loss try: avg_loss = val_total_loss / len(self.val_loader) except: avg_loss = val_total_loss print(mes.format(self.epoch, avg_loss, 2**avg_loss)) wandb.log({ "epoch": self.epoch, "val avrg loss": avg_loss, "perplexit": 2**avg_loss }) if avg_loss < self.best_val_loss: self.best_val_loss = avg_loss self.save_model('best_ckpt') return avg_loss
def eval_step(src, trg, model, device): src = src.to(device) trg = trg.to(device) trg_input = trg[:, :-1] trg_label = trg[:, 1:] predict = model.forward(src, trg_input) loss = cal_loss(predict, trg_label) correct, total = cal_accruacy(predict, trg_label) return loss.item(), total, correct
def forward(self, embeddings): torch.clamp(self.w, hp.re_num) centroids = utils.get_centroids(embeddings) cossim = utils.get_cossim(embeddings, centroids) sim_matrix = self.w * cossim + self.b loss, _ = utils.cal_loss(sim_matrix) return loss
def main(args): if args.data == 'MNIST': data_path = '/home/szchen/Datasets/' input_dim = 28 * 28 transform = transforms.Compose([transforms.ToTensor()]) mnist = torchvision.datasets.MNIST(data_path, download=False, transform=transform, train=True) dataloader = torch.utils.data.DataLoader(mnist, batch_size=args.batch_size, shuffle=True) encoder = Encoder(input_dim=input_dim, args=args) decoder = Decoder(output_dim=input_dim, args=args) model = VAE(encoder=encoder, decoder=decoder, args=args).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) total_loss = [] for epoch in tqdm(range(args.max_epoch)): epoch_loss = [] for input_data, label in dataloader: input_data = Variable(input_data.view(-1, input_dim)).cuda() predict_, z_mean, z_log_var = model(input_data) optimizer.zero_grad() loss = cal_loss(predict_, input_data, z_mean, z_log_var, args) epoch_loss.append(loss.cpu().data) loss.backward() optimizer.step() total_loss.append(np.mean(epoch_loss)) if args.save_fig != None and (epoch + 1) % args.save_fig == 0: test_image = model.inference(16) test_image = test_image.view(-1, 28, 28).detach().cpu().numpy() utils.save_image(test_image, 'Epoch:{}.png'.format(epoch)) if args.save_paras: if not os.path.exists('./param'): os.mkdir('./param') torch.save(model.state_dict(), './param/parameters.pt') utils.draw_loss_curve(total_loss)
def train_step(src, trg, model, optim, device): optim.zero_grad() src = src.to(device) trg = trg.to(device) trg_input = trg[:, :-1] trg_label = trg[:, 1:] predict = model.forward(src, trg_input) loss = cal_loss(predict, trg_label) loss.backward() optim.step() correct, total = cal_accruacy(predict, trg_label) return loss.item(), total, correct
def forward(self, sents_tensor, lengths, target_tensor): """ 前向传播,计算loss Args: sents_tensor (torch.Tensor): 输入的训练样本 [batch_size, max_seq_len, embed_dim] lengths (torch.Tensor): batch中每个句子pad之前的长度 [batch_size] target_tensor (torch.Tensor): 标签 [batch_size, max_seq_len] Returns: -loss """ seq_embed = self.embedding_layer(sents_tensor) lstm_out, _ = self.bilstm(seq_embed, lengths) lstm_out = self.cls_dropout(lstm_out) lstm_feats = self.classifier(lstm_out) loss = cal_loss(lstm_feats, target_tensor, self.pad_id) return loss
def train_step(self, imgs, tgt4training, tgt4cal_loss): self.optimizer.zero_grad() imgs = imgs.to(self.device) tgt4training = tgt4training.to(self.device) tgt4cal_loss = tgt4cal_loss.to(self.device) epsilon = cal_epsilon(self.args.decay_k, self.total_step, self.args.sample_method) logits = self.model(imgs, tgt4training, epsilon) # calculate loss loss = cal_loss(logits, tgt4cal_loss) self.step += 1 self.total_step += 1 loss.backward() clip_grad_norm_(self.model.parameters(), self.args.clip) self.optimizer.step() return loss.item()
def train(net, optimizer, trainloader, epoch, base_epoch=0): train_step = len(trainloader) net.train() for i, sample in enumerate(trainloader): for key in sample: if key[0:2] != 'D_': sample[key] = sample[key].cuda() output = net(sample) loss = cal_loss(sample, output, loss_type=args.loss_type) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 10 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, args.epoch, i + 1, train_step, loss.item())) save_file = 'model_%03d.pkl' print('Saving Model : ' + save_file % (base_epoch + epoch + 1)) torch.save(net.state_dict(), './models/' + save_file % (base_epoch + epoch + 1))
def train(net, optimizer, trainloader, epoch): train_step = len(trainloader) net.train() # for i, sample in enumerate(trainloader): for i, sample in enumerate(tqdm(trainloader)): for key in sample: if torch.cuda.is_available(): sample[key] = sample[key].cuda() output = net(sample) loss = cal_loss(sample, output) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 10 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, args.epoch, i + 1, train_step, loss.item())) save_file = 'model_%02d.pkl' print('Saving Model : ' + save_file % (epoch + 1)) torch.save(net.state_dict(), './models/' + save_file % (epoch + 1))
def forward(args, model, train): if train: model.train() else: model.eval() dataset = img2para_dataset(args, train) data_loader = torch.utils.data.DataLoader( dataset=dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=train, ) # rnn_params = {'p':[], 's':[], 'w':[]} # for name, param in model.named_parameters(): # if 'pRNN' in name: # rnn_params['p'].append(param) # elif 'sRNN' in name: # rnn_params['s'].append(param) # elif 'wRNN' in name: # rnn_params['w'].append(param) if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) else: optimizer = optim.Adagrad(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) total_cost = 0 total_word_cost = 0 total_sent_cost = 0 total_word_count = 0 # total_sent_count = 0 for batch_idx, batch_data in enumerate(data_loader): real_batch_size = len(batch_data[0]) batch_data = [_.to(args.device) for _ in batch_data[1:]] img_feats, densecap, para_words_labels, stop_labels, words_mask, densecap_mask, fake_words, fake_words_mask = batch_data predict_words, predict_stop = model(img_feats, para_words_labels, words_mask, fake_words, fake_words_mask) para_words_count = torch.sum(words_mask) word_cost, sent_cost = cal_loss(para_words_labels, predict_words, words_mask, stop_labels, predict_stop) # para_sents_count = torch.sum(stop_mask) cost = (args.sent_cost_lambda * sent_cost + word_cost) / real_batch_size if train: optimizer.zero_grad() cost.backward() optimizer.step() total_cost += cost.item() total_word_cost += word_cost.item() total_sent_cost += sent_cost.item() / real_batch_size total_word_count += para_words_count # total_sent_count += para_sents_count if train: print( "batch: {0} loss: {1:.2f}, perp: {2:.2f}, sent loss: {3:.2f}". format(batch_idx, word_cost.item() / real_batch_size, math.exp(word_cost.item() / para_words_count), sent_cost.item() / real_batch_size)) return total_cost / batch_idx, math.exp( total_word_cost / total_word_count), total_sent_cost / batch_idx