def __train_epoch(self): self.model.train() losses = [] progress = tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc='Training', file=sys.stdout) for batch_idx, data in progress: samples, targets = data samples1, samples2 = samples if self.cuda: samples1 = samples1.cuda() samples2 = samples2.cuda() targets = targets.cuda() self.optimizer.zero_grad() outputs = self.model((samples1, samples2)) #output1, output2 = self.model((samples1, samples2)) loss = self.criterion(outputs, targets.float()) loss.backward() self.optimizer.step() losses.append(loss.item()) progress.set_description('Mean Training Loss: {:.4f}'.format( np.mean(losses))) return np.mean(losses)
def run_epoch(self, phase, data_loader, criterion): if phase == 'train': self.model.train() else: self.model.eval() running_loss = 0. for data_dict in data_loader: for name in data_dict: data_dict[name] = data_dict[name].to(device=self.device, non_blocking=True) if phase == 'train': self.optimizer.zero_grad() with torch.enable_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) loss.backward() self.optimizer.step() else: with torch.no_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) running_loss += loss.item() epoch_loss = running_loss / len(data_loader) print('{} loss: {}'.format(phase, epoch_loss)) return epoch_loss
def train(train_loader, model, criterion, optimizer, args): # switch to train mode model.train() if args.freeze_BN: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() run_loss = 0 for i, (input, target) in enumerate(train_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) run_loss += loss.item() if i % num_avg_iter == 0: print('Training loss running avg', run_loss / float(num_avg_iter)) run_loss = 0 # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step()
def train(self): max_iter=80000 lr=0.00002 decay_ratio=0.333 display=20 snapshot=20000 step_index=0 stepvalues=[40000,60000,80000] g_steps=stepvalues[0] param_groups=[] for key, value in self.siamese.named_parameters(): if value.requires_grad: param_groups.append({'params': value, 'lr': lr}) optimizer = optim.SGD(param_groups, lr=lr, momentum=0.9) step_index=0 step=0 for i in range(max_iter): pair_samples, y_np=self.pair_selector.get_data() pos_samples=Variable(torch.FloatTensor(pair_samples[:,0,:,:,:]).cuda()) #[N,C,H,W] neg_samples=Variable(torch.FloatTensor(pair_samples[:,1,:,:,:]).cuda()) y=Variable(torch.FloatTensor(y_np).cuda()) # y=torch.FloatTensor(y).contiguous().cuda(async=True) pos_feat, neg_feat=self.siamese(pos_samples, neg_samples) loss, dist=self.siamese_loss(pos_feat,neg_feat,y) optimizer.zero_grad() loss.backward() optimizer.step() rate=lr*np.power(decay_ratio,step/g_steps) # for param_group in optimizer.param_groups: # param_group['lr']=rate if i%display==0: print('[Info][%d/%d] loss: %f, learn rate: %e'%(i,max_iter,loss, lr)) dist=dist.data.cpu().numpy() pos_labels=(y_np==1) neg_labels=(y_np==0) pos_dist=np.mean(dist[pos_labels],axis=0) if len(np.nonzero(pos_labels)[0])>0 else 0 neg_dist=np.mean(dist[neg_labels],axis=0) if len(np.nonzero(neg_labels)[0])>0 else 0 print('pos pair dist: %f\nneg pair dist: %f'%(pos_dist,neg_dist)) if i==stepvalues[step_index]: for param_group in optimizer.param_groups: param_group['lr']=rate print('learn rate decay: %e'%rate) step=0 lr=rate g_steps=stepvalues[step_index+1]-stepvalues[step_index] step_index+=1 if i>0 and i%snapshot==0: torch.save(self.siamese.state_dict(), 'models_siamese/model_iter_%d.pkl'%i) print('Snapshot to models_siamese/model_iter_%d.pkl'%i) step+=1 torch.save(self.siamese.state_dict(), 'models_siamese/model_iter_%d.pkl'%max_iter)
def train(args, model, device, train_loader, optimizer, loss_func, epoch): model.train() train_loss = 0 running_loss = 0 running_datasize = 0 for batch_idx, (data, targets) in enumerate(train_loader): data = data.to(device) targets = [target.to(device) for target in targets] optimizer.zero_grad() predictions = model(data) location_loss, confidence_loss = loss_func(predictions, targets) loss = location_loss + confidence_loss loss.backward() optimizer.step() train_loss += loss.item() running_loss += loss.item() running_datasize += 1 if (batch_idx + 1) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ([{:.0f}%)]\tLoss: {:.4e}'.format( epoch, (batch_idx + 1) * len(data), len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), running_loss / running_datasize)) running_loss = 0 running_datasize = 0 return train_loss
def run_epoch(self, phase, data_loader, criterion): """封装一个epoch中的forward、loss、backward过程""" if phase == 'train': self.model.train() else: self.model.eval() running_loss = 0. # visualize the training process data_loader = iter(data_loader) for i in tqdm(range(len(data_loader))): data_dict = next(data_loader) for name in data_dict: data_dict[name] = data_dict[name].to(device=self.device, non_blocking=True) if phase == 'train': self.optimizer.zero_grad() with torch.enable_grad(): # 前向传播只调用了model,需要调查一下decoder调用的位置 pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) loss.backward() self.optimizer.step() else: with torch.no_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) running_loss += loss.item() epoch_loss = running_loss / len(data_loader) print('{} loss: {}'.format(phase, epoch_loss)) return epoch_loss
def train_loop(dataloader, model, loss_fn, optimizer, scheduler=None): size = len(dataloader.dataset) for batch, dic in enumerate(dataloader): x = dic['x'].to(device) z = dic['z'].to(device) label = dic['label'].to(device) pred = model(x, z) # print(x) # print(z) # print(label) # print(pred) # print(pred) # print(label) loss = loss_fn(pred, label) optimizer.zero_grad() loss.backward() optimizer.step() if batch % 10 == 0: loss, current = loss.item(), batch * len(x) print("loss : {:>7f} [{:>5d}/{:>5d}]".format(loss, current, size)) #break if scheduler: scheduler.step()
def train_batch(self, X, Y): X, Y = X.to(self.device), Y.to(self.device) outputs = self.model(X) loss = self.criterion(outputs, Y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return float(loss)
def train(train_loader, model, criterion, optimizer, epoch): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() for i, (img, target) in enumerate(train_loader): # measure data loading time if img is None: continue data_time.update(time.time() - end) encoded_target = Variable(utils.soft_encode_ab(target).float(), requires_grad=False).cuda() var = Variable(img.float(), requires_grad=True).cuda() # compute output output = model(var) # record loss loss = criterion(output, encoded_target) if torch.isnan(loss): print('NaN value encountered in loss.') continue # measure accuracy and record loss #prec1, = accuracy(output.data, target) losses.update(loss.data, var.size(0)) # compute gradient and do SGD step backwardTime = time.time() optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) if (i+1) % 5000 == 0: print("Saving checkpoint...") save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), }, args.reduced) if (i+1) % 1000 == 0: start = time.time() batch_num = np.maximum(args.batch_size//4,2) idx = i + epoch*len(train_loader) imgs = utils.getImages(img, target, output.detach().cpu(), batch_num) writer.add_image('data/imgs_gen', imgs, idx) print("Img conversion time: ", time.time() - start) writer.add_scalar('data/loss_train', losses.avg, i + epoch*len(train_loader))
def train(trainloader, t_model, s_model, criterion, optimizer, epoch, use_cuda, args): # switch to train mode global kd_loss_fun, cmclloss_v1, indeploss, mclloss t_model.eval() s_model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_kl = AverageMeter() losses_ce = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (batch_data) in enumerate(trainloader): # measure data loading time if len(batch_data) == 2: inputs, targets = batch_data else: inputs, targets, indexes = batch_data data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output t_outputs = t_model(inputs) s_outputs = s_model(inputs) t_prec1, t_prec5 = accuracy(t_outputs.data, targets.data, topk=(1, 5)) # measure accuracy and record loss prec1, prec5 = accuracy(s_outputs.data, targets.data, topk=(1, 5)) loss_kl = kd_loss_fun(s_outputs, t_outputs.detach(), targets) loss_ce = criterion(s_outputs, targets) loss = loss_kl losses.update(loss.item(), inputs.size(0)) losses_kl.update(loss_kl.item(), inputs.size(0)) losses_ce.update(loss_ce.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() progress_bar( batch_idx, len(trainloader), 'Loss: %.2f | KLloss: %.2f | ce_loss: %.2f | Top1: %.2f | Top5: %.2f | t_top1: %.2f | t_top5: %.2f' % (losses.avg, loss_kl, loss_ce, top1.avg, top5.avg, t_prec1, t_prec5)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return (losses.avg, losses_kl.avg, losses_ce.avg, top1.avg, top5.avg)
def train(self): self.model.train() val_loss, val_psnr = self.evaluate(self.step - 1, self.start_epoch) print( "[*] Preliminary check: Epoch: {} Step: {} Validation Loss: {:.5f} PSNR: {:.3f}" .format(self.start_epoch, (self.step - 1), val_loss, val_psnr)) print('-' * 40) # Resume training from stopped epoch for epoch in range(self.start_epoch, self.cfg.num_epochs): step_loss = 0 start_time = time.time() for idx, (noisy, clean) in enumerate(self.train_loader, start=1): # Input/Target noisy = noisy.to(self.device, dtype=torch.float) clean = clean.to(self.device, dtype=torch.float) # BackProp self.optimizer.zero_grad() output = self.model(noisy) loss = self.criterion(output, clean) loss.backward() self.optimizer.step() # STATS step_loss += loss.item() if idx % self.cfg.verbose_step == 0: val_loss, val_psnr = self.evaluate(self.step, epoch) self.writer.add_scalar("Loss/Train", step_loss / self.cfg.verbose_step, self.step) self.writer.add_scalar("Loss/Validation", val_loss, self.step) self.writer.add_scalar( "Stats/LR", self.optimizer.param_groups[0]['lr'], self.step) self.writer.add_scalar("Stats/PSNR", val_psnr, self.step) print( "[{}/{}/{}] Loss [T/V]: [{:.5f}/{:.5f}] PSNR: {:.3f} LR: {} Time: {:.1f} Output: [{}-{}]" .format(epoch, self.step, idx, (step_loss / self.cfg.verbose_step), val_loss, val_psnr, self.optimizer.param_groups[0]['lr'], (time.time() - start_time), torch.min(output).item(), torch.max(output).item())) self.step += 1 if self.cfg.scheduler == "step": self.lr_sch.step() elif self.cfg.scheduler == "plateau": self.lr_sch.step(metrics=val_loss) step_loss, start_time = 0, time.time() self.model.train()
def train_model(model, criterion, optimizer_ft, scheduler, epoch): scheduler.step() lambda1 = sigmoid_rampup(epoch, args.LabelWt) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() model.train() correct = 0 total = 0 end = time.time() for batch_idx, (inputs, targets, weights) in enumerate(dataloaders_train): if use_gpu: inputs = Variable(inputs.cuda()) targets = Variable(targets.cuda()) weights = Variable(weights.cuda()) data_time.update(time.time() - end) optimizer_ft.zero_grad() outputs = model(inputs) if args.stage2: loss = criterion(outputs, targets, weights) else: loss = criterion(outputs, targets, lambda1) loss.backward() optimizer_ft.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() _, predicted = outputs.max(1) correct += predicted.eq(targets).sum().item() total += inputs.size(0) if batch_idx % 10 == 0: print('Epoch: [{}][{}/{}] ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}) ' 'Accu: {:.2f}'.format(epoch, batch_idx, len(dataloaders_train), 100. * correct / total, batch_time=batch_time, data_time=data_time, train_loss=train_loss)) writer.add_scalar('training acc (train)', 100. * correct / total, epoch) writer.add_scalar('loss', train_loss.avg, epoch)
def train_model(model, criterion, optimizer, dataload, valdataloader, num_epochs=2): setDir(args.logdir) writer = SummaryWriter(args.logdir) max_val_acc = 0 lr = args.lr for epoch in range(num_epochs): if epoch%5 == 0: lr = lr / 2 optimizer = optim.Adam(model.parameters(), lr=lr) print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) dataset_size = len(dataload.dataset) epoch_loss = 0 step = 0 # minibatch数 val_loss = 0 val_epoch_acc = 0 val_step = 0 for x, y in dataload: # 分100次遍历数据集,每次遍历batch_size=4 optimizer.zero_grad() # 每次minibatch都要将梯度(dw,db,...)清零 inputs = x.to(device) labels = y.to(device) outputs = model(inputs)# 前向传播 loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long()) # 计算损失 loss.backward() # 梯度下降,计算出梯度 optimizer.step() # 更新参数一次:所有的优化器Optimizer都实现了step()方法来对所有的参数进行更新 epoch_loss += loss.item() step += 1 print("EPOCH:%d,%d/%d,train_loss:%0.3f" % (epoch, step, dataset_size // dataload.batch_size, loss.item())) for x, y in valdataloader: inputs = x.to(device) labels = y.to(device) outputs = model(inputs)# 前向传播 val_acc = accuracy(outputs, torch.squeeze(labels, dim=1).long()) loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long()) # 计算损失 val_epoch_acc += float(val_acc.numpy()) val_loss += loss.item() val_step += 1 val_epoch_acc = val_epoch_acc / val_step # if val_epoch_acc > max_val_acc: # torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch)) # 返回模型的所有内容 # max_val_acc = val_epoch_acc writer.add_scalars('train_epoch_loss', {'epoch_loss': epoch_loss}, epoch) writer.add_scalars('val_epoch_loss', {'val_loss': val_loss}, epoch) writer.add_scalars('val_epoch_acc', {'val_epoch_acc': val_epoch_acc}, epoch) print("epoch %d loss:%0.3f val_loss:%0.3f, val_acc:%0.3f" % (epoch, epoch_loss, val_loss, val_epoch_acc)) torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch)) torch.cuda.empty_cache() writer.close() return model
def train(epoch): fcn_model.train() # tran mode total_loss = 0. for batch_idx, (imgs, labels) in enumerate(train_loader): N = imgs.size(0) if use_cuda: imgs = imgs.cuda() labels = labels.cuda() imgs_tensor = Variable(imgs) # torch.Size([2, 3, 320, 320]) labels_tensor = Variable(labels) # torch.Size([2, 320, 320]) out = fcn_model(imgs_tensor) # torch.Size([2, 21, 320, 320]) # with open('./result.txt', 'r+') as f: # f.write(str(out.detach().numpy())) # f.write("\n") loss = criterion(out, labels_tensor) loss /= N optimizer.zero_grad() loss.backward() optimizer.step() # update all arguments total_loss += loss.data[0] # return float # if batch_idx == 2: # break if (batch_idx) % 20 == 0: print('train epoch [%d/%d], iter[%d/%d], lr %.7f, aver_loss %.5f' % (epoch, epoch_num, batch_idx, len(train_loader), learning_rate, total_loss / (batch_idx + 1))) # # visiualize scalar # if epoch % 10 == 0: # label_img = tools.labelToimg(labels[0]) # net_out = out[0].data.max(1)[1].squeeze_(0) # out_img = tools.labelToimg(net_out) # writer.add_scalar("loss", loss, epoch) # writer.add_scalar("total_loss", total_loss, epoch) # writer.add_scalars('loss/scalar_group', {"loss": epoch * loss, # "total_loss": epoch * total_loss}) # writer.add_image('Image', imgs[0], epoch) # writer.add_image('label', label_img, epoch) # writer.add_image("out", out_img, epoch) assert total_loss is not np.nan assert total_loss is not np.inf # model save if (epoch) % 20 == 0: torch.save(fcn_model.state_dict(), './pretrained_models/model%d.pth' % epoch) # save for 5 epochs total_loss /= len(train_loader) print('train epoch [%d/%d] average_loss %.5f' % (epoch, epoch_num, total_loss))
def train(epoch_idx, mAP): # return f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, ds, _, _ = posv.shape vfeat = torch.cat((posv, negv), 0) afeat = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) label = torch.cat((pos_label, neg_label), 0).long().squeeze(-1) # pdb.set_trace() ins_scores, bag_predicts = f(vfeat, afeat) # print(ins_scores) loss = 0 ahs = [] ces = [] for i in range(ds): bag_predict = bag_predicts[i] ins_score = ins_scores[i] celoss = CELoss(bag_predict, label[:, i]) ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :]) # ahs.append(ahloss) # ces.append(celoss) if loss == 0: loss = celoss + ahloss else: loss = loss + celoss + ahloss # ahs = torch.stack(ahs) # ces = torch.stack(ces) # loss = torch.mean(ahs)+torch.mean(celoss) print( "In epoch {}, [{}/{}]: loss: {:.6f}, max avg_mAP: {:.4f}, current test mAP: {:.4f}" .format(epoch_idx + 1, batch_idx, len(train_loader), loss.item(), maxavgMap, avg_mAP)) print("current mAP: {};".format(mAP)) print("max mAP: {};".format(maxMAP)) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opt.step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update('celoss', celoss.item(), epoch_idx) recoder.update('ahloss', ahloss.item(), epoch_idx) recoder.save()
def iterate(self, epoch: int, phase: str): self.net.train(phase == "train") dataloader = self.dataloaders[phase] # self.meter.on_epoch_begin(epoch, phase) for itr, (images, targets) in tqdm(enumerate(dataloader), total=len(dataloader)): images = images.to(self.device).float() N = images.shape[0] np_logits, hv_logits, nc_logits = self.net(images) np_targets = utils.get_np_targets(targets[:, 0, :, :]) hv_targets = utils.get_hv_targets(targets[:, 0, :, :]) nc_targets = utils.get_nc_targets(targets[:, 1, :, :]) np_targets = np_targets.to(self.device) hv_targets = hv_targets.to(self.device) nc_targets = nc_targets.to(self.device) assert np_targets.shape == (N, 256, 256) and hv_targets.shape == (N, 2, 256, 256) \ and nc_targets.shape == (N, 256, 256) loss, loss_np, loss_hv, loss_nc = self.hoverloss(np_logits, np_targets, hv_logits, hv_targets, nc_logits, nc_targets) if phase == "train": loss.backward() self.optimizer.step() self.optimizer.zero_grad() self.scheduler.step(epoch) # Update metrics for this batch with torch.no_grad(): loss = loss.detach() loss_np = loss_np.detach() loss_nc = loss_nc.detach() loss_hv = loss_hv.detach() self.epoch_loss['loss'].append(loss.item()) self.epoch_loss['loss_np'].append(2*loss_np.item()) self.epoch_loss['loss_nc'].append(loss_nc.item()) self.epoch_loss['loss_hv'].append(40*loss_hv.item()) self.store[phase]['loss'].append(sum(self.epoch_loss['loss']) / len(self.epoch_loss['loss'])) self.store[phase]['loss_np'].append(sum(self.epoch_loss['loss_np']) / len(self.epoch_loss['loss_np'])) self.store[phase]['loss_nc'].append(sum(self.epoch_loss['loss_nc']) / len(self.epoch_loss['loss_nc'])) self.store[phase]['loss_hv'].append(sum(self.epoch_loss['loss_hv']) / len(self.epoch_loss['loss_hv'])) self.epoch_loss['loss'] = [0] self.epoch_loss['loss_np'] = [0] self.epoch_loss['loss_nc'] = [0] self.epoch_loss['loss_hv'] = [0] if torch.cuda.is_available(): torch.cuda.empty_cache() return self.store[phase]['loss'][-1]
def train(model, criterion, optimizer, input_img_gt): model.train() D = model(input_img_gt['img']) loss = criterion(D, input_img_gt['gt']) with torch.no_grad(): dsc = dscloss(D, input_img_gt['gt']) optimizer.zero_grad() loss.backward() optimizer.step() return loss.detach().cpu().numpy(), dsc.detach().cpu().numpy()
def train(model, config, epoch): model.class_classifier.train() model.feature.train() for step, (features, labels) in enumerate(config['source_train_loader']): if torch.cuda.is_available(): features, labels = features.cuda(), labels.cuda() optimizer.zero_grad() preds = model.class_classify(features) loss = criterion(preds, labels) loss.backward() optimizer.step()
def train(epoch): fcn_model.train() # tran mode total_loss = 0. st = time.time() for batch_idx, (imgs, labels, Image_Path) in enumerate(train_loader): # train_batch += 1 if use_cuda: imgs = imgs.cuda() labels = labels.cuda() # batch_idx += 1 imgs_tensor = Variable(imgs) # torch.Size([2, 3, 320, 320]) target = Variable(labels) # torch.Size([2, 320, 320]) out = fcn_model(imgs_tensor) # torch.Size([2, 21, 320, 320]) loss = criterion(out, target) optimizer.zero_grad() loss.backward() optimizer.step() # update all arguments total_loss += loss.item() # return float if (batch_idx) % 20 == 0: ed = time.time() print( 'train epoch [%d/%d], iter[%d/%d], lr %.7f, aver_loss %.5f, time_use = %.1f' % (epoch, epochs, batch_idx, len(train_loader), learning_rate, total_loss / (batch_idx + 1), ed - st)) st = ed # # visiualize scalar # label_img = tools.labelToimg(labels[0]) # net_out = out[0].data.max(1)[1].squeeze_(0) # out_img = tools.labelToimg(net_out) # writer.add_scalar("loss", loss, train_batch) # writer.add_scalar("total_loss", total_loss, train_batch) # writer.add_scalars('loss/scalar_group', {"loss": train_batch * loss, # "total_loss": train_batch * total_loss}) # writer.add_image('Image', imgs[0], epoch) # writer.add_image('label', label_img, epoch) # writer.add_image("out", out_img, epoch) assert total_loss is not np.nan assert total_loss is not np.inf torch.save(fcn_model.state_dict(), './models/temp.pth') # save for 5 epochs total_loss /= len(train_loader) print('train epoch [%d/%d] average_loss %.5f' % (epoch, epochs, total_loss)) return total_loss
def train(training_data_loader, optimizer, model, criterion, epoch): model.train() currtime = time.time() for iteration, batch in enumerate(training_data_loader, 1): source, target = trainPrepare(batch) output = model(source) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() if iteration % 100 == 0 or iteration == len(training_data_loader): usetime = time.time() - currtime currtime = time.time() print( f"===> Epoch[{epoch+1}]({iteration}/{len(training_data_loader)}): Loss: {loss.item():.6f}, Time: {usetime:.4f}" )
def train(epoch): fcn_model.train() total_loss = 0. for batch_idx, (imgs, labels) in enumerate(train_loader): N = imgs.size(0) if use_cuda: imgs = imgs.cuda() labels = labels.cuda() imgs = Variable(imgs) labels = Variable(labels) out = fcn_model(imgs) loss = criterion(out, labels) loss /= N # visiualize scalar writer.add_scalar("loss", loss, batch_idx) writer.add_scalar("total_loss", total_loss, batch_idx) writer.add_scalars('loss/scalar_group', { "loss": batch_idx * loss, "total_loss": batch_idx * total_loss }) writer.add_image('Image', imgs, batch_idx) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.data[0] # return float if (batch_idx) % 20 == 0: print('train epoch [%d/%d], iter[%d/%d], lr %.5f, aver_loss %.5f' % (epoch, epoch_num, batch_idx, len(train_loader), learning_rate, total_loss / (batch_idx + 1))) # model save if (epoch) % 5 == 0: torch.save(fcn_model.state_dict(), 'params.pth') assert total_loss is not np.nan assert total_loss is not np.inf total_loss /= len(train_loader) print('train epoch [%d/%d] average_loss %.5f' % (epoch, epoch_num, total_loss))
def train(epoch_idx, mAP): f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, _, _ = posv.shape vfeat = torch.cat((posv, negv), 0) afeat = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) label = torch.cat((pos_label, neg_label), 0).long().view(-1) # pdb.set_trace() ins_scores, bag_predict = f(vfeat, afeat) # print(ins_scores) celoss = CELoss(bag_predict, label) ahloss = AHLoss(ins_scores[:b1, :], ins_scores[b1:, :]) # pdb.set_trace() loss = celoss + ahloss # loss = ahloss if args.dataset != "youtube": print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max mAP_5: {:.4f}, current mAP_5: {:.4f}" .format(args.domain, epoch_idx + 1, batch_idx, len(train_loader), loss.item(), max_mAP_5, mAP_5)) else: print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}" .format(args.domain, epoch_idx + 1, batch_idx, len(train_loader), loss.item(), max_mAP, mAP)) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opt.step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update('celoss', celoss.item(), epoch_idx) recoder.update('ahloss', ahloss.item(), epoch_idx) recoder.save()
def training(self, epoch): pbar = tqdm(total=self.n_train, desc=f'Epoch {epoch + 1}/{self.num_epoch}', unit='img', bar_format='{l_bar}%s{bar:10}%s{r_bar}{bar:-10b}' % (Fore.RED, Fore.RESET)) mean_loss, mean_score = 0, 0 self.net.train() n_iter = len(self.loader_train) for k, btchs in enumerate(self.loader_train): imgs = btchs[0].to(device=self.dvc_main, dtype=self.dtype) labels = btchs[1].to(device=self.dvc_main, dtype=self.dtype) self.scheduler.step(epoch + k / n_iter) self.optim.zero_grad() preds = self.net(imgs) loss = self.criterion(preds, labels) loss.backward() self.optim.step() with torch.no_grad(): img_dt = imgs.data label_dt = labels.data pred_dt = preds.data mean_score += F1Score(pred_dt, label_dt) mean_loss += loss.item() lrs = f"{self.scheduler.get_last_lr()[0]:.3f}" pbar.set_postfix(**{self.name_loss: mean_loss / (k + 1), 'F1Score': mean_score / (k + 1), 'LRs' : lrs}) pbar.update(imgs.shape[0]) if k == 0: img_dict = {'Train/': img_dt, 'Train/true': label_dt, 'Train/pred': pred_dt} self.writing(epoch, self.writer_main, img_dict, opt='image') scalar_dict = {self.name_loss: mean_loss / (n_iter + 1), 'F1Score': mean_score / (n_iter + 1)} pbar.write(_term_move_up(), end='\r') self.writing(epoch, self.writer_main, scalar_dict, opt='scalar') pbar.close()
def __train_epoch(self): self.model.train() losses = [] accuracies = [] TN = FN = TP = FP = 0 progress = tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc='Training', file=sys.stdout) for batch_idx, data in progress: samples, targets = data if self.cuda: samples = samples.cuda() targets = targets.cuda() self.optimizer.zero_grad() outputs = self.model(samples) loss = self.criterion(outputs, targets) loss.backward() self.optimizer.step() losses.append(loss.item()) targets = targets.data.cpu() _, predicted = torch.max(outputs.data, 1) predicted = predicted.data.cpu() perf = self.__perf_measure(targets, predicted) TN += perf[2] FN += perf[3] TP += perf[0] FP += perf[1] acc = (TP + TN) / (FP + FN + TP + TN) if FP + FN + TP + TN > 0 else 0 precision = TP / (TP + FP) if TP + FP > 0 else 0 recall = TP / (TP + FN) if TP + FN > 0 else 0 f1 = 2 * (precision * recall) / ( precision + recall) if precision + recall > 0 else 0 accuracies.append(acc) progress.set_description( 'Training Loss: {:.4f} | Accuracy: {:.4f} | F1: {:.4f} | Precision: {:.4f} | Recall: {:.4f} | TP: {} | TN: {} | FP: {} | FN: {}' .format(loss.item(), acc, f1, precision, recall, TP, TN, FP, FN)) return np.mean(losses)
def forward_step(net, mid_net, loss_fn, loader, args): pair_total = 0 catfeat = [] for i in range(args.loss_step): catfeat.append([]) label = [] epoch_end = False while pair_total < args.max_pair / args.worker: pair_total, epoch_end = embed_step(pair_total, net, mid_net, loss_fn, catfeat, label, loader, args, volatile=args.fix_net) losses = [] for k in range(len(catfeat)): if args.fix_net: for feat in catfeat[k]: if isinstance(feat, tuple): feat[0].volatile = False feat[0].requires_grad = False feat[1].volatile = False feat[1].requires_grad = False else: feat.volatile = False feat.requires_grad = False losses.append(loss_fn[k](catfeat[k], label)) loss = sum(losses) loss_total = loss.data[0] correct = [] for k in range(len(loss_fn)): correct.append(loss_fn[k].check_result(label)) total = len(label) net.zero_grad() if mid_net != None: mid_net.zero_grad() for l in loss_fn: l.zero_grad() loss.backward() return loss_total, correct, total, epoch_end
def train_emb(self, epoch, batch_data, ids=None, *args): """One training step given images and captions. """ self.Eiters += 1 self.logger.update('Eit', self.Eiters) self.logger.update('lr', self.optimizer.param_groups[0]['lr']) # compute the embeddings img_emb, cap_emb, cap_lens, ids = self.forward_emb(epoch, batch_data) # measure accuracy and record loss self.optimizer.zero_grad() loss = self.forward_loss(epoch, img_emb, cap_emb, cap_lens, ids) # compute gradient and do SGD step loss.backward() if self.grad_clip > 0: clip_grad_norm(self.params, self.grad_clip) self.optimizer.step()
def train(epoch, model, loss_fn, train_loader, optimizer): model.train() # Horovod: set epoch to sampler for shuffling. train_loader.sampler.set_epoch(epoch) for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader), ascii=True): for key in data: if type(data[key][0]) != np.str_: data[key] = data[key].cuda() target = target.cuda() optimizer.zero_grad() output = model(data) loss = loss_fn(output, target) loss.backward() optimizer.step() if batch_idx % 100 == 0: # Horovod: use train_sampler to determine the number of examples in # this worker's partition. logging.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.sampler), 100. * batch_idx / len(train_loader), loss.item()))
def train(epoch_idx, mAP): # return f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, ds, _, _ = posv.shape vfeats = torch.cat((posv, negv), 0) afeats = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) labels = torch.cat((pos_label, neg_label), 0).long().squeeze(-1) # pdb.set_trace() for dm in range(ds): vfeat, afeat, label = vfeats[:, dm, :, :], afeats[:, dm, :, :], labels[:, dm] ins_score, bag_predict = networks[dm](vfeat, afeat) celoss = CELoss(bag_predict, label) ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :]) loss = celoss + ahloss print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}" .format(domains[dm], epoch_idx + 1, batch_idx, len(train_loader), loss.item(), MaxmAP[dm], mAP[dm])) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opts[dm].step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update(domains[dm] + ' celoss', celoss.item(), epoch_idx) recoder.update(domains[dm] + ' ahloss', ahloss.item(), epoch_idx) recoder.save()
def train_fn(data_loader, model, optimizer, device, scheduler): model.train() for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)): ids = d["ids"] token_type_ids = d["token_type_ids"] mask = d["mask"] targets = d["targets"] ids = ids.to(device, dtype=torch.long) token_type_ids = token_type_ids.to(device, dtype=torch.long) mask = mask.to(device, dtype=torch.long) targets = targets.to(device, dtype=torch.float) optimizer.zero_grad() outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids) loss = loss.loss_fn(outputs, targets) loss.backward() optimizer.step() scheduler.step()
def train(model, data_loader, optimizer, scheduler, i): model.train() fin_loss = 0.0 tk = tqdm(data_loader, desc="Epoch" + " [TRAIN] " + str(i + 1)) for t, data in enumerate(tk): for k, v in data.items(): data[k] = v.cuda() optimizer.zero_grad() _, loss = model(**data) loss.backward() optimizer.step() fin_loss += loss.item() tk.set_postfix({ 'loss': '%.6f' % float(fin_loss / (t + 1)), 'LR': optimizer.param_groups[0]['lr'] }) scheduler.step() return fin_loss / len(data_loader)