def train(train_loader, model, criterion, optimizer, epoch, opt): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accmeter = AverageMeter() accmeter_class0 = AverageMeter() accmeter_class1 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if opt.gpu is not None: input = input.cuda(opt.gpu, non_blocking=True) target = target.cuda(opt.gpu, non_blocking=True) output = model(input, opt) loss = criterion(output, target) # measure accuracy and record loss acc, acc_class0, acc_class1 = binary_accuracy(output, target) losses.update(loss.item(), input.size(0)) accmeter.update(acc[0], input.size(0)) accmeter_class0.update(acc_class0[0], (target == 0).sum().cpu().item()) accmeter_class1.update(acc_class1[0], (target == 1).sum().cpu().item()) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % opt.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc (class 0) {class0.val:.3f} ({class0.avg:.3f})\t' 'Acc (class 1) {class1.val:.3f} ({class1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=accmeter, class0=accmeter_class0, class1=accmeter_class1)) lossvals['train'][epoch] = accmeter.avg lossvals['train_class0'][epoch] = accmeter_class0.avg lossvals['train_class1'][epoch] = accmeter_class1.avg
def validate(val_loader, model, criterion): losses = AverageMeter() ious = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target, loss_weight) in enumerate(val_loader): # compute output outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target, loss_weight) loss /= len(outputs) iou = iou_score(outputs[-1], target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def train(train_loader, model, criterion, optimizer): losses = AverageMeter() ious = AverageMeter() model.train() pbar = tqdm(enumerate(train_loader), total=len(train_loader)) for i, (input, target, loss_weight) in pbar: # compute output outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target, loss_weight) loss /= len(outputs) iou = iou_score(outputs[-1], target) # update log and progress bar losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) pbar.set_postfix({'loss': loss.item(), 'iou': iou}) # compute gradient and do optimizing step optimizer.zero_grad() loss.backward() optimizer.step() log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def TestTriplets(test_loader, tnet, criterion): losses = AverageMeter() accs = AverageMeter() # switch to evaluation mode tnet.eval() for batch_idx, (data1, data2, data3, _, _, _) in enumerate(test_loader): if args.cuda: data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda() data1, data2, data3 = Variable(data1), Variable(data2), Variable(data3) # compute output dista, distb, distc, embedded_x, embedded_y, embedded_z = tnet(data1, data2, data3) target = torch.FloatTensor(dista.size()).fill_(1) if args.cuda: target = target.cuda() target = Variable(target) loss_triplet = criterion(dista, distb, distc, target, args.margin, args.in_triplet_hard).data[0] loss_embedd = embedded_x.norm(2) + embedded_y.norm(2) + embedded_z.norm(2) test_loss = loss_triplet + args.reg * loss_embedd # measure accuracy and record loss acc = LossAccuracy(dista, distb, distc, args.margin) accs.update(acc, data1.size(0)) losses.update(test_loss.data[0], data1.size(0)) print('\nTest/val triplets: Average loss: %f, Accuracy: %f \n' % (losses.avg, accs.avg)) return losses.avg, accs.avg
def validate(args, val_loader, model, criterion): losses = AverageMeter() ious = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in tqdm(enumerate(val_loader), total=len(val_loader)): input = input.cuda() target = target.cuda() # compute output if args.deepsupervision: outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target) loss /= len(outputs) iou = iou_score(outputs[-1], target) else: output = model(input) loss = criterion(output, target) iou = iou_score(output, target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def train(self): self.model.train() self.optim.zero_grad() iteration = 0 for epoch in range(cfg.SOLVER.MAX_EPOCH): if epoch == cfg.TRAIN.REINFORCEMENT.START: self.rl_stage = True self.setup_loader(epoch) start = time.time() data_time = AverageMeter() batch_time = AverageMeter() losses = AverageMeter() for _, (indices, input_seq, target_seq, gv_feat, att_feats, att_mask) in enumerate(self.training_loader): data_time.update(time.time() - start) input_seq = input_seq.cuda() target_seq = target_seq.cuda() gv_feat = gv_feat.cuda() att_feats = att_feats.cuda() att_mask = att_mask.cuda() # att_mask = torch.ones(16,70).cuda() # print(att_mask.shape) kwargs = self.make_kwargs(indices, input_seq, target_seq, gv_feat, att_feats, att_mask) loss, loss_info = self.forward(kwargs) loss.backward() # utils.clip_gradient(self.optim.optimizer, self.model, # cfg.SOLVER.GRAD_CLIP_TYPE, cfg.SOLVER.GRAD_CLIP) self.optim.step() self.optim.zero_grad() # self.optim.scheduler_step('Iter') batch_time.update(time.time() - start) start = time.time() losses.update(loss.item()) self.display(iteration, data_time, batch_time, losses, loss_info) iteration += 1 if self.distributed: dist.barrier() self.save_model(epoch) val = self.eval(epoch) # self.optim.scheduler_step('Epoch', val) # self.scheduled_sampling(epoch) if self.distributed: dist.barrier()
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, local_rank): losses = AverageMeter() mious = AverageMeter() iterator = tqdm(train_data_loader) model.train() if conf["optimizer"]["schedule"]["mode"] == "epoch": scheduler.step(current_epoch) for i, sample in enumerate(iterator): imgs = sample["image"].cuda() masks = sample["mask"].cuda().float() masks_orig = sample["mask_orig"].cuda().float() out_mask = model(imgs) with torch.no_grad(): pred = torch.softmax(out_mask, dim=1) argmax = torch.argmax(pred, dim=1) ious = miou_round(argmax, masks_orig).item() mious.update(ious, imgs.size(0)) mask_loss = loss_functions["mask_loss"](out_mask, masks.contiguous()) loss = mask_loss losses.update(loss.item(), imgs.size(0)) iterator.set_description( "epoch: {}; lr {:.7f}; Loss ({loss.avg:.4f}); miou ({miou.avg:.4f}); " .format(current_epoch, scheduler.get_lr()[-1], loss=losses, miou=mious)) optimizer.zero_grad() if conf['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) optimizer.step() torch.cuda.synchronize() if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): scheduler.step(i + current_epoch * len(train_data_loader)) if local_rank == 0: for idx, param_group in enumerate(optimizer.param_groups): lr = param_group['lr'] summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch) summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
def train(args, train_loader, model, criterion, optimizer, epoch, scheduler=None): losses = AverageMeter() ious = AverageMeter() model.train() end = time.time() for i, (input, target) in enumerate(train_loader): if args.scheduler == 'CyclicLR': scheduler.batch_step() if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss #dice = dice_coef(output, target) iou = batch_iou(output, target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) # compute gradient and do optimizing step optimizer.zero_grad() loss.backward() optimizer.step() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'IoU {iou.val:.3f} ({iou.avg:.3f})'.format(epoch, i, len(train_loader), loss=losses, iou=ious)) log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def validate(val_loader, model, criterion, epoch, opt): batch_time = AverageMeter() losses = AverageMeter() accmeter = AverageMeter() accmeter_class0 = AverageMeter() accmeter_class1 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if opt.gpu is not None: input = input.cuda(opt.gpu, non_blocking=True) target = target.cuda(opt.gpu, non_blocking=True) # compute output output = model(input, opt) loss = criterion(output, target) # measure accuracy and record loss acc, acc_class0, acc_class1 = binary_accuracy(output, target) losses.update(loss.item(), input.size(0)) accmeter.update(acc[0], input.size(0)) accmeter_class0.update(acc_class0[0], (target == 0).sum().cpu().item()) accmeter_class1.update(acc_class1[0], (target == 1).sum().cpu().item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % opt.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc (class 0) {class0.val:.3f} ({class0.avg:.3f})\t' 'Acc (class 1) {class1.val:.3f} ({class1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=accmeter, class0=accmeter_class0, class1=accmeter_class1)) print(' * Acc {top1.avg:.3f} Acc (class 0) {class0.avg:.3f} Acc (class 1) {class1.avg:.3f}' .format(top1=accmeter, class0=accmeter_class0, class1=accmeter_class1)) lossvals['val'][epoch] = accmeter.avg lossvals['val_class0'][epoch] = accmeter_class0.avg lossvals['val_class1'][epoch] = accmeter_class1.avg return accmeter.avg
def train(args, use_gpu, train_loader, model, criterion, optimizer, scheduler=None): losses = AverageMeter() dice_coef = AverageMeter() pixel_acc = AverageMeter() iou = AverageMeter() model.train() #ts = time.time() for iter, batch in tqdm(enumerate(train_loader), total=len(train_loader)): optimizer.zero_grad() if use_gpu: inputs = Variable(batch['X'].cuda()) targets = Variable(batch['Y'].cuda()) else: inputs, targets = Variable(batch['X']), Variable(batch['Y']) # compute output output = model(inputs) #print('output',output.shape,'targets',targets.shape) loss = criterion(output, targets) loss.backward() optimizer.step() output = output.data.cpu().numpy() N, _, h, w = output.shape pred = output.transpose(0, 2, 3, 1).reshape( -1, args.num_class).argmax(axis=1).reshape(N, h, w) mask = batch['l'].cpu().numpy().reshape(N, h, w) ioum = ious(pred, mask, args.num_class) dice_coefm = dice_coefs(pred, mask, args.num_class) pixel_accm = pixel_accs(pred, mask) losses.update(loss.item(), inputs.size(0)) iou.update(ioum, inputs.size(0)) dice_coef.update(dice_coefm, inputs.size(0)) pixel_acc.update(pixel_accm, inputs.size(0)) log = OrderedDict([ ('loss', losses.avg), ('iou', iou.avg), ('dice_coef', dice_coef.avg), ('pixel_acc', pixel_acc.avg), ]) return log
def one_forward_pass(metas, model, criterion, args, train=True): clr = metas['clr'].to(device, non_blocking=True) ''' prepare infos ''' if 'hm_veil' in metas.keys(): hm_veil = metas['hm_veil'].to(device, non_blocking=True) # (B,21) infos = {'hm_veil': hm_veil, 'batch_size': clr.shape[0]} ''' prepare targets ''' hm = metas['hm'].to(device, non_blocking=True) delta_map = metas['delta_map'].to(device, non_blocking=True) location_map = metas['location_map'].to(device, non_blocking=True) flag_3d = metas['flag_3d'].to(device, non_blocking=True) joint = metas['joint'].to(device, non_blocking=True) targets = { 'clr': clr, 'hm': hm, 'dm': delta_map, 'lm': location_map, "flag_3d": flag_3d, "joint": joint } else: infos = {'batch_size': clr.shape[0]} tips = metas['tips'].to(device, non_blocking=True) targets = {'clr': clr, "joint": tips} ''' ---------------- Forward Pass ---------------- ''' results = model(clr) ''' ---------------- Forward End ---------------- ''' total_loss = torch.Tensor([0]).cuda() losses = {} if not train: return results, {**targets, **infos}, total_loss, losses ''' compute losses ''' if args.det_loss: det_total_loss, det_losses, batch_3d_size = criterion[ 'det'].compute_loss(results, targets, infos) total_loss += det_total_loss losses.update(det_losses) targets["batch_3d_size"] = batch_3d_size return results, {**targets, **infos}, total_loss, losses
def train(args, train_loader, model, criterion, optimizer, epoch, scheduler=None): losses = AverageMeter() ious = AverageMeter() model.train() for i, (input, target) in tqdm(enumerate(train_loader), total=len(train_loader)): # input = input.cuda() # target = target.cuda() # print(f'input shape: {input.shape}') # torch.Size([18, 4, 160, 160]) # compute output if args.deepsupervision: outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target) loss /= len(outputs) iou = iou_score(outputs[-1], target) else: output = model(input) loss = criterion(output, target) iou = iou_score(output, target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) # compute gradient and do optimizing step optimizer.zero_grad() loss.backward() optimizer.step() log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def validate_one_class(val_loader, backbone, centers_reg, args): """ Run evaluation """ batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to evaluate mode backbone.eval() centers_reg.eval() centers = centers_reg.centers all_features, all_labels, all_outputs = [], [], [] end = time.time() with torch.no_grad(): for i, (inputs, labels) in enumerate(val_loader): inputs = inputs.cuda() labels = labels.cuda() all_labels.append(labels.data.cpu().numpy()) # compute output features = backbone(inputs) loss = centers_reg(features, labels) losses.update(loss.item(), inputs.size(0)) all_features.append(features.data.cpu().numpy()) all_outputs.append(outputs.data.cpu().numpy()) prec1 = accuracy_l2(features, centers, labels) top1.update(prec1, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) return top1, losses
def validate(epoch, args, use_gpu, val_loader, model, criterion): losses = AverageMeter() dice_coef = AverageMeter() pixel_acc = AverageMeter() iou = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): for iter, batch in tqdm(enumerate(val_loader), total=len(val_loader)): if use_gpu: inputs = Variable(batch['X'].cuda()) targets = Variable(batch['Y'].cuda()) else: inputs, targets = Variable(batch['X']), Variable(batch['Y']) # compute output output = model(inputs) loss = criterion(output, targets) outputs = output.data.cpu().numpy() N, _, h, w = outputs.shape pred = outputs.transpose(0, 2, 3, 1).reshape( -1, args.num_class).argmax(axis=1).reshape(N, h, w) mask = batch['l'].cpu().numpy().reshape(N, h, w) ioum = ious(pred, mask, args.num_class) dice_coefm = dice_coefs(pred, mask, args.num_class) pixel_accm = pixel_accs(pred, mask) losses.update(loss.item(), inputs.size(0)) iou.update(ioum, inputs.size(0)) dice_coef.update(dice_coefm, inputs.size(0)) pixel_acc.update(pixel_accm, inputs.size(0)) # only save the 1st image for comparison if iter == 0: # only save the 1st image for comparison image = pred[0, :, :] save_result_comparison(epoch, batch['X'], mask, image) log = OrderedDict([ ('loss', losses.avg), ('iou', iou.avg), ('dice_coef', dice_coef.avg), ('pixel_acc', pixel_acc.avg), ]) return log
def train(args, train_loader, model, criterion, optimizer, epoch, scheduler=None): losses = AverageMeter() ious = AverageMeter() model.train() # 遍历数组对象组合为一个序列索引 for i, (input, target) in tqdm(enumerate(train_loader), total=len(train_loader)): input = input.cuda() target = target.cuda() # 计算输出 if args.deepsupervision: outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target) loss /= len(outputs) iou = iou_score(outputs[-1], target) else: output = model(input) loss = criterion(output, target) iou = iou_score(output, target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) # 计算梯度 optimizer.zero_grad() loss.backward() optimizer.step() log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def validate(args, val_loader, model, criterion, scheduler=None): losses = AverageMeter() ious = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss #dice = dice_coef(output, target) iou = batch_iou(output, target) losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) if i % args.print_freq == 0: print('Validation: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'IoU {iou.val:.3f} ({iou.avg:.3f})'.format( i, len(val_loader), loss=losses, iou=ious)) print(' * Loss {loss.avg:.4f} IoU {iou.avg:.3f}'.format(loss=losses, iou=ious)) log = OrderedDict([ ('loss', losses.avg), ('iou', ious.avg), ]) return log
def Train(train_loader_t, model, optimizer, epoch, num_species, num_per_specie): losses = AverageMeter() emb_norms = AverageMeter() # switch to train mode model.train() loss_triplet = 0 loss_embedd = 0 for batch_idx, (data, labels, idx) in enumerate(train_loader_t): if args.cuda: data = data.cuda() data = Variable(data) # compute output embed = model(data) loss_embed = embed.norm(2) loss_triplet = ComputeTripletLoss(embed, labels, num_species, num_per_specie) loss = loss_triplet + args.reg * loss_embed # measure loss accuracy and record loss losses.update(loss_triplet.data[0], data.size(0)) emb_norms.update(loss_embed.data[0]/3, data.size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() print('Train Epoch: {} [{}/{}]\t' 'Loss: {:.4f} \t' 'Total Loss: {:.2f}'.format( epoch, (batch_idx+1) * len(data), len(train_loader_t.sampler), loss_triplet.data[0], loss.data[0]))
def validate(val_loader, backbone, head, centers, criterion_model, args): """ Run evaluation """ batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to evaluate mode backbone.eval(), head.eval() all_features, all_labels, all_outputs = [], [], [] end = time.time() with torch.no_grad(): for i, (inputs, labels) in enumerate(val_loader): inputs = inputs.cuda() labels = labels.cuda() all_labels.append(labels.data.cpu().numpy()) # compute output features = backbone(inputs) if (args.onevsrest): if (args.centerloss): outputs, loss, closs = head(features, labels) else: outputs, loss = head(features, labels) else: if (args.head in ['DC_EPCC', 'EPCC']): outputs = head(features, centers) elif (args.head in [ 'ArcMarginProduct', 'AddMarginProduct', 'SphereProduct' ]): outputs = head(features, labels) elif (args.head in ['Linear_FC']): outputs = head(features) else: raise ('head is not defined') loss = criterion_model(outputs, labels) losses.update(loss.item(), inputs.size(0)) all_features.append(features.data.cpu().numpy()) all_outputs.append(outputs.data.cpu().numpy()) if (not args.onevsrest): # measure multi_class accuracy prec1 = accuracy(outputs.data, labels)[0].item() top1.update(prec1, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) val_features = np.concatenate(all_features, 0) val_labels = np.concatenate(all_labels, 0) val_outputs = np.concatenate(all_outputs, 0) classbased_ap = None if (args.onevsrest): # measure accuracy onevsrest val_labels = np.where(val_labels >= 0, 1, 0) prec1 = average_precision_score(val_labels, val_outputs) top1.avg = prec1 classbased_ap = average_precision_score(val_labels, val_outputs, average=None) return top1, classbased_ap, losses, val_features, val_labels, val_outputs
def train_epoch(epoch, data_loader, model, criterion, optimizer, scheduler, opt, logger, epoch_logger, batch_logger, rank, world_size, writer): if rank == 0: logger.info('Training at epoch {}'.format(epoch)) model.train() batch_time = AverageMeter(opt.print_freq) data_time = AverageMeter(opt.print_freq) loss_time = AverageMeter(opt.print_freq) losses = AverageMeter(opt.print_freq) global_losses = AverageMeter() end_time = time.time() for i, data in enumerate(data_loader): data_time.update(time.time() - end_time) curr_step = (epoch - 1) * len(data_loader) + i scheduler.step(curr_step) ret = model(data) num_rois = ret['num_rois'] outputs = ret['outputs'] targets = ret['targets'] tot_rois = torch.Tensor([num_rois]).cuda() dist.all_reduce(tot_rois) tot_rois = tot_rois.item() if tot_rois == 0: end_time = time.time() continue optimizer.zero_grad() if num_rois > 0: loss = criterion(outputs, targets) loss = loss * num_rois / tot_rois * world_size else: loss = torch.cuda.FloatTensor(0.) for param in model.parameters(): if param.requires_grad: loss = loss + param.sum() loss = 0. * loss loss.backward() optimizer.step() reduced_loss = loss.clone() dist.all_reduce(reduced_loss) losses.update(reduced_loss.item(), tot_rois) global_losses.update(reduced_loss.item(), tot_rois) batch_time.update(time.time() - end_time) end_time = time.time() if (i + 1) % opt.print_freq == 0 and rank == 0: writer.add_scalar('train/loss', losses.avg, curr_step + 1) writer.add_scalar('train/lr', optimizer.param_groups[0]['lr'], curr_step + 1) batch_logger.log({ 'epoch': epoch, 'batch': i + 1, 'iter': curr_step + 1, 'loss': losses.avg, 'lr': optimizer.param_groups[0]['lr'] }) logger.info('Epoch [{0}]\t' 'Iter [{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses)) if rank == 0: writer.add_scalar('train/epoch_loss', global_losses.avg, epoch) writer.flush() epoch_logger.log({ 'epoch': epoch, 'loss': global_losses.avg, 'lr': optimizer.param_groups[0]['lr'] }) logger.info('-' * 100) logger.info('Epoch [{}/{}]\t' 'Loss {:.4f}'.format(epoch, opt.train.n_epochs, global_losses.avg)) if epoch % opt.train.save_freq == 0: save_file_path = os.path.join(opt.result_path, 'ckpt_{}.pth.tar'.format(epoch)) states = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } torch.save(states, save_file_path) logger.info('Checkpoint saved to {}'.format(save_file_path)) logger.info('-' * 100)
def Train(train_loader_t, tnet, criterion, optimizer, epoch, sampler): losses = AverageMeter() loss_accs = AverageMeter() emb_norms = AverageMeter() # switch to train mode tnet.train() loss_triplet = 0 loss_embedd = 0 assert(args.batch_size%triplet_batch_size == 0) reset = args.batch_size/triplet_batch_size for batch_idx, (data1, data2, data3, idx1, idx2, idx3) in enumerate(train_loader_t): if batch_idx % reset == 0: #print('Reset') loss_triplet = 0 loss_embedd = 0 if args.cuda: data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda() data1, data2, data3 = Variable(data1), Variable(data2), Variable(data3) # compute output dista, distb, distc, embedded_x, embedded_y, embedded_z = tnet(data1, data2, data3) # 1 means, dista should be larger than distb target = torch.FloatTensor(dista.size()).fill_(1) if args.cuda: target = target.cuda() target = Variable(target) # forward pass loss_triplet += criterion(dista, distb, distc, target, args.margin, args.in_triplet_hard) if args.mining == 'Hardest' or args.mining == 'SemiHard': sampler.SampleNegatives(dista, distb, loss_triplet, (idx1, idx2, idx3)) loss_embedd += embedded_x.norm(2) + embedded_y.norm(2) + embedded_z.norm(2) if batch_idx%reset != reset-1: # don't do backward pass as of yet continue loss = (loss_triplet + args.reg * loss_embedd)/reset # measure loss accuracy and record loss loss_acc = LossAccuracy(dista, distb, distc, args.margin) losses.update(loss_triplet.data[0], data1.size(0)) loss_accs.update(loss_acc, data1.size(0)) emb_norms.update(loss_embedd.data[0]/3, data1.size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() print(loss_triplet.data[0], args.reg*loss_embedd.data[0], args.reg, loss_embedd.data[0]) print('Train Epoch: {} [{}/{}]\t' 'Loss: {:.4f} ({:.4f}) \t' 'Loss Acc: {:.2f}% ({:.2f}%) \t' 'Emb_Norm: {:.2f} ({:.2f})'.format( epoch, (batch_idx+1) * len(data1), len(train_loader_t.dataset), losses.val, losses.avg, 100. * loss_accs.val, 100. * loss_accs.avg, emb_norms.val, emb_norms.avg)) return loss_accs.avg
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, local_rank): num_classes = conf['num_classes'] losses = AverageMeter() speed_losses = AverageMeter() junction_losses = AverageMeter() dices = AverageMeter() iterator = tqdm(train_data_loader) model.train() if conf["optimizer"]["schedule"]["mode"] == "epoch": scheduler.step(current_epoch) for i, sample in enumerate(iterator): imgs = sample["image"].cuda() masks = sample["mask"].cuda() out_mask = model(imgs) mask_band = 10 jn_band = 11 with torch.no_grad(): pred = torch.sigmoid(out_mask[:, mask_band:jn_band, ...]) d = dice_round(pred, masks[:, mask_band:jn_band, ...].contiguous(), t=0.5).item() dices.update(d, imgs.size(0)) mask_loss = loss_functions["mask_loss"](out_mask[:, mask_band:jn_band, ...].contiguous(), masks[:, mask_band:jn_band, ...].contiguous()) speed_loss = loss_functions["speed_loss"](out_mask[:, :mask_band, ...].contiguous(), masks[:, :mask_band, ...].contiguous()) loss = speed_loss + mask_loss if num_classes > 8: junction_loss = loss_functions["junction_loss"]( out_mask[:, jn_band:jn_band + 1, ...].contiguous(), masks[:, jn_band:jn_band + 1, ...].contiguous()) junction_losses.update(junction_loss.item(), imgs.size(0)) loss += junction_loss losses.update(loss.item(), imgs.size(0)) speed_losses.update(speed_loss.item(), imgs.size(0)) iterator.set_description( "epoch: {}; lr {:.7f}; Loss ({loss.avg:.4f}); Dice ({dice.avg:.4f}); Speed ({speed.avg:.4f}); Junction ({junction.avg:.4f}); " .format(current_epoch, scheduler.get_lr()[-1], loss=losses, dice=dices, speed=speed_losses, junction=junction_losses)) optimizer.zero_grad() if conf['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): scheduler.step(i + current_epoch * len(train_data_loader)) if local_rank == 0: for idx, param_group in enumerate(optimizer.param_groups): lr = param_group['lr'] summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch) summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
def train(self): self.model.train() self.optim.zero_grad() iteration = 0 for epoch in range(cfg.SOLVER.MAX_EPOCH): if epoch == cfg.TRAIN.REINFORCEMENT.START: self.rl_stage = True self.setup_loader(epoch) start = time.time() data_time = AverageMeter() batch_time = AverageMeter() losses = AverageMeter() if not self.distributed or self.args.local_rank == 0: pbar = ProgressBar(n_total=len(self.training_loader), desc='Training') val = self.eval(epoch) for step, (indices, input_seq, target_seq, gv_feat, att_feats, att_mask, image_ids, dataset_name) in enumerate(self.training_loader): data_time.update(time.time() - start) input_seq = input_seq.cuda() target_seq = target_seq.cuda() gv_feat = gv_feat.cuda() att_feats = att_feats.cuda() att_mask = att_mask.cuda() kwargs = self.make_kwargs(indices, input_seq, target_seq, gv_feat, att_feats, att_mask) loss, loss_info = self.forward(kwargs) loss.backward() utils.clip_gradient(self.optim.optimizer, self.model, cfg.SOLVER.GRAD_CLIP_TYPE, cfg.SOLVER.GRAD_CLIP) self.optim.step() self.optim.zero_grad() self.optim.scheduler_step('Iter') batch_time.update(time.time() - start) start = time.time() losses.update(loss.item()) self.summary(iteration, loss, image_ids, dataset_name) self.display(iteration, data_time, batch_time, losses, loss_info) iteration += 1 if self.distributed: dist.barrier() if not self.distributed or self.args.local_rank == 0: pbar(step) self.save_model(epoch) val = self.eval(epoch) self.optim.scheduler_step('Epoch', val) self.scheduled_sampling(epoch) if self.distributed: dist.barrier()
def train(epoch, model, criterion,loss_fn, optimizer, trainloader, learning_rate, use_gpu): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() std=np.expand_dims(np.array([0.229, 0.224, 0.225]),axis=1) std=np.expand_dims(std,axis=2) mean=np.expand_dims(np.array([0.485, 0.456, 0.406]),axis=1) mean=np.expand_dims(mean,axis=2) model.train() #model_edge.eval() #model_tradclass.eval() end = time.time() #print('llllllllllllll','located in train_with_inpaint_final.py at 264') #exit(0) #for batch_idx, (images_train, labels_train,tpids,Xt_img_ori,Xt_img_gray,images_test, labels_test, pids) in enumerate(trainloader): for batch_idx, (images_train,images_train1,images_train2,images_train3,images_train4,images_train5,images_train6,images_train7,images_train8, labels_train,tpids, images_test,images_test1,images_test2,images_test3,images_test4, labels_test, pids) in enumerate(trainloader): data_time.update(time.time() - end) #print(Xt_img_ori.shape,Xt_img_gray.shape,images_train.shape,'lll') edges=[] if only_CSEI: augment_k=4 else: augment_k=8 tpids_4 = tpids.reshape(4,-1,1)#[:,:,0] tpids_4 = tpids_4.repeat(1,1,augment_k).reshape(4,-1) K_shot=images_train.shape[1]/5 images_train1=images_train1.reshape(4,-1,1,3,84,84) images_train2=images_train2.reshape(4,-1,1,3,84,84) images_train3=images_train3.reshape(4,-1,1,3,84,84) images_train4=images_train4.reshape(4,-1,1,3,84,84) images_train5=images_train5.reshape(4,-1,1,3,84,84) images_train6=images_train6.reshape(4,-1,1,3,84,84) images_train7=images_train7.reshape(4,-1,1,3,84,84) images_train8=images_train8.reshape(4,-1,1,3,84,84) #print(images_test.shape) #exit(0) #images_test1=images_test1.reshape(4,30,1,3,84,84) #images_test2=images_test2.reshape(4,30,1,3,84,84) #images_test3=images_test3.reshape(4,30,1,3,84,84) #images_test4=images_test4.reshape(4,30,1,3,84,84) #if cuda memory enough use follow code if only_CSEI: images_train_4=torch.cat((images_train1, images_train2,images_train3,images_train4), 2) else: images_train_4=torch.cat((images_train1, images_train2,images_train3,images_train4,images_train5,images_train6,images_train7,images_train8), 2) #if cuda memory not enough use follow this code #images_train_4=torch.cat((images_train1, images_train2,images_train3), 2) #images_train_fuse= torch.cat((images_train.reshape(4,-1,1,3,84,84), images_train1, images_train2,images_train3), 2) #images_test=images_test.reshape(4,30,1,3,84,84) #images_test_4=torch.cat((images_test,images_test1, images_test2,images_test3, images_test4), 2) #images_test_4=torch.cat((images_test,images_test3, images_test4), 2) #images_test=images_test_4.reshape(4,-1,3,84,84) labels_train_4 = labels_train.reshape(4,-1,1)#[:,:,0] labels_train_4 = labels_train_4.repeat(1,1,augment_k) labels_test_4=labels_train_4[:,:,:augment_k] labels_train_4 = labels_train_4.reshape(4,-1) labels_test_4=labels_test_4.reshape(4,-1) if use_gpu: images_train, labels_train,images_train_4 = images_train.cuda(), labels_train.cuda(),images_train_4.cuda() #images_train_fuse=images_train_fuse.cuda() images_test, labels_test = images_test.cuda(), labels_test.cuda() pids = pids.cuda() labels_train_4=labels_train_4.cuda() labels_test_4=labels_test_4.cuda() tpids_4 = tpids_4.cuda() tpids=tpids.cuda() pids_con=torch.cat((pids, tpids_4), 1) labels_test_4=torch.cat((labels_test, labels_test_4), 1) #tpids batch_size, num_train_examples, channels, height, width = images_train.size() num_test_examples = images_test.size(1) labels_train_1hot = one_hot(labels_train).cuda() train_pid=torch.matmul(labels_train_1hot.transpose(1, 2),tpids.unsqueeze(2).float()).squeeze() train_pid=(train_pid/K_shot).long() #exit() labels_train_1hot_4 = one_hot(labels_train_4).cuda() #labels_train = labels_train.view(batch_size * num_train_examples) #print( labels_train) #exit(0) labels_test_1hot = one_hot(labels_test).cuda() labels_test_1hot_4 = one_hot(labels_test_4).cuda() #support set switch=np.random.uniform(0,1) if switch>-1: images_train=images_train.reshape(4,-1,3,84,84) else: images_train=images_train1.cuda().reshape(4,-1,3,84,84) #images_train1 images_train_4=images_train_4.reshape(4,-1,3,84,84) #inpaint_tensor=torch.from_numpy(inpaint_img_np).cuda().reshape(4,20,3,84,84).float() images_test=torch.cat((images_test, images_train_4), 1).reshape(4,-1,3,84,84)#images_train ytest, cls_scores,features,params_classifier,spatial = model(images_train, images_test, labels_train_1hot, labels_test_1hot_4)#ytest is all class classification #cls_scores is N-way classifation loss1 = criterion(ytest, pids_con.view(-1)) loss2 = criterion(cls_scores, labels_test_4.view(-1)) if epoch>900: loss3 = loss_fn(params_classifier,ytest, features,pids_con) loss = loss1 + 0.5 * loss2+loss3 else: loss= loss1 + 0.5 * loss2#+0.5*loss_contrast optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(), pids.size(0)) batch_time.update(time.time() - end) end = time.time() print('Epoch{0} ' 'lr: {1} ' 'Time:{batch_time.sum:.1f}s ' 'Data:{data_time.sum:.1f}s ' 'Loss:{loss.avg:.4f} '.format( epoch+1, learning_rate, batch_time=batch_time, data_time=data_time, loss=losses))
inputs_var = torch.autograd.Variable(inputs).cuda() labels_var = torch.autograd.Variable(labels).cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize embed_feat = model(inputs_var, scda=False, pool_type='max_avg', is_train=True, scale=128) # loss = criterion(embed_feat, labels) if args.loss == 'softmax': loss = criterion(embed_feat, labels_var) prec1, prec5 = accuracy(embed_feat.data, labels, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) else: loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) print( '[epoch %05d]\t loss: %.7f \t prec: %.3f \t pos-dist: %.3f \tneg-dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) loss.backward() optimizer.step() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, local_rank): losses = AverageMeter() damage_f1 = AverageMeter() localization_f1 = AverageMeter() iterator = tqdm(train_data_loader) model.train() if conf["optimizer"]["schedule"]["mode"] == "epoch": scheduler.step(current_epoch) for i, sample in enumerate(iterator): imgs = sample["image"].cuda() masks = sample["mask"].cuda().float() out_mask = model(imgs) mask_band = 4 with torch.no_grad(): pred = torch.sigmoid(out_mask[:, :, ...]) d = dice_round(pred[:, mask_band:, ...], masks[:, mask_band:, ...], t=0.5).item() loc_f1 = 0 for i in range(4): loc_f1 += 1 / (dice_round(pred[:, i:i + 1, ...], masks[:, i:i + 1, ...], t=0.3).item() + 1e-3) loc_f1 = 4 / loc_f1 localization_f1.update(d, imgs.size(0)) damage_f1.update(loc_f1, imgs.size(0)) mask_loss = loss_functions["mask_loss"](out_mask[:, mask_band:, ...].contiguous(), masks[:, mask_band:, ...].contiguous()) damage_loss = loss_functions["damage_loss"](out_mask[:, :mask_band, ...].contiguous(), masks[:, :mask_band, ...].contiguous()) loss = 0.7 * damage_loss + 0.3 * mask_loss losses.update(loss.item(), imgs.size(0)) iterator.set_description( "epoch: {}; lr {:.7f}; Loss ({loss.avg:.4f}); Localization F1 ({dice.avg:.4f}); Damage F1 ({damage.avg:.4f}); " .format(current_epoch, scheduler.get_lr()[-1], loss=losses, dice=localization_f1, damage=damage_f1)) optimizer.zero_grad() if conf['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) optimizer.step() torch.cuda.synchronize() if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): scheduler.step(i + current_epoch * len(train_data_loader)) if local_rank == 0: for idx, param_group in enumerate(optimizer.param_groups): lr = param_group['lr'] summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch) summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)