def train(train_loader, model, criterion, optimizer, args, epoch): losses = AverageMeter() model.train() for step, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)): image = x.float().cuda() target = y.float().cuda() output = model(image) # model output target_soft = get_soft_label(target, args.num_classes) # get soft label loss = criterion(output, target_soft, args.num_classes) # the dice losses losses.update(loss.data, image.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if step % (math.ceil( float(len(train_loader.dataset)) / args.batch_size)) == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {losses.avg:.6f}'. format(epoch, step * len(image), len(train_loader.dataset), 100. * step / len(train_loader), losses=losses)) print('The average loss:{losses.avg:.4f}'.format(losses=losses)) return losses.avg
def train_once(model, train_loader, optimizer, criterion, epoch, log): model.train() losses = AverageMeter() criterion1, criterio2 = criterion for i, (inputs, targets, valid, meta) in enumerate(train_loader): inputs = inputs.cuda() # print(inputs.shape) # print(len(targets), print(targets[-1].shape)) refine_target = targets[-1].cuda() global_pred, refine_pred = model(inputs) global_loss = 0 for pred, label in zip(global_pred, targets): # label*valid (mask some heatmap) # print(pred.shape, label.shape) mask = (valid > 1.0).type( torch.FloatTensor).unsqueeze(2).unsqueeze(3) # print(mask.shape) label = label * mask # print(label.shape) global_loss += criterion1(pred, label.cuda()) / 2 refine_loss = criterio2(refine_pred, refine_target) refine_loss = refine_loss.mean(dim=3).mean(dim=2) mask = (valid > 0.0).type(torch.FloatTensor) # print(refine_pred.shape, mask.shape) refine_loss = refine_loss * mask.cuda() refine_loss = ohkm(refine_loss, 8) loss = global_loss + refine_loss losses.update(loss.item(), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() log.add_scalar('loss_epoch_{0}'.format(epoch), loss.item(), i) log.flush() if i % cfg.print_freq == 0: print( 'epoch: ', epoch, '{0}/{1} loss_avg: {2} global_loss: {3} refine_loss: {4} loss: {5}' .format(i, len(train_loader), losses.avg, global_loss, refine_loss, loss)) return losses.avg
def run(model, data_loader, mode, criterion_vox, criterion_coord, optimizer_G, optimizer_P): # self.epoch += 1 batch_time = AverageMeter() data_time = AverageMeter() losses_vox = AverageMeter() losses_coord = AverageMeter() errs = AverageMeter() log_key = ['losses_vox', 'losses_coord', 'errs'] log_info = dict.fromkeys(log_key) # normalized mean error results dataset_len = data_loader.dataset.__len__() nme_results = torch.Tensor(dataset_len, 1) def data2variable(inputs, target, meta): if mode in ['pre_train', 'train']: input_var = torch.autograd.Variable(inputs.cuda()) target_var = [torch.autograd.Variable(target[i].cuda(async=True)) for i in range(len(target))] coord_var = torch.autograd.Variable(meta['tpts_inp'].cuda(async=True))
def valid_isic(valid_loader, model, criterion, optimizer, args, epoch, minloss): val_losses = AverageMeter() val_isic_dice = AverageMeter() model.eval() for step, (t, k) in tqdm(enumerate(valid_loader), total=len(valid_loader)): image = t.float().cuda() target = k.float().cuda() output = model(image) # model output output_dis = torch.max(output, 1)[1].unsqueeze(dim=1) output_soft = get_soft_label(output_dis, args.num_classes) target_soft = get_soft_label(target, args.num_classes) # get soft label val_loss = criterion(output, target_soft, args.num_classes) # the dice losses val_losses.update(val_loss.data, image.size(0)) isic = val_dice_isic(output_soft, target_soft, args.num_classes) # the dice score val_isic_dice.update(isic.data, image.size(0)) if step % (math.ceil( float(len(valid_loader.dataset)) / args.batch_size)) == 0: print('Valid Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {losses.avg:.6f}'. format(epoch, step * len(image), len(valid_loader.dataset), 100. * step / len(valid_loader), losses=val_losses)) print('The ISIC Mean Average Dice score: {isic.avg: .4f}; ' 'The Average Loss score: {loss.avg: .4f}'.format(isic=val_isic_dice, loss=val_losses)) if val_losses.avg < min(minloss): minloss.append(val_losses.avg) print(minloss) modelname = args.ckpt + '/' + 'min_loss' + '_' + args.data + '_checkpoint.pth.tar' print('the best model will be saved at {}'.format(modelname)) state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict() } torch.save(state, modelname) return val_losses.avg, val_isic_dice.avg
def train(train_loader, model, criterions, optimizer): # prepare for refine loss def ohkm(loss, top_k): ohkm_loss = 0. for i in range(loss.size()[0]): sub_loss = loss[i] topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False) tmp_loss = torch.gather(sub_loss, 0, topk_idx) ohkm_loss += torch.sum(tmp_loss) / top_k ohkm_loss /= loss.size()[0] return ohkm_loss criterion1, criterion2 = criterions batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() train_loader_desc = tqdm(train_loader) for i, (inputs, targets, valid, meta) in enumerate(train_loader_desc): input_var = torch.autograd.Variable(inputs.cuda()) target15, target11, target9, target7 = targets refine_target_var = torch.autograd.Variable(target7.cuda(async=True)) valid_var = torch.autograd.Variable(valid.cuda(async=True)) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() loss = 0. global_loss_record = 0. refine_loss_record = 0. # comput global loss and refine loss for global_output, label in zip(global_outputs, targets): # print(global_output.size()) num_points = global_output.size()[1] global_label = label * (valid > 1.1).type(torch.FloatTensor).view( -1, num_points, 1, 1) global_loss = criterion1( global_output, torch.autograd.Variable(global_label.cuda(async=True))) / 2.0 loss += global_loss global_loss_record += global_loss.data.item() refine_loss = criterion2(refine_output, refine_target_var) refine_loss = refine_loss.mean(dim=3).mean(dim=2) refine_loss *= (valid_var > 0.1).type(torch.cuda.FloatTensor) refine_loss = ohkm(refine_loss, 8) loss += refine_loss refine_loss_record = refine_loss.data.item() # record loss losses.update(loss.data.item(), inputs.size(0)) # compute gradient and do Optimization step optimizer.zero_grad() loss.backward() optimizer.step() train_loader_desc.set_description( 'loss: {loss:.5f},global loss: {global_loss:.3f}, refine loss: {refine_loss:.3f}, avg loss: {avg_loss:.3f}' .format(loss=loss.data.item(), global_loss=global_loss_record, refine_loss=refine_loss_record, avg_loss=losses.avg)) if (i % 1000 == 0 and i != 0): print( 'iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}' .format(i, loss.data.item(), global_loss_record, refine_loss_record, losses.avg)) return losses.avg
def validate(loader, model, criterion, netType, debug, flip): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() end = time.time() # predictions predictions = torch.Tensor(loader.dataset.__len__(), 68, 2) model.eval() gt_win, pred_win = None, None bar = Bar('Validating', max=len(loader)) all_dists = torch.zeros((68, loader.dataset.__len__())) for i, (inputs, target, meta) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr( inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, batch_dists = accuracy(score_map, target.cpu(), idx, thr=0.07) all_dists[:, i * args.val_batch:(i + 1) * args.val_batch] = batch_dists preds = final_preds(score_map, meta['center'], meta['scale'], [64, 64]) for n in range(score_map.size(0)): predictions[meta['index'][n], :, :] = preds[n, :, :] if debug: gt_batch_img = batch_with_heatmap(inputs, target) pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) plt.subplot(122) pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() mean_error = torch.mean(all_dists) auc = calc_metrics(all_dists) # this is auc of predicted maps and target. print("=> Mean Error: {:.2f}, [email protected]: {} based on maps".format( mean_error * 100., auc)) return losses.avg, acces.avg, predictions, auc
def train(loader, model, criterion, optimizer, netType, debug=False, flip=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() model.train() end = time.time() # rnn = torch.nn.LSTM(10, 20, 2) # hidden = torch.autograd.Variable(torch.zeros((args.train_batch))) gt_win, pred_win = None, None bar = Bar('Training', max=len(loader)) for i, (inputs, target) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) if debug: gt_batch_img = batch_with_heatmap(inputs, target) # pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) # plt.subplot(122) # pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) # pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr( inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, _ = accuracy(score_map, target.cpu(), idx, thr=0.07) losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() return losses.avg, acces.avg
def train(train_loader, model, criterion, optimizer): # prepare for refine loss def ohkm(loss, top_k): ohkm_loss = 0. for i in range(loss.size()[0]): sub_loss = loss[i] topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False) tmp_loss = torch.gather(sub_loss, 0, topk_idx) ohkm_loss += torch.sum(tmp_loss) / top_k ohkm_loss /= loss.size()[0] return ohkm_loss batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() #for i, (inputs, targets, valid, meta) in enumerate(train_loader): for i, (inputs, targets, valid) in enumerate(train_loader): input_var = inputs.cuda(non_blocking=True) target15, target11, target9, target7 = targets valid_var = valid.cuda(non_blocking=True) # compute output global_outputs = model(input_var) num_points = global_outputs.size(1) #global_outputs, refine_output = model(input_var) #score_map = refine_output.data.cpu() #batch_size = global_outputs.size(0) #num_joints = global_outputs.size(1) #print("batch_size: ", batch_size, "num_joints: ", num_joints) #print("w: ", global_outputs.size(2), "h: ", global_outputs.size(3)) loss = 0. global_loss_record = 0. refine_loss_record = 0. # comput global loss and refine loss for global_output, label in zip(global_outputs, targets): #print("num_points: ", num_points) global_label = label * (valid > 1.1).type(torch.FloatTensor).view( -1, num_points, 1, 1) global_loss = criterion(global_output, global_label.cuda(non_blocking=True)) / 2.0 loss += global_loss global_loss_record += global_loss.item() # record loss losses.update(loss.item(), inputs.size(0)) # compute gradient and do Optimization step optimizer.zero_grad() loss.backward() optimizer.step() if (i % 100 == 0 and i != 0): print( 'iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}' .format(i, loss.data.item(), global_loss_record, refine_loss_record, losses.avg)) return losses.avg
def validate(val_loader, model, criterion, epoch, writer=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) if isinstance(output, tuple): output, out_aux = output loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) if args.debug and i >= 5: break print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) if writer is not None: writer.add_scalar("val/cross_entropy", losses.avg, epoch) writer.add_scalar("val/top1", top1.avg.item(), epoch) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, writer=None, mask=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # loss_aux_recorder = AverageMeter() # avg_sparsity_loss = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): adjust_learning_rate(optimizer, epoch, train_loader_len=len(train_loader), iteration=i, decay_strategy=args.lr_strategy, warmup=args.warmup, total_epoch=args.epochs, lr=args.lr, decay_epoch=args.decay_epoch) # measure data loading time data_time.update(time.time() - end) target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) if isinstance(output, tuple): output, out_aux = output loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # Mask finetuning style: do not actullay prune the network, # just simply disable the updating of the pruned layers if mask is not None: for name, p in model.named_parameters(): if 'weight' in name: p.grad.data = p.grad.data * mask[name] optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'lr {3}\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), optimizer.param_groups[0]['lr'], batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if args.debug and i >= 5: break if writer: writer.add_scalar("train/cross_entropy", losses.avg, epoch) writer.add_scalar("train/top1", top1.avg.item(), epoch) writer.add_scalar("train/top5", top5.avg.item(), epoch)
def validate(loader, model, criterion, netType, debug, flip): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() end = time.time() # predictions predictions = torch.Tensor(loader.dataset.__len__(), 68, 2) model.eval() gt_win, pred_win = None, None bar = Bar('Validating', max=len(loader)) all_dists = torch.zeros((68, loader.dataset.__len__())) for i, (inputs, target, meta) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr(inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, batch_dists = accuracy(score_map, target.cpu(), idx, thr=0.07) all_dists[:, i * args.val_batch:(i + 1) * args.val_batch] = batch_dists preds = final_preds(score_map, meta['center'], meta['scale'], [64, 64]) for n in range(score_map.size(0)): predictions[meta['index'][n], :, :] = preds[n, :, :] if debug: gt_batch_img = batch_with_heatmap(inputs, target) pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) plt.subplot(122) pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() mean_error = torch.mean(all_dists) auc = calc_metrics(all_dists) # this is auc of predicted maps and target. print("=> Mean Error: {:.2f}, [email protected]: {} based on maps".format(mean_error*100., auc)) return losses.avg, acces.avg, predictions, auc
def train(loader, model, criterion, optimizer, netType, debug=False, flip=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acces = AverageMeter() model.train() end = time.time() # rnn = torch.nn.LSTM(10, 20, 2) # hidden = torch.autograd.Variable(torch.zeros((args.train_batch))) gt_win, pred_win = None, None bar = Bar('Training', max=len(loader)) for i, (inputs, target) in enumerate(loader): data_time.update(time.time() - end) input_var = torch.autograd.Variable(inputs.cuda()) target_var = torch.autograd.Variable(target.cuda(async=True)) if debug: gt_batch_img = batch_with_heatmap(inputs, target) # pred_batch_img = batch_with_heatmap(inputs, score_map) if not gt_win or not pred_win: plt.subplot(121) gt_win = plt.imshow(gt_batch_img) # plt.subplot(122) # pred_win = plt.imshow(pred_batch_img) else: gt_win.set_data(gt_batch_img) # pred_win.set_data(pred_batch_img) plt.pause(.05) plt.draw() output = model(input_var) score_map = output[-1].data.cpu() if flip: flip_input_var = torch.autograd.Variable( torch.from_numpy(shufflelr(inputs.clone().numpy())).float().cuda()) flip_output_var = model(flip_input_var) flip_output = flip_back(flip_output_var[-1].data.cpu()) score_map += flip_output # intermediate supervision loss = 0 for o in output: loss += criterion(o, target_var) acc, _ = accuracy(score_map, target.cpu(), idx, thr=0.07) losses.update(loss.data[0], inputs.size(0)) acces.update(acc[0], inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc: .4f}'.format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acces.avg) bar.next() bar.finish() return losses.avg, acces.avg
def train(train_loader, model, criterions, writer, counter, optimizer, device): criterion_abs, criterion_bce = criterions batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() # Freezing batchnorm2d # print("Freezing mean/var of BatchNorm2d") # for m in model.modules(): # if isinstance(m, nn.BatchNorm2d): # m.eval() # m.weight.requires_grad = False # m.bias.requires_grad = False # import pdb; pdb.set_trace() for i, (inputs, targets, meta) in enumerate(train_loader): input_var = torch.autograd.Variable(inputs.to(device)) targets = targets.type(torch.FloatTensor) targets = torch.autograd.Variable(targets.to(device)) # import pdb; pdb.set_trace() # input_var = inputs.to(device) endpoints_target = targets[:, 0, :, :].to(device).unsqueeze(1) intersections_points_target = targets[:, 1, :, :].to(device).unsqueeze(1) end_points_short_offsets_target = targets[:, 2:4, :, :].to(device) intersection_points_short_offsets_target = targets[:, 4:6, :, :].to( device) ground_truth = [ endpoints_target, intersections_points_target, end_points_short_offsets_target, intersection_points_short_offsets_target, ] with torch.enable_grad(): optimizer.zero_grad() outputs = model(input_var) loss, loss_end_pt, loss_inter_pt, loss_short_end_pt, loss_short_inter_pt = get_losses( ground_truth, outputs) losses.update(loss.data.item(), inputs.size(0)) loss = loss.to(device) loss.backward() optimizer.step() # import pdb; pdb.set_trace() ########## writer.add_scalar('loss', loss.data.item(), counter) writer.add_scalar('loss_end_pt', loss_end_pt.data.item(), counter) writer.add_scalar('loss_inter_pt', loss_inter_pt.data.item(), counter) writer.add_scalar('loss_short_end_pt', loss_short_end_pt.data.item(), counter) writer.add_scalar('loss_short_inter_pt', loss_short_inter_pt.data.item(), counter) writer.add_scalar('losses.avg', losses.avg, counter) counter = counter + 1 # import pdb; pdb.set_trace() if (i % 50 == 0 and i != 0): print('iteration {} | loss: {}, avg loss: {}, '.format( i, loss.data.item(), losses.avg)) return losses.avg, counter
def train(train_loader, model, optimizer, lr): # prepare for refine loss def ohkm(loss, top_k): ohkm_loss = 0. for i in range(loss.size(0)): sub_loss = loss[i] topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False) tmp_loss = torch.gather(sub_loss, 0, topk_idx) ohkm_loss += tmp_loss.mean() ohkm_loss /= loss.size(0) return ohkm_loss batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() bar = Bar('Train', max=len(train_loader)) end = time.time() for i, (inputs, targets, valid, meta) in enumerate(train_loader): data_time.update(time.time() - end) input_var = inputs.cuda() target15, target11, target9, target7 = targets refine_target_var = target7.cuda() valid_var = valid.cuda() # compute output global_outputs, refine_output = model(input_var) num_points = target15.size(1) loss = None global_loss_record = 0. # comput global loss and refine loss for global_output, label in zip(global_outputs, targets): global_label = label * (valid > 1.1).float().view( -1, num_points, 1, 1) global_loss = F.mse_loss(global_output, label.cuda()) / 2. if loss is None: loss = global_loss else: loss += global_loss global_loss_record += global_loss.item() refine_loss = F.mse_loss(refine_output, refine_target_var, reduction='none') refine_loss = refine_loss.mean(dim=3).mean(dim=2) refine_loss *= (valid_var > 0.1).float().view(-1, num_points) refine_loss = ohkm(refine_loss, 8) loss += refine_loss refine_loss_record = refine_loss.item() # record loss losses.update(loss.item(), inputs.size(0)) # compute gradient and do Optimization step optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) bar_format_string = '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | ' \ 'LR: {lr:.6f} Loss: {loss1:.6f}-{loss2:.6f}-{loss3:.6f}-{loss4:.6f}' bar.suffix = bar_format_string.format(batch=i, size=len(train_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lr=lr, loss1=loss.item(), loss2=global_loss_record, loss3=refine_loss_record, loss4=losses.avg) bar.next() bar.finish() return losses.avg