def compute_features(dataloader, model, N): if args.verbose: print('Compute features') batch_time = AverageMeter() end = time.time() model.eval() # discard the label information in the dataloader for i, (input_tensor, _) in enumerate(dataloader): input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True) aux = model(input_var).data.cpu().numpy() if i == 0: features = np.zeros((N, aux.shape[1])).astype('float32') if i < len(dataloader) - 1: features[i * args.batch: (i + 1) * args.batch] = aux.astype('float32') else: # special treatment for final batch features[i * args.batch:] = aux.astype('float32') # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 200) == 0: print('{0} / {1}\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})' .format(i, len(dataloader), batch_time=batch_time)) return features
def train(loader, model, optimizer, criterion, fc6_8, losses, it=0, total_iterations=None, stepsize=None, verbose=True): # to log batch_time = AverageMeter() data_time = AverageMeter() top1 = AverageMeter() end = time.time() current_iteration = it # use dropout for the MLP model.train() # in the batch norms always use global statistics model.features.eval() for (input, target) in loader: # measure data loading time data_time.update(time.time() - end) # adjust learning rate if current_iteration != 0 and current_iteration % stepsize == 0: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.5 print('iter {0} learning rate is {1}'.format(current_iteration, param_group['lr'])) # move input to gpu input = input.cuda(non_blocking=True) # forward pass with or without grad computation output = model(input) target = target.float().cuda() mask = (target == 255) loss = torch.sum(criterion(output, target).masked_fill_(mask, 0)) / target.size(0) # backward optimizer.zero_grad() loss.backward() # clip gradients torch.nn.utils.clip_grad_norm_(model.parameters(), 10) # and weights update optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if verbose is True and current_iteration % 25 == 0: print('Iteration[{0}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( current_iteration, batch_time=batch_time, data_time=data_time, loss=losses)) current_iteration = current_iteration + 1 if total_iterations is not None and current_iteration == total_iterations: break return current_iteration
def train(loader, model, crit, opt, epoch): """Training of the CNN. Args: loader (torch.utils.data.DataLoader): Data loader model (nn.Module): CNN crit (torch.nn): loss opt (torch.optim.SGD): optimizer for every parameters with True requires_grad in model except top layer epoch (int) """ batch_time = AverageMeter() losses = AverageMeter() data_time = AverageMeter() forward_time = AverageMeter() backward_time = AverageMeter() # switch to train mode model.train() # create an optimizer for the last fc layer optimizer_tl = torch.optim.SGD( model.top_layer.parameters(), lr=args.lr, weight_decay=10**args.wd, ) end = time.time() for i, (input_tensor, target) in enumerate(loader): data_time.update(time.time() - end) # save checkpoint n = len(loader) * epoch + i if n % args.checkpoints == 0: path = os.path.join( args.exp, 'checkpoints', 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': opt.state_dict() }, path) target = target.cuda(async=True) input_var = torch.autograd.Variable(input_tensor.cuda()) target_var = torch.autograd.Variable(target) output = model(input_var) loss = crit(output, target_var) # record loss #losses.update(loss.data[0], input_tensor.size(0)) losses.update(loss.data, input_tensor.size(0)) # compute gradient and do SGD step opt.zero_grad() optimizer_tl.zero_grad() loss.backward() opt.step() optimizer_tl.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 200) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss: {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses)) return losses.avg
def validate(val_loader, model, criterions, epoch): """Validating""" model.eval() batch_time = AverageMeter() data_time = AverageMeter() loss_protest = AverageMeter() loss_v = AverageMeter() protest_acc = AverageMeter() violence_mse = AverageMeter() visattr_acc = AverageMeter() end = time.time() loss_history = [] for i, sample in enumerate(val_loader): # measure data loading batch_time input, target = sample['image'], sample['label'] if args.cuda: input = input.cuda() for k, v in target.items(): target[k] = v.cuda() input_var = Variable(input) target_var = {} for k, v in target.items(): target_var[k] = Variable(v) output = model(input_var) losses, scores, N_protest = calculate_loss(output, target_var, criterions) loss = 0 for l in losses: loss += l if N_protest: loss_protest.update(losses[0].data.item(), input.size(0)) loss_v.update(loss.data.item() - losses[0].data.item(), N_protest) else: # when no protest images loss_protest.update(losses[0].data[0], input.size(0)) loss_history.append(loss.data.item()) protest_acc.update(scores['protest_acc'], input.size(0)) violence_mse.update(scores['violence_mse'], N_protest) visattr_acc.update(scores['visattr_acc'], N_protest) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) ' 'Loss {loss_val:.3f} ({loss_avg:.3f}) ' 'Protest Acc {protest_acc.val:.3f} ({protest_acc.avg:.3f}) ' 'Violence MSE {violence_mse.val:.5f} ({violence_mse.avg:.5f}) ' 'Vis Attr Acc {visattr_acc.val:.3f} ({visattr_acc.avg:.3f})'. format(epoch, i, len(val_loader), batch_time=batch_time, loss_val=loss_protest.val + loss_v.val, loss_avg=loss_protest.avg + loss_v.avg, protest_acc=protest_acc, violence_mse=violence_mse, visattr_acc=visattr_acc)) print(' * Loss {loss_avg:.3f} Protest Acc {protest_acc.avg:.3f} ' 'Violence MSE {violence_mse.avg:.5f} ' 'Vis Attr Acc {visattr_acc.avg:.3f} '.format( loss_avg=loss_protest.avg + loss_v.avg, protest_acc=protest_acc, violence_mse=violence_mse, visattr_acc=visattr_acc)) return loss_protest.avg + loss_v.avg, loss_history
def validate(val_loader, model, epoch, output_writers): global args batch_time = AverageMeter() flow2_EPEs = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, data in enumerate(val_loader): if args.data_loader == "torch": (input, target) = data if args.data_loader == "dali": input = [ data[0]["images"][:, 0:3, :, :], data[0]["images"][:, 3:6, :, :], ] target = data[0]["flow"] if args.show_val_images: for k in range(len(input[0].cpu())): f, axarr = plt.subplots(2, 2) axarr[0, 0].imshow(np.moveaxis(np.array(input[0].cpu()[k]), 0, 2)) axarr[0, 1].imshow(np.moveaxis(np.array(input[1].cpu()[k]), 0, 2)) axarr[1, 0].imshow( np.moveaxis( flow2rgb(args.div_flow * np.squeeze(target.cpu()[k]), max_value=10), 0, 2, )) plt.show() target = target.to(device) input = torch.cat(input, 1).to(device) # compute output output = model(input) # [0], input[1]) flow2_EPE = args.div_flow * realEPE(output, target, sparse=args.sparse) # record EPE flow2_EPEs.update(flow2_EPE.item(), target.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i < len(output_writers): # log first output of first batches if epoch == 0: # input = torch.cat(input, 1).to(device) mean_values = torch.tensor([0.45, 0.432, 0.411], dtype=input.dtype).view(3, 1, 1) output_writers[i].add_image( "GroundTruth", flow2rgb(args.div_flow * target[0], max_value=10), 0) output_writers[i].add_image( "Inputs", (input[0, :3].cpu() + mean_values).clamp(0, 1), 0) output_writers[i].add_image( "Inputs", (input[0, 3:].cpu() + mean_values).clamp(0, 1), 1) output_writers[i].add_image( "FlowNet Outputs", flow2rgb(args.div_flow * output[0], max_value=10), epoch, ) if i % args.print_freq == 0: print("Test: [{0}/{1}]\t Time {2}\t EPE {3}".format( i, len(val_loader), batch_time, flow2_EPEs)) print(" * EPE {:.3f}".format(flow2_EPEs.avg)) return flow2_EPEs.avg
def train(epoch, train_loader, model, criterion, optimizer, opt, auxiliary=None): """One epoch training""" model.train() batch_time = AverageMeter() data_time = AverageMeter() cel_holder = AverageMeter() opl_holder = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for idx, (input, target, _) in enumerate(train_loader): data_time.update(time.time() - end) input = input.float() if torch.cuda.is_available(): input = input.cuda() target = target.cuda() # ===================forward===================== # output = model(input) if opt.srl: output, srl_loss = model(input, labels=target) else: [f0, f1, f2, f3, feat], output = model(input, is_feat=True) cel = criterion(output, target) if auxiliary is not None: opl = auxiliary(feat, target) ratio = opt.opl_ratio loss = cel + ratio * opl elif opt.srl: loss = cel + srl_loss else: loss = cel acc1, acc5 = accuracy(output, target, topk=(1, 5)) cel_holder.update(cel.item(), input.size(0)) if auxiliary is not None: opl_holder.update(opl.item(), input.size(0)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # ===================backward===================== optimizer.zero_grad() loss.backward() if opt.popl: for param in auxiliary.parameters(): # learning rate: 0.5 param.grad.data *= (0.5 / (opt.opl_ratio * opt.learning_rate)) optimizer.step() # ===================meters===================== batch_time.update(time.time() - end) end = time.time() # tensorboard logger pass # print info if idx % opt.print_freq == 0: print(f'Epoch: [{epoch}][{idx}/{len(train_loader)}]\t' f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})') sys.stdout.flush() print(f' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}') sys.stdout.flush() if auxiliary is not None: return top1.avg, losses.avg, [cel_holder.avg, opl_holder.avg] else: return top1.avg, losses.avg
def train(args, model, classifier, train_loader, criterion, optimizer, epoch): # Switch to train mode model.train() classifier.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() total_feats = [] total_targets = [] end = time.time() for batch_idx, (input1, input2, input3, target) in enumerate(tqdm(train_loader, disable=False)): # Get inputs and target input1, input2, input3, target = input1.float(), input2.float( ), input3.float(), target.long() # Reshape augmented tensors input1, input2, input3, target = input1.reshape( -1, 3, args.tile_h, args.tile_w), input2.reshape( -1, 3, args.tile_h, args.tile_w), input3.reshape( -1, 3, args.tile_h, args.tile_w), target.view(-1, 1).reshape(-1, ) # Move the variables to Cuda input1, input2, input3, target = input1.cuda(), input2.cuda( ), input3.cuda(), target.cuda() # compute output ############################### feats = model(input1, input2, input3) output = classifier(feats) loss = criterion(output, target) # compute gradient and do SGD step ############# optimizer.zero_grad() loss.backward() optimizer.step() # compute loss and accuracy #################### batch_size = target.size(0) losses.update(loss.item(), batch_size) pred = torch.argmax(output, dim=1) acc.update(torch.sum(target == pred).item() / batch_size, batch_size) # Save features total_feats.append(feats) total_targets.append(target) # measure elapsed time torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # print statistics and write summary every N batch if (batch_idx + 1) % args.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, batch_idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) final_feats = torch.cat(total_feats).detach() final_targets = torch.cat(total_targets).detach() return losses.avg, acc.avg, final_feats, final_targets
def train(epoch): logger.info('Epoch: %d' % epoch) adjust_learning_rate(optimizer, epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() # switch to train mode model.train() end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(trainloader): pytorchgo_args.get_args().step += 1 if args.debug and batch_idx >= 2: break if False: if niter * trainloader.batch_size >= optimize_times[-1]: with torch.no_grad(): _ = optimize_times.pop() if args.hc > 1: feature_return_switch(model, True) selflabels = opt_sk(model, selflabels, epoch) if args.hc > 1: feature_return_switch(model, False) data_time.update(time.time() - end) inputs, targets, indexes = inputs.to(device), targets.to( device), indexes.to(device) optimizer.zero_grad() outputs = model(inputs) if epoch <= args.fix_epoch: #freeze update of prototype in epoch 0 is_freeze_protoype = True model.prototype_N2K.requires_grad = False else: is_freeze_protoype = False model.prototype_N2K.requires_grad = True scores = torch.mm(outputs, model.prototype_N2K) #[BxC]x[CxK] with torch.no_grad(): err, tim_sec, q = optimize_L_sk(scores) #assert np.sum(q.detach().cpu().numpy()[:, 0]) < 1.1 p = torch.softmax(scores / pytorchgo_args.get_args().contrast_temp, -1) #B x K loss = -torch.mean(q * torch.log(p)) loss.backward() optimizer.step() if not is_freeze_protoype: with torch.no_grad(): _matrix = model.prototype_N2K normalize_dim = 0 #TODO, check norm qn = torch.norm(_matrix, p=2, dim=normalize_dim).detach( ) # https://discuss.pytorch.org/t/how-to-normalize-embedding-vectors/1209/3 model.prototype_N2K.data = _matrix.div( qn.unsqueeze(normalize_dim)) # assert np.sum(model.prototype_N2K.detach().cpu().numpy()[:,0])<1.1 train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % 10 == 0: logger.info( 'Epoch: [{}/{}][{}/{}]' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}), best_knn_acc={best_knn_acc}, freeze={freeze}' .format(epoch, args.epochs, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss, best_knn_acc=best_acc, freeze=is_freeze_protoype)) wandb_logging(d=dict( loss1e4=loss.item() * 1e4, group0_lr=optimizer.state_dict()['param_groups'][0]['lr'], sk_err=err, sk_time_sec=tim_sec), step=pytorchgo_args.get_args().step, use_wandb=pytorchgo_args.get_args().wandb, prefix="training epoch {}/{}: ".format( epoch, pytorchgo_args.get_args().epochs)) #optimizer_summary(optimizer) cpu_prototype = model.prototype_N2K.detach().cpu().numpy() return cpu_prototype
def train(train_loader, model, criterions, optimizer, epoch): """training the model""" model.train() batch_time = AverageMeter() data_time = AverageMeter() loss_protest = AverageMeter() loss_v = AverageMeter() protest_acc = AverageMeter() violence_mse = AverageMeter() visattr_acc = AverageMeter() end = time.time() loss_history = [] for i, sample in enumerate(train_loader): # measure data loading batch_time input, target = sample['image'], sample['label'] data_time.update(time.time() - end) if args.cuda: input = input.cuda() for k, v in target.items(): target[k] = v.cuda() target_var = {} for k,v in target.items(): target_var[k] = Variable(v) input_var = Variable(input) output = model(input_var) losses, scores, N_protest = calculate_loss(output, target_var, criterions) optimizer.zero_grad() loss = 0 for l in losses: loss += l # back prop loss.backward() optimizer.step() if N_protest: loss_protest.update(losses[0].data[0], input.size(0)) loss_v.update(loss.data[0] - losses[0].data[0], N_protest) else: # when there is no protest image in the batch loss_protest.update(losses[0].data[0], input.size(0)) loss_history.append(loss.data[0]) protest_acc.update(scores['protest_acc'], input.size(0)) violence_mse.update(scores['violence_mse'], N_protest) visattr_acc.update(scores['visattr_acc'], N_protest) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}] ' 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) ' 'Data {data_time.val:.2f} ({data_time.avg:.2f}) ' 'Loss {loss_val:.3f} ({loss_avg:.3f}) ' 'Protest {protest_acc.val:.3f} ({protest_acc.avg:.3f}) ' 'Violence {violence_mse.val:.5f} ({violence_mse.avg:.5f}) ' 'Vis Attr {visattr_acc.val:.3f} ({visattr_acc.avg:.3f})' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss_val=loss_protest.val + loss_v.val, loss_avg = loss_protest.avg + loss_v.avg, protest_acc = protest_acc, violence_mse = violence_mse, visattr_acc = visattr_acc)) return loss_history
def validate(args): layer = -1 avgNmse = AverageMeter() running_metric = runningScore(2) data_loader = ListaDataLoader(args, flag='val') val_loader = torch.utils.data.DataLoader(data_loader, batch_size=args.batch_size, shuffle=False, num_workers=0, drop_last=True) A = ArrayResposeLoad(measurements=args.measurements, antenna_x=args.antenna_x, antenna_y=args.antenna_y) if args.arch == "LISTA": model = models.LISTA(A=A, T=args.T, lam=args.lam, untied=args.untied, coord=args.coord) for param in model.parameters(): param.requires_grad = False device = args.device model = model.to(device) if args.resume is not None: if os.path.isfile(args.resume): print("Loading model from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) layer = checkpoint['layer'] print("Loaded checkpoint '{}' (layer {}|epoch {})".format( args.resume, checkpoint['layer'], checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() for idx, (y, x) in enumerate(val_loader): print('progress: %d / %d' % (idx + 1, len(val_loader))) x_hats = model(y.cuda().transpose(0, 1)) nmse = nmse_metric(x_hats[layer], x.cuda().transpose(0, 1)) avgNmse.update(nmse, x.size(0)) antenna_size = args.antenna_x * args.antenna_y x = x.cpu().numpy() x_complex = x[:, :antenna_size] + 1j * x[:, antenna_size:] x_abs = abs(x_complex) mask = x_abs != 0 x_hat = x_hats[layer].transpose(0, 1) x_hat = x_hat.cpu().numpy() x_complex_hat = x_hat[:, :antenna_size] + 1j * x_hat[:, antenna_size:] x_hat_abs = abs(x_complex_hat) high_th = np.median(x_hat_abs[mask]) low_th = np.median(x_hat_abs[1 - mask]) th = (high_th + low_th) / 2 update_score(x_hat_abs, mask, running_metric, th) print('F1 score:', running_metric.get_scores()) print('normalized MSE:', avgNmse.avg)
def validate_pose(validate_loader, PoseNet_model, epoch, validate_writer): global args batch_time = AverageMeter() warping_time = AverageMeter() net_time = AverageMeter() validate_endPoint_loss = AverageMeter() epoch_size = len(validate_loader) if args.epoch_size == 0 else min( len(validate_loader), args.epoch_size) # switch to evaluate mode PoseNet_model.eval() end = time.time() for batch_index, (inputImgs, targetPoses) in enumerate(validate_loader): # if batch_index >= epoch_size: break batch_size = targetPoses.size()[0] absolute_poseGT = targetPoses.to( device) # torch.Size([8, 2, 320, 448]) # # NOTE this is the pose at the start time point of exposure of an image # pose_img1 = absolute_poseGT[:, 0, :] # torch.Size([batch_size, 6]) # pose_img2 = absolute_poseGT[:, 2, :] # NOTE can be the average value of the start and end point of exposure. pose_img1 = (absolute_poseGT[:, 0, :] + absolute_poseGT[:, 1, :]) / 2 # torch.Size([batch_size, 6]) pose_img2 = (absolute_poseGT[:, 2, :] + absolute_poseGT[:, 3, :]) / 2 input_img1 = inputImgs[0].to(device) input_img2 = inputImgs[1].to( device) # torch.Size([batch_size, 3, 320, 448]) # 1. ground truth # torch.Size([batch_size, 8]) homo8_1to2_GT, homo8_1to2_initial = absolutePose2homo8Pose( pose_img1, pose_img2, batch_size, rot_random_bias=0.0, slope_random_bias=0.0) # NOTE train without attitude noise homo8_2to1_GT, homo8_2to1_initial = absolutePose2homo8Pose( pose_img2, pose_img1, batch_size, rot_random_bias=0.0, slope_random_bias=0.0) # 2. nn forward homo8_1to2_nn, warpingTimer1, netTimer1 = PoseNet_model( input_img1, input_img2, homo8_1to2_initial) homo8_2to1_nn, warpingTimer2, netTimer2 = PoseNet_model( input_img2, input_img1, homo8_2to1_initial) if batch_index > 6: # not counting the first a few warping_time.update((warpingTimer1 + warpingTimer2) * 0.5, n=batch_size) net_time.update((netTimer1 + netTimer2) * 0.5, n=batch_size) # 3. error homo8_1to2_blockError = homo8_1to2_GT - homo8_1to2_nn homo8_2to1_blockError = homo8_2to1_GT - homo8_2to1_nn endPoint_loss = torch.mean(charbonnier( homo8_1to2_blockError[:, 5:])) + torch.mean( charbonnier(homo8_2to1_blockError[:, 5:])) # predict tilt TODO # endPoint_loss = torch.mean(charbonnier(homo8_1to2_blockError[:, 0:2])) + torch.mean(charbonnier(homo8_2to1_blockError[:, 0:2])) validate_endPoint_loss.update(endPoint_loss.item(), n=batch_size) batch_time.update(time.time() - end) end = time.time() if batch_index % args.print_freq == 0: print('Test: [{0}/{1}]\t Time {2}\t Bidirection_Loss {3}'.format( batch_index, epoch_size, batch_time, validate_endPoint_loss)) if batch_index > 6: print('WarpingTime(ms):', warping_time.avg, 'NetworkTime(ms):', net_time.avg, 'Total fps:', 1000.0 / (warping_time.avg + net_time.avg)) print(' * EPE loss validate: {:.3f}, Epoch: {}'.format( validate_endPoint_loss.avg, epoch)) return validate_endPoint_loss.avg
def train_pose(train_loader, PoseNet_model, optimizer, epoch, train_writer, n_iter): global args batch_time = AverageMeter() data_time = AverageMeter() train_endPoint_loss = AverageMeter() # ssim_loss = pytorch_ssim.SSIM() # TODO epoch_size = len(train_loader) if args.epoch_size == 0 else min( len(train_loader), args.epoch_size) # switch to train mode PoseNet_model.train() end = time.time() # go through the training set for one epoch and train for batch_index, (inputImgs, targetPoses) in enumerate(train_loader): n_iter += 1 if batch_index >= epoch_size: break batch_size = targetPoses.size()[0] # measure data loading time data_time.update(time.time() - end) absolute_poseGT = targetPoses.to(device) # # NOTE this is the pose at the start time point of exposure of an image # pose_img1 = absolute_poseGT[:, 0, :] # torch.Size([batch_size, 6]) # pose_img2 = absolute_poseGT[:, 2, :] # NOTE can be the average value of the start and end point of exposure. pose_img1 = (absolute_poseGT[:, 0, :] + absolute_poseGT[:, 1, :]) / 2 # torch.Size([batch_size, 6]) pose_img2 = (absolute_poseGT[:, 2, :] + absolute_poseGT[:, 3, :]) / 2 input_img1 = inputImgs[0].to(device) input_img2 = inputImgs[1].to( device) # torch.Size([batch_size, 3, 320, 448]) loss_block_sum = torch.zeros(1).squeeze().to(device) # 1. ground truth # torch.Size([batch_size, 8]) homo8_1to2_GT, homo8_1to2_initial = absolutePose2homo8Pose( pose_img1, pose_img2, batch_size, rot_random_bias=0.0, slope_random_bias=0.0) # NOTE train without attitude noise homo8_2to1_GT, homo8_2to1_initial = absolutePose2homo8Pose( pose_img2, pose_img1, batch_size, rot_random_bias=0.0, slope_random_bias=0.0) # 2. nn forward # torch.autograd.set_detect_anomaly(True) homo8_1to2_nn_blockList = PoseNet_model(input_img1, input_img2, homo8_1to2_initial) homo8_2to1_nn_blockList = PoseNet_model(input_img2, input_img1, homo8_2to1_initial) # 3. loss for block_num in range(PoseNet_model.num_blocks): if args.self_supervised: # self-supervised # charbonnier loss block_loss = torch.mean( charbonnier( homo8_1to2_nn_blockList[block_num])) + torch.mean( charbonnier(homo8_2to1_nn_blockList[block_num])) # # L1 loss # block_loss = torch.mean(torch.nn.L1Loss()(homo8_1to2_nn_blockList[block_num], torch.zeros(homo8_1to2_nn_blockList[block_num].size()).to(device))) + \ # torch.mean(torch.nn.L1Loss()(homo8_2to1_nn_blockList[block_num], torch.zeros(homo8_2to1_nn_blockList[block_num].size()).to(device))) # TODO SSIM ssim_loss else: # supervised with ground truth homo8_1to2_blockError = homo8_1to2_GT - homo8_1to2_nn_blockList[ block_num] homo8_2to1_blockError = homo8_2to1_GT - homo8_2to1_nn_blockList[ block_num] block_loss = torch.mean( charbonnier(homo8_1to2_blockError[:, 5:])) + torch.mean( charbonnier(homo8_2to1_blockError[:, 5:]) ) # NOTE only trans error for now # predict tilt TODO # block_loss = torch.mean(charbonnier(homo8_1to2_blockError[:, 0:2])) + torch.mean(charbonnier(homo8_2to1_blockError[:, 0:2])) loss_block_sum = loss_block_sum + block_loss * args.list_blocks_weights[ block_num] # this creates a new tensor in a new address with the same name. # print(id(loss_block_sum)) loss = loss_block_sum # compute gradient and do optimization step # torch.autograd.set_detect_anomaly(True) optimizer.zero_grad() loss.backward() optimizer.step() # measure time consuming batch_time.update(time.time() - end) end = time.time() # in self-supervised training, the block_loss is the photometric error, # while validate_endPoint_loss is the pose error, not comparable! So here recalculate the pose error if args.self_supervised: homo8_1to2_blockError = homo8_1to2_GT - homo8_1to2_nn_blockList[ PoseNet_model.num_blocks] homo8_2to1_blockError = homo8_2to1_GT - homo8_2to1_nn_blockList[ PoseNet_model.num_blocks] block_loss = torch.mean(charbonnier( homo8_1to2_blockError[:, 5:])) + torch.mean( charbonnier(homo8_2to1_blockError[:, 5:]) ) # NOTE only trans error for now train_endPoint_loss.update( block_loss.item(), n=batch_size) # the final loss is the last block_loss if batch_index % args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t Time {3}\t Data {4}\t Bidirection_Loss {5}' .format(epoch, batch_index, epoch_size, batch_time, data_time, train_endPoint_loss)) if args.save_train_log: train_writer.add_scalar('train_loss_bidirection', loss.item(), n_iter) print(' EPE loss train: {:.3f}, Epoch: {}'.format( train_endPoint_loss.avg, epoch)) return train_endPoint_loss.avg, n_iter
def train(loader, model, crit, opt, epoch): """Training of the CNN. Args: loader (torch.utils.data.DataLoader): Data loader model (nn.Module): CNN crit (torch.nn): loss opt (torch.optim.SGD): optimizer for every parameters with True requires_grad in model except top layer epoch (int) """ batch_time = AverageMeter() losses = AverageMeter() data_time = AverageMeter() forward_time = AverageMeter() backward_time = AverageMeter() # switch to train mode model.train() # create an optimizer for the last fc layer optimizer_tl = torch.optim.SGD( model.top_layer.parameters(), lr=args.lr, weight_decay=10**args.wd, ) end = time.time() for i, (input_tensor, target) in enumerate(loader): data_time.update(time.time() - end) # save checkpoint n = len(loader) * epoch + i if n % args.checkpoints == 0: path = os.path.join( args.exp, 'checkpoints', 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer' : opt.state_dict() }, path) target = target.cuda(async=True) input_var = torch.autograd.Variable(input_tensor.cuda()) target_var = torch.autograd.Variable(target) output = model(input_var) loss = crit(output, target_var) # record loss losses.update(loss.data[0], input_tensor.size(0)) # compute gradient and do SGD step opt.zero_grad() optimizer_tl.zero_grad() loss.backward() opt.step() optimizer_tl.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 200) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss: {loss.val:.4f} ({loss.avg:.4f})' .format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses)) return losses.avg
def train(train_loader, model, criterion, optimizer, epoch, summary_writer): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) L1_loss = torch.nn.L1Loss() end = time.time() for batch_idx, (imgs, gt_texts, training_masks, ori_imgs, border_map) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts[:, ::4, ::4].cuda()) training_masks = Variable(training_masks[:, ::4, ::4].cuda()) border_map = Variable(border_map.cuda()) outputs = model(imgs) gaussian_map = outputs # gaussian_map, center_map, region_map = outputs weighted_mse_loss, mse_region_loss, loss_center = weighted_regression( gaussian_map, gt_texts, training_masks) center_gt = torch.where(gt_texts > 0.7, gt_texts, torch.zeros_like(gt_texts)) # center_mask = torch.where(gt_texts > 0.7, torch.ones_like(gt_texts), torch.zeros_like(gt_texts)) region_gt = torch.where(gt_texts > 0.4, gt_texts, torch.zeros_like(gt_texts)) # region_mask = torch.where(gt_texts > 0.4, torch.ones_like(gt_texts), torch.zeros_like(gt_texts)) # loss for center_map # loss_center_dice = criterion(gaussian_map, center_gt, training_masks) # loss for region_map loss_region_dice = criterion(gaussian_map, region_gt, training_masks) # loss for border_map # border_mask = 1. - (center_other - border_map) # loss_border = criterion(gaussian_map, gt_texts, training_masks) loss = loss_center + weighted_mse_loss + mse_region_loss + loss_region_dice # print("loss:", loss_center, "loss_region:", loss_region, "weighted_mse_loss:", weighted_mse_loss) losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_center = cal_text_score(gaussian_map, gt_texts, training_masks, running_metric_text, 0, 0.8) # score_region = cal_text_score(gaussian_map, gt_texts, training_masks * region_mask, running_metric_text, 0, 0.2) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: # visualization global_step = epoch * len(train_loader) + batch_idx maps = torch.sigmoid(gaussian_map[0:1]) center_map = torch.where(maps > 0.8, maps, torch.zeros_like(maps)) text_map = torch.where(maps > 0.4, maps, torch.zeros_like(maps)) summary_writer.add_images('gt/img', ori_imgs[0:1], global_step=global_step) summary_writer.add_images('gt/score_map', torch.unsqueeze(gt_texts[0:1], 1), global_step=global_step) summary_writer.add_images('gt/center_map', torch.unsqueeze(center_gt[0:1], 1), global_step=global_step) summary_writer.add_images('gt/region_map', torch.unsqueeze(region_gt[0:1], 1), global_step=global_step) # summary_writer.add_images('gt/border_map', torch.unsqueeze(border_mask[0:1], 1), global_step=global_step) summary_writer.add_images('predicition/score_map', torch.sigmoid(gaussian_map[0:1]), global_step=global_step) summary_writer.add_images('predicition/center_map', torch.sigmoid(center_map[0:1]), global_step=global_step) summary_writer.add_images('predicition/region_map', torch.sigmoid(text_map[0:1]), global_step=global_step) summary_writer.add_scalar('loss/reg_loss', weighted_mse_loss, global_step=global_step) summary_writer.add_scalar('loss/reg_center_loss', loss_center, global_step=global_step) # summary_writer.add_scalar('loss/center_dice_loss', loss_center_dice, global_step=global_step) summary_writer.add_scalar('loss/region_dice_loss', loss_region_dice, global_step=global_step) # summary_writer.add_scalar('loss/border_loss', loss_border, global_step=global_step) summary_writer.add_scalar('loss/text_region_loss', mse_region_loss, global_step=global_step) summary_writer.add_scalar('metric/acc_c', score_center['Mean Acc'], global_step=global_step) summary_writer.add_scalar('metric/iou_c', score_center['Mean IoU'], global_step=global_step) # summary_writer.add_scalar('metric/acc_t', score_region['Mean Acc'], global_step=global_step) # summary_writer.add_scalar('metric/iou_t', score_region['Mean IoU'], global_step=global_step) output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_c: {acc_c: .4f} | IOU_c: {iou_c: .4f} '.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc_c=score_center['Mean Acc'], iou_c=score_center['Mean IoU'], # acc_t=score_region['Mean Acc'], # iou_t=score_region['Mean IoU'], ) print(output_log) sys.stdout.flush() return (losses.avg, score_center['Mean Acc'], score_center['Mean IoU'])
def train_ins(epoch, train_loader, model, contrast, criterion, optimizer, opt): """ one epoch training for instance discrimination """ model.train() batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() prob_meter = AverageMeter() end = time.time() for idx, (inputs, _, index) in enumerate(train_loader): data_time.update(time.time() - end) bsz = inputs.size(0) inputs = inputs.float() if opt.gpu is not None: inputs = inputs.cuda(opt.gpu, non_blocking=True) else: inputs = inputs.cuda() index = index.cuda(opt.gpu, non_blocking=True) # ===================forward===================== feat = model(inputs) out = contrast(feat, index) loss = criterion(out) prob = out[:, 0].mean() # ===================backward===================== optimizer.zero_grad() if opt.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # ===================meters===================== loss_meter.update(loss.item(), bsz) prob_meter.update(prob.item(), bsz) torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'prob {prob.val:.3f} ({prob.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=loss_meter, prob=prob_meter)) print(out.shape) sys.stdout.flush() return loss_meter.avg, prob_meter.avg
def train(train_loader, model, criterion, optimizer, epoch): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'])
def train_moco(epoch, train_loader, model, model_ema, contrast, criterion, optimizer, opt): """ one epoch training for instance discrimination """ model.train() model_ema.eval() def set_bn_train(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: m.train() model_ema.apply(set_bn_train) batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() prob_meter = AverageMeter() end = time.time() for idx, (inputs, _, index) in enumerate(train_loader): data_time.update(time.time() - end) bsz = inputs.size(0) inputs = inputs.float() if opt.gpu is not None: inputs = inputs.cuda(opt.gpu, non_blocking=True) else: inputs = inputs.cuda() index = index.cuda(opt.gpu, non_blocking=True) # ===================forward===================== x1, x2 = torch.split(inputs, [3, 3], dim=1) # ids for ShuffleBN shuffle_ids, reverse_ids = get_shuffle_ids(bsz) feat_q = model(x1) with torch.no_grad(): x2 = x2[shuffle_ids] feat_k = model_ema(x2) feat_k = feat_k[reverse_ids] out = contrast(feat_q, feat_k) loss = criterion(out) prob = out[:, 0].mean() # ===================backward===================== optimizer.zero_grad() if opt.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # ===================meters===================== loss_meter.update(loss.item(), bsz) prob_meter.update(prob.item(), bsz) moment_update(model, model_ema, opt.alpha) torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'prob {prob.val:.3f} ({prob.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=loss_meter, prob=prob_meter)) print(out.shape) sys.stdout.flush() return loss_meter.avg, prob_meter.avg
def semi_train(loader, semi_loader, model, fd, crit, opt_body, opt_category, epoch, device, args): batch_time = AverageMeter() losses = AverageMeter() semi_losses = AverageMeter() # switch to train mode model.train() end = time.time() for i, ((input_tensor, label), pseudo_target, imgidx) in enumerate(loader): input_var = torch.autograd.Variable(input_tensor.to(device)) pseudo_target_var = torch.autograd.Variable( pseudo_target.to(device, non_blocking=True)) output = model(input_var) loss = crit(output, pseudo_target_var.long()) # record loss losses.update(loss.item(), input_tensor.size(0)) # compute gradient and do SGD step opt_body.zero_grad() loss.backward() opt_body.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 5) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'PSEUDO_Loss: {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(loader), batch_time=batch_time, loss=losses)) '''SUPERVISION with a few labelled dataset''' model.cluster_layer = None model.category_layer = nn.Sequential( nn.Linear(fd, args.nmb_category), nn.Softmax(dim=1), ) model.category_layer[0].weight.data.normal_(0, 0.01) model.category_layer[0].bias.data.zero_() model.category_layer = model.category_layer.double() model.category_layer.to(device) category_save = os.path.join(args.exp, '../..', 'category_layer.pth.tar') if os.path.isfile(category_save): category_layer_param = torch.load(category_save) model.category_layer.load_state_dict(category_layer_param) semi_output_save = [] semi_label_save = [] for i, (input_tensor, label) in enumerate(semi_loader): input_var = torch.autograd.Variable(input_tensor.to(device)) label_var = torch.autograd.Variable(label.to(device, non_blocking=True)) output = model(input_var) semi_loss = crit(output, label_var.long()) # compute gradient and do SGD step opt_category.zero_grad() opt_body.zero_grad() semi_loss.backward() opt_category.step() opt_body.step() # record loss semi_losses.update(semi_loss.item(), input_tensor.size(0)) # Record accuracy output = torch.argmax(output, axis=1) semi_output_save.append(output.data.cpu().numpy()) semi_label_save.append(label.data.cpu().numpy()) # measure elapsed time if args.verbose and (i % 5) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'SEMI_Loss: {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(semi_loader), loss=semi_losses)) semi_output_flat = flatten_list(semi_output_save) semi_label_flat = flatten_list(semi_label_save) semi_accu_list = [ out == lab for (out, lab) in zip(semi_output_flat, semi_label_flat) ] semi_accuracy = sum(semi_accu_list) / len(semi_accu_list) return losses.avg, semi_losses.avg, semi_accuracy
def main(args): torch.manual_seed(1) ### add arguments ### args.vc_dir = './data/Vocabulary' args.df_dir = './data/dataset' args.max_sequence_length = 35 args.model_name = args.task + args.feat_type args.memory_type = '_mrm2s' args.image_feature_net = 'concat' args.layer = 'fc' args.save_model_path = args.save_path + '%s_%s_%s%s' % ( args.task, args.image_feature_net, args.layer, args.memory_type) # Create model directory if not os.path.exists(args.save_model_path): os.makedirs(args.save_model_path) ###################################################################################### ## This part of dataset code is adopted from ## https://github.com/YunseokJANG/tgif-qa/blob/master/code/gifqa/data_util/tgif.py ###################################################################################### print 'Start loading TGIF dataset' train_dataset = DatasetTGIF(dataset_name='train', image_feature_net=args.image_feature_net, layer=args.layer, max_length=args.max_sequence_length, data_type=args.task, dataframe_dir=args.df_dir, vocab_dir=args.vc_dir) train_dataset.load_word_vocabulary() val_dataset = train_dataset.split_dataset(ratio=0.1) val_dataset.share_word_vocabulary_from(train_dataset) test_dataset = DatasetTGIF(dataset_name='test', image_feature_net=args.image_feature_net, layer=args.layer, max_length=args.max_sequence_length, data_type=args.task, dataframe_dir=args.df_dir, vocab_dir=args.vc_dir) test_dataset.share_word_vocabulary_from(train_dataset) print 'dataset lengths train/val/test %d/%d/%d' % ( len(train_dataset), len(val_dataset), len(test_dataset)) ############################# # get video feature dimension ############################# video_feature_dimension = train_dataset.get_video_feature_dimension() feat_channel = video_feature_dimension[3] feat_dim = video_feature_dimension[2] text_embed_size = train_dataset.GLOVE_EMBEDDING_SIZE answer_vocab_size = None ############################# # get word vector dimension ############################# word_matrix = train_dataset.word_matrix voc_len = word_matrix.shape[0] assert text_embed_size == word_matrix.shape[1] ############################# # Parameters ############################# SEQUENCE_LENGTH = args.max_sequence_length VOCABULARY_SIZE = train_dataset.n_words assert VOCABULARY_SIZE == voc_len FEAT_DIM = train_dataset.get_video_feature_dimension()[1:] train_iter = train_dataset.batch_iter(args.num_epochs, args.batch_size) # Create model directory if not os.path.exists(args.save_path): os.makedirs(args.save_path) if args.task == 'Count': # add L2 loss criterion = nn.MSELoss(size_average=True).cuda() elif args.task in ['Action', 'Trans']: from embed_loss import MultipleChoiceLoss criterion = MultipleChoiceLoss(num_option=5, margin=1, size_average=True).cuda() elif args.task == 'FrameQA': # add classification loss answer_vocab_size = len(train_dataset.ans2idx) print('Vocabulary size', answer_vocab_size, VOCABULARY_SIZE) criterion = nn.CrossEntropyLoss(size_average=True).cuda() if args.memory_type == '_mrm2s': rnn = AttentionTwoStream(args.task, feat_channel, feat_dim, text_embed_size, args.hidden_size, voc_len, args.num_layers, word_matrix, answer_vocab_size=answer_vocab_size, max_len=args.max_sequence_length) else: assert 1 == 2 rnn = rnn.cuda() # to directly test, load pre-trained model, replace with your model to test your model if args.test == 1: if args.task == 'Count': rnn.load_state_dict( torch.load( './saved_models/Count_concat_fc_mrm2s/rnn-1300-l3.257-a27.942.pkl' )) elif args.task == 'Action': rnn.load_state_dict( torch.load( './saved_models/Action_concat_fc_mrm2s/rnn-0800-l0.137-a84.663.pkl' )) elif args.task == 'Trans': rnn.load_state_dict( torch.load( './saved_models/Trans_concat_fc_mrm2s/rnn-1500-l0.246-a78.068.pkl' )) elif args.task == 'FrameQA': rnn.load_state_dict( torch.load( './saved_models/FrameQA_concat_fc_mrm2s/rnn-4200-l1.233-a69.361.pkl' )) else: assert 1 == 2, 'Invalid task' optimizer = torch.optim.Adam(rnn.parameters(), lr=args.learning_rate) iter = 0 if args.task == 'Count': best_val_loss = 100.0 best_val_iter = 0.0 # this is a regression problem, predict a value from 1-10 for batch_chunk in train_iter: if args.test == 0: video_features = torch.from_numpy( batch_chunk['video_features'].astype(np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype(np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy(batch_chunk['answer'].astype( np.float32)).cuda() data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn(data_dict, 'Count') loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() acc = rnn.accuracy(predictions, targets.int()) print('Train %s iter %d, loss %.3f, acc %.2f' % (args.task, iter, loss.data, acc.item())) if iter % 100 == 0: rnn.eval() with torch.no_grad(): if args.test == 0: ##### Validation ###### n_iter = len(val_dataset) / args.batch_size losses = AverageMeter() accuracy = AverageMeter() iter_val = 0 for batch_chunk in val_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_val % 10 == 0: print('%d/%d' % (iter_val, n_iter)) iter_val += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype( np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype( np.float32)).cuda() # print(question_words) data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn( data_dict, 'Count') loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets.int()) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) if best_val_loss > losses.avg: best_val_loss = losses.avg best_val_iter = iter print( '[Val] iter %d, loss %.3f, acc %.2f, best loss %.3f at iter %d' % (iter, losses.avg, accuracy.avg, best_val_loss, best_val_iter)) torch.save( rnn.state_dict(), os.path.join( args.save_model_path, 'rnn-%04d-l%.3f-a%.3f.pkl' % (iter, losses.avg, accuracy.avg))) if 1 == 1: ###### Test ###### n_iter = len(test_dataset) / args.batch_size losses = AverageMeter() accuracy = AverageMeter() iter_test = 0 for batch_chunk in test_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_test % 10 == 0: print('%d/%d' % (iter_test, n_iter)) iter_test += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype( np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype( np.float32)).cuda() data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn( data_dict, 'Count') loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets.int()) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) print('[Test] iter %d, loss %.3f, acc %.2f' % (iter, losses.avg, accuracy.avg)) if args.test == 1: exit() rnn.train() iter += 1 elif args.task in ['Action', 'Trans']: best_val_acc = 0.0 best_val_iter = 0.0 # this is a multiple-choice problem, predict probability of each class for batch_chunk in train_iter: if args.test == 0: video_features = torch.from_numpy( batch_chunk['video_features'].astype(np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] candidates = torch.from_numpy(batch_chunk['candidates'].astype( np.int64)).cuda() candidate_lengths = batch_chunk['candidate_lengths'] answers = torch.from_numpy(batch_chunk['answer'].astype( np.int32)).cuda() num_mult_choices = batch_chunk['num_mult_choices'] data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['candidates'] = candidates data_dict['candidate_lengths'] = candidate_lengths data_dict['answers'] = answers data_dict['num_mult_choices'] = num_mult_choices outputs, targets, predictions = rnn(data_dict, args.task) loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() acc = rnn.accuracy(predictions, targets.long()) print('Train %s iter %d, loss %.3f, acc %.2f' % (args.task, iter, loss.data, acc.item())) if iter % 100 == 0: rnn.eval() with torch.no_grad(): if args.test == 0: n_iter = len(val_dataset) / args.batch_size losses = AverageMeter() accuracy = AverageMeter() iter_val = 0 for batch_chunk in val_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_val % 10 == 0: print('%d/%d' % (iter_val, n_iter)) iter_val += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] candidates = torch.from_numpy( batch_chunk['candidates'].astype( np.int64)).cuda() candidate_lengths = batch_chunk[ 'candidate_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype(np.int32)).cuda() num_mult_choices = batch_chunk['num_mult_choices'] data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['candidates'] = candidates data_dict['candidate_lengths'] = candidate_lengths data_dict['answers'] = answers data_dict['num_mult_choices'] = num_mult_choices outputs, targets, predictions = rnn( data_dict, args.task) loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets.long()) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) if best_val_acc < accuracy.avg: best_val_acc = accuracy.avg best_val_iter = iter print( '[Val] iter %d, loss %.3f, acc %.2f, best acc %.3f at iter %d' % (iter, losses.avg, accuracy.avg, best_val_acc, best_val_iter)) torch.save( rnn.state_dict(), os.path.join( args.save_model_path, 'rnn-%04d-l%.3f-a%.3f.pkl' % (iter, losses.avg, accuracy.avg))) if 1 == 1: n_iter = len(test_dataset) / args.batch_size losses = AverageMeter() accuracy = AverageMeter() iter_test = 0 for batch_chunk in test_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_test % 10 == 0: print('%d/%d' % (iter_test, n_iter)) iter_test += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] candidates = torch.from_numpy( batch_chunk['candidates'].astype( np.int64)).cuda() candidate_lengths = batch_chunk[ 'candidate_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype(np.int32)).cuda() num_mult_choices = batch_chunk['num_mult_choices'] #question_word_nums = batch_chunk['question_word_nums'] data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['candidates'] = candidates data_dict['candidate_lengths'] = candidate_lengths data_dict['answers'] = answers data_dict['num_mult_choices'] = num_mult_choices outputs, targets, predictions = rnn( data_dict, args.task) loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets.long()) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) print('[Test] iter %d, loss %.3f, acc %.2f' % (iter, losses.avg, accuracy.avg)) if args.test == 1: exit() rnn.train() iter += 1 elif args.task == 'FrameQA': best_val_acc = 0.0 best_val_iter = 0.0 # this is a multiple-choice problem, predict probability of each class for batch_chunk in train_iter: if args.test == 0: video_features = torch.from_numpy( batch_chunk['video_features'].astype(np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype(np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy(batch_chunk['answer'].astype( np.int64)).cuda() data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn(data_dict, args.task) loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() acc = rnn.accuracy(predictions, targets) print('Train %s iter %d, loss %.3f, acc %.2f' % (args.task, iter, loss.data, acc.item())) if iter % 100 == 0: rnn.eval() with torch.no_grad(): if args.test == 0: losses = AverageMeter() accuracy = AverageMeter() n_iter = len(val_dataset) / args.batch_size iter_val = 0 for batch_chunk in val_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_val % 10 == 0: print('%d/%d' % (iter_val, n_iter)) iter_val += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype( np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype(np.int64)).cuda() data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn( data_dict, args.task) loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) if best_val_acc < accuracy.avg: best_val_acc = accuracy.avg best_val_iter = iter print( '[Val] iter %d, loss %.3f, acc %.2f, best acc %.3f at iter %d' % (iter, losses.avg, accuracy.avg, best_val_acc, best_val_iter)) torch.save( rnn.state_dict(), os.path.join( args.save_model_path, 'rnn-%04d-l%.3f-a%.3f.pkl' % (iter, losses.avg, accuracy.avg))) if 1 == 1: losses = AverageMeter() accuracy = AverageMeter() n_iter = len(test_dataset) / args.batch_size iter_test = 0 for batch_chunk in test_dataset.batch_iter( 1, args.batch_size, shuffle=False): if iter_test % 10 == 0: print('%d/%d' % (iter_test, n_iter)) iter_test += 1 video_features = torch.from_numpy( batch_chunk['video_features'].astype( np.float32)).cuda() video_lengths = batch_chunk['video_lengths'] question_words = torch.from_numpy( batch_chunk['question_words'].astype( np.int64)).cuda() question_lengths = batch_chunk['question_lengths'] answers = torch.from_numpy( batch_chunk['answer'].astype(np.int64)).cuda() data_dict = {} data_dict['video_features'] = video_features data_dict['video_lengths'] = video_lengths data_dict['question_words'] = question_words data_dict['question_lengths'] = question_lengths data_dict['answers'] = answers outputs, targets, predictions = rnn( data_dict, args.task) loss = criterion(outputs, targets) acc = rnn.accuracy(predictions, targets) losses.update(loss.item(), video_features.size(0)) accuracy.update(acc.item(), video_features.size(0)) print('[Test] iter %d, loss %.3f, acc %.2f' % (iter, losses.avg, accuracy)) if args.test == 1: exit() rnn.train() iter += 1
def validate(args, model, classifier, val_loader, criterion, epoch): # switch to evaluate mode model.eval() classifier.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() with torch.no_grad(): end = time.time() for batch_idx, (input1, input2, input3, target) in enumerate(tqdm(val_loader, disable=False)): # Get inputs and target input1, input2, input3, target = input1.float(), input2.float( ), input3.float(), target.long() # Reshape augmented tensors input1, input2, input3, target = input1.reshape( -1, 3, args.tile_h, args.tile_w), input2.reshape( -1, 3, args.tile_h, args.tile_w), input3.reshape( -1, 3, args.tile_h, args.tile_w), target.view(-1, 1).reshape(-1, ) # Move the variables to Cuda input1, input2, input3, target = input1.cuda(), input2.cuda( ), input3.cuda(), target.cuda() # compute output ############################### feats = model(input1, input2, input3) output = classifier(feats) loss = criterion(output, target) # compute loss and accuracy #################### batch_size = target.size(0) losses.update(loss.item(), batch_size) pred = torch.argmax(output, dim=1) acc.update( torch.sum(target == pred).item() / batch_size, batch_size) # measure elapsed time torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # print statistics and write summary every N batch if (batch_idx + 1) % args.print_freq == 0: print('Val: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, batch_idx + 1, len(val_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) return losses.avg, acc.avg
def train(loader, model, crit, opt, epoch, device, args): """Training of the CNN. Args: loader (torch.utils.data.DataLoader): Data loader model (nn.Module): CNN crit (torch.nn): loss opt (torch.optim.SGD): optimizer for every parameters with True requires_grad in model except top layer epoch (int) """ batch_time = AverageMeter() losses = AverageMeter() data_time = AverageMeter() # switch to train mode model.train() # create an optimizer for the last fc layer # optimizer_tl = torch.optim.SGD( # model.top_layer.parameters(), # lr=args.lr, # weight_decay=10**args.wd, # ) optimizer_tl = torch.optim.Adam( model.top_layer.parameters(), lr=args.lr, betas=(0.5, 0.99), weight_decay=10**args.wd, ) end = time.time() input_tensors = [] labels = [] pseudo_targets = [] outputs = [] imgidxes = [] for i, ((input_tensor, label), pseudo_target, imgidx) in enumerate(loader): data_time.update(time.time() - end) # save checkpoint n = len(loader) * epoch + i if n % args.checkpoints == 0: path = os.path.join( args.exp, '../checkpoints', 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': opt.state_dict() }, path) input_var = torch.autograd.Variable(input_tensor.to(device)) pseudo_target_var = torch.autograd.Variable( pseudo_target.to(device, non_blocking=True)) output = model(input_var) loss = crit(output, pseudo_target_var.long()) # record loss # print('loss :', loss) # print('input_tensor.size(0) :', input_tensor.size(0)) losses.update(loss.item(), input_tensor.size(0)) # compute gradient and do SGD step opt.zero_grad() optimizer_tl.zero_grad() loss.backward() opt.step() optimizer_tl.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.verbose and (i % 5) == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'PSEUDO_Loss: {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(loader), batch_time=batch_time, loss=losses)) input_tensors.append(input_tensor.data.cpu().numpy()) pseudo_targets.append(pseudo_target.data.cpu().numpy()) outputs.append(output.data.cpu().numpy()) labels.append(label) imgidxes.append(imgidx) input_tensors = np.concatenate(input_tensors, axis=0) pseudo_targets = np.concatenate(pseudo_targets, axis=0) outputs = np.concatenate(outputs, axis=0) labels = np.concatenate(labels, axis=0) imgidxes = np.concatenate(imgidxes, axis=0) tr_epoch_out = [ input_tensors, pseudo_targets, outputs, labels, losses.avg, imgidxes ] return losses.avg, tr_epoch_out
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay) # train model for epoch in range(max_epochs): t0 = datetime.now() model.train() meter = AverageMeter() for i, x in enumerate(train_dl): x = x.cuda() x_corrputed, mask = noise_maker.apply(x) optimizer.zero_grad() loss = model.loss(x_corrputed, x, mask) loss.backward() optimizer.step() meter.update(loss.detach().cpu().numpy()) delta = (datetime.now() - t0).seconds scheduler.step() print('\r epoch {:5d} - loss {:.6f} - {:4.6f} sec per epoch'.format(epoch, meter.avg, delta), end='') torch.save({ "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict(), "model": model.state_dict() }, model_checkpoint ) model_state = torch.load(model_checkpoint) model.load_state_dict(model_state['model']) # extract features
def train(epoch, train_loader, model_s, model_t , criterion_cls, criterion_div, criterion_kd, optimizer, opt, MemBank): """One epoch training""" model_s.train() for m in model_t: m.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() train_indices = list(range(len(MemBank))) end = time.time() with tqdm(train_loader, total=len(train_loader)) as pbar: for idx, (input, input2, input3, input4, target, indices) in enumerate(pbar): data_time.update(time.time() - end) input = input.float() if torch.cuda.is_available(): input = input.cuda() input2 = input2.cuda() input3 = input3.cuda() input4 = input4.cuda() target = target.cuda() indices = indices.cuda() batch_size = input.shape[0] generated_data = rotrate_concat([input, input2, input3, input4]) train_targets = target.repeat(opt.trans) proxy_labels = torch.zeros(opt.trans*batch_size).cuda().long() for ii in range(opt.trans): proxy_labels[ii*batch_size:(ii+1)*batch_size] = ii # ===================forward===================== with torch.no_grad(): (_,_,_,_, feat_t), (train_logit_t, eq_logit_t, inv_rep_t) = model_t[0](generated_data, inductive=True) (_,_,_,_, feat_s), (train_logit_s, eq_logit_s, inv_rep_s) = model_s(generated_data, inductive=True) # ===================memory bank of negatives for current batch===================== np.random.shuffle(train_indices) mn_indices_all = np.array(list(set(train_indices) - set(indices))) np.random.shuffle(mn_indices_all) mn_indices = mn_indices_all[:opt.membank_size] mn_arr = MemBank[mn_indices] mem_rep_of_batch_imgs = MemBank[indices] loss_ce = criterion_cls(train_logit_s, train_targets) loss_eq = criterion_cls(eq_logit_s, proxy_labels) loss_div = criterion_div(train_logit_s, train_logit_t) loss_div_eq = criterion_div(eq_logit_s, eq_logit_t) loss_mse_inv = torch.nn.functional.mse_loss(inv_rep_s, inv_rep_t) loss_mse_feat = torch.nn.functional.mse_loss(feat_s, feat_t) inv_rep_0 = inv_rep_s[:batch_size, :] loss_inv = simple_contrstive_loss(mem_rep_of_batch_imgs, inv_rep_0, mn_arr, opt.contrast_temp) for ii in range(1, opt.trans): loss_inv += simple_contrstive_loss(inv_rep_0, inv_rep_s[(ii*batch_size):((ii+1)*batch_size), :], mn_arr, opt.contrast_temp) loss_inv = loss_inv/opt.trans loss = opt.w_ce * (opt.gamma * (loss_eq + loss_inv) + loss_ce) + opt.w_div*(loss_div + loss_div_eq + loss_mse_inv + loss_mse_feat) acc1, acc5 = accuracy(train_logit_s, train_targets, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # ===================update memory bank====================== MemBankCopy = MemBank.clone().detach() MemBankCopy[indices] = (opt.mvavg_rate * MemBankCopy[indices]) + ((1 - opt.mvavg_rate) * inv_rep_0) MemBank = MemBankCopy.clone().detach() # ===================backward===================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================meters===================== batch_time.update(time.time() - end) end = time.time() pbar.set_postfix({"Acc@1":'{0:.2f}'.format(top1.avg.cpu().numpy()), "Acc@5":'{0:.2f}'.format(top5.avg.cpu().numpy(),2), "Loss" :'{0:.2f}'.format(losses.avg,2), }) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg, losses.avg, MemBank
def train(train_loader, model, optimizer, epoch, train_writer): global n_iter, args batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() flow2_EPEs = AverageMeter() if args.data_loader == "torch": epoch_size = (len(train_loader) if args.epoch_size == 0 else min( len(train_loader), args.epoch_size)) if args.data_loader == "dali": epoch_size = (9999 if args.epoch_size == 0 else min( len(train_loader), args.epoch_size)) # switch to train mode model.train() end = time.time() for i, data in enumerate(train_loader): if args.data_loader == "torch": (input, target) = data if args.data_loader == "dali": input = [ data[0]["images"][:, 0:3, :, :], data[0]["images"][:, 3:6, :, :], ] target = data[0]["flow"] if args.show_train_images: for k in range(len(input[0].cpu())): f, axarr = plt.subplots(2, 2) axarr[0, 0].imshow(np.moveaxis(np.array(input[0].cpu()[k]), 0, 2)) axarr[0, 1].imshow(np.moveaxis(np.array(input[1].cpu()[k]), 0, 2)) axarr[1, 0].imshow( np.moveaxis( flow2rgb(args.div_flow * np.squeeze(target.cpu()[k]), max_value=10), 0, 2, )) plt.show() # measure data loading time data_time.update(time.time() - end) target = target.to(device) input = torch.cat(input, 1).to(device) # compute output output = model(input) # [0], input[1]) if args.sparse: # Since Target pooling is not very precise when sparse, # take the highest resolution prediction and upsample it instead of downsampling target h, w = target.size()[-2:] output = [F.interpolate(output[0], (h, w)), *output[1:]] loss = multiscaleEPE(output, target, weights=args.multiscale_weights, sparse=args.sparse) flow2_EPE = args.div_flow * realEPE( output, target, sparse=args.sparse) # output[0] if using multi-scale loss # record loss and EPE losses.update(loss.item(), target.size(0)) train_writer.add_scalar("train_loss", loss.item(), n_iter) flow2_EPEs.update(flow2_EPE.item(), target.size(0)) # compute gradient and do optimization step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( "Epoch: [{0}][{1}/{2}]\t Time {3}\t Data {4}\t Loss {5}\t EPE {6}" .format(epoch, i, epoch_size, batch_time, data_time, losses, flow2_EPEs)) n_iter += 1 if i >= epoch_size: break return losses.avg, flow2_EPEs.avg
def train(epoch, train_loader, model, classifier, criterion, optimizer, opt): """ one epoch training """ model.eval() classifier.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for idx, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.float() if opt.gpu is not None: input = input.cuda(opt.gpu, non_blocking=True) target = target.cuda(opt.gpu, non_blocking=True) # ===================forward===================== with torch.no_grad(): feat_l, feat_ab = model(input, opt.layer) feat = torch.cat((feat_l, feat_ab), dim=1) feat = feat.contiguous().detach() output = classifier(feat) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # ===================backward===================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================meters===================== batch_time.update(time.time() - end) end = time.time() # tensorboard logger pass # print info if idx % opt.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, idx, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) sys.stdout.flush() return top1.avg, losses.avg
def train(train_loader, model, criterions, optimizer, epoch): """training the model""" model.train() batch_time = AverageMeter() data_time = AverageMeter() loss_protest = AverageMeter() loss_v = AverageMeter() protest_acc = AverageMeter() violence_mse = AverageMeter() visattr_acc = AverageMeter() end = time.time() loss_history = [] for i, sample in enumerate(train_loader): # measure data loading batch_time input, target = sample['image'], sample['label'] data_time.update(time.time() - end) if args.cuda: input = input.cuda() for k, v in target.items(): target[k] = v.cuda() target_var = {} for k, v in target.items(): target_var[k] = Variable(v) input_var = Variable(input) output = model(input_var) losses, scores, N_protest = calculate_loss(output, target_var, criterions) optimizer.zero_grad() loss = 0 for l in losses: loss += l # back prop loss.backward() optimizer.step() if N_protest: loss_protest.update(losses[0].data.item(), input.size(0)) loss_v.update(loss.data.item() - losses[0].data.item(), N_protest) else: # when there is no protest image in the batch loss_protest.update(losses[0].data.item(), input.size(0)) loss_history.append(loss.data.item()) protest_acc.update(scores['protest_acc'], input.size(0)) violence_mse.update(scores['violence_mse'], N_protest) visattr_acc.update(scores['visattr_acc'], N_protest) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}] ' 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) ' 'Data {data_time.val:.2f} ({data_time.avg:.2f}) ' 'Loss {loss_val:.3f} ({loss_avg:.3f}) ' 'Protest {protest_acc.val:.3f} ({protest_acc.avg:.3f}) ' 'Violence {violence_mse.val:.5f} ({violence_mse.avg:.5f}) ' 'Vis Attr {visattr_acc.val:.3f} ({visattr_acc.avg:.3f})'. format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss_val=loss_protest.val + loss_v.val, loss_avg=loss_protest.avg + loss_v.avg, protest_acc=protest_acc, violence_mse=violence_mse, visattr_acc=visattr_acc)) return loss_history
def train(train_loader, nets, optimizers, criterions, epoch): batch_time = AverageMeter() data_time = AverageMeter() cls1_losses = AverageMeter() dml1_losses = AverageMeter() cls2_losses = AverageMeter() dml2_losses = AverageMeter() top11 = AverageMeter() top51 = AverageMeter() top12 = AverageMeter() top52 = AverageMeter() net1 = nets['net1'] net2 = nets['net2'] criterionCls = criterions['criterionCls'] criterionDML = criterions['criterionDML'] optimizer1 = optimizers['optimizer1'] optimizer2 = optimizers['optimizer2'] net1.train() net2.train() end = time.time() for idx, (img, target) in enumerate(train_loader, start=1): data_time.update(time.time() - end) if args.cuda: img = img.cuda() target = target.cuda() _, _, _, _, output1 = net1(img) _, _, _, _, output2 = net2(img) # for net1 cls1_loss = criterionCls(output1, target) dml1_loss = criterionDML(F.log_softmax(output1, dim=1), F.softmax(output2.detach(), dim=1)) / img.size(0) dml1_loss = dml1_loss * args.lambda_dml net1_loss = cls1_loss + dml1_loss prec11, prec51 = accuracy(output1, target, topk=(1,5)) cls1_losses.update(cls1_loss.item(), img.size(0)) dml1_losses.update(dml1_loss.item(), img.size(0)) top11.update(prec11.item(), img.size(0)) top51.update(prec51.item(), img.size(0)) # for net2 cls2_loss = criterionCls(output2, target) dml2_loss = criterionDML(F.log_softmax(output2, dim=1), F.softmax(output1.detach(), dim=1)) / img.size(0) dml2_loss = dml2_loss * args.lambda_dml net2_loss = cls2_loss + dml2_loss prec12, prec52 = accuracy(output2, target, topk=(1,5)) cls2_losses.update(cls2_loss.item(), img.size(0)) dml2_losses.update(dml2_loss.item(), img.size(0)) top12.update(prec12.item(), img.size(0)) top52.update(prec52.item(), img.size(0)) # update net1 & net2 optimizer1.zero_grad() net1_loss.backward() optimizer1.step() optimizer2.zero_grad() net2_loss.backward() optimizer2.step() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: print('Epoch[{0}]:[{1:03}/{2:03}] ' 'Time:{batch_time.val:.4f} ' 'Data:{data_time.val:.4f} ' 'Cls1:{cls1_losses.val:.4f}({cls1_losses.avg:.4f}) ' 'DML1:{dml1_losses.val:.4f}({dml1_losses.avg:.4f}) ' 'Cls1:{cls2_losses.val:.4f}({cls2_losses.avg:.4f}) ' 'DML1:{dml2_losses.val:.4f}({dml2_losses.avg:.4f}) ' 'prec@1_1:{top11.val:.2f}({top11.avg:.2f}) ' 'prec@5_1:{top51.val:.2f}({top51.avg:.2f}) ' 'prec@1_2:{top12.val:.2f}({top12.avg:.2f}) ' 'prec@5_2:{top52.val:.2f}({top52.avg:.2f})'.format( epoch, idx, len(train_loader), batch_time=batch_time, data_time=data_time, cls1_losses=cls1_losses, dml1_losses=dml1_losses, top11=top11, top51=top51, cls2_losses=cls2_losses, dml2_losses=dml2_losses, top12=top12, top52=top52))
def train(train_loader, train_loader1, train_loader2, train_loader3, args, model, criterion, center_loss, optimizer, epoch, num_epochs): print(len(train_loader), len(train_loader1), len(train_loader2), len(train_loader3)) count = 0 since = time.time() running_loss0 = AverageMeter() running_loss1 = AverageMeter() running_loss2 = AverageMeter() running_loss3 = AverageMeter() running_loss4 = AverageMeter() running_loss5 = AverageMeter() running_loss6 = AverageMeter() running_loss7 = AverageMeter() running_loss = AverageMeter() log = Log() model.train() image_acc = 0 text_acc = 0 video_acc = 0 audio_acc = 0 for (i, (input, target)), (j, (input1, target1)), (k, (input2, target2)), (p, (input3, target3)) in zip( enumerate(train_loader), enumerate(train_loader1), enumerate(train_loader2), enumerate(train_loader3)): input_var = Variable(input.cuda()) input_var1 = Variable(input1.cuda()) input_var2 = Variable(input2.cuda()) input_var3 = Variable(input3.cuda()) targets = torch.cat((target, target1, target2, target3), 0) targets = Variable(targets.cuda()) target_var = Variable(target.cuda()) target_var1 = Variable(target1.cuda()) target_var2 = Variable(target2.cuda()) target_var3 = Variable(target3.cuda()) label_num_i = Variable(torch.zeros(200).cuda()) label_num_v = Variable(torch.zeros(200).cuda()) label_num_a = Variable(torch.zeros(200).cuda()) label_num_t = Variable(torch.zeros(200).cuda()) outputi = Variable(torch.zeros(200, len(input), 200).cuda()) outputv = Variable(torch.zeros(200, len(input), 200).cuda()) outputa = Variable(torch.zeros(200, len(input), 200).cuda()) outputt = Variable(torch.zeros(200, len(input), 200).cuda()) outputs = model(input_var, input_var1, input_var2, input_var3) # [16,200] # print('output',outputs.size()) size = int(outputs.size(0) / 4) img = outputs.narrow(0, 0, size) vid = outputs.narrow(0, size, size) aud = outputs.narrow(0, 2 * size, size) txt = outputs.narrow(0, 3 * size, size) for (i, j) in zip(target_var, img): outputi[i][label_num_i[i].int()] = j label_num_i[i] += 1 for (i, j) in zip(target_var1, vid): outputv[i][label_num_i[i].int()] = j label_num_v[i] += 1 for (i, j) in zip(target_var2, aud): outputa[i][label_num_i[i].int()] = j label_num_a[i] += 1 for (i, j) in zip(target_var3, txt): outputt[i][label_num_i[i].int()] = j label_num_t[i] += 1 # print(type(label_num_i[1])) # time_elapsed = time.time() - since # print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60), "训练了%d个batch" % count) _, predict1 = torch.max(img, 1) # 0是按列找,1是按行找 _, predict2 = torch.max(vid, 1) # 0是按列找,1是按行找 _, predict3 = torch.max(aud, 1) # 0是按列找,1是按行找 _, predict4 = torch.max(txt, 1) # 0是按列找,1是按行找 image_acc += torch.sum( torch.squeeze( predict1.long() == target_var.long())).item() / float( target_var.size()[0]) video_acc += torch.sum( torch.squeeze( predict2.long() == target_var1.long())).item() / float( target_var1.size()[0]) audio_acc += torch.sum( torch.squeeze( predict3.long() == target_var2.long())).item() / float( target_var2.size()[0]) text_acc += torch.sum( torch.squeeze( predict4.long() == target_var3.long())).item() / float( target_var3.size()[0]) loss0 = criterion(img, target_var) loss1 = criterion(vid, target_var1) loss2 = criterion(aud, target_var2) loss3 = criterion(txt, target_var3) loss4 = loss0 + loss1 + loss2 + loss3 loss5 = center_loss(outputs, targets) * 0.001 # loss5=get_MMD(feature,targets) if (args.loss_choose == 'r'): ### loss6, _ = ranking_loss(targets, outputs, margin=1, margin2=0.5, squared=False) loss6, _ = ranking_loss(targets, feature, margin=1, margin2=0.5, squared=False) loss6 = loss6 * 0.1 else: loss6 = 0.0 ##loss = loss4 + loss5 + loss6 loss7 = (get_MMD(outputv, outputi, label_num_v, label_num_i)+ get_MMD(outputa, outputi, label_num_a, label_num_i)+ get_MMD(outputt, outputi, label_num_t, label_num_i)\ + get_MMD(outputa, outputv, label_num_a, label_num_v)+ get_MMD(outputt, outputv, label_num_t, label_num_v)+ get_MMD(outputa, outputt, label_num_a, label_num_t))*0.001 # loss7 = get_MMD(outputv, outputi, label_num_v, label_num_i) loss = loss4 + loss7 # print(loss) batchsize = input_var.size(0) running_loss0.update(loss0.item(), batchsize) running_loss1.update(loss1.item(), batchsize) running_loss2.update(loss2.item(), batchsize) running_loss3.update(loss3.item(), batchsize) running_loss4.update(loss4.item(), batchsize) running_loss5.update(loss5.item(), batchsize) running_loss7.update(loss7.item(), batchsize) if (args.loss_choose == 'r'): running_loss6.update(loss6.item(), batchsize) running_loss.update(loss.item(), batchsize) optimizer.zero_grad() loss.backward() # for param in center_loss.parameters(): # param.grad.data *= (1./0.001) optimizer.step() count += 1 if (i % args.print_freq == 0): print('-' * 20) print('Epoch [{0}/{1}][{2}/{3}]'.format(epoch, num_epochs, i, len(train_loader))) print('Image Loss: {loss.avg:.5f}'.format(loss=running_loss0)) print('Video Loss: {loss.avg:.5f}'.format(loss=running_loss1)) print('Audio Loss: {loss.avg:.5f}'.format(loss=running_loss2)) print('Text Loss: {loss.avg:.5f}'.format(loss=running_loss3)) print('AllMedia Loss: {loss.avg:.5f}'.format(loss=running_loss4)) print('MMD Loss: {loss.avg:.5f}'.format(loss=running_loss7)) if (args.loss_choose == 'r'): print( 'Ranking Loss: {loss.avg:.5f}'.format(loss=running_loss6)) print('All Loss: {loss.avg:.5f}'.format(loss=running_loss)) # log.save_train_info(epoch, i, len(train_loader), running_loss) # optimizer.zero_grad() # loss.backward() # # # for param in center_loss.parameters(): # # param.grad.data *= (1./0.001) # # optimizer.step() print("训练第%d个epoch:" % epoch) print("image:", image_acc / len(train_loader3)) print("text:", text_acc / len(train_loader3)) print("video:", video_acc / len(train_loader3)) print("audio:", audio_acc / len(train_loader3)) time_elapsed = time.time() - since print( 'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60), "训练了%d个batch" % count)
def test(test_loader, nets, criterions): cls1_losses = AverageMeter() dml1_losses = AverageMeter() cls2_losses = AverageMeter() dml2_losses = AverageMeter() top11 = AverageMeter() top51 = AverageMeter() top12 = AverageMeter() top52 = AverageMeter() net1 = nets['net1'] net2 = nets['net2'] criterionCls = criterions['criterionCls'] criterionDML = criterions['criterionDML'] net1.eval() net2.eval() end = time.time() for idx, (img, target) in enumerate(test_loader, start=1): if args.cuda: img = img.cuda() target = target.cuda() with torch.no_grad(): _, _, _, _, output1 = net1(img) _, _, _, _, output2 = net2(img) # for net1 cls1_loss = criterionCls(output1, target) dml1_loss = criterionDML(F.log_softmax(output1, dim=1), F.softmax(output2.detach(), dim=1)) / img.size(0) dml1_loss = dml1_loss * args.lambda_dml prec11, prec51 = accuracy(output1, target, topk=(1,5)) cls1_losses.update(cls1_loss.item(), img.size(0)) dml1_losses.update(dml1_loss.item(), img.size(0)) top11.update(prec11.item(), img.size(0)) top51.update(prec51.item(), img.size(0)) # for net2 cls2_loss = criterionCls(output2, target) dml2_loss = criterionDML(F.log_softmax(output2, dim=1), F.softmax(output1.detach(), dim=1)) / img.size(0) dml2_loss = dml2_loss * args.lambda_dml prec12, prec52 = accuracy(output2, target, topk=(1,5)) cls2_losses.update(cls2_loss.item(), img.size(0)) dml2_losses.update(dml2_loss.item(), img.size(0)) top12.update(prec12.item(), img.size(0)) top52.update(prec52.item(), img.size(0)) f_l = [cls1_losses.avg, dml1_losses.avg, top11.avg, top51.avg] f_l += [cls2_losses.avg, dml2_losses.avg, top12.avg, top52.avg] print('Cls1: {:.4f}, DML1: {:.4f}, Prec@1_1: {:.2f}, Prec@5_1: {:.2f}' 'Cls2: {:.4f}, DML2: {:.4f}, Prec@1_2: {:.2f}, Prec@5_2: {:.2f}'.format(*f_l))
def train(train_loader, nets, optimizer, criterions, epoch): batch_time = AverageMeter() data_time = AverageMeter() cls_losses = AverageMeter() fitnet_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() snet = nets['snet'] tnet = nets['tnet'] criterionCls = criterions['criterionCls'] criterionFitnet = criterions['criterionFitnet'] snet.train() end = time.time() for idx, (img, target) in enumerate(train_loader, start=1): data_time.update(time.time() - end) if args.cuda: img = img.cuda() target = target.cuda() _, _, _, rb3_s, output_s = snet(img) _, _, _, rb3_t, output_t = tnet(img) cls_loss = criterionCls(output_s, target) fitnet_loss = criterionFitnet(rb3_s, rb3_t.detach()) * args.lambda_fitnet loss = cls_loss + fitnet_loss prec1, prec5 = accuracy(output_s, target, topk=(1,5)) cls_losses.update(cls_loss.item(), img.size(0)) fitnet_losses.update(fitnet_loss.item(), img.size(0)) top1.update(prec1.item(), img.size(0)) top5.update(prec5.item(), img.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: print('Epoch[{0}]:[{1:03}/{2:03}] ' 'Time:{batch_time.val:.4f} ' 'Data:{data_time.val:.4f} ' 'Cls:{cls_losses.val:.4f}({cls_losses.avg:.4f}) ' 'Fitnet:{fitnet_losses.val:.4f}({fitnet_losses.avg:.4f}) ' 'prec@1:{top1.val:.2f}({top1.avg:.2f}) ' 'prec@5:{top5.val:.2f}({top5.avg:.2f})'.format( epoch, idx, len(train_loader), batch_time=batch_time, data_time=data_time, cls_losses=cls_losses, fitnet_losses=fitnet_losses, top1=top1, top5=top5))
def train(train_loader, model, optimizer, epoch, train_writer): global n_iter, args, event_interval, image_resize, sp_threshold batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() flow2_EPEs = AverageMeter() # switch to train mode model.train() end = time.time() mini_batch_size_v = args.batch_size batch_size_v = 4 sp_threshold = 0.5 for ww, data in enumerate(train_loader, 0): # get the inputs former_inputs_on, former_inputs_off, latter_inputs_on, latter_inputs_off, former_gray, latter_gray = data if torch.sum(former_inputs_on + former_inputs_off) > 0: input_representation = torch.zeros( former_inputs_on.size(0), batch_size_v, image_resize, image_resize, former_inputs_on.size(3)).float() for b in range(batch_size_v): if b == 0: input_representation[:, 0, :, :, :] = former_inputs_on elif b == 1: input_representation[:, 1, :, :, :] = former_inputs_off elif b == 2: input_representation[:, 2, :, :, :] = latter_inputs_on elif b == 3: input_representation[:, 3, :, :, :] = latter_inputs_off # measure data loading time data_time.update(time.time() - end) # compute output input_representation = input_representation.to(device) output = model(input_representation.type(torch.cuda.FloatTensor), image_resize, sp_threshold) # Photometric loss. photometric_loss = compute_photometric_loss( former_gray[:, 0, :, :], latter_gray[:, 0, :, :], torch.sum(input_representation, 4), output, weights=args.multiscale_weights) # Smoothness loss. smoothness_loss = smooth_loss(output) # total_loss loss = photometric_loss + 1 * smoothness_loss # compute gradient and do optimization step optimizer.zero_grad() loss.backward() optimizer.step() # record loss and EPE train_writer.add_scalar('train_loss', loss.item(), n_iter) losses.update(loss.item(), input_representation.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if mini_batch_size_v * ww % args.print_freq < mini_batch_size_v: print('Epoch: [{0}][{1}/{2}]\t Time {3}\t Data {4}\t Loss {5}'. format(epoch, mini_batch_size_v * ww, mini_batch_size_v * len(train_loader), batch_time, data_time, losses)) n_iter += 1 return losses.avg
def train(epoch, train_loader, model, contrast, criterion_l, criterion_ab, optimizer, opt): """ one epoch training """ model.train() contrast.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() l_loss_meter = AverageMeter() ab_loss_meter = AverageMeter() l_prob_meter = AverageMeter() ab_prob_meter = AverageMeter() end = time.time() for idx, (inputs, _, index) in enumerate(train_loader): data_time.update(time.time() - end) bsz = inputs.size(0) inputs = inputs.float() if torch.cuda.is_available(): index = index.cuda(non_blocking=True) inputs = inputs.cuda() # ===================forward===================== feat_l, feat_ab = model(inputs) out_l, out_ab = contrast(feat_l, feat_ab, index) l_loss = criterion_l(out_l) ab_loss = criterion_ab(out_ab) l_prob = out_l[:, 0].mean() ab_prob = out_ab[:, 0].mean() loss = l_loss + ab_loss # ===================backward===================== optimizer.zero_grad() if opt.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # ===================meters===================== losses.update(loss.item(), bsz) l_loss_meter.update(l_loss.item(), bsz) l_prob_meter.update(l_prob.item(), bsz) ab_loss_meter.update(ab_loss.item(), bsz) ab_prob_meter.update(ab_prob.item(), bsz) torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'l_p {lprobs.val:.3f} ({lprobs.avg:.3f})\t' 'ab_p {abprobs.val:.3f} ({abprobs.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, lprobs=l_prob_meter, abprobs=ab_prob_meter)) # print(out_l.shape) sys.stdout.flush() return l_loss_meter.avg, l_prob_meter.avg, ab_loss_meter.avg, ab_prob_meter.avg
def validate(test_loader, model, epoch, output_writers): global args, image_resize, sp_threshold d_label = h5py.File(gt_file, 'r') gt_temp = np.float32(d_label['davis']['left']['flow_dist']) gt_ts_temp = np.float64(d_label['davis']['left']['flow_dist_ts']) d_label = None d_set = h5py.File(testfile, 'r') gray_image = d_set['davis']['left']['image_raw'] batch_time = AverageMeter() # switch to evaluate mode model.eval() end = time.time() batch_size_v = 4 sp_threshold = 0.5 AEE_sum = 0. AEE_sum_sum = 0. AEE_sum_gt = 0. AEE_sum_sum_gt = 0. percent_AEE_sum = 0. iters = 0. scale = 1 for i, data in enumerate(test_loader, 0): former_inputs_on, former_inputs_off, latter_inputs_on, latter_inputs_off, st_time, ed_time = data if torch.sum(former_inputs_on + former_inputs_off) > 0: input_representation = torch.zeros( former_inputs_on.size(0), batch_size_v, image_resize, image_resize, former_inputs_on.size(3)).float() for b in range(batch_size_v): if b == 0: input_representation[:, 0, :, :, :] = former_inputs_on elif b == 1: input_representation[:, 1, :, :, :] = former_inputs_off elif b == 2: input_representation[:, 2, :, :, :] = latter_inputs_on elif b == 3: input_representation[:, 3, :, :, :] = latter_inputs_off # compute output input_representation = input_representation.to(device) output = model(input_representation.type(torch.cuda.FloatTensor), image_resize, sp_threshold) # pred_flow = output pred_flow = np.zeros((image_resize, image_resize, 2)) output_temp = output.cpu() pred_flow[:, :, 0] = cv2.resize(np.array(output_temp[0, 0, :, :]), (image_resize, image_resize), interpolation=cv2.INTER_LINEAR) pred_flow[:, :, 1] = cv2.resize(np.array(output_temp[0, 1, :, :]), (image_resize, image_resize), interpolation=cv2.INTER_LINEAR) U_gt_all = np.array(gt_temp[:, 0, :, :]) V_gt_all = np.array(gt_temp[:, 1, :, :]) U_gt, V_gt = estimate_corresponding_gt_flow( U_gt_all, V_gt_all, gt_ts_temp, np.array(st_time), np.array(ed_time)) gt_flow = np.stack((U_gt, V_gt), axis=2) # ----------- Visualization if epoch < 0: mask_temp = former_inputs_on + former_inputs_off + latter_inputs_on + latter_inputs_off mask_temp = torch.sum(torch.sum(mask_temp, 0), 2) mask_temp_np = np.squeeze(np.array(mask_temp)) > 0 spike_image = mask_temp spike_image[spike_image > 0] = 255 if args.render: cv2.imshow('Spike Image', np.array(spike_image, dtype=np.uint8)) gray = cv2.resize(gray_image[i], (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) if args.render: cv2.imshow('Gray Image', cv2.cvtColor(gray, cv2.COLOR_BGR2RGB)) out_temp = np.array(output_temp.cpu().detach()) x_flow = cv2.resize( np.array(out_temp[0, 0, :, :]), (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) y_flow = cv2.resize( np.array(out_temp[0, 1, :, :]), (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) flow_rgb = flow_viz_np(x_flow, y_flow) if args.render: cv2.imshow('Predicted Flow Output', cv2.cvtColor(flow_rgb, cv2.COLOR_BGR2RGB)) gt_flow_x = cv2.resize( gt_flow[:, :, 0], (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) gt_flow_y = cv2.resize( gt_flow[:, :, 1], (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) gt_flow_large = flow_viz_np(gt_flow_x, gt_flow_y) if args.render: cv2.imshow('GT Flow', cv2.cvtColor(gt_flow_large, cv2.COLOR_BGR2RGB)) masked_x_flow = cv2.resize( np.array(out_temp[0, 0, :, :] * mask_temp_np), (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) masked_y_flow = cv2.resize( np.array(out_temp[0, 1, :, :] * mask_temp_np), (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) flow_rgb_masked = flow_viz_np(masked_x_flow, masked_y_flow) if args.render: cv2.imshow( 'Masked Predicted Flow', cv2.cvtColor(flow_rgb_masked, cv2.COLOR_BGR2RGB)) gt_flow_cropped = gt_flow[2:-2, 45:-45] gt_flow_masked_x = cv2.resize( gt_flow_cropped[:, :, 0] * mask_temp_np, (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) gt_flow_masked_y = cv2.resize( gt_flow_cropped[:, :, 1] * mask_temp_np, (scale * image_resize, scale * image_resize), interpolation=cv2.INTER_LINEAR) gt_masked_flow = flow_viz_np(gt_flow_masked_x, gt_flow_masked_y) if args.render: cv2.imshow('GT Masked Flow', cv2.cvtColor(gt_masked_flow, cv2.COLOR_BGR2RGB)) cv2.waitKey(1) image_size = pred_flow.shape full_size = gt_flow.shape xsize = full_size[1] ysize = full_size[0] xcrop = image_size[1] ycrop = image_size[0] xoff = (xsize - xcrop) // 2 yoff = (ysize - ycrop) // 2 gt_flow = gt_flow[yoff:-yoff, xoff:-xoff, :] AEE, percent_AEE, n_points, AEE_sum_temp, AEE_gt, AEE_sum_temp_gt = flow_error_dense( gt_flow, pred_flow, (torch.sum(torch.sum(torch.sum(input_representation, dim=0), dim=0), dim=2)).cpu(), is_car=False) AEE_sum = AEE_sum + args.div_flow * AEE AEE_sum_sum = AEE_sum_sum + AEE_sum_temp AEE_sum_gt = AEE_sum_gt + args.div_flow * AEE_gt AEE_sum_sum_gt = AEE_sum_sum_gt + AEE_sum_temp_gt percent_AEE_sum += percent_AEE # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i < len(output_writers): # log first output of first batches # if epoch == 0: # mean_values = torch.tensor([0.411,0.432,0.45], dtype=input_representation.dtype).view(3,1,1) output_writers[i].add_image( 'FlowNet Outputs', flow2rgb(args.div_flow * output[0], max_value=10), epoch) iters += 1 print('-------------------------------------------------------') print( 'Mean AEE: {:.2f}, sum AEE: {:.2f}, Mean AEE_gt: {:.2f}, sum AEE_gt: {:.2f}, mean %AEE: {:.2f}, # pts: {:.2f}' .format(AEE_sum / iters, AEE_sum_sum / iters, AEE_sum_gt / iters, AEE_sum_sum_gt / iters, percent_AEE_sum / iters, n_points)) print('-------------------------------------------------------') gt_temp = None return AEE_sum / iters
def train(train_loader, model, optimizer, epoch, train_writer, scheduler): global n_iter, args batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() flow2_EPEs = AverageMeter() epoch_size = len(train_loader) if args.epoch_size == 0 else min( len(train_loader), args.epoch_size) # switch to train mode model.train() end = time.time() for i, batch in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target_x = batch['Dispx'].to(device) target_y = batch['Dispy'].to(device) target = torch.cat([target_x, target_y], 1).to(device) in_ref = batch['Ref'].float().to(device) in_ref = torch.cat([in_ref, in_ref, in_ref], 1).to(device) in_def = batch['Def'].float().to(device) in_def = torch.cat([in_def, in_def, in_def], 1).to(device) input = torch.cat([in_ref, in_def], 1).to(device) # compute output output = model(input) # if args.sparse: # # Since Target pooling is not very precise when sparse, # # take the highest resolution prediction and upsample it instead of downsampling target # h, w = target.size()[-2:] # output = [F.interpolate(output[0], (h,w)), *output[1:]] loss = multiscaleEPE(output, target, weights=args.multiscale_weights, sparse=args.sparse) flow2_EPE = args.div_flow * realEPE( output[0], target, sparse=args.sparse) # record loss and EPE losses.update(loss.item(), target.size(0)) train_writer.add_scalar('train_loss', loss.item(), n_iter) flow2_EPEs.update(flow2_EPE.item(), target.size(0)) # compute gradient and do optimization step optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t Time {3}\t Data {4}\t Loss {5}\t EPE {6}' .format(epoch, i, epoch_size, batch_time, data_time, losses, flow2_EPEs)) n_iter += 1 #break # if i >= epoch_size: # break return losses.avg, flow2_EPEs.avg
def validate(val_loader, model, criterions, epoch): """Validating""" model.eval() batch_time = AverageMeter() data_time = AverageMeter() loss_protest = AverageMeter() loss_v = AverageMeter() protest_acc = AverageMeter() violence_mse = AverageMeter() visattr_acc = AverageMeter() end = time.time() loss_history = [] for i, sample in enumerate(val_loader): # measure data loading batch_time input, target = sample['image'], sample['label'] if args.cuda: input = input.cuda() for k, v in target.items(): target[k] = v.cuda() input_var = Variable(input) target_var = {} for k,v in target.items(): target_var[k] = Variable(v) output = model(input_var) losses, scores, N_protest = calculate_loss(output, target_var, criterions) loss = 0 for l in losses: loss += l if N_protest: loss_protest.update(losses[0].data[0], input.size(0)) loss_v.update(loss.data[0] - losses[0].data[0], N_protest) else: # when no protest images loss_protest.update(losses[0].data[0], input.size(0)) loss_history.append(loss.data[0]) protest_acc.update(scores['protest_acc'], input.size(0)) violence_mse.update(scores['violence_mse'], N_protest) visattr_acc.update(scores['visattr_acc'], N_protest) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) ' 'Loss {loss_val:.3f} ({loss_avg:.3f}) ' 'Protest Acc {protest_acc.val:.3f} ({protest_acc.avg:.3f}) ' 'Violence MSE {violence_mse.val:.5f} ({violence_mse.avg:.5f}) ' 'Vis Attr Acc {visattr_acc.val:.3f} ({visattr_acc.avg:.3f})' .format( epoch, i, len(val_loader), batch_time=batch_time, loss_val =loss_protest.val + loss_v.val, loss_avg = loss_protest.avg + loss_v.avg, protest_acc = protest_acc, violence_mse = violence_mse, visattr_acc = visattr_acc)) print(' * Loss {loss_avg:.3f} Protest Acc {protest_acc.avg:.3f} ' 'Violence MSE {violence_mse.avg:.5f} ' 'Vis Attr Acc {visattr_acc.avg:.3f} ' .format(loss_avg = loss_protest.avg + loss_v.avg, protest_acc = protest_acc, violence_mse = violence_mse, visattr_acc = visattr_acc)) return loss_protest.avg + loss_v.avg, loss_history