def eval_robust_heatmap(detector, xloader, print_freq, logger): batch_time, NUM_PTS = AverageMeter(), xloader.dataset.NUM_PTS Preds, GT_locs, Distances = [], [], [] eval_meta, end = Eval_Meta(), time.time() with torch.no_grad(): detector.eval() for i, (inputs, heatmaps, masks, norm_points, thetas, data_index, nopoints, xshapes) in enumerate(xloader): data_index = data_index.squeeze(1).tolist() batch_size, iters, C, H, W = inputs.size() for ibatch in range(batch_size): xinputs, xpoints, xthetas = inputs[ibatch], norm_points[ ibatch].permute(0, 2, 1).contiguous(), thetas[ibatch] batch_features, batch_heatmaps, batch_locs, batch_scos = detector( xinputs.cuda(non_blocking=True)) batch_locs = batch_locs.cpu()[:, :-1] all_locs = [] for _iter in range(iters): _locs = normalize_points((H, W), batch_locs[_iter].permute(1, 0)) xlocs = torch.cat((_locs, torch.ones(1, NUM_PTS)), dim=0) nlocs = torch.mm(xthetas[_iter, :2], xlocs) rlocs = denormalize_points(xshapes[ibatch].tolist(), nlocs) rlocs = torch.cat( (rlocs.permute(1, 0), xpoints[_iter, :, 2:]), dim=1) all_locs.append(rlocs.clone()) GT_loc = xloader.dataset.labels[ data_index[ibatch]].get_points() norm_distance = xloader.dataset.get_normalization_distance( data_index[ibatch]) # save the results eval_meta.append((sum(all_locs) / len(all_locs)).numpy().T, GT_loc.numpy(), xloader.dataset.datas[data_index[ibatch]], norm_distance) Distances.append(norm_distance) Preds.append(all_locs) GT_locs.append(GT_loc.permute(1, 0)) # compute time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0 or i + 1 == len(xloader): last_time = convert_secs2time( batch_time.avg * (len(xloader) - i - 1), True) logger.log( ' -->>[Robust HEATMAP-based Evaluation] [{:03d}/{:03d}] Time : {:}' .format(i, len(xloader), last_time)) # evaluate the results errors, valids = calculate_robust(Preds, GT_locs, Distances, NUM_PTS) return errors, valids, eval_meta
def visualize(args): print ('The result file is {:}'.format(args.meta)) print ('The save path is {:}'.format(args.save)) meta = Path(args.meta) save = Path(args.save) assert meta.exists(), 'The model path {:} does not exist' xmeta = Eval_Meta() xmeta.load(meta) print ('this meta file has {:} predictions'.format(len(xmeta))) if not save.exists(): os.makedirs( args.save ) for i in range(len(xmeta)): image, prediction = xmeta.image_lists[i], xmeta.predictions[i] name = osp.basename(image) image = draw_image_by_points(image, prediction, 2, (255, 0, 0), False, False) path = save / name image.save(path) print ('{:03d}-th image is saved into {:}'.format(i, path))
def visualize(args): print ('The result file is {:}'.format(args.meta)) print ('The save path is {:}'.format(args.save)) meta = Path(args.meta) save = Path(args.save) assert meta.exists(), 'The model path {:} does not exist' xmeta = Eval_Meta() xmeta.load(meta) print ('this meta file has {:} predictions'.format(len(xmeta))) if not save.exists(): os.makedirs( args.save ) for i in range(len(xmeta)): image, prediction = xmeta.image_lists[i], xmeta.predictions[i] name = osp.basename(image) image = draw_image_by_points(image, prediction, 6, (255, 0, 0), False, False) path = save / name image.save(path) print ('{:03d}-th image is saved into {:}'.format(i, path))
def stm_main_heatmap(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, stm_config, use_stm, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, DetLosses, TemporalLosses, MultiviewLosses, TotalLosses = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() alk_points, a3d_points = AverageMeter(), AverageMeter() annotate_index = loader.dataset.video_L eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log('STM-Main-REG : training : {:} .. STM = {:}'.format( stm_config, use_stm)) print_freq = args.print_freq net.train() criterion.train() else: logger.log('STM-Main-REG : evaluation mode.') print_freq = args.print_freq_eval net.eval() criterion.eval() i_batch_size, v_batch_size, m_batch_size = args.i_batch_size, args.v_batch_size, args.m_batch_size iv_size = i_batch_size + v_batch_size end = time.time() for i, (frames, Fflows, Bflows, targets, masks, normpoints, transthetas, MV_Tensors, MV_Thetas, MV_Shapes, MV_KRT, torch_is_3D, torch_is_images \ , image_index, nopoints, shapes, MultiViewPaths) in enumerate(loader): # frames : IBatch+VBatch+MBatch, Frame, Channel, Height, Width # Fflows : IBatch+VBatch+MBatch, Frame-1, Height, Width, 2 # Bflows : IBatch+VBatch+MBatch, Frame-1, Height, Width, 2 # information MV_Mask = masks[iv_size:] frames, Fflows, Bflows, targets, masks, normpoints, transthetas = frames[: iv_size], Fflows[: iv_size], Bflows[: iv_size], targets[: iv_size], masks[: iv_size], normpoints[: iv_size], transthetas[: iv_size] nopoints, shapes, torch_is_images = nopoints[: iv_size], shapes[: iv_size], torch_is_images[: iv_size] MV_Tensors, MV_Thetas, MV_Shapes, MV_KRT, torch_is_3D = \ MV_Tensors[iv_size:], MV_Thetas[iv_size:], MV_Shapes[iv_size:], MV_KRT[iv_size:], torch_is_3D[iv_size:] assert torch.sum(torch_is_images[:i_batch_size]).item( ) == i_batch_size, 'Image Check Fail : {:} vs. {:}'.format( torch_is_images[:i_batch_size], i_batch_size) assert v_batch_size == 0 or torch.sum( torch_is_images[i_batch_size:]).item( ) == 0, 'Video Check Fail : {:} vs. {:}'.format( torch_is_images[i_batch_size:], v_batch_size) assert torch_is_3D.sum().item( ) == m_batch_size, 'Multiview Check Fail : {:} vs. {:}'.format( torch_is_3D, m_batch_size) image_index = image_index.squeeze(1).tolist() (batch_size, frame_length, C, H, W), num_pts, num_views = frames.size( ), args.num_pts, stm_config.max_views visible_point_num = float(np.sum( masks.numpy()[:, :-1, :, :])) / batch_size visible_points.update(visible_point_num, batch_size) normpoints = normpoints.permute(0, 2, 1) target_heats = targets.cuda(non_blocking=True) target_points = normpoints[:, :, :2].contiguous().cuda( non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1 - nopoints).view(batch_size, 1, 1, 1) * masks have_det_loss = det_masks.sum().item() > 0 det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] batch_heatmaps, batch_locs, batch_scos, batch_past2now, batch_future2now, batch_FBcheck, multiview_heatmaps, multiview_locs = net( frames, Fflows, Bflows, MV_Tensors, torch_is_images) annot_heatmaps = [x[:, annotate_index] for x in batch_heatmaps] forward_time.update(time.time() - end) # detection loss if have_det_loss: det_loss, each_stage_loss_value = compute_stage_loss( criterion, target_heats, annot_heatmaps, det_masks) DetLosses.update(det_loss.item(), batch_size) each_stage_loss_value = show_stage_loss(each_stage_loss_value) else: det_loss, each_stage_loss_value = 0, 'no-det-loss' # temporal loss if use_stm[0]: video_batch_locs = batch_locs[i_batch_size:, :, :num_pts] video_past2now, video_future2now = batch_past2now[ i_batch_size:, :, :num_pts], batch_future2now[ i_batch_size:, :, :num_pts] video_FBcheck = batch_FBcheck[i_batch_size:, :num_pts] video_mask = masks[i_batch_size:, :num_pts].contiguous().cuda( non_blocking=True) video_heatmaps = [ x[i_batch_size:, :, :num_pts] for x in batch_heatmaps ] sbr_loss, available_nums, loss_string = calculate_temporal_loss( criterion, video_heatmaps, video_batch_locs, video_past2now, video_future2now, video_FBcheck, video_mask, stm_config) alk_points.update( float(available_nums) / v_batch_size, v_batch_size) if available_nums > stm_config.available_sbr_thresh: TemporalLosses.update(sbr_loss.item(), v_batch_size) else: sbr_loss, sbr_loss_string = 0, 'non-sbr-loss' else: sbr_loss, sbr_loss_string = 0, 'non-sbr-loss' # multiview loss if use_stm[1]: MV_Mask_G = MV_Mask[:, :-1].view( m_batch_size, 1, -1, 1).contiguous().cuda(non_blocking=True) MV_Thetas_G = MV_Thetas.to(multiview_locs.device) MV_Shapes_G = MV_Shapes.to(multiview_locs.device).view( m_batch_size, num_views, 1, 2) MV_KRT_G = MV_KRT.to(multiview_locs.device) mv_norm_locs_trs = torch.cat( (multiview_locs[:, :, :num_pts].permute(0, 1, 3, 2), torch.ones(m_batch_size, num_views, 1, num_pts, device=multiview_locs.device)), dim=2) mv_norm_locs_ori = torch.matmul(MV_Thetas_G[:, :, :2], mv_norm_locs_trs) mv_norm_locs_ori = mv_norm_locs_ori.permute(0, 1, 3, 2) mv_real_locs_ori = denormalize_L(mv_norm_locs_ori, MV_Shapes_G) mv_3D_locs_ori = TriangulateDLT_BatchCam(MV_KRT_G, mv_real_locs_ori) mv_proj_locs_ori = ProjectKRT_Batch( MV_KRT_G, mv_3D_locs_ori.view(m_batch_size, 1, num_pts, 3)) mv_pnorm_locs_ori = normalize_L(mv_proj_locs_ori, MV_Shapes_G) mv_pnorm_locs_trs = convert_theta(mv_pnorm_locs_ori, MV_Thetas_G) MV_locs = multiview_locs[:, :, :num_pts].contiguous() MV_heatmaps = [x[:, :, :num_pts] for x in multiview_heatmaps] if args.debug: with torch.no_grad(): for ims in range(m_batch_size): x_index = image_index[iv_size + ims] x_paths = [ xlist[iv_size + ims] for xlist in MultiViewPaths ] x_mv_locs, p_mv_locs = mv_real_locs_ori[ ims], mv_proj_locs_ori[ims] multiview_debug_save(save_dir, '{:}'.format(x_index), x_paths, x_mv_locs.cpu().numpy(), p_mv_locs.cpu().numpy()) y_mv_locs = denormalize_points_batch((H, W), MV_locs[ims]) q_mv_locs = denormalize_points_batch( (H, W), mv_pnorm_locs_trs[ims]) temp_tensors = MV_Tensors[ims] temp_images = [ args.tensor2imageF(x) for x in temp_tensors ] temp_names = [Path(x).name for x in x_paths] multiview_debug_save_v2(save_dir, '{:}'.format(x_index), temp_names, temp_images, y_mv_locs.cpu().numpy(), q_mv_locs.cpu().numpy()) stm_loss, available_nums = calculate_multiview_loss( criterion, MV_heatmaps, MV_locs, mv_pnorm_locs_trs, MV_Mask_G, stm_config) a3d_points.update( float(available_nums) / m_batch_size, m_batch_size) if available_nums > stm_config.available_stm_thresh: MultiviewLosses.update(stm_loss.item(), m_batch_size) else: stm_loss = 0 else: stm_loss = 0 # measure accuracy and record loss if use_stm[0]: total_loss = det_loss + sbr_loss * stm_config.sbr_weights + stm_loss * stm_config.stm_weights else: total_loss = det_loss + stm_loss * stm_config.stm_weights if isinstance(total_loss, numbers.Number): warnings.warn( 'The {:}-th iteration has no detection loss and no lk loss'. format(i)) else: TotalLosses.update(total_loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() total_loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs = batch_locs.detach().to(cpu)[:, annotate_index, :num_pts] batch_scos = batch_scos.detach().to(cpu)[:, annotate_index, :num_pts] # evaluate the training data for ibatch in range(iv_size): imgidx, nopoint = image_index[ibatch], nopoints[ibatch] if nopoint == 1: continue norm_locs = torch.cat( (batch_locs[ibatch].permute(1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat( (real_locs, batch_scos[ibatch].view(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx][annotate_index] normDistce = loader.dataset.NormDistances[imgidx] xpoints = loader.dataset.labels[imgidx].get_points() eval_meta.append(real_locs.numpy(), xpoints.numpy(), image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, frames[ibatch, annotate_index], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch, annotate_index], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'F-time {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Det {dloss.val:7.4f} ({dloss.avg:7.4f}) ' 'SBR {sloss.val:7.6f} ({sloss.avg:7.6f}) ' 'STM {mloss.val:7.6f} ({mloss.avg:7.6f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, \ dloss=DetLosses, sloss=TemporalLosses, mloss=MultiviewLosses, loss=TotalLosses) + last_time + each_stage_loss_value \ + ' I={:}'.format(list(frames.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg) \ + ' Ava-PTS : {:.1f} ({:.1f})'.format(alk_points.val, alk_points.avg) \ + ' A3D-PTS : {:.1f} ({:.1f})'.format(a3d_points.val, a3d_points.avg) ) if args.debug: logger.log(' -->>Indexes : {:}'.format(image_index)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return TotalLosses.avg, nme
def x_sbr_main_regression(args, loader, teacher, net, criterion, optimizer, epoch_str, logger, opt_config, sbr_config, use_sbr, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() visible_points, DetLosses, TotalLosses, TemporalLosses = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() alk_points = AverageMeter() annotate_index = loader.dataset.video_L eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path(args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else : save_dir = None # switch to train mode if mode == 'train': logger.log('Temporal-Main-Regression: training : {:} .. SBR={:}'.format(sbr_config, use_sbr)) print_freq = args.print_freq net.train() ; criterion.train() else: logger.log('Temporal-Main-Regression : evaluation mode.') print_freq = args.print_freq_eval net.eval() ; criterion.eval() teacher.eval() i_batch_size, v_batch_size = args.i_batch_size, args.v_batch_size end = time.time() for i, (frames, Fflows, Bflows, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes, is_images) in enumerate(loader): # frames : IBatch+VBatch, Frame, Channel, Height, Width # Fflows : IBatch+VBatch, Frame-1, Height, Width, 2 # Bflows : IBatch+VBatch, Frame-1, Height, Width, 2 # information image_index = image_index.squeeze(1).tolist() (batch_size, frame_length, C, H, W), num_pts = frames.size(), args.num_pts visible_point_num = float(np.sum(masks.numpy()[:,:-1,:,:])) / batch_size visible_points.update(visible_point_num, batch_size) assert is_images[:i_batch_size].sum().item() == i_batch_size, '{:} vs. {:}'.format(is_images, i_batch_size) assert is_images[i_batch_size:].sum().item() == 0, '{:} vs. {:}'.format(is_images, v_batch_size) normpoints = normpoints.permute(0, 2, 1) target_points = normpoints[:, :, :2].contiguous().cuda(non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1-nopoints).view(batch_size, 1, 1) * masks[:, :num_pts].contiguous().view(batch_size, num_pts, 1) have_det_loss = det_masks.sum().item() > 0 det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] batch_locs, batch_past2now, batch_future2now, batch_FBcheck = net(frames, Fflows, Bflows, is_images) forward_time.update(time.time() - end) # detection loss if have_det_loss: with torch.no_grad(): sotf_targets = teacher(frames) det_loss = criterion(batch_locs, sotf_targets, None) DetLosses.update(det_loss.item(), batch_size) else: det_loss = 0 # temporal loss if use_sbr: video_batch_locs = batch_locs[i_batch_size:, :] video_past2now, video_future2now, video_FBcheck = batch_past2now[i_batch_size:], batch_future2now[i_batch_size:], batch_FBcheck[i_batch_size:] video_mask = masks[i_batch_size:, :-1].contiguous().cuda(non_blocking=True) sbr_loss, available_nums = calculate_temporal_loss(criterion, video_batch_locs, video_past2now, video_future2now, video_FBcheck, video_mask, sbr_config) alk_points.update(float(available_nums)/v_batch_size, v_batch_size) if available_nums > sbr_config.available_thresh: TemporalLosses.update(sbr_loss.item(), v_batch_size) else: sbr_loss = 0 else: sbr_loss = 0 # measure accuracy and record loss #if sbr_config.weight != 0: total_loss = det_loss + sbr_loss * sbr_config.weight #else : total_loss = det_loss if use_sbr: total_loss = det_loss + sbr_loss * sbr_config.weight else : total_loss = det_loss if isinstance(total_loss, numbers.Number): warnings.warn('The {:}-th iteration has no detection loss and no lk loss'.format(i)) else: TotalLosses.update(total_loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() total_loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs = batch_locs.detach().to(cpu)[:, annotate_index] # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): if nopoint == 1: continue norm_locs = torch.cat((batch_locs[ibatch].permute(1,0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2,:] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx][annotate_index] normDistce = loader.dataset.NormDistances[imgidx] xpoints = loader.dataset.labels[imgidx].get_points() eval_meta.append(real_locs.numpy(), xpoints.numpy(), image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, frames[ibatch, annotate_index], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch, annotate_index], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader)-i-1), True) end = time.time() if i % print_freq == 0 or i+1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'F-time {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Det {dloss.val:7.4f} ({dloss.avg:7.4f}) ' 'SBR {sloss.val:7.4f} ({sloss.avg:7.4f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, \ dloss=DetLosses, sloss=TemporalLosses, loss=TotalLosses) + last_time \ + ' I={:}'.format(list(frames.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg) \ + ' Ava-PTS : {:.1f} ({:.1f})'.format(alk_points.val, alk_points.avg)) if args.debug: logger.log(' -->>Indexes : {:}'.format(image_index)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return TotalLosses.avg, nme
def basic_train(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config): args = deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, losses = AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') # switch to train mode net.train() criterion.train() end = time.time() for i, (inputs, target, mask, points, image_index, nopoints, cropped_size) in enumerate(loader): # inputs : Batch, Channel, Height, Width target = target.cuda(non_blocking=True) image_index = image_index.numpy().squeeze(1).tolist() batch_size, num_pts = inputs.size(0), args.num_pts visible_point_num = float(np.sum( mask.numpy()[:, :-1, :, :])) / batch_size visible_points.update(visible_point_num, batch_size) nopoints = nopoints.numpy().squeeze(1).tolist() annotated_num = batch_size - sum(nopoints) # measure data loading time mask = mask.cuda(non_blocking=True) data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] batch_heatmaps, batch_locs, batch_scos = net(inputs) forward_time.update(time.time() - end) loss, each_stage_loss_value = compute_stage_loss( criterion, target, batch_heatmaps, mask) if opt_config.lossnorm: loss, each_stage_loss_value = loss / annotated_num / 2, [ x / annotated_num / 2 for x in each_stage_loss_value ] # measure accuracy and record loss losses.update(loss.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) np_batch_locs, np_batch_scos = batch_locs.detach().to( cpu).numpy(), batch_scos.detach().to(cpu).numpy() cropped_size = cropped_size.numpy() # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): if nopoint == 1: continue locations, scores = np_batch_locs[ibatch, :-1, :], np.expand_dims( np_batch_scos[ibatch, :-1], -1) xpoints = loader.dataset.labels[imgidx].get_points() assert cropped_size[ibatch, 0] > 0 and cropped_size[ ibatch, 1] > 0, 'The ibatch={:}, imgidx={:} is not right.'.format( ibatch, imgidx, cropped_size[ibatch]) scale_h, scale_w = cropped_size[ibatch, 0] * 1. / inputs.size( -2), cropped_size[ibatch, 1] * 1. / inputs.size(-1) locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[ ibatch, 2], locations[:, 1] * scale_h + cropped_size[ ibatch, 3] assert xpoints.shape[1] == num_pts and locations.shape[ 0] == num_pts and scores.shape[ 0] == num_pts, 'The number of points is {} vs {} vs {} vs {}'.format( num_pts, xpoints.shape, locations.shape, scores.shape) # recover the original resolution prediction = np.concatenate((locations, scores), axis=1).transpose(1, 0) image_path = loader.dataset.datas[imgidx] face_size = loader.dataset.face_sizes[imgidx] eval_meta.append(prediction, xpoints, image_path, face_size) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % args.print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[Train]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, loss=losses) + last_time + show_stage_loss(each_stage_loss_value) \ + ' In={:} Tar={:}'.format(list(inputs.size()), list(target.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(logger) return losses.avg, nme
def basic_train(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config): args = deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() visible_points, losses = AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') # switch to train mode net.train() criterion.train() end = time.time() for i, (inputs, target, mask, points, image_index, nopoints, cropped_size) in enumerate(loader): # inputs : Batch, Channel, Height, Width target = target.cuda(non_blocking=True) image_index = image_index.numpy().squeeze(1).tolist() batch_size, num_pts = inputs.size(0), args.num_pts visible_point_num = float(np.sum(mask.numpy()[:,:-1,:,:])) / batch_size visible_points.update(visible_point_num, batch_size) nopoints = nopoints.numpy().squeeze(1).tolist() annotated_num = batch_size - sum(nopoints) # measure data loading time mask = mask.cuda(non_blocking=True) data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] batch_heatmaps, batch_locs, batch_scos = net(inputs) forward_time.update(time.time() - end) loss, each_stage_loss_value = compute_stage_loss(criterion, target, batch_heatmaps, mask) if opt_config.lossnorm: loss, each_stage_loss_value = loss / annotated_num / 2, [x/annotated_num/2 for x in each_stage_loss_value] # measure accuracy and record loss losses.update(loss.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) np_batch_locs, np_batch_scos = batch_locs.detach().to(cpu).numpy(), batch_scos.detach().to(cpu).numpy() cropped_size = cropped_size.numpy() # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): if nopoint == 1: continue locations, scores = np_batch_locs[ibatch,:-1,:], np.expand_dims(np_batch_scos[ibatch,:-1], -1) xpoints = loader.dataset.labels[imgidx].get_points() assert cropped_size[ibatch,0] > 0 and cropped_size[ibatch,1] > 0, 'The ibatch={:}, imgidx={:} is not right.'.format(ibatch, imgidx, cropped_size[ibatch]) scale_h, scale_w = cropped_size[ibatch,0] * 1. / inputs.size(-2) , cropped_size[ibatch,1] * 1. / inputs.size(-1) locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[ibatch,2], locations[:, 1] * scale_h + cropped_size[ibatch,3] assert xpoints.shape[1] == num_pts and locations.shape[0] == num_pts and scores.shape[0] == num_pts, 'The number of points is {} vs {} vs {} vs {}'.format(num_pts, xpoints.shape, locations.shape, scores.shape) # recover the original resolution prediction = np.concatenate((locations, scores), axis=1).transpose(1,0) image_path = loader.dataset.datas[imgidx] face_size = loader.dataset.face_sizes[imgidx] eval_meta.append(prediction, xpoints, image_path, face_size) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader)-i-1), True) end = time.time() if i % args.print_freq == 0 or i+1 == len(loader): logger.log(' -->>[Train]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, loss=losses) + last_time + show_stage_loss(each_stage_loss_value) \ + ' In={:} Tar={:}'.format(list(inputs.size()), list(target.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(logger) return losses.avg, nme
def basic_main_heatmap(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, losses = AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log('basic-main-V2 : training mode.') print_freq = args.print_freq net.train() criterion.train() else: logger.log('basic-main-V2 : evaluation mode.') print_freq = args.print_freq_eval net.eval() criterion.eval() end = time.time() for i, (inputs, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes) in enumerate(loader): # inputs : Batch, Channel, Height, Width # information image_index = image_index.squeeze(1).tolist() (batch_size, C, H, W), num_pts = inputs.size(), args.num_pts visible_point_num = float(np.sum( masks.numpy()[:, :-1, :, :])) / batch_size visible_points.update(visible_point_num, batch_size) annotated_num = batch_size - sum(nopoints) det_masks = (1 - nopoints).view(batch_size, 1, 1, 1) * masks det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() targets = targets.cuda(non_blocking=True) # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] batch_features, batch_heatmaps, batch_locs, batch_scos = net(inputs) forward_time.update(time.time() - end) loss, each_stage_loss_value = compute_stage_loss( criterion, targets, batch_heatmaps, det_masks) # measure accuracy and record loss losses.update(loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs, batch_scos = batch_locs.detach().to( cpu), batch_scos.detach().to(cpu) # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): locations = batch_locs[ibatch, :-1, :] norm_locs = normalize_points((H, W), locations.transpose(1, 0)) norm_locs = torch.cat((norm_locs, torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat( (real_locs, batch_scos[ibatch, :-1].view(1, -1)), dim=0) #real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx] normDistce = loader.dataset.NormDistances[imgidx] if nopoint == 1: xpoints = None else: xpoints = loader.dataset.labels[imgidx].get_points().numpy( ) eval_meta.append(real_locs.numpy(), xpoints, image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, inputs[ibatch], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, loss=losses) + last_time + show_stage_loss(each_stage_loss_value) \ + ' In={:} Tar={:}'.format(list(inputs.size()), list(targets.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return losses.avg, eval_meta, nme
def basic_eval(args, loader, net, criterion, epoch_str, logger, opt_config): batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, losses = AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') # switch to train mode net.eval() criterion.eval() end = time.time() for i, (inputs, mask, points, image_index, nopoints, cropped_size) in enumerate(loader): # inputs : Batch, Channel, Height, Width image_index = image_index.numpy().squeeze(1).tolist() batch_size, num_pts = inputs.size(0), args.num_pts visible_point_num = float(np.sum(mask.numpy()[:, :-1])) / batch_size visible_points.update(visible_point_num, batch_size) nopoints = nopoints.numpy().squeeze(1).tolist() annotated_num = batch_size - sum(nopoints) points = points[:, :, :2].contiguous() # measure data loading time points = points.cuda(non_blocking=True) mask = mask.cuda(non_blocking=True) data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] batch_locs = net(inputs) forward_time.update(time.time() - end) if annotated_num > 0: loss = compute_regression_loss(criterion, points, batch_locs, mask) # measure accuracy and record loss losses.update(loss.item(), batch_size) else: loss, each_stage_loss_value = 0, 'no-det-loss' eval_time.update(time.time() - end) np_batch_locs = batch_locs.to(cpu).numpy() cropped_size = cropped_size.numpy() # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): #if nopoint == 1: continue locations = np_batch_locs[ibatch, :, :] scores = np.ones((locations.shape[0], 1), dtype=locations.dtype) xpoints = loader.dataset.labels[imgidx].get_points() assert cropped_size[ibatch, 0] > 0 and cropped_size[ ibatch, 1] > 0, 'The ibatch={:}, imgidx={:} is not right.'.format( ibatch, imgidx, cropped_size[ibatch]) scale_h, scale_w = cropped_size[ibatch, 0] * 1. / inputs.size( -2), cropped_size[ibatch, 1] * 1. / inputs.size(-1) locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[ ibatch, 2], locations[:, 1] * scale_h + cropped_size[ ibatch, 3] assert xpoints.shape[1] == num_pts and locations.shape[ 0] == num_pts and scores.shape[ 0] == num_pts, 'The number of points is {} vs {} vs {} vs {}'.format( num_pts, xpoints.shape, locations.shape, scores.shape) # recover the original resolution prediction = np.concatenate((locations, scores), axis=1).transpose(1, 0) image_path = loader.dataset.datas[imgidx] face_size = loader.dataset.face_sizes[imgidx] if nopoint == 1: eval_meta.append(prediction, None, image_path, face_size) else: eval_meta.append(prediction, xpoints, image_path, face_size) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % (args.print_freq) == 0 or i + 1 == len(loader): logger.log(' -->>[Eval]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, loss=losses) + last_time \ + ' In={:} Out={:}'.format(list(inputs.size()), list(batch_locs.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) return losses.avg, eval_meta
def basic_main_regression(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, pre_data_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() visible_points, LOSSES, LOSSES_LOCS, LOSSES_SCOS = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log( 'basic-main-Regression : training mode :: {:}'.format(criterion)) print_freq = args.print_freq net.train() criterion.train() else: logger.log( 'basic-main-Regression : evaluation mode :: {:}'.format(criterion)) print_freq = args.print_freq_eval net.eval() criterion.eval() end = time.time() for i, (inputs, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes) in enumerate(loader): # inputs : Batch, Channel, Height, Width pre_data_time.update(time.time() - end) # information image_index = image_index.squeeze(1).tolist() (batch_size, C, H, W), num_pts = inputs.size(), args.num_pts visible_points.update( float(masks.numpy()[:, :-1].sum()) / batch_size, batch_size) normpoints = normpoints.permute(0, 2, 1) target_points = normpoints[:, :, :2].contiguous().cuda( non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1 - nopoints).view( batch_size, 1, 1) * masks[:, :num_pts].contiguous().view( batch_size, num_pts, 1) det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) batch_locs = net(inputs) forward_time.update(time.time() - end) loss = criterion(batch_locs, target_points, det_masks) #loss_scos = criterion(batch_scos, target_scores, None) #loss = loss_locs + loss_scos * opt_config.scos_weight # measure accuracy and record loss LOSSES.update(loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): #batch_locs, batch_scos = batch_locs.detach().to(cpu), batch_scos.detach().to(cpu) batch_locs = batch_locs.detach().to(cpu) # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): norm_locs = torch.cat( (batch_locs[ibatch].permute(1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) #real_locs = torch.cat((real_locs, batch_scos[ibatch].permute(1,0)), dim=0) real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx] normDistce = loader.dataset.NormDistances[imgidx] if nopoint == 1: xpoints = None else: xpoints = loader.dataset.labels[imgidx].get_points().numpy( ) eval_meta.append(real_locs.numpy(), xpoints, image_path, normDistce) #if save_dir: # pro_debug_save(save_dir, Path(image_path).name, inputs[ibatch], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'PRE {pre_data_time.val:4.2f} ({pre_data_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) [locs={locs.avg:7.4f} scos={scos.avg:7.4f}]'.format( mode, epoch_str, i, len(loader), batch_time=batch_time, pre_data_time=pre_data_time, data_time=data_time, forward_time=forward_time, loss=LOSSES, locs=LOSSES_LOCS, scos=LOSSES_SCOS) + last_time \ + ' I={:}'.format(list(inputs.size())) \ + ' Vis={:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return LOSSES.avg, eval_meta, nme