def __process_affine(self, image, target, theta, nopoints, aux_info=None): image, target, theta = image.clone(), target.copy(), theta.clone() (C, H, W), (height, width) = image.size(), self.shape if nopoints: # do not have label norm_trans_points = torch.zeros((3, self.NUM_PTS)) heatmaps = torch.zeros((self.NUM_PTS + 1, height // self.downsample, width // self.downsample)) mask = torch.ones((self.NUM_PTS + 1, 1, 1), dtype=torch.uint8) transpose_theta = identity2affine(False) else: norm_trans_points = apply_affine2point(target.get_points(), theta, (H, W)) norm_trans_points = apply_boundary(norm_trans_points) real_trans_points = norm_trans_points.clone() real_trans_points[:2, :] = denormalize_points(self.shape, real_trans_points[:2, :]) heatmaps, mask = generate_label_map(real_trans_points.numpy(), height // self.downsample, width // self.downsample, self.sigma, self.downsample, nopoints, self.heatmap_type) # H*W*C heatmaps = torch.from_numpy(heatmaps.transpose((2, 0, 1))).type(torch.FloatTensor) mask = torch.from_numpy(mask.transpose((2, 0, 1))).type(torch.ByteTensor) if self.mean_face is None: # warnings.warn('In LandmarkDataset use identity2affine for transpose_theta because self.mean_face is None.') transpose_theta = identity2affine(False) else: if torch.sum(norm_trans_points[2, :] == 1) < 3: warnings.warn( 'In LandmarkDataset after transformation, no visiable point, using identity instead. Aux: {:}'.format( aux_info)) transpose_theta = identity2affine(False) else: transpose_theta = solve2theta(norm_trans_points, self.mean_face.clone()) affineImage = affine2image(image, theta, self.shape) if self.cutout is not None: affineImage = self.cutout(affineImage) return affineImage, heatmaps, mask, norm_trans_points, theta, transpose_theta
def __process_affine(self, image, target, theta, nopoints, aux_info=None): image, target, theta = image.clone(), target.copy(), theta.clone() (C, H, W), (height, width) = image.size(), self.shape if nopoints: # do not have label norm_trans_points = torch.zeros((3, self.NUM_PTS)) heatmaps = torch.zeros( (self.NUM_PTS + 1, height // self.downsample, width // self.downsample)) mask = torch.ones((self.NUM_PTS + 1, 1, 1), dtype=torch.uint8) else: norm_trans_points = apply_affine2point(target.get_points(), theta, (H, W)) norm_trans_points = apply_boundary(norm_trans_points) real_trans_points = norm_trans_points.clone() real_trans_points[:2, :] = denormalize_points( self.shape, real_trans_points[:2, :]) heatmaps, mask = generate_label_map(real_trans_points.numpy(), height // self.downsample, width // self.downsample, self.sigma, self.downsample, nopoints, self.heatmap_type) # H*W*C heatmaps = torch.from_numpy(heatmaps.transpose( (2, 0, 1))).type(torch.FloatTensor) mask = torch.from_numpy(mask.transpose( (2, 0, 1))).type(torch.ByteTensor) affineImage = affine2image(image, theta, self.shape) return affineImage, heatmaps, mask, norm_trans_points, theta
def eval_robust_heatmap(detector, xloader, print_freq, logger): batch_time, NUM_PTS = AverageMeter(), xloader.dataset.NUM_PTS Preds, GT_locs, Distances = [], [], [] eval_meta, end = Eval_Meta(), time.time() with torch.no_grad(): detector.eval() for i, (inputs, heatmaps, masks, norm_points, thetas, data_index, nopoints, xshapes) in enumerate(xloader): data_index = data_index.squeeze(1).tolist() batch_size, iters, C, H, W = inputs.size() for ibatch in range(batch_size): xinputs, xpoints, xthetas = inputs[ibatch], norm_points[ ibatch].permute(0, 2, 1).contiguous(), thetas[ibatch] batch_features, batch_heatmaps, batch_locs, batch_scos = detector( xinputs.cuda(non_blocking=True)) batch_locs = batch_locs.cpu()[:, :-1] all_locs = [] for _iter in range(iters): _locs = normalize_points((H, W), batch_locs[_iter].permute(1, 0)) xlocs = torch.cat((_locs, torch.ones(1, NUM_PTS)), dim=0) nlocs = torch.mm(xthetas[_iter, :2], xlocs) rlocs = denormalize_points(xshapes[ibatch].tolist(), nlocs) rlocs = torch.cat( (rlocs.permute(1, 0), xpoints[_iter, :, 2:]), dim=1) all_locs.append(rlocs.clone()) GT_loc = xloader.dataset.labels[ data_index[ibatch]].get_points() norm_distance = xloader.dataset.get_normalization_distance( data_index[ibatch]) # save the results eval_meta.append((sum(all_locs) / len(all_locs)).numpy().T, GT_loc.numpy(), xloader.dataset.datas[data_index[ibatch]], norm_distance) Distances.append(norm_distance) Preds.append(all_locs) GT_locs.append(GT_loc.permute(1, 0)) # compute time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0 or i + 1 == len(xloader): last_time = convert_secs2time( batch_time.avg * (len(xloader) - i - 1), True) logger.log( ' -->>[Robust HEATMAP-based Evaluation] [{:03d}/{:03d}] Time : {:}' .format(i, len(xloader), last_time)) # evaluate the results errors, valids = calculate_robust(Preds, GT_locs, Distances, NUM_PTS) return errors, valids, eval_meta
def stm_main_heatmap(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, stm_config, use_stm, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, DetLosses, TemporalLosses, MultiviewLosses, TotalLosses = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() alk_points, a3d_points = AverageMeter(), AverageMeter() annotate_index = loader.dataset.video_L eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log('STM-Main-REG : training : {:} .. STM = {:}'.format( stm_config, use_stm)) print_freq = args.print_freq net.train() criterion.train() else: logger.log('STM-Main-REG : evaluation mode.') print_freq = args.print_freq_eval net.eval() criterion.eval() i_batch_size, v_batch_size, m_batch_size = args.i_batch_size, args.v_batch_size, args.m_batch_size iv_size = i_batch_size + v_batch_size end = time.time() for i, (frames, Fflows, Bflows, targets, masks, normpoints, transthetas, MV_Tensors, MV_Thetas, MV_Shapes, MV_KRT, torch_is_3D, torch_is_images \ , image_index, nopoints, shapes, MultiViewPaths) in enumerate(loader): # frames : IBatch+VBatch+MBatch, Frame, Channel, Height, Width # Fflows : IBatch+VBatch+MBatch, Frame-1, Height, Width, 2 # Bflows : IBatch+VBatch+MBatch, Frame-1, Height, Width, 2 # information MV_Mask = masks[iv_size:] frames, Fflows, Bflows, targets, masks, normpoints, transthetas = frames[: iv_size], Fflows[: iv_size], Bflows[: iv_size], targets[: iv_size], masks[: iv_size], normpoints[: iv_size], transthetas[: iv_size] nopoints, shapes, torch_is_images = nopoints[: iv_size], shapes[: iv_size], torch_is_images[: iv_size] MV_Tensors, MV_Thetas, MV_Shapes, MV_KRT, torch_is_3D = \ MV_Tensors[iv_size:], MV_Thetas[iv_size:], MV_Shapes[iv_size:], MV_KRT[iv_size:], torch_is_3D[iv_size:] assert torch.sum(torch_is_images[:i_batch_size]).item( ) == i_batch_size, 'Image Check Fail : {:} vs. {:}'.format( torch_is_images[:i_batch_size], i_batch_size) assert v_batch_size == 0 or torch.sum( torch_is_images[i_batch_size:]).item( ) == 0, 'Video Check Fail : {:} vs. {:}'.format( torch_is_images[i_batch_size:], v_batch_size) assert torch_is_3D.sum().item( ) == m_batch_size, 'Multiview Check Fail : {:} vs. {:}'.format( torch_is_3D, m_batch_size) image_index = image_index.squeeze(1).tolist() (batch_size, frame_length, C, H, W), num_pts, num_views = frames.size( ), args.num_pts, stm_config.max_views visible_point_num = float(np.sum( masks.numpy()[:, :-1, :, :])) / batch_size visible_points.update(visible_point_num, batch_size) normpoints = normpoints.permute(0, 2, 1) target_heats = targets.cuda(non_blocking=True) target_points = normpoints[:, :, :2].contiguous().cuda( non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1 - nopoints).view(batch_size, 1, 1, 1) * masks have_det_loss = det_masks.sum().item() > 0 det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] batch_heatmaps, batch_locs, batch_scos, batch_past2now, batch_future2now, batch_FBcheck, multiview_heatmaps, multiview_locs = net( frames, Fflows, Bflows, MV_Tensors, torch_is_images) annot_heatmaps = [x[:, annotate_index] for x in batch_heatmaps] forward_time.update(time.time() - end) # detection loss if have_det_loss: det_loss, each_stage_loss_value = compute_stage_loss( criterion, target_heats, annot_heatmaps, det_masks) DetLosses.update(det_loss.item(), batch_size) each_stage_loss_value = show_stage_loss(each_stage_loss_value) else: det_loss, each_stage_loss_value = 0, 'no-det-loss' # temporal loss if use_stm[0]: video_batch_locs = batch_locs[i_batch_size:, :, :num_pts] video_past2now, video_future2now = batch_past2now[ i_batch_size:, :, :num_pts], batch_future2now[ i_batch_size:, :, :num_pts] video_FBcheck = batch_FBcheck[i_batch_size:, :num_pts] video_mask = masks[i_batch_size:, :num_pts].contiguous().cuda( non_blocking=True) video_heatmaps = [ x[i_batch_size:, :, :num_pts] for x in batch_heatmaps ] sbr_loss, available_nums, loss_string = calculate_temporal_loss( criterion, video_heatmaps, video_batch_locs, video_past2now, video_future2now, video_FBcheck, video_mask, stm_config) alk_points.update( float(available_nums) / v_batch_size, v_batch_size) if available_nums > stm_config.available_sbr_thresh: TemporalLosses.update(sbr_loss.item(), v_batch_size) else: sbr_loss, sbr_loss_string = 0, 'non-sbr-loss' else: sbr_loss, sbr_loss_string = 0, 'non-sbr-loss' # multiview loss if use_stm[1]: MV_Mask_G = MV_Mask[:, :-1].view( m_batch_size, 1, -1, 1).contiguous().cuda(non_blocking=True) MV_Thetas_G = MV_Thetas.to(multiview_locs.device) MV_Shapes_G = MV_Shapes.to(multiview_locs.device).view( m_batch_size, num_views, 1, 2) MV_KRT_G = MV_KRT.to(multiview_locs.device) mv_norm_locs_trs = torch.cat( (multiview_locs[:, :, :num_pts].permute(0, 1, 3, 2), torch.ones(m_batch_size, num_views, 1, num_pts, device=multiview_locs.device)), dim=2) mv_norm_locs_ori = torch.matmul(MV_Thetas_G[:, :, :2], mv_norm_locs_trs) mv_norm_locs_ori = mv_norm_locs_ori.permute(0, 1, 3, 2) mv_real_locs_ori = denormalize_L(mv_norm_locs_ori, MV_Shapes_G) mv_3D_locs_ori = TriangulateDLT_BatchCam(MV_KRT_G, mv_real_locs_ori) mv_proj_locs_ori = ProjectKRT_Batch( MV_KRT_G, mv_3D_locs_ori.view(m_batch_size, 1, num_pts, 3)) mv_pnorm_locs_ori = normalize_L(mv_proj_locs_ori, MV_Shapes_G) mv_pnorm_locs_trs = convert_theta(mv_pnorm_locs_ori, MV_Thetas_G) MV_locs = multiview_locs[:, :, :num_pts].contiguous() MV_heatmaps = [x[:, :, :num_pts] for x in multiview_heatmaps] if args.debug: with torch.no_grad(): for ims in range(m_batch_size): x_index = image_index[iv_size + ims] x_paths = [ xlist[iv_size + ims] for xlist in MultiViewPaths ] x_mv_locs, p_mv_locs = mv_real_locs_ori[ ims], mv_proj_locs_ori[ims] multiview_debug_save(save_dir, '{:}'.format(x_index), x_paths, x_mv_locs.cpu().numpy(), p_mv_locs.cpu().numpy()) y_mv_locs = denormalize_points_batch((H, W), MV_locs[ims]) q_mv_locs = denormalize_points_batch( (H, W), mv_pnorm_locs_trs[ims]) temp_tensors = MV_Tensors[ims] temp_images = [ args.tensor2imageF(x) for x in temp_tensors ] temp_names = [Path(x).name for x in x_paths] multiview_debug_save_v2(save_dir, '{:}'.format(x_index), temp_names, temp_images, y_mv_locs.cpu().numpy(), q_mv_locs.cpu().numpy()) stm_loss, available_nums = calculate_multiview_loss( criterion, MV_heatmaps, MV_locs, mv_pnorm_locs_trs, MV_Mask_G, stm_config) a3d_points.update( float(available_nums) / m_batch_size, m_batch_size) if available_nums > stm_config.available_stm_thresh: MultiviewLosses.update(stm_loss.item(), m_batch_size) else: stm_loss = 0 else: stm_loss = 0 # measure accuracy and record loss if use_stm[0]: total_loss = det_loss + sbr_loss * stm_config.sbr_weights + stm_loss * stm_config.stm_weights else: total_loss = det_loss + stm_loss * stm_config.stm_weights if isinstance(total_loss, numbers.Number): warnings.warn( 'The {:}-th iteration has no detection loss and no lk loss'. format(i)) else: TotalLosses.update(total_loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() total_loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs = batch_locs.detach().to(cpu)[:, annotate_index, :num_pts] batch_scos = batch_scos.detach().to(cpu)[:, annotate_index, :num_pts] # evaluate the training data for ibatch in range(iv_size): imgidx, nopoint = image_index[ibatch], nopoints[ibatch] if nopoint == 1: continue norm_locs = torch.cat( (batch_locs[ibatch].permute(1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat( (real_locs, batch_scos[ibatch].view(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx][annotate_index] normDistce = loader.dataset.NormDistances[imgidx] xpoints = loader.dataset.labels[imgidx].get_points() eval_meta.append(real_locs.numpy(), xpoints.numpy(), image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, frames[ibatch, annotate_index], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch, annotate_index], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'F-time {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Det {dloss.val:7.4f} ({dloss.avg:7.4f}) ' 'SBR {sloss.val:7.6f} ({sloss.avg:7.6f}) ' 'STM {mloss.val:7.6f} ({mloss.avg:7.6f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, \ dloss=DetLosses, sloss=TemporalLosses, mloss=MultiviewLosses, loss=TotalLosses) + last_time + each_stage_loss_value \ + ' I={:}'.format(list(frames.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg) \ + ' Ava-PTS : {:.1f} ({:.1f})'.format(alk_points.val, alk_points.avg) \ + ' A3D-PTS : {:.1f} ({:.1f})'.format(a3d_points.val, a3d_points.avg) ) if args.debug: logger.log(' -->>Indexes : {:}'.format(image_index)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return TotalLosses.avg, nme
def x_sbr_main_regression(args, loader, teacher, net, criterion, optimizer, epoch_str, logger, opt_config, sbr_config, use_sbr, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() visible_points, DetLosses, TotalLosses, TemporalLosses = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() alk_points = AverageMeter() annotate_index = loader.dataset.video_L eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path(args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else : save_dir = None # switch to train mode if mode == 'train': logger.log('Temporal-Main-Regression: training : {:} .. SBR={:}'.format(sbr_config, use_sbr)) print_freq = args.print_freq net.train() ; criterion.train() else: logger.log('Temporal-Main-Regression : evaluation mode.') print_freq = args.print_freq_eval net.eval() ; criterion.eval() teacher.eval() i_batch_size, v_batch_size = args.i_batch_size, args.v_batch_size end = time.time() for i, (frames, Fflows, Bflows, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes, is_images) in enumerate(loader): # frames : IBatch+VBatch, Frame, Channel, Height, Width # Fflows : IBatch+VBatch, Frame-1, Height, Width, 2 # Bflows : IBatch+VBatch, Frame-1, Height, Width, 2 # information image_index = image_index.squeeze(1).tolist() (batch_size, frame_length, C, H, W), num_pts = frames.size(), args.num_pts visible_point_num = float(np.sum(masks.numpy()[:,:-1,:,:])) / batch_size visible_points.update(visible_point_num, batch_size) assert is_images[:i_batch_size].sum().item() == i_batch_size, '{:} vs. {:}'.format(is_images, i_batch_size) assert is_images[i_batch_size:].sum().item() == 0, '{:} vs. {:}'.format(is_images, v_batch_size) normpoints = normpoints.permute(0, 2, 1) target_points = normpoints[:, :, :2].contiguous().cuda(non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1-nopoints).view(batch_size, 1, 1) * masks[:, :num_pts].contiguous().view(batch_size, num_pts, 1) have_det_loss = det_masks.sum().item() > 0 det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] batch_locs, batch_past2now, batch_future2now, batch_FBcheck = net(frames, Fflows, Bflows, is_images) forward_time.update(time.time() - end) # detection loss if have_det_loss: with torch.no_grad(): sotf_targets = teacher(frames) det_loss = criterion(batch_locs, sotf_targets, None) DetLosses.update(det_loss.item(), batch_size) else: det_loss = 0 # temporal loss if use_sbr: video_batch_locs = batch_locs[i_batch_size:, :] video_past2now, video_future2now, video_FBcheck = batch_past2now[i_batch_size:], batch_future2now[i_batch_size:], batch_FBcheck[i_batch_size:] video_mask = masks[i_batch_size:, :-1].contiguous().cuda(non_blocking=True) sbr_loss, available_nums = calculate_temporal_loss(criterion, video_batch_locs, video_past2now, video_future2now, video_FBcheck, video_mask, sbr_config) alk_points.update(float(available_nums)/v_batch_size, v_batch_size) if available_nums > sbr_config.available_thresh: TemporalLosses.update(sbr_loss.item(), v_batch_size) else: sbr_loss = 0 else: sbr_loss = 0 # measure accuracy and record loss #if sbr_config.weight != 0: total_loss = det_loss + sbr_loss * sbr_config.weight #else : total_loss = det_loss if use_sbr: total_loss = det_loss + sbr_loss * sbr_config.weight else : total_loss = det_loss if isinstance(total_loss, numbers.Number): warnings.warn('The {:}-th iteration has no detection loss and no lk loss'.format(i)) else: TotalLosses.update(total_loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() total_loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs = batch_locs.detach().to(cpu)[:, annotate_index] # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): if nopoint == 1: continue norm_locs = torch.cat((batch_locs[ibatch].permute(1,0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2,:] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx][annotate_index] normDistce = loader.dataset.NormDistances[imgidx] xpoints = loader.dataset.labels[imgidx].get_points() eval_meta.append(real_locs.numpy(), xpoints.numpy(), image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, frames[ibatch, annotate_index], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch, annotate_index], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader)-i-1), True) end = time.time() if i % print_freq == 0 or i+1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'F-time {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Det {dloss.val:7.4f} ({dloss.avg:7.4f}) ' 'SBR {sloss.val:7.4f} ({sloss.avg:7.4f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, \ dloss=DetLosses, sloss=TemporalLosses, loss=TotalLosses) + last_time \ + ' I={:}'.format(list(frames.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg) \ + ' Ava-PTS : {:.1f} ({:.1f})'.format(alk_points.val, alk_points.avg)) if args.debug: logger.log(' -->>Indexes : {:}'.format(image_index)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return TotalLosses.avg, nme
def evaluate(args): if args.cuda: assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True else: print('Use the CPU mode') print('The image is {:}'.format(args.image)) print('The model is {:}'.format(args.model)) last_info = Path(args.model) assert last_info.exists(), 'The model path {:} does not exist'.format( last_info) last_info = torch.load(last_info, map_location=torch.device('cpu')) snapshot = last_info['last_checkpoint'] assert snapshot.exists(), 'The model path {:} does not exist'.format( snapshot) print('The face bounding box is {:}'.format(args.face)) assert len(args.face) == 4, 'Invalid face input : {:}'.format(args.face) snapshot = torch.load(snapshot, map_location=torch.device('cpu')) param = snapshot['args'] # General Data Argumentation if param.use_gray == False: mean_fill = tuple([int(x * 255) for x in [0.485, 0.456, 0.406]]) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) else: mean_fill = (0.5, ) normalize = transforms.Normalize(mean=[mean_fill[0]], std=[0.5]) eval_transform = transforms.Compose2V([transforms.ToTensor(), normalize, \ transforms.PreCrop(param.pre_crop_expand), \ transforms.CenterCrop(param.crop_max)]) model_config = load_configure(param.model_config, None) dataset = Dataset(eval_transform, param.sigma, model_config.downsample, param.heatmap_type, (120, 96), param.use_gray, None, param.data_indicator) #dataset = Dataset(eval_transform, param.sigma, model_config.downsample, param.heatmap_type, (param.height,param.width), param.use_gray, None, param.data_indicator) dataset.reset(param.num_pts) net = obtain_pro_model(model_config, param.num_pts + 1, param.sigma, param.use_gray) net.load_state_dict(remove_module_dict(snapshot['state_dict'])) if args.cuda: net = net.cuda() print('Processing the input face image.') face_meta = PointMeta(dataset.NUM_PTS, None, args.face, args.image, 'BASE-EVAL') face_img = pil_loader(args.image, dataset.use_gray) affineImage, heatmaps, mask, norm_trans_points, transthetas, _, _, _, shape = dataset._process_( face_img, face_meta, -1) #import cv2; cv2.imwrite('temp.png', transforms.ToPILImage(normalize, False)(affineImage)) # network forward with torch.no_grad(): if args.cuda: inputs = affineImage.unsqueeze(0).cuda() else: inputs = affineImage.unsqueeze(0) _, _, batch_locs, batch_scos = net(inputs) batch_locs, batch_scos = batch_locs.cpu(), batch_scos.cpu() (batch_size, C, H, W), num_pts = inputs.size(), param.num_pts locations, scores = batch_locs[0, :-1, :], batch_scos[:, :-1] norm_locs = normalize_points((H, W), locations.transpose(1, 0)) norm_locs = torch.cat((norm_locs, torch.ones(1, num_pts)), dim=0) transtheta = transthetas[:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shape.tolist(), norm_locs) real_locs = torch.cat((real_locs, scores), dim=0) print('the coordinates for {:} facial landmarks:'.format(param.num_pts)) for i in range(param.num_pts): point = real_locs[:, i] print( 'the {:02d}/{:02d}-th landmark : ({:.1f}, {:.1f}), score = {:.2f}'. format(i, param.num_pts, float(point[0]), float(point[1]), float(point[2]))) if args.save: resize = 512 image = draw_image_by_points(args.image, real_locs, 2, (255, 0, 0), args.face, resize) image.save(args.save) print('save the visualization results into {:}'.format(args.save)) else: print('ignore the visualization procedure')
def basic_main_heatmap(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() visible_points, losses = AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log('basic-main-V2 : training mode.') print_freq = args.print_freq net.train() criterion.train() else: logger.log('basic-main-V2 : evaluation mode.') print_freq = args.print_freq_eval net.eval() criterion.eval() end = time.time() for i, (inputs, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes) in enumerate(loader): # inputs : Batch, Channel, Height, Width # information image_index = image_index.squeeze(1).tolist() (batch_size, C, H, W), num_pts = inputs.size(), args.num_pts visible_point_num = float(np.sum( masks.numpy()[:, :-1, :, :])) / batch_size visible_points.update(visible_point_num, batch_size) annotated_num = batch_size - sum(nopoints) det_masks = (1 - nopoints).view(batch_size, 1, 1, 1) * masks det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() targets = targets.cuda(non_blocking=True) # measure data loading time data_time.update(time.time() - end) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] batch_features, batch_heatmaps, batch_locs, batch_scos = net(inputs) forward_time.update(time.time() - end) loss, each_stage_loss_value = compute_stage_loss( criterion, targets, batch_heatmaps, det_masks) # measure accuracy and record loss losses.update(loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): batch_locs, batch_scos = batch_locs.detach().to( cpu), batch_scos.detach().to(cpu) # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): locations = batch_locs[ibatch, :-1, :] norm_locs = normalize_points((H, W), locations.transpose(1, 0)) norm_locs = torch.cat((norm_locs, torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) real_locs = torch.cat( (real_locs, batch_scos[ibatch, :-1].view(1, -1)), dim=0) #real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx] normDistce = loader.dataset.NormDistances[imgidx] if nopoint == 1: xpoints = None else: xpoints = loader.dataset.labels[imgidx].get_points().numpy( ) eval_meta.append(real_locs.numpy(), xpoints, image_path, normDistce) if save_dir: pro_debug_save(save_dir, Path(image_path).name, inputs[ibatch], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) '.format( mode, epoch_str, i, len(loader), batch_time=batch_time, data_time=data_time, forward_time=forward_time, loss=losses) + last_time + show_stage_loss(each_stage_loss_value) \ + ' In={:} Tar={:}'.format(list(inputs.size()), list(targets.size())) \ + ' Vis-PTS : {:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return losses.avg, eval_meta, nme
def main(args): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.set_num_threads(args.workers) print('Training Base Detector : prepare_seed : {:}'.format(args.rand_seed)) prepare_seed(args.rand_seed) temporal_main, eval_all = procedures['{:}-train'.format( args.procedure)], procedures['{:}-test'.format(args.procedure)] logger = prepare_logger(args) # General Data Argumentation normalize, train_transform, eval_transform, robust_transform = prepare_data_augmentation( transforms, args) recover = transforms.ToPILImage(normalize) args.tensor2imageF = recover assert (args.scale_min + args.scale_max) / 2 == 1, 'The scale is not ok : {:} ~ {:}'.format( args.scale_min, args.scale_max) # Model Configure Load model_config = load_configure(args.model_config, logger) sbr_config = load_configure(args.sbr_config, logger) shape = (args.height, args.width) logger.log('--> {:}\n--> Sigma : {:}, Shape : {:}'.format( model_config, args.sigma, shape)) logger.log('--> SBR Configuration : {:}\n'.format(sbr_config)) # Training Dataset train_data = VDataset(eval_transform, args.sigma, model_config.downsample, args.heatmap_type, shape, args.use_gray, args.mean_point, \ args.data_indicator, sbr_config, transforms.ToPILImage(normalize, 'cv2gray')) train_data.load_list(args.train_lists, args.num_pts, args.boxindicator, args.normalizeL, True) if args.x68to49: assert args.num_pts == 68, 'args.num_pts is not 68 vs. {:}'.format( args.num_pts) if train_data is not None: train_data = convert68to49(train_data) args.num_pts = 49 # define the temporal model (accelerated SBR) net = obtain_pro_temporal(model_config, sbr_config, args.num_pts, args.sigma, args.use_gray) assert model_config.downsample == net.downsample, 'downsample is not correct : {:} vs {:}'.format( model_config.downsample, net.downsample) logger.log("=> network :\n {}".format(net)) logger.log('Training-data : {:}'.format(train_data)) logger.log('arguments : {:}'.format(args)) opt_config = load_configure(args.opt_config, logger) optimizer, scheduler, criterion = obtain_optimizer(net.parameters(), opt_config, logger) logger.log('criterion : {:}'.format(criterion)) net, criterion = net.cuda(), criterion.cuda() net = torch.nn.DataParallel(net) last_info = logger.last_info() try: last_checkpoint = load_checkpoint(args.init_model) checkpoint = remove_module_dict(last_checkpoint['state_dict'], False) net.module.detector.load_state_dict(checkpoint) except: last_checkpoint = load_checkpoint(args.init_model) net.load_state_dict(last_checkpoint['state_dict']) detector = torch.nn.DataParallel(net.module.detector) logger.log("=> initialize the detector : {:}".format(args.init_model)) net.eval() detector.eval() logger.log('SBR Config : {:}'.format(sbr_config)) save_xdir = logger.path('meta') random.seed(111) index_list = list(range(len(train_data))) random.shuffle(index_list) #selected_list = index_list[: min(200, len(index_list))] #selected_list = [7260, 11506, 39952, 75196, 51614, 41061, 37747, 41355] #for iidx, i in enumerate(selected_list): index_list.remove(47875) selected_list = [47875] + index_list save_xdir = logger.path('meta') type_error_1, type_error_2, type_error, misses = 0, 0, 0, 0 type_error_pts, total_pts = 0, 0 for iidx, i in enumerate(selected_list): frames, Fflows, Bflows, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes, is_images = train_data[ i] frames, Fflows, Bflows, is_images = frames.unsqueeze( 0), Fflows.unsqueeze(0), Bflows.unsqueeze(0), is_images.unsqueeze( 0) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] with torch.no_grad(): if args.procedure == 'heatmap': batch_heatmaps, batch_locs, batch_scos, batch_past2now, batch_future2now, batch_FBcheck = net( frames, Fflows, Bflows, is_images) else: batch_locs, batch_past2now, batch_future2now, batch_FBcheck = net( frames, Fflows, Bflows, is_images) (batch_size, frame_length, C, H, W), num_pts, annotate_index = frames.size( ), args.num_pts, train_data.video_L batch_locs = batch_locs.cpu()[:, :, :num_pts] video_mask = masks.unsqueeze(0)[:, :num_pts] batch_past2now = batch_past2now.cpu()[:, :, :num_pts] batch_future2now = batch_future2now.cpu()[:, :, :num_pts] batch_FBcheck = batch_FBcheck[:, :num_pts].cpu() FB_check_oks = FB_communication(criterion, batch_locs, batch_past2now, batch_future2now, batch_FBcheck, video_mask, sbr_config) # locations norm_past_det_locs = torch.cat( (batch_locs[0, annotate_index - 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_noww_det_locs = torch.cat( (batch_locs[0, annotate_index, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_next_det_locs = torch.cat( (batch_locs[0, annotate_index + 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_next_locs = torch.cat( (batch_past2now[0, annotate_index, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_past_locs = torch.cat( (batch_future2now[0, annotate_index - 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[:2, :] norm_past_det_locs = torch.mm(transtheta, norm_past_det_locs) norm_noww_det_locs = torch.mm(transtheta, norm_noww_det_locs) norm_next_det_locs = torch.mm(transtheta, norm_next_det_locs) norm_next_locs = torch.mm(transtheta, norm_next_locs) norm_past_locs = torch.mm(transtheta, norm_past_locs) real_past_det_locs = denormalize_points(shapes.tolist(), norm_past_det_locs) real_noww_det_locs = denormalize_points(shapes.tolist(), norm_noww_det_locs) real_next_det_locs = denormalize_points(shapes.tolist(), norm_next_det_locs) real_next_locs = denormalize_points(shapes.tolist(), norm_next_locs) real_past_locs = denormalize_points(shapes.tolist(), norm_past_locs) gt_noww_points = train_data.labels[image_index.item()].get_points() gt_past_points = train_data.find_index( train_data.datas[image_index.item()][annotate_index - 1]) gt_next_points = train_data.find_index( train_data.datas[image_index.item()][annotate_index + 1]) FB_check_oks = FB_check_oks[:num_pts].squeeze() #import pdb; pdb.set_trace() if FB_check_oks.sum().item() > 2: # type 1 error : detection at both (t) and (t-1) is wrong, while pass the check is_type_1, (T_wrong, T_total) = check_is_1st_error( [real_past_det_locs, real_noww_det_locs, real_next_det_locs], [gt_past_points, gt_noww_points, gt_next_points], FB_check_oks, shapes) # type 2 error : detection at frame t is ok, while tracking are wrong and frame at (t-1) is wrong: spec_index, is_type_2 = check_is_2nd_error( real_noww_det_locs, gt_noww_points, [real_past_locs, real_next_locs], [gt_past_points, gt_next_points], FB_check_oks, shapes) type_error_1 += is_type_1 type_error_2 += is_type_2 type_error += is_type_1 or is_type_2 type_error_pts, total_pts = type_error_pts + T_wrong, total_pts + T_total if is_type_2: RED, GREEN, BLUE = (255, 0, 0), (0, 255, 0), (0, 0, 255) [image_past, image_noww, image_next] = train_data.datas[image_index.item()] crop_box = train_data.labels[ image_index.item()].get_box().tolist() point_index = FB_check_oks.nonzero().squeeze().tolist() colors = [ GREEN if _i in point_index else RED for _i in range(num_pts) ] + [BLUE for _i in range(num_pts)] I_past_det = draw_image_by_points( image_past, torch.cat((real_past_det_locs, gt_past_points[:2]), dim=1), 3, colors, crop_box, (400, 500)) I_noww_det = draw_image_by_points( image_noww, torch.cat((real_noww_det_locs, gt_noww_points[:2]), dim=1), 3, colors, crop_box, (400, 500)) I_next_det = draw_image_by_points( image_next, torch.cat((real_next_det_locs, gt_next_points[:2]), dim=1), 3, colors, crop_box, (400, 500)) I_past = draw_image_by_points( image_past, torch.cat((real_past_locs, gt_past_points[:2]), dim=1), 3, colors, crop_box, (400, 500)) I_next = draw_image_by_points( image_next, torch.cat((real_next_locs, gt_next_points[:2]), dim=1), 3, colors, crop_box, (400, 500)) ### I_past.save(str(save_xdir / '{:05d}-v1-a-pastt.png'.format(i))) I_noww_det.save( str(save_xdir / '{:05d}-v1-b-curre.png'.format(i))) I_next.save(str(save_xdir / '{:05d}-v1-c-nextt.png'.format(i))) I_past_det.save( str(save_xdir / '{:05d}-v1-det-a-past.png'.format(i))) I_noww_det.save( str(save_xdir / '{:05d}-v1-det-b-curr.png'.format(i))) I_next_det.save( str(save_xdir / '{:05d}-v1-det-c-next.png'.format(i))) logger.log('TYPE-ERROR : {:}, landmark-index : {:}'.format( i, spec_index)) else: misses += 1 string = 'Handle {:05d}/{:05d} :: {:05d}'.format( iidx, len(selected_list), i) string += ', error-1 : {:} ({:.2f}%), error-2 : {:} ({:.2f}%)'.format( type_error_1, type_error_1 * 100.0 / (iidx + 1), type_error_2, type_error_2 * 100.0 / (iidx + 1)) string += ', error : {:} ({:.2f}%), miss : {:}'.format( type_error, type_error * 100.0 / (iidx + 1), misses) string += ', final-error : {:05d} / {:05d} = {:.2f}%'.format( type_error_pts, total_pts, type_error_pts * 100.0 / total_pts) logger.log(string)
def __process_affine(self, frames, target, theta, nopoints, skipopt, aux_info=None): frames, target, theta = [frame.clone() for frame in frames ], target.copy(), theta.clone() (C, H, W), (height, width) = frames[0].size(), self.shape if nopoints: # do not have label norm_trans_points = torch.zeros((3, self.NUM_PTS)) heatmaps = torch.zeros( (self.NUM_PTS + 1, height // self.downsample, width // self.downsample)) mask = torch.ones((self.NUM_PTS + 1, 1, 1), dtype=torch.uint8) else: norm_trans_points = apply_affine2point(target.get_points(), theta, (H, W)) norm_trans_points = apply_boundary(norm_trans_points) real_trans_points = norm_trans_points.clone() real_trans_points[:2, :] = denormalize_points( self.shape, real_trans_points[:2, :]) heatmaps, mask = generate_label_map(real_trans_points.numpy(), height // self.downsample, width // self.downsample, self.sigma, self.downsample, nopoints, self.heatmap_type) # H*W*C heatmaps = torch.from_numpy(heatmaps.transpose( (2, 0, 1))).type(torch.FloatTensor) mask = torch.from_numpy(mask.transpose( (2, 0, 1))).type(torch.ByteTensor) affineFrames = [ affine2image(frame, theta, self.shape) for frame in frames ] if not skipopt: Gframes = [self.tensor2img(frame) for frame in affineFrames] forward_flow, backward_flow = [], [] for idx in range(len(Gframes)): if idx > 0: forward_flow.append( self.optflow.calc(Gframes[idx - 1], Gframes[idx], None)) if idx + 1 < len(Gframes): #backward_flow.append( self.optflow.calc(Gframes[idx], Gframes[idx+1], None) ) ## HDXY backward_flow.append( self.optflow.calc(Gframes[idx + 1], Gframes[idx], None)) forward_flow = torch.stack( [torch.from_numpy(x) for x in forward_flow]) backward_flow = torch.stack( [torch.from_numpy(x) for x in backward_flow]) else: forward_flow, backward_flow = torch.zeros( (len(affineFrames) - 1, height, width, 2)), torch.zeros( (len(affineFrames) - 1, height, width, 2)) # affineFrames #frames x #channel x #height x #width # forward_flow (#frames-1) x #height x #width x 2 # backward_flow (#frames-1) x #height x #width x 2 return torch.stack( affineFrames ), forward_flow, backward_flow, heatmaps, mask, norm_trans_points, theta
def __process_affine(self, frames, target, theta, nopoints, skip_opt, aux_info=None): frames, target, theta = [frame.clone() for frame in frames ], target.copy(), theta.clone() (C, H, W), (height, width) = frames[0].size(), self.shape if nopoints: # do not have label norm_trans_points = torch.zeros((3, self.NUM_PTS)) heatmaps = torch.zeros( (self.NUM_PTS + 1, height // self.downsample, width // self.downsample)) mask = torch.ones((self.NUM_PTS + 1, 1, 1), dtype=torch.uint8) transpose_theta = identity2affine(False) else: norm_trans_points = apply_affine2point(target.get_points(), theta, (H, W)) norm_trans_points = apply_boundary(norm_trans_points) real_trans_points = norm_trans_points.clone() real_trans_points[:2, :] = denormalize_points( self.shape, real_trans_points[:2, :]) heatmaps, mask = generate_label_map(real_trans_points.numpy(), height // self.downsample, width // self.downsample, self.sigma, self.downsample, nopoints, self.heatmap_type) # H*W*C heatmaps = torch.from_numpy(heatmaps.transpose( (2, 0, 1))).type(torch.FloatTensor) mask = torch.from_numpy(mask.transpose( (2, 0, 1))).type(torch.ByteTensor) if torch.sum(norm_trans_points[2, :] == 1) < 3 or self.mean_face is None: warnings.warn( 'In GeneralDatasetV2 after transformation, no visiable point, using identity instead. Aux: {:}' .format(aux_info)) transpose_theta = identity2affine(False) else: transpose_theta = solve2theta(norm_trans_points, self.mean_face.clone()) affineFrames = [ affine2image(frame, theta, self.shape) for frame in frames ] if not skip_opt: Gframes = [self.tensor2img(frame) for frame in affineFrames] forward_flow, backward_flow = [], [] for idx in range(len(Gframes)): if idx > 0: forward_flow.append( self.optflow.calc(Gframes[idx - 1], Gframes[idx], None)) if idx + 1 < len(Gframes): #backward_flow.append( self.optflow.calc(Gframes[idx], Gframes[idx+1], None) ) backward_flow.append( self.optflow.calc(Gframes[idx + 1], Gframes[idx], None)) forward_flow = torch.stack( [torch.from_numpy(x) for x in forward_flow]) backward_flow = torch.stack( [torch.from_numpy(x) for x in backward_flow]) else: forward_flow, backward_flow = torch.zeros( (len(affineFrames) - 1, height, width, 2)), torch.zeros( (len(affineFrames) - 1, height, width, 2)) # affineFrames #frames x #channel x #height x #width # forward_flow (#frames-1) x #height x #width x 2 # backward_flow (#frames-1) x #height x #width x 2 return torch.stack( affineFrames ), forward_flow, backward_flow, heatmaps, mask, norm_trans_points, theta, transpose_theta
def main(args): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.set_num_threads(args.workers) print('Training Base Detector : prepare_seed : {:}'.format(args.rand_seed)) prepare_seed(args.rand_seed) temporal_main, eval_all = procedures['{:}-train'.format( args.procedure)], procedures['{:}-test'.format(args.procedure)] logger = prepare_logger(args) # General Data Argumentation normalize, train_transform, eval_transform, robust_transform = prepare_data_augmentation( transforms, args) recover = transforms.ToPILImage(normalize) args.tensor2imageF = recover assert (args.scale_min + args.scale_max) / 2 == 1, 'The scale is not ok : {:} ~ {:}'.format( args.scale_min, args.scale_max) # Model Configure Load model_config = load_configure(args.model_config, logger) sbr_config = load_configure(args.sbr_config, logger) shape = (args.height, args.width) logger.log('--> {:}\n--> Sigma : {:}, Shape : {:}'.format( model_config, args.sigma, shape)) logger.log('--> SBR Configuration : {:}\n'.format(sbr_config)) # Training Dataset train_data = VDataset(eval_transform, args.sigma, model_config.downsample, args.heatmap_type, shape, args.use_gray, args.mean_point, \ args.data_indicator, sbr_config, transforms.ToPILImage(normalize, 'cv2gray')) train_data.load_list(args.train_lists, args.num_pts, args.boxindicator, args.normalizeL, True) # Evaluation Dataloader assert len( args.eval_ilists) == 1, 'invalid length of eval_ilists : {:}'.format( len(eval_ilists)) eval_data = IDataset(eval_transform, args.sigma, model_config.downsample, args.heatmap_type, shape, args.use_gray, args.mean_point, args.data_indicator) eval_data.load_list(args.eval_ilists[0], args.num_pts, args.boxindicator, args.normalizeL, True) if args.x68to49: assert args.num_pts == 68, 'args.num_pts is not 68 vs. {:}'.format( args.num_pts) if train_data is not None: train_data = convert68to49(train_data) eval_data = convert68to49(eval_data) args.num_pts = 49 # define the temporal model (accelerated SBR) net = obtain_pro_temporal(model_config, sbr_config, args.num_pts, args.sigma, args.use_gray) assert model_config.downsample == net.downsample, 'downsample is not correct : {:} vs {:}'.format( model_config.downsample, net.downsample) logger.log("=> network :\n {}".format(net)) logger.log('Training-data : {:}'.format(train_data)) logger.log('Evaluate-data : {:}'.format(eval_data)) logger.log('arguments : {:}'.format(args)) opt_config = load_configure(args.opt_config, logger) optimizer, scheduler, criterion = obtain_optimizer(net.parameters(), opt_config, logger) logger.log('criterion : {:}'.format(criterion)) net, criterion = net.cuda(), criterion.cuda() net = torch.nn.DataParallel(net) last_info = logger.last_info() try: last_checkpoint = load_checkpoint(args.init_model) checkpoint = remove_module_dict(last_checkpoint['state_dict'], False) net.module.detector.load_state_dict(checkpoint) except: last_checkpoint = load_checkpoint(args.init_model) net.load_state_dict(last_checkpoint['state_dict']) detector = torch.nn.DataParallel(net.module.detector) logger.log("=> initialize the detector : {:}".format(args.init_model)) net.eval() detector.eval() logger.log('SBR Config : {:}'.format(sbr_config)) save_xdir = logger.path('meta') type_error = 0 random.seed(111) index_list = list(range(len(train_data))) random.shuffle(index_list) #selected_list = index_list[: min(200, len(index_list))] selected_list = [ 7260, 11506, 39952, 75196, 51614, 41061, 37747, 41355, 47875 ] for iidx, i in enumerate(selected_list): frames, Fflows, Bflows, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes, is_images = train_data[ i] frames, Fflows, Bflows, is_images = frames.unsqueeze( 0), Fflows.unsqueeze(0), Bflows.unsqueeze(0), is_images.unsqueeze( 0) # batch_heatmaps is a list for stage-predictions, each element should be [Batch, Sequence, PTS, H/Down, W/Down] if args.procedure == 'heatmap': batch_heatmaps, batch_locs, batch_scos, batch_past2now, batch_future2now, batch_FBcheck = net( frames, Fflows, Bflows, is_images) else: batch_locs, batch_past2now, batch_future2now, batch_FBcheck = net( frames, Fflows, Bflows, is_images) (batch_size, frame_length, C, H, W), num_pts, annotate_index = frames.size( ), args.num_pts, train_data.video_L batch_locs = batch_locs.cpu()[:, :, :num_pts] video_mask = masks.unsqueeze(0)[:, :num_pts] batch_past2now = batch_past2now.cpu()[:, :, :num_pts] batch_future2now = batch_future2now.cpu()[:, :, :num_pts] batch_FBcheck = batch_FBcheck[:, :num_pts].cpu() FB_check_oks = FB_communication(criterion, batch_locs, batch_past2now, batch_future2now, batch_FBcheck, video_mask, sbr_config) # locations norm_past_det_locs = torch.cat( (batch_locs[0, annotate_index - 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_noww_det_locs = torch.cat( (batch_locs[0, annotate_index, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_next_det_locs = torch.cat( (batch_locs[0, annotate_index + 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_next_locs = torch.cat( (batch_past2now[0, annotate_index, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) norm_past_locs = torch.cat( (batch_future2now[0, annotate_index - 1, :num_pts].permute( 1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[:2, :] norm_past_det_locs = torch.mm(transtheta, norm_past_det_locs) norm_noww_det_locs = torch.mm(transtheta, norm_noww_det_locs) norm_next_det_locs = torch.mm(transtheta, norm_next_det_locs) norm_next_locs = torch.mm(transtheta, norm_next_locs) norm_past_locs = torch.mm(transtheta, norm_past_locs) real_past_det_locs = denormalize_points(shapes.tolist(), norm_past_det_locs) real_noww_det_locs = denormalize_points(shapes.tolist(), norm_noww_det_locs) real_next_det_locs = denormalize_points(shapes.tolist(), norm_next_det_locs) real_next_locs = denormalize_points(shapes.tolist(), norm_next_locs) real_past_locs = denormalize_points(shapes.tolist(), norm_past_locs) gt_noww_points = train_data.labels[image_index.item()].get_points() FB_check_oks = FB_check_oks[:num_pts].squeeze() #import pdb; pdb.set_trace() if FB_check_oks.sum().item() > 2: point_index = FB_check_oks.nonzero().squeeze().tolist() something_wrong = False for pidx in point_index: real_now_det_loc = real_noww_det_locs[:, pidx] real_pst_det_loc = real_past_det_locs[:, pidx] real_net_det_loc = real_next_det_locs[:, pidx] real_nex_loc = real_next_locs[:, pidx] real_pst_loc = real_next_locs[:, pidx] grdt_now_loc = gt_noww_points[:2, pidx] #if torch.abs(real_now_loc - grdt_now_loc).max() > 5: # something_wrong = True #if torch.abs(real_nex_loc - grdt_nex_loc).max() > 5: # something_wrong = True #if something_wrong == True: if True: [image_past, image_noww, image_next] = train_data.datas[image_index.item()] try: crop_box = train_data.labels[ image_index.item()].get_box().tolist() #crop_box = [crop_box[0]-20, crop_box[1]-20, crop_box[2]+20, crop_box[3]+20] except: crop_box = False RED, GREEN, BLUE = (255, 0, 0), (0, 255, 0), (0, 0, 255) colors = [ GREEN if _i in point_index else RED for _i in range(num_pts) ] if crop_box != False or True: I_past_det = draw_image_by_points(image_past, real_past_det_locs[:], 3, colors, crop_box, (400, 500)) I_noww_det = draw_image_by_points(image_noww, real_noww_det_locs[:], 3, colors, crop_box, (400, 500)) I_next_det = draw_image_by_points(image_next, real_next_det_locs[:], 3, colors, crop_box, (400, 500)) I_next = draw_image_by_points(image_next, real_next_locs[:], 3, colors, crop_box, (400, 500)) I_past = draw_image_by_points(image_past, real_past_locs[:], 3, colors, crop_box, (400, 500)) I_past.save( str(save_xdir / '{:05d}-v1-a-pastt.png'.format(i))) I_noww_det.save( str(save_xdir / '{:05d}-v1-b-curre.png'.format(i))) I_next.save( str(save_xdir / '{:05d}-v1-c-nextt.png'.format(i))) I_past_det.save( str(save_xdir / '{:05d}-v1-det-a-past.png'.format(i))) I_noww_det.save( str(save_xdir / '{:05d}-v1-det-b-curr.png'.format(i))) I_next_det.save( str(save_xdir / '{:05d}-v1-det-c-next.png'.format(i))) #[image_past, image_noww, image_next] = train_data.datas[image_index.item()] #image_noww = draw_image_by_points(image_noww, real_noww_locs[:], 2, colors, False, False) #image_next = draw_image_by_points(image_next, real_next_locs[:], 2, colors, False, False) #image_past = draw_image_by_points(image_past, real_past_locs[:], 2, colors, False, False) #image_noww.save( str(save_xdir / '{:05d}-v2-b-curre.png'.format(i)) ) #image_next.save( str(save_xdir / '{:05d}-v2-c-nextt.png'.format(i)) ) #image_past.save( str(save_xdir / '{:05d}-v2-a-pastt.png'.format(i)) ) #type_error += 1 logger.log( 'Handle {:05d}/{:05d} :: {:05d}, ok-points={:.3f}, wrong data={:}'. format(iidx, len(selected_list), i, FB_check_oks.float().mean().item(), type_error)) save_xx_dir = save_xdir.parent / 'image-data' save_xx_dir.mkdir(parents=True, exist_ok=True) selected_list = [100, 115, 200, 300, 400] + list(range(200, 220)) for iidx, i in enumerate(selected_list): inputs, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes = eval_data[ i] inputs = inputs.unsqueeze(0) (batch_size, C, H, W), num_pts = inputs.size(), args.num_pts _, _, batch_locs, batch_scos = detector(inputs) # inputs batch_locs, batch_scos = batch_locs.cpu(), batch_scos.cpu() norm_locs = normalize_points((H, W), batch_locs[0, :num_pts].transpose(1, 0)) norm_det_locs = torch.cat((norm_locs, torch.ones(1, num_pts)), dim=0) norm_det_locs = torch.mm(transthetas[:2, :], norm_det_locs) real_det_locs = denormalize_points(shapes.tolist(), norm_det_locs) gt_now_points = eval_data.labels[image_index.item()].get_points() image_now = eval_data.datas[image_index.item()] crop_box = eval_data.labels[image_index.item()].get_box().tolist() RED, GREEN, BLUE = (255, 0, 0), (0, 255, 0), (0, 0, 255) Gcolors = [GREEN for _ in range(num_pts)] points = torch.cat((real_det_locs, gt_now_points[:2]), dim=1) colors = [GREEN for _ in range(num_pts)] + [BLUE for _ in range(num_pts)] image = draw_image_by_points(image_now, real_det_locs, 3, Gcolors, crop_box, (400, 500)) image.save(str(save_xx_dir / '{:05d}-crop.png'.format(i))) image = draw_image_by_points(image_now, points, 3, colors, False, False) #image = draw_image_by_points(image_now, real_det_locs, 3, colors , False, False) image.save(str(save_xx_dir / '{:05d}-orig.png'.format(i))) logger.log('Finish drawing : {:}'.format(save_xdir)) logger.log('Finish drawing : {:}'.format(save_xx_dir)) logger.close()
def main(args): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.set_num_threads(args.workers) print('Training Base Detector : prepare_seed : {:}'.format(args.rand_seed)) prepare_seed(args.rand_seed) logger = prepare_logger(args) checkpoint = load_checkpoint(args.init_model) xargs = checkpoint['args'] logger.log('Previous args : {:}'.format(xargs)) # General Data Augmentation if xargs.use_gray == False: mean_fill = tuple([int(x * 255) for x in [0.485, 0.456, 0.406]]) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) else: mean_fill = (0.5, ) normalize = transforms.Normalize(mean=[mean_fill[0]], std=[0.5]) eval_transform = transforms.Compose2V([transforms.ToTensor(), normalize, \ transforms.PreCrop(xargs.pre_crop_expand), \ transforms.CenterCrop(xargs.crop_max)]) # Model Configure Load model_config = load_configure(xargs.model_config, logger) shape = (xargs.height, xargs.width) logger.log('--> {:}\n--> Sigma : {:}, Shape : {:}'.format( model_config, xargs.sigma, shape)) # Evaluation Dataloader eval_loaders = [] if args.eval_ilists is not None: for eval_ilist in args.eval_ilists: eval_idata = EvalDataset(eval_transform, xargs.sigma, model_config.downsample, xargs.heatmap_type, shape, xargs.use_gray, xargs.data_indicator) eval_idata.load_list(eval_ilist, args.num_pts, xargs.boxindicator, xargs.normalizeL, True) eval_iloader = torch.utils.data.DataLoader( eval_idata, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) eval_loaders.append((eval_iloader, False)) if args.eval_vlists is not None: for eval_vlist in args.eval_vlists: eval_vdata = EvalDataset(eval_transform, xargs.sigma, model_config.downsample, xargs.heatmap_type, shape, xargs.use_gray, xargs.data_indicator) eval_vdata.load_list(eval_vlist, args.num_pts, xargs.boxindicator, xargs.normalizeL, True) eval_vloader = torch.utils.data.DataLoader( eval_vdata, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) eval_loaders.append((eval_vloader, True)) # define the detector detector = obtain_pro_model(model_config, xargs.num_pts, xargs.sigma, xargs.use_gray) assert model_config.downsample == detector.downsample, 'downsample is not correct : {:} vs {:}'.format( model_config.downsample, detector.downsample) logger.log("=> detector :\n {:}".format(detector)) logger.log("=> Net-Parameters : {:} MB".format( count_parameters_in_MB(detector))) logger.log('=> Eval-Transform : {:}'.format(eval_transform)) detector = detector.cuda() net = torch.nn.DataParallel(detector) net.eval() net.load_state_dict(checkpoint['detector']) cpu = torch.device('cpu') assert len(args.use_stable) == 2 for iLOADER, (loader, is_video) in enumerate(eval_loaders): logger.log( '{:} The [{:2d}/{:2d}]-th test set [{:}] = {:} with {:} batches.'. format(time_string(), iLOADER, len(eval_loaders), 'video' if is_video else 'image', loader.dataset, len(loader))) with torch.no_grad(): all_points, all_results, all_image_ps = [], [], [] for i, (inputs, targets, masks, normpoints, transthetas, image_index, nopoints, shapes) in enumerate(loader): image_index = image_index.squeeze(1).tolist() (batch_size, C, H, W), num_pts = inputs.size(), xargs.num_pts # batch_heatmaps is a list for stage-predictions, each element should be [Batch, C, H, W] if xargs.procedure == 'heatmap': batch_features, batch_heatmaps, batch_locs, batch_scos = net( inputs) batch_locs = batch_locs[:, :-1, :] else: batch_locs = net(inputs) batch_locs = batch_locs.detach().to(cpu) # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): if xargs.procedure == 'heatmap': norm_locs = normalize_points( (H, W), batch_locs[ibatch].transpose(1, 0)) norm_locs = torch.cat( (norm_locs, torch.ones(1, num_pts)), dim=0) else: norm_locs = torch.cat((batch_locs[ibatch].permute( 1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) #real_locs = torch.cat((real_locs, batch_scos[ibatch].permute(1,0)), dim=0) real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) xpoints = loader.dataset.labels[imgidx].get_points().numpy( ) image_path = loader.dataset.datas[imgidx] # put into the list all_points.append(torch.from_numpy(xpoints)) all_results.append(real_locs) all_image_ps.append(image_path) total = len(all_points) logger.log( '{:} The [{:2d}/{:2d}]-th test set finishes evaluation : {:} frames/images' .format(time_string(), iLOADER, len(eval_loaders), total)) """ if args.use_stable[0] > 0: save_dir = Path( osp.join(args.save_path, '{:}-X-{:03d}'.format(args.model_name, iLOADER)) ) save_dir.mkdir(parents=True, exist_ok=True) wrap_parallel = WrapParallel(save_dir, all_image_ps, all_results, all_points, 180, (255, 0, 0)) wrap_loader = torch.utils.data.DataLoader(wrap_parallel, batch_size=args.workers, shuffle=False, num_workers=args.workers, pin_memory=True) for iL, INDEXES in enumerate(wrap_loader): _ = INDEXES cmd = 'ffmpeg -y -i {:}/%06d.png -framerate 30 {:}.avi'.format(save_dir, save_dir) logger.log('{:} possible >>>>> : {:}'.format(time_string(), cmd)) os.system( cmd ) if args.use_stable[1] > 0: save_dir = Path( osp.join(args.save_path, '{:}-Y-{:03d}'.format(args.model_name, iLOADER)) ) save_dir.mkdir(parents=True, exist_ok=True) Xpredictions, Xgts = torch.stack(all_results), torch.stack(all_points) new_preds = fc_solve(Xgts, Xpredictions, is_cuda=True) wrap_parallel = WrapParallel(save_dir, all_image_ps, new_preds, all_points, 180, (0, 0, 255)) wrap_loader = torch.utils.data.DataLoader(wrap_parallel, batch_size=args.workers, shuffle=False, num_workers=args.workers, pin_memory=True) for iL, INDEXES in enumerate(wrap_loader): _ = INDEXES cmd = 'ffmpeg -y -i {:}/%06d.png -framerate 30 {:}.avi'.format(save_dir, save_dir) logger.log('{:} possible >>>>> : {:}'.format(time_string(), cmd)) os.system( cmd ) """ Xpredictions, Xgts = torch.stack(all_results), torch.stack(all_points) save_path = Path( osp.join(args.save_path, '{:}-result-{:03d}.pth'.format(args.model_name, iLOADER))) torch.save( { 'paths': all_image_ps, 'ground-truths': Xgts, 'predictions': all_results }, save_path) logger.log('{:} save into {:}'.format(time_string(), save_path)) if False: new_preds = fc_solve_v2(Xgts, Xpredictions, is_cuda=True) # create the dir save_dir = Path( osp.join(args.save_path, '{:}-T-{:03d}'.format(args.model_name, iLOADER))) save_dir.mkdir(parents=True, exist_ok=True) wrap_parallel = WrapParallelV2(save_dir, all_image_ps, Xgts, all_results, new_preds, all_points, 180, [args.model_name, 'SRT']) wrap_parallel[0] wrap_loader = torch.utils.data.DataLoader(wrap_parallel, batch_size=args.workers, shuffle=False, num_workers=args.workers, pin_memory=True) for iL, INDEXES in enumerate(wrap_loader): _ = INDEXES cmd = 'ffmpeg -y -i {:}/%06d.png -vb 5000k {:}.avi'.format( save_dir, save_dir) logger.log('{:} possible >>>>> : {:}'.format(time_string(), cmd)) os.system(cmd) logger.close() return
def basic_main_regression(args, loader, net, criterion, optimizer, epoch_str, logger, opt_config, mode): assert mode == 'train' or mode == 'test', 'invalid mode : {:}'.format(mode) args = copy.deepcopy(args) batch_time, pre_data_time, data_time, forward_time, eval_time = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() visible_points, LOSSES, LOSSES_LOCS, LOSSES_SCOS = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() eval_meta = Eval_Meta() cpu = torch.device('cpu') if args.debug: save_dir = Path( args.save_path) / 'DEBUG' / ('{:}-'.format(mode) + epoch_str) else: save_dir = None # switch to train mode if mode == 'train': logger.log( 'basic-main-Regression : training mode :: {:}'.format(criterion)) print_freq = args.print_freq net.train() criterion.train() else: logger.log( 'basic-main-Regression : evaluation mode :: {:}'.format(criterion)) print_freq = args.print_freq_eval net.eval() criterion.eval() end = time.time() for i, (inputs, targets, masks, normpoints, transthetas, meanthetas, image_index, nopoints, shapes) in enumerate(loader): # inputs : Batch, Channel, Height, Width pre_data_time.update(time.time() - end) # information image_index = image_index.squeeze(1).tolist() (batch_size, C, H, W), num_pts = inputs.size(), args.num_pts visible_points.update( float(masks.numpy()[:, :-1].sum()) / batch_size, batch_size) normpoints = normpoints.permute(0, 2, 1) target_points = normpoints[:, :, :2].contiguous().cuda( non_blocking=True) target_scores = normpoints[:, :, 2:].contiguous().cuda(non_blocking=True) det_masks = (1 - nopoints).view( batch_size, 1, 1) * masks[:, :num_pts].contiguous().view( batch_size, num_pts, 1) det_masks = det_masks.cuda(non_blocking=True) nopoints = nopoints.squeeze(1).tolist() # measure data loading time data_time.update(time.time() - end) batch_locs = net(inputs) forward_time.update(time.time() - end) loss = criterion(batch_locs, target_points, det_masks) #loss_scos = criterion(batch_scos, target_scores, None) #loss = loss_locs + loss_scos * opt_config.scos_weight # measure accuracy and record loss LOSSES.update(loss.item(), batch_size) # compute gradient and do SGD step if mode == 'train': # training mode optimizer.zero_grad() loss.backward() optimizer.step() eval_time.update(time.time() - end) with torch.no_grad(): #batch_locs, batch_scos = batch_locs.detach().to(cpu), batch_scos.detach().to(cpu) batch_locs = batch_locs.detach().to(cpu) # evaluate the training data for ibatch, (imgidx, nopoint) in enumerate(zip(image_index, nopoints)): norm_locs = torch.cat( (batch_locs[ibatch].permute(1, 0), torch.ones(1, num_pts)), dim=0) transtheta = transthetas[ibatch][:2, :] norm_locs = torch.mm(transtheta, norm_locs) real_locs = denormalize_points(shapes[ibatch].tolist(), norm_locs) #real_locs = torch.cat((real_locs, batch_scos[ibatch].permute(1,0)), dim=0) real_locs = torch.cat((real_locs, torch.ones(1, num_pts)), dim=0) image_path = loader.dataset.datas[imgidx] normDistce = loader.dataset.NormDistances[imgidx] if nopoint == 1: xpoints = None else: xpoints = loader.dataset.labels[imgidx].get_points().numpy( ) eval_meta.append(real_locs.numpy(), xpoints, image_path, normDistce) #if save_dir: # pro_debug_save(save_dir, Path(image_path).name, inputs[ibatch], targets[ibatch], normpoints[ibatch], meanthetas[ibatch], batch_heatmaps[-1][ibatch], args.tensor2imageF) # measure elapsed time batch_time.update(time.time() - end) last_time = convert_secs2time(batch_time.avg * (len(loader) - i - 1), True) end = time.time() if i % print_freq == 0 or i + 1 == len(loader): logger.log(' -->>[{:}]: [{:}][{:03d}/{:03d}] ' 'Time {batch_time.val:4.2f} ({batch_time.avg:4.2f}) ' 'PRE {pre_data_time.val:4.2f} ({pre_data_time.avg:4.2f}) ' 'Data {data_time.val:4.2f} ({data_time.avg:4.2f}) ' 'Forward {forward_time.val:4.2f} ({forward_time.avg:4.2f}) ' 'Loss {loss.val:7.4f} ({loss.avg:7.4f}) [locs={locs.avg:7.4f} scos={scos.avg:7.4f}]'.format( mode, epoch_str, i, len(loader), batch_time=batch_time, pre_data_time=pre_data_time, data_time=data_time, forward_time=forward_time, loss=LOSSES, locs=LOSSES_LOCS, scos=LOSSES_SCOS) + last_time \ + ' I={:}'.format(list(inputs.size())) \ + ' Vis={:2d} ({:.1f})'.format(int(visible_points.val), visible_points.avg)) nme, _, _ = eval_meta.compute_mse(loader.dataset.dataset_name, logger) return LOSSES.avg, eval_meta, nme