def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.TEST.MODEL_FILE = HRNET_PATH + '/models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth' cfg.TEST.USE_GT_BBOX = False cfg.GPUS = (0, ) cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) normalize = transforms.Compose([transforms.ToTensor(), normalize]) predict_imgs(model, args.imgs, args.bbox, args.out, normalize, 0.85)
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.RANK = args.rank cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) #logger.info(cfg) if cfg.WITHOUT_EVAL: input( "[WARNING] According to the configuration, there will be no evaluation. If evaluation is necessary, please terminate this process. [press Enter to continue]" ) logger.info("=> Training without evaluation") ngpus_per_node = len(cfg.GPUS) if ngpus_per_node == 1: warnings.warn( 'You have chosen a specific GPU. This will completely disable data parallelism.' ) # Simply call main_worker function main_worker(','.join([str(i) for i in cfg.GPUS]), ngpus_per_node, args, final_output_dir, tb_log_dir)
def __prepare_fine_tune(self): cfg.defrost() cfg.TRAIN.ANNO_FILE = cfg.FINE_TUNE.ANNO_FILE cfg.TRAIN.YOLO_EPOCHS = cfg.FINE_TUNE.YOLO_EPOCHS cfg.TRAIN.LR_INIT = cfg.FINE_TUNE.LR_INIT cfg.TRAIN.LR_END = cfg.FINE_TUNE.LR_END cfg.TRAIN.WARMUP_EPOCHS = cfg.FINE_TUNE.WARMUP_EPOCHS cfg.freeze()
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model.eval() dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)) summary(model, dump_input)
def main(): args = parse_args() update_config(cfg, args) # 所有配置更新完毕后,将节点的序号传递给配置文件 cfg.defrost() cfg.RANK = args.rank cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train' ) logger.info(pprint.pformat(args)) logger.info(cfg) if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') # 得到总的节点数目 if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED # 检测硬件设备上有多少块GPU,基于此配置相应的进程数目 # 为了在指定GPU块上进行训练,可用CUDA_VISIBLE_DEVICES进行手动屏蔽 ngpus_per_node = torch.cuda.device_count() if cfg.MULTIPROCESSING_DISTRIBUTED: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn( main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir) ) else: # Simply call main_worker function main_worker( ','.join([str(i) for i in cfg.GPUS]), ngpus_per_node, args, final_output_dir, tb_log_dir )
def update_my_config(): cfg.defrost() cfg.merge_from_file('experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml') opts = [ "TEST.MODEL_FILE", "/mnt/models/HRNet/pose_mpii/pose_hrnet_w32_256x256.pth" ] cfg.merge_from_list(opts) cfg.OUTPUT_DIR = "output_test" cfg.LOG_DIR = "log_test" cfg.freeze()
def predict(cfg_path, img_dir, bbox_dir, out_file, param_overrides=[]): # update_config needs some hardcoded params, fake them here class args: cfg = cfg_path opts = param_overrides modelDir = '' logDir = '' dataDir = '' update_config(cfg, args) cfg.defrost() cfg.TEST.MODEL_FILE = '../hrnet/pose_hrnet_w32_256x192.pth' cfg.TEST.USE_GT_BBOX = False cfg.TEST.BATCH_SIZE_PER_GPU = 64 cfg.GPUS = (0, ) cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger( cfg, cfg_path, 'valid') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) normalize = transforms.Compose([transforms.ToTensor(), normalize]) detection_thresh = 0.8 img_dir = os.path.join(img_dir, '*') # Dataset requires a glob format predict_imgs(model, img_dir, bbox_dir, out_file, normalize, detection_thresh)
def main(): args = parse_args() set_seed(int(args.seed)) update_config(cfg, args) cfg.defrost() cfg.RANK = args.rank cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ngpus_per_node = torch.cuda.device_count() args.world_size = ngpus_per_node * args.world_size mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir))
def main(): # parallel processing or one parallel args = parse_args() update_config(cfg, args) cfg.defrost() cfg.RANK = args.rank cfg.freeze() logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED ngpus_per_node = torch.cuda.device_count() if cfg.MULTIPROCESSING_DISTRIBUTED: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function. Sharing CUDA tensors between processes mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir)) else: # Simply call main_worker function main_worker(','.join([str(i) for i in cfg.GPUS]), ngpus_per_node, args, final_output_dir, tb_log_dir)
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() record_prefix = './eval2D_results_' if args.is_vis: result_dir = record_prefix + cfg.EXP_NAME mse2d_lst = np.loadtxt(os.path.join(result_dir, 'mse2d_each_joint.txt')) PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt')) plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], mse2d_lst) exit() cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_path = args.model_path is_vis = args.is_vis # FP16 SETTING if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) # # calculate GFLOPS # dump_input = torch.rand( # (5, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]) # ) # print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) # ops, params = get_model_complexity_info( # model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]), # as_strings=True, print_per_layer_stat=True, verbose=True) # input() if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) if args.gpu != -1: device = torch.device('cuda:' + str(args.gpu)) torch.cuda.set_device(args.gpu) else: device = torch.device('cpu') # load model state if model_path: print("Loading model:", model_path) ckpt = torch.load(model_path) #, map_location='cpu') if 'state_dict' not in ckpt.keys(): state_dict = ckpt else: state_dict = ckpt['state_dict'] print('Model epoch {}'.format(ckpt['epoch'])) for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict, strict=True) model.to(device) # calculate GFLOPS dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])).to(device) print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) model.eval() # inference_dataset = eval('dataset.{}'.format(cfg.DATASET.TEST_DATASET[0].replace('_kpt','')))( # cfg.DATA_DIR, # cfg.DATASET.TEST_SET, # transform=transform # ) inference_dataset = eval('dataset.{}'.format( cfg.DATASET.TEST_DATASET[0].replace('_kpt', '')))( cfg.DATA_DIR, cfg.DATASET.TEST_SET, transforms=build_transforms(cfg, is_train=False)) batch_size = args.batch_size data_loader = torch.utils.data.DataLoader( inference_dataset, batch_size=batch_size, #48 shuffle=False, num_workers=min(8, batch_size), #8 pin_memory=False) print('\nEvaluation loader information:\n' + str(data_loader.dataset)) n_joints = cfg.DATASET.NUM_JOINTS th2d_lst = np.array([i for i in range(1, 50)]) PCK2d_lst = np.zeros((len(th2d_lst), )) mse2d_lst = np.zeros((n_joints, )) visibility_lst = np.zeros((n_joints, )) print('Start evaluating... [Batch size: {}]\n'.format( data_loader.batch_size)) with torch.no_grad(): pose2d_mse_loss = JointsMSELoss().to(device) infer_time = [0, 0] start_time = time.time() for i, ret in enumerate(data_loader): # pose2d_gt: b x 21 x 2 is [u,v] 0<=u<64, 0<=v<64 (heatmap size) # visibility: b x 21 vis=0/1 imgs = ret['imgs'] pose2d_gt = ret['pose2d'] # b [x v] x 21 x 2 visibility = ret['visibility'] # b [x v] x 21 x 1 s1 = time.time() if 'CPM' == cfg.MODEL.NAME: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmap_lst = model( imgs.to(device), ret['centermaps'].to(device) ) # 6 groups of heatmaps, each of which has size (1,22,32,32) heatmaps = heatmap_lst[-1][:, 1:] pose2d_pred = data_loader.dataset.get_kpts(heatmaps) hm_size = heatmap_lst[-1].shape[-1] # 32 else: if cfg.MODEL.NAME == 'pose_hrnet_transformer': # imgs: b(1) x (4*seq_len) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], imgs.shape[1] // 4 idx_lst = torch.tensor([4 * i for i in range(seq_len)]) imgs = torch.stack([ imgs[b, idx_lst + cam_idx] for b in range(n_batches) for cam_idx in range(4) ]) # (b*4) x seq_len x 3 x 256 x 256 pose2d_pred, heatmaps_pred, _ = model( imgs.cuda(device)) # (b*4) x 21 x 2 pose2d_gt = pose2d_gt[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *pose2d_pred.shape[-2:]) # (b*4) x 21 x 2 visibility = visibility[:, 4 * (seq_len // 2):4 * ( seq_len // 2 + 1)].contiguous().view( -1, *visibility.shape[-2:]) # (b*4) x 21 else: if 'Aggr' in cfg.MODEL.NAME: # imgs: b x (4*5) x 3 x 256 x 256 n_batches, seq_len = imgs.shape[0], len( cfg.DATASET.SEQ_IDX) true_batch_size = imgs.shape[1] // seq_len pose2d_gt = torch.cat([ pose2d_gt[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) visibility = torch.cat([ visibility[b, true_batch_size * (seq_len // 2):true_batch_size * (seq_len // 2 + 1)] for b in range(n_batches) ], dim=0) imgs = torch.cat([ imgs[b, true_batch_size * j:true_batch_size * (j + 1)] for j in range(seq_len) for b in range(n_batches) ], dim=0) # (b*4*5) x 3 x 256 x 256 heatmaps_pred, _ = model(imgs.to(device)) else: pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:]) heatmaps_pred, _ = model( imgs.to(device)) # b x 21 x 64 x 64 pose2d_pred = get_final_preds( heatmaps_pred, cfg.MODEL.HEATMAP_SOFTMAX) # b x 21 x 2 hm_size = heatmaps_pred.shape[-1] # 64 if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 # rescale to the original image before DLT if 'RHD' in cfg.DATASET.TEST_DATASET[0]: crop_size, corner = ret['crop_size'], ret['corner'] crop_size, corner = crop_size.view(-1, 1, 1), corner.unsqueeze( 1) # b x 1 x 1; b x 2 x 1 pose2d_pred = pose2d_pred.cpu() * crop_size / hm_size + corner pose2d_gt = pose2d_gt * crop_size / hm_size + corner else: orig_width, orig_height = data_loader.dataset.orig_img_size pose2d_pred[:, :, 0] *= orig_width / hm_size pose2d_pred[:, :, 1] *= orig_height / hm_size pose2d_gt[:, :, 0] *= orig_width / hm_size pose2d_gt[:, :, 1] *= orig_height / hm_size # for k in range(21): # print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist()) # input() # 2D errors pose2d_pred, pose2d_gt, visibility = pose2d_pred.cpu().numpy( ), pose2d_gt.numpy(), visibility.squeeze(2).numpy() # import matplotlib.pyplot as plt # imgs = cv2.resize(imgs[0].permute(1,2,0).cpu().numpy(), tuple(data_loader.dataset.orig_img_size)) # for k in range(21): # print(pose2d_gt[0,k],pose2d_pred[0,k],visibility[0,k]) # for k in range(0,21,5): # fig = plt.figure() # ax1 = fig.add_subplot(131) # ax2 = fig.add_subplot(132) # ax3 = fig.add_subplot(133) # ax1.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax1, pose2d_gt[0,:,0:2], order='uv') # ax2.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB)) # plot_hand(ax2, pose2d_pred[0,:,0:2], order='uv') # ax3.imshow(heatmaps_pred[0,k].cpu().numpy()) # plt.show() mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt, axis=2) * visibility # b x 21 mse2d_lst += mse_each_joint.sum(axis=0) visibility_lst += visibility.sum(axis=0) for th_idx in range(len(th2d_lst)): PCK2d_lst[th_idx] += np.sum( (mse_each_joint < th2d_lst[th_idx]) * visibility) period = 10 if i % (len(data_loader) // period) == 0: print("[Evaluation]{}% finished.".format( period * i // (len(data_loader) // period))) #if i == 10:break print('Evaluation spent {:.2f} s\tfps: {:.1f} {:.4f}'.format( time.time() - start_time, infer_time[0] / infer_time[1], infer_time[1] / infer_time[0])) mse2d_lst /= visibility_lst PCK2d_lst /= visibility_lst.sum() result_dir = record_prefix + cfg.EXP_NAME if not os.path.exists(result_dir): os.mkdir(result_dir) mse_file, pck_file = os.path.join( result_dir, 'mse2d_each_joint.txt'), os.path.join(result_dir, 'PCK2d.txt') print('Saving results to ' + mse_file) print('Saving results to ' + pck_file) np.savetxt(mse_file, mse2d_lst, fmt='%.4f') np.savetxt(pck_file, np.stack((th2d_lst, PCK2d_lst))) plot_performance(PCK2d_lst, th2d_lst, mse2d_lst)
if os.path.isdir(cfg.TEST.DEMO_FILE): image_names = [] ls = os.listdir(cfg.TEST.DEMO_FILE) for file_name in sorted(ls): ext = file_name[file_name.rfind('.') + 1:].lower() if ext in image_ext: image_names.append( os.path.join(cfg.TEST.DEMO_FILE, file_name)) else: image_names = [cfg.TEST.DEMO_FILE] for (image_name) in image_names: print(image_name) ret = detector.run(image_name) time_str = '' for stat in time_stats: time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) print(time_str) if __name__ == '__main__': args = parse_args() update_config(cfg, args.cfg) cfg.defrost() cfg.TEST.MODEL_PATH = args.TESTMODEL cfg.TEST.DEMO_FILE = args.DEMOFILE cfg.TEST.NMS = args.NMS cfg.DEBUG = args.DEBUG cfg.freeze() demo(cfg)
def get_network(name, batch_size): """Get the symbol definition and random weight of a network""" # change for cifar input_shape = (batch_size, 3, 32, 32) output_shape = (batch_size, 10) print("Use : {}".format(name)) if name == "cifar_resnet20_v1": input_shape = (batch_size, 3, 32, 32) output_shape = (batch_size, 10) block = get_model('cifar_resnet20_v1', pretrained=True) mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype) net = mod["main"] net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs) mod = tvm.IRModule.from_expr(net) elif name == 'ssd_512_resnet50_v1_voc': input_shape = (batch_size, 3, 512, 512) output_shape = (batch_size, 20) block = get_model('ssd_512_resnet50_v1_voc', pretrained=True) mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype) net = mod["main"] net = relay.Function(net.params, net.body, None, net.type_params, net.attrs) mod = tvm.IRModule.from_expr(net) elif name == 'hrnet_bottom_up': import sys sys.path.append("/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation") sys.path.append("/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/lib") import sys import cv2 import torch from config import cfg, update_config import argparse import models parser = argparse.ArgumentParser(description='Train keypoints network') # general parser.add_argument('--cfg', type=str, default="/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/experiments/inference_demo.yaml") parser.add_argument('--videoFile', type=str, required=False) parser.add_argument('--outputDir', type=str, default='/output/') parser.add_argument('--inferenceFps', type=int, default=10) parser.add_argument('--visthre', type=float, default=0) parser.add_argument('opts', help='Modify config options using the command-line', default=None, nargs=argparse.REMAINDER) args = parser.parse_args() update_config(cfg, args) input_shape = (1, 3, 512, 512) output_shape = None cfg.defrost() cfg.TEST.MODEL_FILE = "/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/model/pose_coco/pose_hrnet_w32_reg_delaysep_bg01_stn_512_adam_lr1e-3_coco_x140.pth" print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) cfg.freeze() pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=False) pose_model.load_state_dict(torch.load( cfg.TEST.MODEL_FILE), strict=False) input_data = torch.randn(input_shape) scripted_model = torch.jit.trace(pose_model, input_data).eval() mod, params = relay.frontend.from_pytorch(scripted_model, input_shapes=[('data', input_shape)], default_dtype=dtype) net = mod["main"] net = relay.Function(net.params, net.body, None, net.type_params, net.attrs) mod = tvm.IRModule.from_expr(net) else: raise ValueError("Unsupported network: " + name) return mod, params, input_shape, output_shape
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_path = args.model_path is_vis = args.is_vis gpus = ','.join([str(i) for i in cfg.GPUS]) gpu_ids = eval('[' + gpus + ']') if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) # model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( # cfg, is_train=True # ) if 'pose_hrnet' in cfg.MODEL.NAME: model = { "pose_hrnet": pose_hrnet.get_pose_net, "pose_hrnet_softmax": pose_hrnet_softmax.get_pose_net }[cfg.MODEL.NAME](cfg, is_train=True) else: model = { "ransac": RANSACTriangulationNet, "alg": AlgebraicTriangulationNet, "vol": VolumetricTriangulationNet, "vol_CPM": VolumetricTriangulationNet_CPM, "FTL": FTLMultiviewNet }[cfg.MODEL.NAME](cfg, is_train=False) # load model state if model_path: print("Loading model:", model_path) ckpt = torch.load(model_path, map_location='cpu' if args.gpu == -1 else 'cuda:0') if 'state_dict' not in ckpt.keys(): state_dict = ckpt else: state_dict = ckpt['state_dict'] print('Model epoch {}'.format(ckpt['epoch'])) for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict, strict=True) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) device = torch.device('cuda:' + str(args.gpu) if args.gpu != -1 else 'cpu') model.to(device) model.eval() # image transformer transform = build_transforms(cfg, is_train=False) inference_dataset = eval('dataset.' + cfg.DATASET.TEST_DATASET[0])( cfg, cfg.DATASET.TEST_SET, transform=transform) data_loader = torch.utils.data.DataLoader(inference_dataset, batch_size=1, shuffle=True, num_workers=0, pin_memory=False) print('\nValidation loader information:\n' + str(data_loader.dataset)) with torch.no_grad(): pose2d_mse_loss = JointsMSELoss().to( device) if args.gpu != -1 else JointsMSELoss() pose3d_mse_loss = Joints3DMSELoss().to( device) if args.gpu != -1 else Joints3DMSELoss() orig_width, orig_height = inference_dataset.orig_img_size heatmap_size = cfg.MODEL.HEATMAP_SIZE count = 4 for i, ret in enumerate(data_loader): # orig_imgs: 1 x 4 x 480 x 640 x 3 # imgs: 1 x 4 x 3 x H x W # pose2d_gt (bounded in 64 x 64): 1 x 4 x 21 x 2 # pose3d_gt: 1 x 21 x 3 # visibility: 1 x 4 x 21 # extrinsic matrix: 1 x 4 x 3 x 4 # intrinsic matrix: 1 x 3 x 3 if not (i % 67 == 0): continue imgs = ret['imgs'].to(device) orig_imgs = ret['orig_imgs'] pose2d_gt, pose3d_gt, visibility = ret['pose2d'], ret[ 'pose3d'], ret['visibility'] extrinsic_matrices, intrinsic_matrices = ret[ 'extrinsic_matrices'], ret['intrinsic_matrix'] # somtimes intrisic_matrix has a shape of 3x3 or b x 3x3 intrinsic_matrix = intrinsic_matrices[0] if len( intrinsic_matrices.shape) == 3 else intrinsic_matrices start_time = time.time() if 'pose_hrnet' in cfg.MODEL.NAME: pose3d_gt = pose3d_gt.to(device) heatmaps, _ = model(imgs[0]) # N_views x 21 x 64 x 64 pose2d_pred = get_final_preds(heatmaps, cfg) # N_views x 21 x 2 proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to( device) # b x v x 3 x 4 # rescale to the original image before DLT pose2d_pred[:, :, 0:1] *= orig_width / heatmap_size[0] pose2d_pred[:, :, 1:2] *= orig_height / heatmap_size[0] # 3D world coordinate 1 x 21 x 3 pose3d_pred = DLT_pytorch(pose2d_pred, proj_matrices.squeeze()).unsqueeze(0) elif 'alg' == cfg.MODEL.NAME or 'ransac' == cfg.MODEL.NAME: # the predicted 2D poses have been rescaled inside the triangulation model # pose2d_pred: 1 x N_views x 21 x 2 # pose3d_pred: 1 x 21 x 3 proj_matrices = (intrinsic_matrix @ extrinsic_matrices ) # b x v x 3 x 4 pose3d_pred,\ pose2d_pred,\ heatmaps,\ confidences_pred = model(imgs, proj_matrices.to(device)) elif "vol" in cfg.MODEL.NAME: intrinsic_matrix = update_after_resize( intrinsic_matrix, (orig_height, orig_width), tuple(heatmap_size)) proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to( device) # b x v x 3 x 4 # pose3d_pred (torch.tensor) b x 21 x 3 # pose2d_pred (torch.tensor) b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H) # heatmaps_pred (torch.tensor) b x v x 21 x 64 x 64 # volumes_pred (torch.tensor) # confidences_pred (torch.tensor) # cuboids_pred (list) # coord_volumes_pred (torch.tensor) # base_points_pred (torch.tensor) b x v x 1 x 2 if cfg.MODEL.BACKBONE_NAME == 'CPM_volumetric': centermaps = ret['centermaps'].to(device) pose3d_pred,\ pose2d_pred,\ heatmaps_pred,\ volumes_pred,\ confidences_pred,\ coord_volumes_pred,\ base_points_pred\ = model(imgs, centermaps, proj_matrices) else: pose3d_pred,\ pose2d_pred,\ heatmaps,\ volumes_pred,\ confidences_pred,\ coord_volumes_pred,\ base_points_pred\ = model(imgs, proj_matrices) pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0] pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[0] elif 'FTL' == cfg.MODEL.NAME: # pose2d_pred: 1 x 4 x 21 x 2 # pose3d_pred: 1 x 21 x 3 heatmaps, pose2d_pred, pose3d_pred = model( imgs.to(device), extrinsic_matrices.to(device), intrinsic_matrix.to(device)) print(pose2d_pred) pose2d_pred = torch.cat((pose2d_pred[:, :, :, 0:1] * 640 / 64, pose2d_pred[:, :, :, 1:2] * 480 / 64), dim=-1) # N_views x 21 x 2 end_time = time.time() print('3D pose inference time {:.1f} ms'.format( 1000 * (end_time - start_time))) pose3d_EPE = pose3d_mse_loss(pose3d_pred[:, 1:], pose3d_gt[:, 1:].to(device)).item() print('Pose3d MSE: {:.4f}\n'.format(pose3d_EPE)) # if pose3d_EPE > 35: # input() # continue # 2D errors pose2d_gt[:, :, :, 0] *= orig_width / heatmap_size[0] pose2d_gt[:, :, :, 1] *= orig_height / heatmap_size[1] # for k in range(21): # print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist()) # input() visualize(args=args, imgs=np.squeeze(orig_imgs[0].numpy()), pose2d_gt=np.squeeze(pose2d_gt.cpu().numpy()), pose2d_pred=np.squeeze(pose2d_pred.cpu().numpy()), pose3d_gt=np.squeeze(pose3d_gt.cpu().numpy()), pose3d_pred=np.squeeze(pose3d_pred.cpu().numpy()))
def main(): args = parse_args() update_config(cfg, args) cfg.defrost() cfg.freeze() if args.is_vis: result_dir = prefix + cfg.EXP_NAME mse2d_lst = np.loadtxt(os.path.join(result_dir, 'mse2d_each_joint.txt')) mse3d_lst = np.loadtxt(os.path.join(result_dir, 'mse3d_each_joint.txt')) PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt')) PCK3d_lst = np.loadtxt(os.path.join(result_dir, 'PCK3d.txt')) plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], PCK3d_lst[1, :], PCK3d_lst[0, :], mse2d_lst, mse3d_lst) exit() cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_path = args.model_path is_vis = args.is_vis gpus = ','.join([str(i) for i in cfg.GPUS]) gpu_ids = eval('[' + gpus + ']') if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) if 'pose_hrnet' in cfg.MODEL.NAME: model = { "pose_hrnet": pose_hrnet.get_pose_net, "pose_hrnet_softmax": pose_hrnet_softmax.get_pose_net }[cfg.MODEL.NAME](cfg, is_train=True) else: model = { "ransac": RANSACTriangulationNet, "alg": AlgebraicTriangulationNet, "vol": VolumetricTriangulationNet, "vol_CPM": VolumetricTriangulationNet_CPM, "FTL": FTLMultiviewNet }[cfg.MODEL.NAME](cfg, is_train=False) if cfg.FP16.ENABLED: model = network_to_half(model) if cfg.MODEL.SYNC_BN and not args.distributed: print( 'Warning: Sync BatchNorm is only supported in distributed training.' ) # load model state if model_path: print("Loading model:", model_path) ckpt = torch.load(model_path, map_location='cpu' if args.gpu == -1 else 'cuda:0') if 'state_dict' not in ckpt.keys(): state_dict = ckpt else: state_dict = ckpt['state_dict'] print('Model epoch {}'.format(ckpt['epoch'])) for key in list(state_dict.keys()): new_key = key.replace("module.", "") state_dict[new_key] = state_dict.pop(key) model.load_state_dict(state_dict, strict=False) device = torch.device('cuda:' + str(args.gpu) if args.gpu != -1 else 'cpu') model.to(device) model.eval() # image transformer transform = build_transforms(cfg, is_train=False) inference_dataset = eval('dataset.' + cfg.DATASET.DATASET[0])( cfg, cfg.DATASET.TEST_SET, transform=transform) inference_dataset.n_views = eval(args.views) batch_size = args.batch_size if platform.system() == 'Linux': # for linux data_loader = torch.utils.data.DataLoader(inference_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=False) else: # for windows batch_size = 1 data_loader = torch.utils.data.DataLoader(inference_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False) print('\nEvaluation loader information:\n' + str(data_loader.dataset)) print('Evaluation batch size: {}\n'.format(batch_size)) th2d_lst = np.array([i for i in range(1, 50)]) PCK2d_lst = np.zeros((len(th2d_lst), )) mse2d_lst = np.zeros((21, )) th3d_lst = np.array([i for i in range(1, 51)]) PCK3d_lst = np.zeros((len(th3d_lst), )) mse3d_lst = np.zeros((21, )) visibility_lst = np.zeros((21, )) with torch.no_grad(): start_time = time.time() pose2d_mse_loss = JointsMSELoss().cuda( args.gpu) if args.gpu != -1 else JointsMSELoss() pose3d_mse_loss = Joints3DMSELoss().cuda( args.gpu) if args.gpu != -1 else Joints3DMSELoss() infer_time = [0, 0] start_time = time.time() n_valid = 0 model.orig_img_size = inference_dataset.orig_img_size orig_width, orig_height = model.orig_img_size heatmap_size = cfg.MODEL.HEATMAP_SIZE for i, ret in enumerate(data_loader): # ori_imgs: b x 4 x 480 x 640 x 3 # imgs: b x 4 x 3 x H x W # pose2d_gt: b x 4 x 21 x 2 (have not been transformed) # pose3d_gt: b x 21 x 3 # visibility: b x 4 x 21 # extrinsic matrix: b x 4 x 3 x 4 # intrinsic matrix: b x 3 x 3 # if i < count: continue imgs = ret['imgs'].to(device) orig_imgs = ret['orig_imgs'] pose2d_gt, pose3d_gt, visibility = ret['pose2d'], ret[ 'pose3d'], ret['visibility'] extrinsic_matrices, intrinsic_matrices = ret[ 'extrinsic_matrices'], ret['intrinsic_matrix'] # somtimes intrisic_matrix has a shape of 3x3 or b x 3x3 intrinsic_matrix = intrinsic_matrices[0] if len( intrinsic_matrices.shape) == 3 else intrinsic_matrices batch_size = orig_imgs.shape[0] n_joints = pose2d_gt.shape[2] pose2d_gt = pose2d_gt.view( -1, *pose2d_gt.shape[2:]).numpy() # b*v x 21 x 2 pose3d_gt = pose3d_gt.numpy() # b x 21 x 3 visibility = visibility.view( -1, visibility.shape[2]).numpy() # b*v x 21 if 'pose_hrnet' in cfg.MODEL.NAME: s1 = time.time() heatmaps, _ = model(imgs.view( -1, *imgs.shape[2:])) # b*v x 21 x 64 x 64 pose2d_pred = get_final_preds(heatmaps, cfg).view( batch_size, -1, n_joints, 2 ) # b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H) proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to( device) # b x v x 3 x 4 # rescale to the original image before DLT pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0] pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[0] # 3D world coordinate 1 x 21 x 3 pose3d_pred = torch.cat([ DLT_sii_pytorch(pose2d_pred[:, :, k], proj_matrices).unsqueeze(1) for k in range(n_joints) ], dim=1) # b x 21 x 3 if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 #print('FPS {:.1f}'.format(infer_time[0]/infer_time[1])) elif 'alg' == cfg.MODEL.NAME or 'ransac' == cfg.MODEL.NAME: s1 = time.time() # pose2d_pred: b x N_views x 21 x 2 # NOTE: the estimated 2D poses are located in the original image of size 640(W) x 480(H)] # pose3d_pred: b x 21 x 3 [world coord] proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to( device) # b x v x 3 x 4 pose3d_pred,\ pose2d_pred,\ heatmaps,\ confidences_pred = model(imgs.to(device), proj_matrices.to(device)) if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 elif "vol" in cfg.MODEL.NAME: intrinsic_matrix = update_after_resize( intrinsic_matrix, (orig_height, orig_width), tuple(heatmap_size)) proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to( device) # b x v x 3 x 4 s1 = time.time() # pose3d_pred (torch.tensor) b x 21 x 3 # pose2d_pred (torch.tensor) b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H) # heatmaps_pred (torch.tensor) b x v x 21 x 64 x 64 # volumes_pred (torch.tensor) # confidences_pred (torch.tensor) # cuboids_pred (list) # coord_volumes_pred (torch.tensor) # base_points_pred (torch.tensor) b x v x 1 x 2 if cfg.MODEL.BACKBONE_NAME == 'CPM_volumetric': centermaps = ret['centermaps'].to(device) heatmaps_gt = ret['heatmaps'] pose3d_pred,\ pose2d_pred,\ heatmaps_pred,\ volumes_pred,\ confidences_pred,\ coord_volumes_pred,\ base_points_pred\ = model(imgs, centermaps, proj_matrices) else: pose3d_pred,\ pose2d_pred,\ heatmaps,\ volumes_pred,\ confidences_pred,\ coord_volumes_pred,\ base_points_pred\ = model(imgs, proj_matrices) if i > 20: infer_time[0] += 1 infer_time[1] += time.time() - s1 pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0] pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[1] # 2D errors pose2d_gt[:, :, 0] *= orig_width / heatmap_size[0] pose2d_gt[:, :, 1] *= orig_height / heatmap_size[1] pose2d_pred = pose2d_pred.view(-1, n_joints, 2).cpu().numpy() # b*v x 21 x 2 for k in range(21): print(pose2d_gt[0, k].tolist(), pose2d_pred[0, k].tolist()) input() mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt, axis=2) * visibility # b*v x 21 mse2d_lst += mse_each_joint.sum(axis=0) visibility_lst += visibility.sum(axis=0) for th_idx in range(len(th2d_lst)): PCK2d_lst[th_idx] += np.sum( (mse_each_joint < th2d_lst[th_idx]) * visibility) # 3D errors for k in range(21): print(pose3d_gt[0, k].tolist(), pose3d_pred[0, k].tolist()) input() visibility = visibility.reshape( (batch_size, -1, n_joints)) # b x v x 21 for b in range(batch_size): # print(np.sum(visibility[b]), visibility[b].size) if np.sum(visibility[b]) >= visibility[b].size * 0.65: n_valid += 1 mse_each_joint = np.linalg.norm( pose3d_pred[b].cpu().numpy() - pose3d_gt[b], axis=1) # 21 mse3d_lst += mse_each_joint for th_idx in range(len(th3d_lst)): PCK3d_lst[th_idx] += np.sum( mse_each_joint < th3d_lst[th_idx]) if i % (len(data_loader) // 5) == 0: print("[Evaluation]{}% finished.".format( 20 * i // (len(data_loader) // 5))) #if i == 10:break print('Evaluation spent {:.2f} s\tFPS: {:.1f}'.format( time.time() - start_time, infer_time[0] / infer_time[1])) mse2d_lst /= visibility_lst PCK2d_lst /= visibility_lst.sum() mse3d_lst /= n_valid PCK3d_lst /= (n_valid * 21) plot_performance(PCK2d_lst, th2d_lst, PCK3d_lst, th3d_lst, mse2d_lst, mse3d_lst) if not os.path.exists(result): os.mkdir(result) result_dir = prefix + cfg.EXP_NAME if not os.path.exists(result_dir): os.mkdir(result_dir) np.savetxt(os.path.join(result_dir, 'mse2d_each_joint.txt'), mse2d_lst, fmt='%.4f') np.savetxt(os.path.join(result_dir, 'mse3d_each_joint.txt'), mse3d_lst, fmt='%.4f') np.savetxt(os.path.join(result_dir, 'PCK2d.txt'), np.stack((th2d_lst, PCK2d_lst))) np.savetxt(os.path.join(result_dir, 'PCK3d.txt'), np.stack((th3d_lst, PCK3d_lst)))
def get_net(file_config, weights): cfg.defrost() cfg.merge_from_file(file_config) model = HpeHRNet(cfg, weights) return model