Ejemplo n.º 1
0
def main():
    args = parse_args()
    update_config(cfg, args)
    cfg.defrost()
    cfg.TEST.MODEL_FILE = HRNET_PATH + '/models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth'
    cfg.TEST.USE_GT_BBOX = False
    cfg.GPUS = (0, )
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')
    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)
    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    normalize = transforms.Compose([transforms.ToTensor(), normalize])

    predict_imgs(model, args.imgs, args.bbox, args.out, normalize, 0.85)
Ejemplo n.º 2
0
def main():
    args = parse_args()
    update_config(cfg, args)
    cfg.defrost()
    cfg.RANK = args.rank
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    #logger.info(cfg)

    if cfg.WITHOUT_EVAL:
        input(
            "[WARNING] According to the configuration, there will be no evaluation. If evaluation is necessary, please terminate this process. [press Enter to continue]"
        )
        logger.info("=> Training without evaluation")

    ngpus_per_node = len(cfg.GPUS)
    if ngpus_per_node == 1:
        warnings.warn(
            'You have chosen a specific GPU. This will completely disable data parallelism.'
        )

    # Simply call main_worker function
    main_worker(','.join([str(i) for i in cfg.GPUS]), ngpus_per_node, args,
                final_output_dir, tb_log_dir)
Ejemplo n.º 3
0
 def __prepare_fine_tune(self):
     cfg.defrost()
     cfg.TRAIN.ANNO_FILE = cfg.FINE_TUNE.ANNO_FILE
     cfg.TRAIN.YOLO_EPOCHS = cfg.FINE_TUNE.YOLO_EPOCHS
     cfg.TRAIN.LR_INIT = cfg.FINE_TUNE.LR_INIT
     cfg.TRAIN.LR_END = cfg.FINE_TUNE.LR_END
     cfg.TRAIN.WARMUP_EPOCHS = cfg.FINE_TUNE.WARMUP_EPOCHS
     cfg.freeze()
Ejemplo n.º 4
0
def main():
    args = parse_args()
    update_config(cfg, args)

    cfg.defrost()
    cfg.freeze()

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    model.eval()
    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    summary(model, dump_input)
Ejemplo n.º 5
0
def main():
    args = parse_args()
    update_config(cfg, args)

    # 所有配置更新完毕后,将节点的序号传递给配置文件
    cfg.defrost()
    cfg.RANK = args.rank
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train'
    )

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    # 得到总的节点数目
    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED

    # 检测硬件设备上有多少块GPU,基于此配置相应的进程数目
    # 为了在指定GPU块上进行训练,可用CUDA_VISIBLE_DEVICES进行手动屏蔽
    ngpus_per_node = torch.cuda.device_count()
    if cfg.MULTIPROCESSING_DISTRIBUTED:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(
            main_worker,
            nprocs=ngpus_per_node,
            args=(ngpus_per_node, args, final_output_dir, tb_log_dir)
        )
    else:
        # Simply call main_worker function
        main_worker(
            ','.join([str(i) for i in cfg.GPUS]),
            ngpus_per_node,
            args,
            final_output_dir,
            tb_log_dir
        )
Ejemplo n.º 6
0
def update_my_config():
    cfg.defrost()

    cfg.merge_from_file('experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml')

    opts = [
        "TEST.MODEL_FILE",
        "/mnt/models/HRNet/pose_mpii/pose_hrnet_w32_256x256.pth"
    ]
    cfg.merge_from_list(opts)

    cfg.OUTPUT_DIR = "output_test"

    cfg.LOG_DIR = "log_test"

    cfg.freeze()
Ejemplo n.º 7
0
def predict(cfg_path, img_dir, bbox_dir, out_file, param_overrides=[]):
    # update_config needs some hardcoded params, fake them here
    class args:
        cfg = cfg_path
        opts = param_overrides
        modelDir = ''
        logDir = ''
        dataDir = ''

    update_config(cfg, args)
    cfg.defrost()
    cfg.TEST.MODEL_FILE = '../hrnet/pose_hrnet_w32_256x192.pth'
    cfg.TEST.USE_GT_BBOX = False
    cfg.TEST.BATCH_SIZE_PER_GPU = 64
    cfg.GPUS = (0, )
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, cfg_path, 'valid')
    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)
    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    normalize = transforms.Compose([transforms.ToTensor(), normalize])

    detection_thresh = 0.8

    img_dir = os.path.join(img_dir, '*')  # Dataset requires a glob format
    predict_imgs(model, img_dir, bbox_dir, out_file, normalize,
                 detection_thresh)
Ejemplo n.º 8
0
def main():
    args = parse_args()
    set_seed(int(args.seed))
    update_config(cfg, args)

    cfg.defrost()
    cfg.RANK = args.rank
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ngpus_per_node = torch.cuda.device_count()

    args.world_size = ngpus_per_node * args.world_size
    mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir))
Ejemplo n.º 9
0
def main():  # parallel processing or one parallel
    args = parse_args()
    update_config(cfg, args)

    cfg.defrost()
    cfg.RANK = args.rank
    cfg.freeze()

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED

    ngpus_per_node = torch.cuda.device_count()
    if cfg.MULTIPROCESSING_DISTRIBUTED:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function. Sharing CUDA tensors between processes
        mp.spawn(main_worker,
                 nprocs=ngpus_per_node,
                 args=(ngpus_per_node, args, final_output_dir, tb_log_dir))
    else:
        # Simply call main_worker function
        main_worker(','.join([str(i) for i in cfg.GPUS]), ngpus_per_node, args,
                    final_output_dir, tb_log_dir)
def main():
    args = parse_args()

    update_config(cfg, args)
    cfg.defrost()
    cfg.freeze()

    record_prefix = './eval2D_results_'
    if args.is_vis:
        result_dir = record_prefix + cfg.EXP_NAME
        mse2d_lst = np.loadtxt(os.path.join(result_dir,
                                            'mse2d_each_joint.txt'))
        PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt'))

        plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], mse2d_lst)
        exit()

    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model_path = args.model_path
    is_vis = args.is_vis

    # FP16 SETTING
    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)

    # # calculate GFLOPS
    # dump_input = torch.rand(
    #     (5, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])
    # )

    # print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    # ops, params = get_model_complexity_info(
    #    model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]),
    #    as_strings=True, print_per_layer_stat=True, verbose=True)
    # input()

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not args.distributed:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    if args.gpu != -1:
        device = torch.device('cuda:' + str(args.gpu))
        torch.cuda.set_device(args.gpu)
    else:
        device = torch.device('cpu')
    # load model state
    if model_path:
        print("Loading model:", model_path)
        ckpt = torch.load(model_path)  #, map_location='cpu')
        if 'state_dict' not in ckpt.keys():
            state_dict = ckpt
        else:
            state_dict = ckpt['state_dict']
            print('Model epoch {}'.format(ckpt['epoch']))

        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict, strict=True)

    model.to(device)

    # calculate GFLOPS
    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])).to(device)

    print(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    model.eval()

    # inference_dataset = eval('dataset.{}'.format(cfg.DATASET.TEST_DATASET[0].replace('_kpt','')))(
    #     cfg.DATA_DIR,
    #     cfg.DATASET.TEST_SET,
    #     transform=transform
    # )
    inference_dataset = eval('dataset.{}'.format(
        cfg.DATASET.TEST_DATASET[0].replace('_kpt', '')))(
            cfg.DATA_DIR,
            cfg.DATASET.TEST_SET,
            transforms=build_transforms(cfg, is_train=False))

    batch_size = args.batch_size
    data_loader = torch.utils.data.DataLoader(
        inference_dataset,
        batch_size=batch_size,  #48
        shuffle=False,
        num_workers=min(8, batch_size),  #8
        pin_memory=False)

    print('\nEvaluation loader information:\n' + str(data_loader.dataset))
    n_joints = cfg.DATASET.NUM_JOINTS
    th2d_lst = np.array([i for i in range(1, 50)])
    PCK2d_lst = np.zeros((len(th2d_lst), ))
    mse2d_lst = np.zeros((n_joints, ))
    visibility_lst = np.zeros((n_joints, ))

    print('Start evaluating... [Batch size: {}]\n'.format(
        data_loader.batch_size))
    with torch.no_grad():
        pose2d_mse_loss = JointsMSELoss().to(device)
        infer_time = [0, 0]
        start_time = time.time()
        for i, ret in enumerate(data_loader):
            # pose2d_gt: b x 21 x 2 is [u,v] 0<=u<64, 0<=v<64 (heatmap size)
            # visibility: b x 21 vis=0/1
            imgs = ret['imgs']
            pose2d_gt = ret['pose2d']  # b [x v] x 21 x 2
            visibility = ret['visibility']  # b [x v] x 21 x 1

            s1 = time.time()
            if 'CPM' == cfg.MODEL.NAME:
                pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:])
                heatmap_lst = model(
                    imgs.to(device), ret['centermaps'].to(device)
                )  # 6 groups of heatmaps, each of which has size (1,22,32,32)
                heatmaps = heatmap_lst[-1][:, 1:]
                pose2d_pred = data_loader.dataset.get_kpts(heatmaps)
                hm_size = heatmap_lst[-1].shape[-1]  # 32
            else:
                if cfg.MODEL.NAME == 'pose_hrnet_transformer':
                    # imgs: b(1) x (4*seq_len) x 3 x 256 x 256
                    n_batches, seq_len = imgs.shape[0], imgs.shape[1] // 4
                    idx_lst = torch.tensor([4 * i for i in range(seq_len)])
                    imgs = torch.stack([
                        imgs[b, idx_lst + cam_idx] for b in range(n_batches)
                        for cam_idx in range(4)
                    ])  # (b*4) x seq_len x 3 x 256 x 256

                    pose2d_pred, heatmaps_pred, _ = model(
                        imgs.cuda(device))  # (b*4) x 21 x 2
                    pose2d_gt = pose2d_gt[:, 4 * (seq_len // 2):4 * (
                        seq_len // 2 + 1)].contiguous().view(
                            -1, *pose2d_pred.shape[-2:])  # (b*4) x 21 x 2
                    visibility = visibility[:, 4 * (seq_len // 2):4 * (
                        seq_len // 2 + 1)].contiguous().view(
                            -1, *visibility.shape[-2:])  # (b*4) x 21

                else:
                    if 'Aggr' in cfg.MODEL.NAME:
                        # imgs: b x (4*5) x 3 x 256 x 256
                        n_batches, seq_len = imgs.shape[0], len(
                            cfg.DATASET.SEQ_IDX)
                        true_batch_size = imgs.shape[1] // seq_len
                        pose2d_gt = torch.cat([
                            pose2d_gt[b, true_batch_size *
                                      (seq_len // 2):true_batch_size *
                                      (seq_len // 2 + 1)]
                            for b in range(n_batches)
                        ],
                                              dim=0)

                        visibility = torch.cat([
                            visibility[b, true_batch_size *
                                       (seq_len // 2):true_batch_size *
                                       (seq_len // 2 + 1)]
                            for b in range(n_batches)
                        ],
                                               dim=0)

                        imgs = torch.cat([
                            imgs[b, true_batch_size * j:true_batch_size *
                                 (j + 1)] for j in range(seq_len)
                            for b in range(n_batches)
                        ],
                                         dim=0)  # (b*4*5) x 3 x 256 x 256

                        heatmaps_pred, _ = model(imgs.to(device))
                    else:
                        pose2d_gt = pose2d_gt.view(-1, *pose2d_gt.shape[-2:])
                        heatmaps_pred, _ = model(
                            imgs.to(device))  # b x 21 x 64 x 64

                    pose2d_pred = get_final_preds(
                        heatmaps_pred, cfg.MODEL.HEATMAP_SOFTMAX)  # b x 21 x 2

                hm_size = heatmaps_pred.shape[-1]  # 64

            if i > 20:
                infer_time[0] += 1
                infer_time[1] += time.time() - s1

            # rescale to the original image before DLT

            if 'RHD' in cfg.DATASET.TEST_DATASET[0]:
                crop_size, corner = ret['crop_size'], ret['corner']
                crop_size, corner = crop_size.view(-1, 1, 1), corner.unsqueeze(
                    1)  # b x 1 x 1; b x 2 x 1
                pose2d_pred = pose2d_pred.cpu() * crop_size / hm_size + corner
                pose2d_gt = pose2d_gt * crop_size / hm_size + corner
            else:
                orig_width, orig_height = data_loader.dataset.orig_img_size
                pose2d_pred[:, :, 0] *= orig_width / hm_size
                pose2d_pred[:, :, 1] *= orig_height / hm_size
                pose2d_gt[:, :, 0] *= orig_width / hm_size
                pose2d_gt[:, :, 1] *= orig_height / hm_size

                # for k in range(21):
                #     print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist())
                # input()
            # 2D errors
            pose2d_pred, pose2d_gt, visibility = pose2d_pred.cpu().numpy(
            ), pose2d_gt.numpy(), visibility.squeeze(2).numpy()

            # import matplotlib.pyplot as plt
            # imgs = cv2.resize(imgs[0].permute(1,2,0).cpu().numpy(), tuple(data_loader.dataset.orig_img_size))
            # for k in range(21):
            #     print(pose2d_gt[0,k],pose2d_pred[0,k],visibility[0,k])
            # for k in range(0,21,5):
            #     fig = plt.figure()
            #     ax1 = fig.add_subplot(131)
            #     ax2 = fig.add_subplot(132)
            #     ax3 = fig.add_subplot(133)
            #     ax1.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB))
            #     plot_hand(ax1, pose2d_gt[0,:,0:2], order='uv')
            #     ax2.imshow(cv2.cvtColor(imgs / imgs.max(), cv2.COLOR_BGR2RGB))
            #     plot_hand(ax2, pose2d_pred[0,:,0:2], order='uv')
            #     ax3.imshow(heatmaps_pred[0,k].cpu().numpy())
            #     plt.show()
            mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt,
                                            axis=2) * visibility  # b x 21

            mse2d_lst += mse_each_joint.sum(axis=0)
            visibility_lst += visibility.sum(axis=0)

            for th_idx in range(len(th2d_lst)):
                PCK2d_lst[th_idx] += np.sum(
                    (mse_each_joint < th2d_lst[th_idx]) * visibility)

            period = 10
            if i % (len(data_loader) // period) == 0:
                print("[Evaluation]{}% finished.".format(
                    period * i // (len(data_loader) // period)))
            #if i == 10:break
        print('Evaluation spent {:.2f} s\tfps: {:.1f} {:.4f}'.format(
            time.time() - start_time, infer_time[0] / infer_time[1],
            infer_time[1] / infer_time[0]))

        mse2d_lst /= visibility_lst
        PCK2d_lst /= visibility_lst.sum()

        result_dir = record_prefix + cfg.EXP_NAME
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)

        mse_file, pck_file = os.path.join(
            result_dir,
            'mse2d_each_joint.txt'), os.path.join(result_dir, 'PCK2d.txt')
        print('Saving results to ' + mse_file)
        print('Saving results to ' + pck_file)
        np.savetxt(mse_file, mse2d_lst, fmt='%.4f')
        np.savetxt(pck_file, np.stack((th2d_lst, PCK2d_lst)))

        plot_performance(PCK2d_lst, th2d_lst, mse2d_lst)
Ejemplo n.º 11
0
        if os.path.isdir(cfg.TEST.DEMO_FILE):
            image_names = []
            ls = os.listdir(cfg.TEST.DEMO_FILE)
            for file_name in sorted(ls):
                ext = file_name[file_name.rfind('.') + 1:].lower()
                if ext in image_ext:
                    image_names.append(
                        os.path.join(cfg.TEST.DEMO_FILE, file_name))
        else:
            image_names = [cfg.TEST.DEMO_FILE]

        for (image_name) in image_names:
            print(image_name)
            ret = detector.run(image_name)
            time_str = ''
            for stat in time_stats:
                time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
            print(time_str)


if __name__ == '__main__':
    args = parse_args()
    update_config(cfg, args.cfg)
    cfg.defrost()
    cfg.TEST.MODEL_PATH = args.TESTMODEL
    cfg.TEST.DEMO_FILE = args.DEMOFILE
    cfg.TEST.NMS = args.NMS
    cfg.DEBUG = args.DEBUG
    cfg.freeze()
    demo(cfg)
Ejemplo n.º 12
0
def get_network(name, batch_size):
    """Get the symbol definition and random weight of a network"""

    # change for cifar
    input_shape = (batch_size, 3, 32, 32)
    output_shape = (batch_size, 10)

    print("Use : {}".format(name))

    if name == "cifar_resnet20_v1":
        input_shape = (batch_size, 3, 32, 32)
        output_shape = (batch_size, 10)

        block = get_model('cifar_resnet20_v1', pretrained=True)
        mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
        net = mod["main"]
        net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs)
        mod = tvm.IRModule.from_expr(net)

    elif name == 'ssd_512_resnet50_v1_voc':
        input_shape = (batch_size, 3, 512, 512)
        output_shape = (batch_size, 20)
        block = get_model('ssd_512_resnet50_v1_voc', pretrained=True)
        mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
        net = mod["main"]
        net = relay.Function(net.params, net.body, None, net.type_params, net.attrs)
        mod = tvm.IRModule.from_expr(net)

    elif name == 'hrnet_bottom_up':
        import sys
        sys.path.append("/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation")
        sys.path.append("/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/lib")

        import sys
        import cv2
        import torch
        from config import cfg, update_config
        import argparse
        import models

        parser = argparse.ArgumentParser(description='Train keypoints network')
        # general
        parser.add_argument('--cfg', type=str, default="/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/experiments/inference_demo.yaml")
        parser.add_argument('--videoFile', type=str, required=False)
        parser.add_argument('--outputDir', type=str, default='/output/')
        parser.add_argument('--inferenceFps', type=int, default=10)
        parser.add_argument('--visthre', type=float, default=0)
        parser.add_argument('opts',
                        help='Modify config options using the command-line',
                        default=None,
                        nargs=argparse.REMAINDER)
        args = parser.parse_args()

        update_config(cfg, args)

        input_shape = (1, 3, 512, 512)
        output_shape = None

        cfg.defrost()
        cfg.TEST.MODEL_FILE = "/datadrive/workspace/github/HRNet-Bottom-Up-Pose-Estimation/model/pose_coco/pose_hrnet_w32_reg_delaysep_bg01_stn_512_adam_lr1e-3_coco_x140.pth"
        print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        cfg.freeze()

        pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=False)
        pose_model.load_state_dict(torch.load(
            cfg.TEST.MODEL_FILE), strict=False)

        input_data = torch.randn(input_shape)
        scripted_model = torch.jit.trace(pose_model, input_data).eval()

        mod, params = relay.frontend.from_pytorch(scripted_model, input_shapes=[('data', input_shape)], default_dtype=dtype)
        net = mod["main"]
        net = relay.Function(net.params, net.body, None, net.type_params, net.attrs)
        mod = tvm.IRModule.from_expr(net)

    else:
        raise ValueError("Unsupported network: " + name)

    return mod, params, input_shape, output_shape
def main():
    args = parse_args()
    update_config(cfg, args)
    cfg.defrost()
    cfg.freeze()

    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model_path = args.model_path
    is_vis = args.is_vis

    gpus = ','.join([str(i) for i in cfg.GPUS])
    gpu_ids = eval('[' + gpus + ']')

    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    # model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
    #     cfg, is_train=True
    # )

    if 'pose_hrnet' in cfg.MODEL.NAME:
        model = {
            "pose_hrnet": pose_hrnet.get_pose_net,
            "pose_hrnet_softmax": pose_hrnet_softmax.get_pose_net
        }[cfg.MODEL.NAME](cfg, is_train=True)
    else:
        model = {
            "ransac": RANSACTriangulationNet,
            "alg": AlgebraicTriangulationNet,
            "vol": VolumetricTriangulationNet,
            "vol_CPM": VolumetricTriangulationNet_CPM,
            "FTL": FTLMultiviewNet
        }[cfg.MODEL.NAME](cfg, is_train=False)

    # load model state
    if model_path:
        print("Loading model:", model_path)
        ckpt = torch.load(model_path,
                          map_location='cpu' if args.gpu == -1 else 'cuda:0')
        if 'state_dict' not in ckpt.keys():
            state_dict = ckpt
        else:
            state_dict = ckpt['state_dict']
            print('Model epoch {}'.format(ckpt['epoch']))

        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict, strict=True)

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not args.distributed:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    device = torch.device('cuda:' + str(args.gpu) if args.gpu != -1 else 'cpu')

    model.to(device)

    model.eval()

    # image transformer
    transform = build_transforms(cfg, is_train=False)

    inference_dataset = eval('dataset.' + cfg.DATASET.TEST_DATASET[0])(
        cfg, cfg.DATASET.TEST_SET, transform=transform)

    data_loader = torch.utils.data.DataLoader(inference_dataset,
                                              batch_size=1,
                                              shuffle=True,
                                              num_workers=0,
                                              pin_memory=False)

    print('\nValidation loader information:\n' + str(data_loader.dataset))

    with torch.no_grad():
        pose2d_mse_loss = JointsMSELoss().to(
            device) if args.gpu != -1 else JointsMSELoss()
        pose3d_mse_loss = Joints3DMSELoss().to(
            device) if args.gpu != -1 else Joints3DMSELoss()
        orig_width, orig_height = inference_dataset.orig_img_size
        heatmap_size = cfg.MODEL.HEATMAP_SIZE
        count = 4
        for i, ret in enumerate(data_loader):
            # orig_imgs: 1 x 4 x 480 x 640 x 3
            # imgs: 1 x 4 x 3 x H x W
            # pose2d_gt (bounded in 64 x 64): 1 x 4 x 21 x 2
            # pose3d_gt: 1 x 21 x 3
            # visibility: 1 x 4 x 21
            # extrinsic matrix: 1 x 4 x 3 x 4
            # intrinsic matrix: 1 x 3 x 3
            if not (i % 67 == 0): continue

            imgs = ret['imgs'].to(device)
            orig_imgs = ret['orig_imgs']
            pose2d_gt, pose3d_gt, visibility = ret['pose2d'], ret[
                'pose3d'], ret['visibility']
            extrinsic_matrices, intrinsic_matrices = ret[
                'extrinsic_matrices'], ret['intrinsic_matrix']
            # somtimes intrisic_matrix has a shape of 3x3 or b x 3x3
            intrinsic_matrix = intrinsic_matrices[0] if len(
                intrinsic_matrices.shape) == 3 else intrinsic_matrices

            start_time = time.time()
            if 'pose_hrnet' in cfg.MODEL.NAME:
                pose3d_gt = pose3d_gt.to(device)

                heatmaps, _ = model(imgs[0])  # N_views x 21 x 64 x 64
                pose2d_pred = get_final_preds(heatmaps,
                                              cfg)  # N_views x 21 x 2
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to(
                    device)  # b x v x 3 x 4

                # rescale to the original image before DLT
                pose2d_pred[:, :, 0:1] *= orig_width / heatmap_size[0]
                pose2d_pred[:, :, 1:2] *= orig_height / heatmap_size[0]
                # 3D world coordinate 1 x 21 x 3
                pose3d_pred = DLT_pytorch(pose2d_pred,
                                          proj_matrices.squeeze()).unsqueeze(0)

            elif 'alg' == cfg.MODEL.NAME or 'ransac' == cfg.MODEL.NAME:
                # the predicted 2D poses have been rescaled inside the triangulation model
                # pose2d_pred: 1 x N_views x 21 x 2
                # pose3d_pred: 1 x 21 x 3
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices
                                 )  # b x v x 3 x 4

                pose3d_pred,\
                pose2d_pred,\
                heatmaps,\
                confidences_pred = model(imgs, proj_matrices.to(device))

            elif "vol" in cfg.MODEL.NAME:
                intrinsic_matrix = update_after_resize(
                    intrinsic_matrix, (orig_height, orig_width),
                    tuple(heatmap_size))
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to(
                    device)  # b x v x 3 x 4

                # pose3d_pred (torch.tensor) b x 21 x 3
                # pose2d_pred (torch.tensor) b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H)
                # heatmaps_pred (torch.tensor) b x v x 21 x 64 x 64
                # volumes_pred (torch.tensor)
                # confidences_pred (torch.tensor)
                # cuboids_pred (list)
                # coord_volumes_pred (torch.tensor)
                # base_points_pred (torch.tensor) b x v x 1 x 2
                if cfg.MODEL.BACKBONE_NAME == 'CPM_volumetric':
                    centermaps = ret['centermaps'].to(device)

                    pose3d_pred,\
                    pose2d_pred,\
                    heatmaps_pred,\
                    volumes_pred,\
                    confidences_pred,\
                    coord_volumes_pred,\
                    base_points_pred\
                        = model(imgs, centermaps, proj_matrices)
                else:
                    pose3d_pred,\
                    pose2d_pred,\
                    heatmaps,\
                    volumes_pred,\
                    confidences_pred,\
                    coord_volumes_pred,\
                    base_points_pred\
                        = model(imgs, proj_matrices)

                pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0]
                pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[0]

            elif 'FTL' == cfg.MODEL.NAME:
                # pose2d_pred: 1 x 4 x 21 x 2
                # pose3d_pred: 1 x 21 x 3
                heatmaps, pose2d_pred, pose3d_pred = model(
                    imgs.to(device), extrinsic_matrices.to(device),
                    intrinsic_matrix.to(device))

                print(pose2d_pred)
                pose2d_pred = torch.cat((pose2d_pred[:, :, :, 0:1] * 640 / 64,
                                         pose2d_pred[:, :, :, 1:2] * 480 / 64),
                                        dim=-1)

            # N_views x 21 x 2
            end_time = time.time()
            print('3D pose inference time {:.1f} ms'.format(
                1000 * (end_time - start_time)))
            pose3d_EPE = pose3d_mse_loss(pose3d_pred[:, 1:],
                                         pose3d_gt[:, 1:].to(device)).item()
            print('Pose3d MSE: {:.4f}\n'.format(pose3d_EPE))

            # if pose3d_EPE > 35:
            #     input()
            #     continue
            # 2D errors
            pose2d_gt[:, :, :, 0] *= orig_width / heatmap_size[0]
            pose2d_gt[:, :, :, 1] *= orig_height / heatmap_size[1]

            # for k in range(21):
            #     print(pose2d_gt[0,k].tolist(), pose2d_pred[0,k].tolist())
            # input()

            visualize(args=args,
                      imgs=np.squeeze(orig_imgs[0].numpy()),
                      pose2d_gt=np.squeeze(pose2d_gt.cpu().numpy()),
                      pose2d_pred=np.squeeze(pose2d_pred.cpu().numpy()),
                      pose3d_gt=np.squeeze(pose3d_gt.cpu().numpy()),
                      pose3d_pred=np.squeeze(pose3d_pred.cpu().numpy()))
def main():
    args = parse_args()

    update_config(cfg, args)
    cfg.defrost()
    cfg.freeze()

    if args.is_vis:
        result_dir = prefix + cfg.EXP_NAME
        mse2d_lst = np.loadtxt(os.path.join(result_dir,
                                            'mse2d_each_joint.txt'))
        mse3d_lst = np.loadtxt(os.path.join(result_dir,
                                            'mse3d_each_joint.txt'))
        PCK2d_lst = np.loadtxt(os.path.join(result_dir, 'PCK2d.txt'))
        PCK3d_lst = np.loadtxt(os.path.join(result_dir, 'PCK3d.txt'))

        plot_performance(PCK2d_lst[1, :], PCK2d_lst[0, :], PCK3d_lst[1, :],
                         PCK3d_lst[0, :], mse2d_lst, mse3d_lst)
        exit()

    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model_path = args.model_path
    is_vis = args.is_vis

    gpus = ','.join([str(i) for i in cfg.GPUS])
    gpu_ids = eval('[' + gpus + ']')

    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    if 'pose_hrnet' in cfg.MODEL.NAME:
        model = {
            "pose_hrnet": pose_hrnet.get_pose_net,
            "pose_hrnet_softmax": pose_hrnet_softmax.get_pose_net
        }[cfg.MODEL.NAME](cfg, is_train=True)
    else:
        model = {
            "ransac": RANSACTriangulationNet,
            "alg": AlgebraicTriangulationNet,
            "vol": VolumetricTriangulationNet,
            "vol_CPM": VolumetricTriangulationNet_CPM,
            "FTL": FTLMultiviewNet
        }[cfg.MODEL.NAME](cfg, is_train=False)

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not args.distributed:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    # load model state
    if model_path:
        print("Loading model:", model_path)
        ckpt = torch.load(model_path,
                          map_location='cpu' if args.gpu == -1 else 'cuda:0')
        if 'state_dict' not in ckpt.keys():
            state_dict = ckpt
        else:
            state_dict = ckpt['state_dict']
            print('Model epoch {}'.format(ckpt['epoch']))

        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict, strict=False)

    device = torch.device('cuda:' + str(args.gpu) if args.gpu != -1 else 'cpu')

    model.to(device)

    model.eval()

    # image transformer
    transform = build_transforms(cfg, is_train=False)

    inference_dataset = eval('dataset.' + cfg.DATASET.DATASET[0])(
        cfg, cfg.DATASET.TEST_SET, transform=transform)
    inference_dataset.n_views = eval(args.views)
    batch_size = args.batch_size
    if platform.system() == 'Linux':  # for linux
        data_loader = torch.utils.data.DataLoader(inference_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=8,
                                                  pin_memory=False)
    else:  # for windows
        batch_size = 1
        data_loader = torch.utils.data.DataLoader(inference_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=0,
                                                  pin_memory=False)

    print('\nEvaluation loader information:\n' + str(data_loader.dataset))
    print('Evaluation batch size: {}\n'.format(batch_size))

    th2d_lst = np.array([i for i in range(1, 50)])
    PCK2d_lst = np.zeros((len(th2d_lst), ))
    mse2d_lst = np.zeros((21, ))
    th3d_lst = np.array([i for i in range(1, 51)])
    PCK3d_lst = np.zeros((len(th3d_lst), ))
    mse3d_lst = np.zeros((21, ))
    visibility_lst = np.zeros((21, ))
    with torch.no_grad():
        start_time = time.time()
        pose2d_mse_loss = JointsMSELoss().cuda(
            args.gpu) if args.gpu != -1 else JointsMSELoss()
        pose3d_mse_loss = Joints3DMSELoss().cuda(
            args.gpu) if args.gpu != -1 else Joints3DMSELoss()

        infer_time = [0, 0]
        start_time = time.time()
        n_valid = 0
        model.orig_img_size = inference_dataset.orig_img_size
        orig_width, orig_height = model.orig_img_size
        heatmap_size = cfg.MODEL.HEATMAP_SIZE

        for i, ret in enumerate(data_loader):
            # ori_imgs: b x 4 x 480 x 640 x 3
            # imgs: b x 4 x 3 x H x W
            # pose2d_gt: b x 4 x 21 x 2 (have not been transformed)
            # pose3d_gt: b x 21 x 3
            # visibility: b x 4 x 21
            # extrinsic matrix: b x 4 x 3 x 4
            # intrinsic matrix: b x 3 x 3
            # if i < count: continue
            imgs = ret['imgs'].to(device)
            orig_imgs = ret['orig_imgs']
            pose2d_gt, pose3d_gt, visibility = ret['pose2d'], ret[
                'pose3d'], ret['visibility']
            extrinsic_matrices, intrinsic_matrices = ret[
                'extrinsic_matrices'], ret['intrinsic_matrix']
            # somtimes intrisic_matrix has a shape of 3x3 or b x 3x3
            intrinsic_matrix = intrinsic_matrices[0] if len(
                intrinsic_matrices.shape) == 3 else intrinsic_matrices

            batch_size = orig_imgs.shape[0]
            n_joints = pose2d_gt.shape[2]
            pose2d_gt = pose2d_gt.view(
                -1, *pose2d_gt.shape[2:]).numpy()  # b*v x 21 x 2
            pose3d_gt = pose3d_gt.numpy()  # b x 21 x 3
            visibility = visibility.view(
                -1, visibility.shape[2]).numpy()  # b*v x 21

            if 'pose_hrnet' in cfg.MODEL.NAME:
                s1 = time.time()
                heatmaps, _ = model(imgs.view(
                    -1, *imgs.shape[2:]))  # b*v x 21 x 64 x 64
                pose2d_pred = get_final_preds(heatmaps, cfg).view(
                    batch_size, -1, n_joints, 2
                )  # b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H)
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to(
                    device)  # b x v x 3 x 4
                # rescale to the original image before DLT
                pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0]
                pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[0]

                # 3D world coordinate 1 x 21 x 3
                pose3d_pred = torch.cat([
                    DLT_sii_pytorch(pose2d_pred[:, :, k],
                                    proj_matrices).unsqueeze(1)
                    for k in range(n_joints)
                ],
                                        dim=1)  # b x 21 x 3

                if i > 20:
                    infer_time[0] += 1
                    infer_time[1] += time.time() - s1
                    #print('FPS {:.1f}'.format(infer_time[0]/infer_time[1]))

            elif 'alg' == cfg.MODEL.NAME or 'ransac' == cfg.MODEL.NAME:
                s1 = time.time()
                # pose2d_pred: b x N_views x 21 x 2
                # NOTE: the estimated 2D poses are located in the original image of size 640(W) x 480(H)]
                # pose3d_pred: b x 21 x 3 [world coord]
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to(
                    device)  # b x v x 3 x 4
                pose3d_pred,\
                pose2d_pred,\
                heatmaps,\
                confidences_pred = model(imgs.to(device), proj_matrices.to(device))
                if i > 20:
                    infer_time[0] += 1
                    infer_time[1] += time.time() - s1

            elif "vol" in cfg.MODEL.NAME:
                intrinsic_matrix = update_after_resize(
                    intrinsic_matrix, (orig_height, orig_width),
                    tuple(heatmap_size))
                proj_matrices = (intrinsic_matrix @ extrinsic_matrices).to(
                    device)  # b x v x 3 x 4
                s1 = time.time()

                # pose3d_pred (torch.tensor) b x 21 x 3
                # pose2d_pred (torch.tensor) b x v x 21 x 2 NOTE: the estimated 2D poses are located in the heatmap size 64(W) x 64(H)
                # heatmaps_pred (torch.tensor) b x v x 21 x 64 x 64
                # volumes_pred (torch.tensor)
                # confidences_pred (torch.tensor)
                # cuboids_pred (list)
                # coord_volumes_pred (torch.tensor)
                # base_points_pred (torch.tensor) b x v x 1 x 2
                if cfg.MODEL.BACKBONE_NAME == 'CPM_volumetric':
                    centermaps = ret['centermaps'].to(device)
                    heatmaps_gt = ret['heatmaps']

                    pose3d_pred,\
                    pose2d_pred,\
                    heatmaps_pred,\
                    volumes_pred,\
                    confidences_pred,\
                    coord_volumes_pred,\
                    base_points_pred\
                        = model(imgs, centermaps, proj_matrices)
                else:
                    pose3d_pred,\
                    pose2d_pred,\
                    heatmaps,\
                    volumes_pred,\
                    confidences_pred,\
                    coord_volumes_pred,\
                    base_points_pred\
                        = model(imgs, proj_matrices)

                if i > 20:
                    infer_time[0] += 1
                    infer_time[1] += time.time() - s1

                pose2d_pred[:, :, :, 0:1] *= orig_width / heatmap_size[0]
                pose2d_pred[:, :, :, 1:2] *= orig_height / heatmap_size[1]

            # 2D errors
            pose2d_gt[:, :, 0] *= orig_width / heatmap_size[0]
            pose2d_gt[:, :, 1] *= orig_height / heatmap_size[1]

            pose2d_pred = pose2d_pred.view(-1, n_joints,
                                           2).cpu().numpy()  # b*v x 21 x 2
            for k in range(21):
                print(pose2d_gt[0, k].tolist(), pose2d_pred[0, k].tolist())
            input()
            mse_each_joint = np.linalg.norm(pose2d_pred - pose2d_gt,
                                            axis=2) * visibility  # b*v x 21
            mse2d_lst += mse_each_joint.sum(axis=0)
            visibility_lst += visibility.sum(axis=0)

            for th_idx in range(len(th2d_lst)):
                PCK2d_lst[th_idx] += np.sum(
                    (mse_each_joint < th2d_lst[th_idx]) * visibility)

            # 3D errors
            for k in range(21):
                print(pose3d_gt[0, k].tolist(), pose3d_pred[0, k].tolist())
            input()
            visibility = visibility.reshape(
                (batch_size, -1, n_joints))  # b x v x 21
            for b in range(batch_size):
                # print(np.sum(visibility[b]), visibility[b].size)
                if np.sum(visibility[b]) >= visibility[b].size * 0.65:
                    n_valid += 1
                    mse_each_joint = np.linalg.norm(
                        pose3d_pred[b].cpu().numpy() - pose3d_gt[b],
                        axis=1)  # 21
                    mse3d_lst += mse_each_joint

                    for th_idx in range(len(th3d_lst)):
                        PCK3d_lst[th_idx] += np.sum(
                            mse_each_joint < th3d_lst[th_idx])

            if i % (len(data_loader) // 5) == 0:
                print("[Evaluation]{}% finished.".format(
                    20 * i // (len(data_loader) // 5)))
            #if i == 10:break
        print('Evaluation spent {:.2f} s\tFPS: {:.1f}'.format(
            time.time() - start_time, infer_time[0] / infer_time[1]))

        mse2d_lst /= visibility_lst
        PCK2d_lst /= visibility_lst.sum()
        mse3d_lst /= n_valid
        PCK3d_lst /= (n_valid * 21)
        plot_performance(PCK2d_lst, th2d_lst, PCK3d_lst, th3d_lst, mse2d_lst,
                         mse3d_lst)

        if not os.path.exists(result):
            os.mkdir(result)
        result_dir = prefix + cfg.EXP_NAME
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)

        np.savetxt(os.path.join(result_dir, 'mse2d_each_joint.txt'),
                   mse2d_lst,
                   fmt='%.4f')
        np.savetxt(os.path.join(result_dir, 'mse3d_each_joint.txt'),
                   mse3d_lst,
                   fmt='%.4f')
        np.savetxt(os.path.join(result_dir, 'PCK2d.txt'),
                   np.stack((th2d_lst, PCK2d_lst)))
        np.savetxt(os.path.join(result_dir, 'PCK3d.txt'),
                   np.stack((th3d_lst, PCK3d_lst)))
Ejemplo n.º 15
0
def get_net(file_config, weights):
    cfg.defrost()
    cfg.merge_from_file(file_config)

    model = HpeHRNet(cfg, weights)
    return model