Beispiel #1
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    data_loader, test_dataset = make_test_dataloader(cfg)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    # pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None
    pbar = tqdm(total=len(test_dataset))
    for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])
            if cfg.RESCORE.USE:
                try:
                    scores = rescore_valid(cfg, final_results, scores)
                except:
                    print("got one.")
        # if cfg.TEST.LOG_PROGRESS:
        #     pbar.update()
        pbar.update()

        if i % cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            save_valid_image(image,
                             final_results,
                             '{}.jpg'.format(prefix),
                             dataset=test_dataset.name)
            # for scale_idx in range(len(outputs)):
            #     prefix_scale = prefix + '_output_{}'.format(
            #         # cfg.DATASET.OUTPUT_SIZE[scale_idx]
            #         scale_idx
            #     )
            #     save_debug_images(
            #         cfg, images, None, None,
            #         outputs[scale_idx], prefix_scale
            #     )
        all_preds.append(final_results)
        all_scores.append(scores)

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()

    name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores,
                                           final_output_dir)

    if isinstance(name_values, list):
        for name_value in name_values:
            _print_name_value(logger, name_value, cfg.MODEL.NAME)
    else:
        _print_name_value(logger, name_values, cfg.MODEL.NAME)
Beispiel #2
0
def main():
    args = get_args()
    # get student config
    student_cfg = get_student_cfg(cfg, args.student_file)
    student_cfg.LOG_DIR = args.log
    student_cfg.PRINT_FREQ = int(args.print_freq)
    if args.mode == 'test':
        student_cfg.DATASET.TEST = 'test2017'
    logger, final_output_dir, tb_log_dir = create_logger(
        student_cfg, args.student_file, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(student_cfg)

    # cudnn related setting
    cudnn.benchmark = student_cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = student_cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = student_cfg.CUDNN.ENABLED

    dev = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = PoseHigherResolutionNet(student_cfg)
    model.load_state_dict(torch.load(args.model_file))

    dump_input = torch.rand(
        (1, 3, student_cfg.DATASET.INPUT_SIZE, student_cfg.DATASET.INPUT_SIZE))
    logger.info(
        get_model_summary(model, dump_input, verbose=student_cfg.VERBOSE))

    model = torch.nn.DataParallel(model, device_ids=student_cfg.GPUS).cuda()
    model.eval()

    data_loader, test_dataset = make_test_dataloader(student_cfg)

    transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    ])

    parser = HeatmapParser(student_cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(
        total=len(test_dataset)) if student_cfg.TEST.LOG_PROGRESS else None
    for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, student_cfg.DATASET.INPUT_SIZE, 1.0,
            min(student_cfg.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(
                    sorted(student_cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = student_cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(student_cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    student_cfg, model, image_resized,
                    student_cfg.TEST.FLIP_TEST, student_cfg.TEST.PROJECT2IMAGE,
                    base_size)

                final_heatmaps, tags_list = aggregate_results(
                    student_cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(
                len(student_cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           student_cfg.TEST.ADJUST,
                                           student_cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if student_cfg.TEST.LOG_PROGRESS:
            pbar.update()

        if i % student_cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            save_valid_image(image,
                             final_results,
                             '{}.jpg'.format(prefix),
                             dataset=test_dataset.name)
            # save_debug_images(cfg, image_resized, None, None, outputs, prefix)

        all_preds.append(final_results)
        all_scores.append(scores)

    if student_cfg.TEST.LOG_PROGRESS:
        pbar.close()

    name_values, _ = test_dataset.evaluate(cfg, all_preds, all_scores,
                                           final_output_dir)

    if isinstance(name_values, list):
        for name_value in name_values:
            _print_name_value(logger, name_value, cfg.MODEL.NAME)
    else:
        _print_name_value(logger, name_values, cfg.MODEL.NAME)
Beispiel #3
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    test_dataset = HIEDataset(DATA_PATH)
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0,
                                              pin_memory=False)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(
                # mean=[0.485, 0.456, 0.406],
                # std=[0.229, 0.224, 0.225]
                mean=[0.5, 0.5, 0.5],
                std=[0.5, 0.5, 0.5])
        ])

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None
    for i, images in enumerate(data_loader):
        # for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        if (i % 100 == 0):
            print("Start process images %d" % i)
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))
        # print("Multi-scale end")

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if cfg.TEST.LOG_PROGRESS:
            pbar.update()

        if i % cfg.PRINT_FREQ == 0:
            prefix = '{}_{}'.format(
                os.path.join(final_output_dir, 'result_valid'), i)
            # logger.info('=> write {}'.format(prefix))
            # save_valid_image(image, final_results, '{}.jpg'.format(prefix),         dataset=test_dataset.name)
            # save_valid_image(image, final_results, '{}.jpg'.format(prefix),dataset='HIE20')
            # save_debug_images(cfg, image_resized, None, None, outputs, prefix)

        all_preds.append(final_results)
        all_scores.append(scores)

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()

    # save preds and scores as json
    test_dataset.save_json(all_preds, all_scores)
    print('Save finished!')
Beispiel #4
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'test'
    )

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
            ]
        )
    else:
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]
                )
            ]
        )

    HMparser = HeatmapParser(cfg)  # ans, scores

    res_folder = os.path.join(args.outdir, 'results')
    if not os.path.exists(res_folder):
        os.makedirs(res_folder)
    video_name = args.video_path.split('/')[-1].split('.')[0]
    res_file = os.path.join(res_folder, '{}.json'.format(video_name))

    # read frames in video
    stream = cv2.VideoCapture(args.video_path)
    assert stream.isOpened(), 'Cannot capture source'


    # fourcc = int(stream.get(cv2.CAP_PROP_FOURCC))
    fps = stream.get(cv2.CAP_PROP_FPS)
    frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    video_dir = os.path.join(args.outdir, 'video', args.data)
    if not os.path.exists(video_dir):
        os.makedirs(video_dir)

    image_dir=os.path.join(args.outdir, 'images', args.data)
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)

    if args.video_format == 'mp4':
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        video_path = os.path.join(video_dir, '{}.mp4'.format(video_name))
    else:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        video_path = os.path.join(video_dir, '{}.avi'.format(video_name))

    if args.save_video:
        out = cv2.VideoWriter(video_path, fourcc, fps, frameSize)

    num = 0
    annolist = []
    while (True):
        ret, image = stream.read()
        print("num:", num)

        if ret is False:
            break

        all_preds = []
        all_scores = []

        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)
        )

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR)
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags
                )

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = HMparser.parse(
                final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
            )

            final_results = get_final_preds(  # joints for all persons in a image
                grouped, center, scale,
                [final_heatmaps.size(3), final_heatmaps.size(2)]
            )

        image=draw_image(image, final_results, dataset=args.data)
        all_preds.append(final_results)
        all_scores.append(scores)

        img_id = num
        num += 1
        file_name = '{}.jpg'.format(str(img_id).zfill(6))
        annorect = person_result(all_preds, scores, img_id)

        annolist.append({
            'annorect': annorect,
            'ignore_regions': [],
            'image': [{'name': file_name}]
        })
        # print(annorect)

        if args.save_video:
            out.write(image)

        if args.save_img:
            img_path = os.path.join(image_dir, file_name)
            cv2.imwrite(img_path, image)

    final_results = {'annolist': annolist}
    with open(res_file, 'w') as f:
        json.dump(final_results, f)
    print('=> create test json finished!')

    # print('=> finished! you can check the output video on {}'.format(save_path))
    stream.release()
    # out.release()
    cv2.destroyAllWindows()
Beispiel #5
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()
    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])
    transforms_pre = torchvision.transforms.Compose([
        ToNumpy(),
    ])
    # iterate over all datasets
    datasets_root_path = "/media/jld/DATOS_JLD/datasets"
    datasets = ["cityscapes", "kitti", "tsinghua"]
    # testing sets from cityscapes and kitti does not have groundtruth --> processing not required
    datasplits = [["train", "val"], ["train"], ["train", "val", "test"]]
    keypoints_output_root_path = "/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/results"
    model_name = osp.basename(
        cfg.TEST.MODEL_FILE).split('.')[0]  # Model name + configuration
    for dsid, dataset in enumerate(datasets):
        dataset_root_path = osp.join(datasets_root_path, dataset)
        output_root_path = osp.join(keypoints_output_root_path, dataset)
        for datasplit in datasplits[dsid]:
            loggur.info(f"Processing split {datasplit} of {dataset}")
            input_img_dir = osp.join(dataset_root_path, datasplit)
            output_kps_json_dir = osp.join(output_root_path, datasplit,
                                           model_name)
            loggur.info(f"Input image dir: {input_img_dir}")
            loggur.info(f"Output pose JSON dir: {output_kps_json_dir}")
            # test_dataset = torchvision.datasets.ImageFolder("/media/jld/DATOS_JLD/git-repos/paper-revista-keypoints/test_images/", transform=transforms_pre)
            test_dataset = dsjld.BaseDataset(input_img_dir,
                                             output_kps_json_dir,
                                             transform=transforms_pre)
            test_dataset.generate_io_samples_pairs()
            # Stablish weight of keypoints scores (like openpifpaf in https://github.com/vita-epfl/openpifpaf/blob/master/openpifpaf/decoder/annotation.py#L44)
            n_keypoints = 17
            kps_score_weights = numpy.ones((17, ))
            kps_score_weights[:3] = 3.0
            # Normalize weights to sum 1
            kps_score_weights /= numpy.sum(kps_score_weights)
            data_loader = torch.utils.data.DataLoader(test_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      num_workers=0,
                                                      pin_memory=False)
            parser = HeatmapParser(cfg)
            all_preds = []
            all_scores = []

            pbar = tqdm(
                total=len(test_dataset))  # if cfg.TEST.LOG_PROGRESS else None
            for i, (img, imgidx) in enumerate(data_loader):
                assert 1 == img.size(0), 'Test batch size should be 1'

                img = img[0].cpu().numpy()
                # size at scale 1.0
                base_size, center, scale = get_multi_scale_size(
                    img, cfg.DATASET.INPUT_SIZE, 1.0,
                    min(cfg.TEST.SCALE_FACTOR))

                with torch.no_grad():
                    final_heatmaps = None
                    tags_list = []
                    for idx, s in enumerate(
                            sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                        input_size = cfg.DATASET.INPUT_SIZE
                        image_resized, center, scale = resize_align_multi_scale(
                            img, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                        image_resized = transforms(image_resized)
                        image_resized = image_resized.unsqueeze(0).cuda()

                        outputs, heatmaps, tags = get_multi_stage_outputs(
                            cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                            cfg.TEST.PROJECT2IMAGE, base_size)

                        final_heatmaps, tags_list = aggregate_results(
                            cfg, s, final_heatmaps, tags_list, heatmaps, tags)

                    final_heatmaps = final_heatmaps / float(
                        len(cfg.TEST.SCALE_FACTOR))
                    tags = torch.cat(tags_list, dim=4)
                    grouped, scores = parser.parse(final_heatmaps, tags,
                                                   cfg.TEST.ADJUST,
                                                   cfg.TEST.REFINE)

                    final_results = get_final_preds(
                        grouped, center, scale,
                        [final_heatmaps.size(3),
                         final_heatmaps.size(2)])

                # if cfg.TEST.LOG_PROGRESS:
                pbar.update()
                # Save all keypoints in a JSON dict
                final_json_results = []
                for kps in final_results:
                    kpsdict = {}
                    x = kps[:, 0]
                    y = kps[:, 1]
                    kps_scores = kps[:, 2]
                    kpsdict['keypoints'] = kps[:, 0:3].tolist()
                    # bounding box by means of minmax approach (without zero elements)
                    xmin = numpy.float64(numpy.min(x[numpy.nonzero(x)]))
                    xmax = numpy.float64(numpy.max(x))
                    width = numpy.float64(xmax - xmin)
                    ymin = numpy.float64(numpy.min(y[numpy.nonzero(y)]))
                    ymax = numpy.float64(numpy.max(y))
                    height = numpy.float64(ymax - ymin)
                    kpsdict['bbox'] = [xmin, ymin, width, height]
                    # Calculate pose score as a weighted mean of keypoints scores
                    kpsdict['score'] = numpy.float64(
                        numpy.sum(kps_score_weights *
                                  numpy.sort(kps_scores)[::-1]))
                    final_json_results.append(kpsdict)

                with open(test_dataset.output_json_files_list[imgidx],
                          "w") as f:
                    json.dump(final_json_results, f)

                all_preds.append(final_results)
                all_scores.append(scores)

            if cfg.TEST.LOG_PROGRESS:
                pbar.close()
Beispiel #6
0
def validate(config,
             val_loader,
             val_dataset,
             model,
             output_dir,
             tb_log_dir,
             writer_dict=None):
    model.eval()
    if config.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])
    # rmse = AverageMeter()

    parser = HeatmapParser(config)
    all_preds = []
    all_scores = []

    pbar = tqdm(total=len(val_dataset)) if config.TEST.LOG_PROGRESS else None
    for i, (images, annos) in enumerate(val_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, config.DATASET.INPUT_SIZE, 1.0,
            min(config.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(
                    sorted(config.TEST.SCALE_FACTOR, reverse=True)):
                input_size = config.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(config.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    config, model, image_resized, config.TEST.FLIP_TEST,
                    config.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    config, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(
                len(config.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           config.TEST.ADJUST,
                                           config.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if config.TEST.LOG_PROGRESS:
            pbar.update()

        # if i % config.PRINT_FREQ == 0:
        #     prefix = '{}_{}'.format(os.path.join(output_dir, 'result_valid'), i)
        #     # logger.info('=> write {}'.format(prefix))
        #     save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=val_dataset.name)
        #     # save_debug_images(config, image_resized, None, None, outputs, prefix)

        all_preds.append(final_results)
        all_scores.append(scores)

    if config.TEST.LOG_PROGRESS:
        pbar.close()

    results, res_file = val_dataset.evaluate(config, all_preds, all_scores,
                                             output_dir)
    ##################################
    gt_file = val_dataset._get_anno_file_name()
    coco = COCO(gt_file)
    coco_dt = coco.loadRes(res_file)
    coco_eval = COCOeval(coco, coco_dt, 'keypoints')
    coco_eval._prepare()
    gts_ = coco_eval._gts
    dts_ = coco_eval._dts

    p = coco_eval.params
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)

    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]
    pcutoff01 = 0.1
    pcutoff06 = 0.6
    mean_rmse_list = []
    mean_rmse_pcutoff01_list = []
    mean_rmse_pcutoff06_list = []
    for catId in catIds:
        for imgId in p.imgIds:
            # dimension here should be Nxm
            gts = gts_[imgId, catId]
            dts = dts_[imgId, catId]
            if len(gts) != 0 and len(dts) != 0:
                npgt = np.array(gts[0]["keypoints"])
                npdt = np.array(dts[0]["keypoints"])
                mask01 = npdt[2::3] >= pcutoff01
                mask06 = npdt[2::3] >= pcutoff06
                RMSE = np.sqrt((npgt[0::3] - npdt[0::3])**2 +
                               (npgt[1::3] - npdt[1::3])**2)
                RMSE_pcutoff01 = RMSE[mask01]
                RMSE_pcutoff06 = RMSE[mask06]
                mean_rmse = np.round(np.nanmean(RMSE.flatten()), 2)
                mean_rmse_pcutoff01 = np.nanmean(RMSE_pcutoff01.flatten())
                mean_rmse_pcutoff06 = np.nanmean(RMSE_pcutoff06.flatten())
                mean_rmse_list.append(mean_rmse)
                mean_rmse_pcutoff01_list.append(mean_rmse_pcutoff01)
                mean_rmse_pcutoff06_list.append(mean_rmse_pcutoff06)
    print(f"Mean RMSE: {np.mean(mean_rmse_list)}")
    print(
        f"Mean RMSE p-cutoff 0.1: {np.round(np.mean(mean_rmse_pcutoff01_list),2)}"
    )
    print(
        f"Mean RMSE p-cutoff 0.6: {np.round(np.mean(mean_rmse_pcutoff06_list),2)}"
    )
    global_steps = writer_dict['valid_global_steps']
    writer_dict["writer"].add_scalar("val_rmse", np.mean(mean_rmse_list),
                                     global_steps)
    writer_dict["writer"].add_scalar("val_rmse_pcutoff_0.1",
                                     np.mean(mean_rmse_pcutoff01_list),
                                     global_steps)
    writer_dict["writer"].add_scalar("val_rmse_pcutoff_0.6",
                                     np.mean(mean_rmse_pcutoff06_list),
                                     global_steps)
    writer_dict['valid_global_steps'] = global_steps + 1
    return np.mean(mean_rmse_list)
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid'
    )

    logger.info(pprint.pformat(args))

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=False
    )

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(
            final_output_dir, 'model_best.pth.tar'
        )
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    data_loader, test_dataset = make_test_dataloader(cfg)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
            ]
        )
    else:
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]
                )
            ]
        )

    parser = HeatmapParser(cfg)

    vid_file = 0 # Or video file path
    print("Opening Camera " + str(vid_file))
    cap = cv2.VideoCapture(vid_file)

    while True:
        ret, image = cap.read()

        a = datetime.datetime.now()

        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)
        )

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR)
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags
                )

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(
                final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
            )

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3), final_heatmaps.size(2)]
            )

        b = datetime.datetime.now()
        inf_time = (b - a).total_seconds()*1000
        print("Inf time {} ms".format(inf_time))

        # Display the resulting frame
        for person in final_results:
            color = np.random.randint(0, 255, size=3)
            color = [int(i) for i in color]
            add_joints(image, person, color, test_dataset.name, cfg.TEST.DETECTION_THRESHOLD)

        image = cv2.putText(image, "{:.2f} ms / frame".format(inf_time), (40, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('frame', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Beispiel #8
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=False
    )

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    print("Initilaized.")

    if cfg.TEST.MODEL_FILE:
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        raise Exception("No weight file. Would you like to test with your hammer?")

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    # data_loader, test_dataset = make_test_dataloader(cfg)

    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
            ]
        )
    else:
        # 默认是用这种
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]
                )
            ]
        )

    parser = HeatmapParser(cfg)

    print("Load model successfully.")

    ENABLE_CAMERA = 1
    ENABLE_VIDEO = 1

    VIDEO_ROTATE = 0
    
    if ENABLE_CAMERA:
        # 读取视频流
        cap = cv2.VideoCapture(-1)
        ret, image = cap.read()
        x, y = image.shape[0:2]
        print((x, y))
        # 创建视频文件
        fourcc = cv2.VideoWriter_fourcc(*'I420')
        # fourcc = cv2.VideoWriter_fourcc(*'X264')
        out = cv2.VideoWriter('./result.avi', fourcc, 24, (y, x), True)
        while ret:
            ret, image = cap.read()
            if not ret:
                break
            # 实时视频自动禁用scale search
            base_size, center, scale = get_multi_scale_size(
                image, cfg.DATASET.INPUT_SIZE, 1.0, 1.0
            )
            with torch.no_grad():
                final_heatmaps = None
                tags_list = []
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, 1.0, 1.0
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, 1.0, final_heatmaps, tags_list, heatmaps, tags
                )

                final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
                tags = torch.cat(tags_list, dim=4)
                grouped, scores = parser.parse(
                    final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
                )

                final_results = get_final_preds(
                    grouped, center, scale,
                    [final_heatmaps.size(3), final_heatmaps.size(2)]
                )

            detection = save_demo_image(image, final_results, mode=1)

            detection = cv2.cvtColor(detection, cv2.COLOR_BGR2RGB)
            cv2.imshow("Pose Estimation", detection)
            out.write(detection)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        out.release()
        os.system("ffmpeg -i result.avi -c:v libx265 camera.mp4")
        cv2.destroyAllWindows()
    elif ENABLE_VIDEO:
        # 读取视频流
        video_name = "./videos/test04.mp4"
        cap = cv2.VideoCapture(video_name)
        # 创建视频文件 
        fourcc = cv2.VideoWriter_fourcc(*'I420')
        out = cv2.VideoWriter('./result.avi', fourcc, 24, (704, 576), True)
        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                break
            if VIDEO_ROTATE: # 仅适用于扔实心球
                image = cv2.resize(image, (960, 540)).transpose((1, 0, 2))
            # 实时视频自动禁用scale search
            base_size, center, scale = get_multi_scale_size(
                image, cfg.DATASET.INPUT_SIZE, 1.0, 1.0
            )
            with torch.no_grad():
                final_heatmaps = None
                tags_list = []
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, 1.0, 1.0
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, 1.0, final_heatmaps, tags_list, heatmaps, tags
                )

                final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
                tags = torch.cat(tags_list, dim=4)
                grouped, scores = parser.parse(
                    final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
                )

                final_results = get_final_preds(
                    grouped, center, scale,
                    [final_heatmaps.size(3), final_heatmaps.size(2)]
                )

            detection = save_demo_image(image, final_results, mode=1)

            detection = cv2.cvtColor(detection, cv2.COLOR_BGR2RGB)
            cv2.imshow("Pose Estimation", detection)
            out.write(detection)
            # print("frame")
            cv2.waitKey(1)

        cap.release()
        out.release()
        os.system("ffmpeg -i result.avi -c:v libx265 det04.mp4")
        cv2.destroyAllWindows()
    else:
        img_name = "./test.jpg"
        images = cv2.imread(img_name)
        image = images
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)
        )

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            print(cfg.TEST.SCALE_FACTOR)
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR)
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags
                )

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(
                final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
            )

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3), final_heatmaps.size(2)]
            )

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        save_demo_image(image, final_results, file_name="./result.jpg")
Beispiel #9
0
def main():
    args = parse_args()
    update_config(cfg, args)
    check_config(cfg)
    pose_dir = prepare_output_dirs(args.outputDir)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))
    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        # model.load_state_dict(torch.load(model_state_file))
        pretrian_model_state = torch.load(model_state_file)

        for name, param in model.state_dict().items():

            model.state_dict()[name].copy_(pretrian_model_state['1.' + name])

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    # data_loader, test_dataset = make_test_dataloader(cfg)
    if cfg.MODEL.NAME == 'pose_hourglass':
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])

    parser = HeatmapParser(cfg)
    # Loading an video
    vidcap = cv2.VideoCapture(args.videoFile)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    if fps < args.inferenceFps:
        print('desired inference fps is ' + str(args.inferenceFps) +
              ' but video fps is ' + str(fps))
        exit()
    skip_frame_cnt = round(fps / args.inferenceFps)
    frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    outcap = cv2.VideoWriter(
        '{}/{}_pose.avi'.format(
            args.outputDir,
            os.path.splitext(os.path.basename(args.videoFile))[0]),
        cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt),
        (frame_width, frame_height))

    count = 0
    while vidcap.isOpened():
        total_now = time.time()
        ret, image_bgr = vidcap.read()
        count += 1

        if not ret:
            continue

        if count % skip_frame_cnt != 0:
            continue

        image_debug = image_bgr.copy()
        now = time.time()
        image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        # image = image_rgb.cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))
        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)
            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])
        for person_joints in final_results:
            for joint in person_joints:
                x, y = int(joint[0]), int(joint[1])
                cv2.circle(image_debug, (x, y), 4, (255, 0, 0), 2)
        then = time.time()
        print("Find person pose in: {} sec".format(then - now))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count))
        cv2.imwrite(img_file, image_debug)
        outcap.write(image_debug)

    vidcap.release()
    outcap.release()
def worker(gpu_id, dataset, indices, cfg, logger, final_output_dir,
           pred_queue):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)

    model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg,
                                                               is_train=False)

    dump_input = torch.rand(
        (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info("=> loading model from {}".format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, "model_best.pth.tar")
        logger.info("=> loading model from {}".format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    sub_dataset = torch.utils.data.Subset(dataset, indices)
    data_loader = torch.utils.data.DataLoader(sub_dataset,
                                              sampler=None,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=0,
                                              pin_memory=False)

    if cfg.MODEL.NAME == "pose_hourglass":
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
        ])
    else:
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ])

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(total=len(sub_dataset)) if cfg.TEST.LOG_PROGRESS else None
    for i, (images, annos) in enumerate(data_loader):
        assert 1 == images.size(0), 'Test batch size should be 1'

        image = images[0].cpu().numpy()
        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR))

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR,
                                           reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR))
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size)

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags)

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)

            visual = False
            if visual:
                visual_heatmap = torch.max(final_skelemaps[0],
                                           dim=0,
                                           keepdim=True)[0]
                visual_heatmap = (visual_heatmap.cpu().numpy().repeat(
                    3, 0).transpose(1, 2, 0))

                mean = [0.485, 0.456, 0.406]
                std = [0.229, 0.224, 0.225]
                visual_img = (image_resized[0].cpu().numpy().transpose(
                    1, 2, 0).astype(np.float32))
                visual_img = visual_img[:, :, ::-1] * np.array(std).reshape(
                    1, 1, 3) + np.array(mean).reshape(1, 1, 3)
                visual_img = visual_img * 255
                test_data = cv2.addWeighted(
                    visual_img.astype(np.float32),
                    0.0,
                    visual_heatmap.astype(np.float32) * 255,
                    1.0,
                    0,
                )
                cv2.imwrite("test_data/{}.jpg".format(i), test_data)

            grouped, scores = parser.parse(final_heatmaps, tags,
                                           cfg.TEST.ADJUST, cfg.TEST.REFINE)

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3),
                 final_heatmaps.size(2)])

        if cfg.TEST.LOG_PROGRESS:
            pbar.update()

        data_idx = indices[i]
        img_id = dataset.ids[data_idx]
        file_name = dataset.coco.loadImgs(img_id)[0]["file_name"]

        for idx in range(len(final_results)):
            all_preds.append({
                "keypoints":
                final_results[idx][:, :3].reshape(-1, ).astype(
                    np.float).tolist(),
                "image_id":
                int(file_name[-16:-4]),
                "score":
                float(scores[idx]),
                "category_id":
                1
            })

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()
    pred_queue.put_nowait(all_preds)
def worker(gpu_id, img_list, cfg, logger, final_output_dir, save_dir, pred_queue):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)

    model = eval("models." + cfg.MODEL.NAME + ".get_pose_net")(cfg, is_train=False)

    dump_input = torch.rand((1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.TEST.MODEL_FILE:
        logger.info("=> loading model from {}".format(cfg.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
    else:
        model_state_file = os.path.join(final_output_dir, "model_best.pth.tar")
        logger.info("=> loading model from {}".format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))
        
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
    model.eval()

    if cfg.MODEL.NAME == "pose_hourglass":
        transforms = torchvision.transforms.Compose(
            [torchvision.transforms.ToTensor(),]
        )
    else:
        transforms = torchvision.transforms.Compose(
            [
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    parser = HeatmapParser(cfg)
    all_preds = []
    all_scores = []

    pbar = tqdm(total=len(img_list)) if cfg.TEST.LOG_PROGRESS else None
    for i, img_path in enumerate(img_list):

        image_name = img_path.split('/')[-1].split('.')[0]
        image = cv2.imread(
            img_path, 
            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
        )

        # size at scale 1.0
        base_size, center, scale = get_multi_scale_size(
            image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)
        )

        with torch.no_grad():
            final_heatmaps = None
            tags_list = []
            for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
                input_size = cfg.DATASET.INPUT_SIZE
                image_resized, center, scale = resize_align_multi_scale(
                    image, input_size, s, min(cfg.TEST.SCALE_FACTOR)
                )
                image_resized = transforms(image_resized)
                image_resized = image_resized.unsqueeze(0).cuda()

                outputs, heatmaps, tags = get_multi_stage_outputs(
                    cfg, model, image_resized, cfg.TEST.FLIP_TEST,
                    cfg.TEST.PROJECT2IMAGE, base_size
                )

                final_heatmaps, tags_list = aggregate_results(
                    cfg, s, final_heatmaps, tags_list, heatmaps, tags
                )

            final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
            tags = torch.cat(tags_list, dim=4)

            grouped, scores = parser.parse(
                final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
            )

            final_results = get_final_preds(
                grouped, center, scale,
                [final_heatmaps.size(3), final_heatmaps.size(2)]
            )

        if cfg.TEST.LOG_PROGRESS:
            pbar.update()

        for idx in range(len(final_results)):
            all_preds.append({
                "keypoints": final_results[idx][:,:3].reshape(-1,).astype(np.float).tolist(),
                "image_name": image_name,
                "score": float(scores[idx]),
                "category_id": 1
            })
        
        skeleton_map = draw_skeleton(image, np.array(final_results))
        cv2.imwrite(
            os.path.join(save_dir, "{}.jpg".format(image_name)),
            skeleton_map
            )

    if cfg.TEST.LOG_PROGRESS:
        pbar.close()
    pred_queue.put_nowait(all_preds)