Beispiel #1
0
def main():
    args = parse_args()
    update_config(cfg, args)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    from pycocotools.coco import COCO
    annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json'
    im_root = '/ssd/xyliu/data/coco/images/val2017/'
    coco = COCO(annFile)
    catIds = coco.getCatIds(catNms=['person'])
    # 所有人体图片的id
    imgIds = coco.getImgIds(catIds=catIds)
    kpts_result = []
    detected_image_num = 0
    box_num = 0
    for imgId in tqdm(imgIds[:]):
        img = coco.loadImgs(imgId)[0]
        im_name = img['file_name']
        img = im_root + im_name
        img_input = plt.imread(img)

        try:
            bboxs, scores = mm_det(human_model, img_input, 0.3)
            inputs, origin_img, center, scale = PreProcess(
                img_input, bboxs, scores, cfg)

        except Exception as e:
            print(e)
            continue

        detected_image_num += 1
        with torch.no_grad():
            output = pose_model(inputs.cuda())
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            #  vis = np.ones(shape=maxvals.shape,)
            vis = maxvals
            preds = preds.astype(np.float16)
            keypoints = np.concatenate((preds, vis), -1)
            for k, s in zip(keypoints, scores.tolist()):
                box_num += 1
                k = k.flatten().tolist()
                item = {
                    "image_id": imgId,
                    "category_id": 1,
                    "keypoints": k,
                    "score": s
                }
                kpts_result.append(item)

    num_joints = 17
    in_vis_thre = 0.2
    oks_thre = 0.5
    oks_nmsed_kpts = []
    for i in range(len(kpts_result)):
        img_kpts = kpts_result[i]['keypoints']
        kpt = np.array(img_kpts).reshape(17, 3)
        box_score = kpts_result[i]['score']
        kpt_score = 0
        valid_num = 0
        # each joint for bbox
        for n_jt in range(0, num_joints):
            # score
            t_s = kpt[n_jt][2]
            if t_s > in_vis_thre:
                kpt_score = kpt_score + t_s
                valid_num = valid_num + 1
        if valid_num != 0:
            kpt_score = kpt_score / valid_num

        # rescoring 关节点的置信度 与 box的置信度的乘积
        kpts_result[i]['score'] = kpt_score * box_score

    import json
    data = json.dumps(kpts_result)
    print(
        'image num is {} \tdetected_image num is {}\t person num is {}'.format(
            len(imgIds), detected_image_num, box_num)),
    #  data = json.dumps(str(kpts_result))
    with open('person_keypoints.json', 'wt') as f:
        #  pass
        f.write(data)
Beispiel #2
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()

        #  if args.camera:
        #  #  为取得实时速度,每两帧取一帧预测
        #  if item == 0:
        #  item = 1
        #  continue

        item = 0
        try:
            bboxs, scores = mm_det(human_model, input_image)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        if args.display:
            ######### 全屏
            #  out_win = "output_style_full_screen"
            #  cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
            #  cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
            #  cv2.imshow(out_win, image)

            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)