Ejemplo n.º 1
0
def detect_2d_joints(frame, short=360):
    """
    Args:
        short: 较短边resize大小
        frame: 任意尺寸的RGB图像

    Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素
    """
    # 缩放图像和生成目标检测器输入张量
    frame = nd.array(frame)
    x, img = data.transforms.presets.yolo.transform_test(frame, short=short)
    # print(x.shape, img.shape)
    # 检测人体
    class_ids, scores, bounding_boxes = detector(x)
    # 生成posenet的输入张量
    pose_input, upscale_bbox = detector_to_alpha_pose(img, class_ids, scores, bounding_boxes)
    # 预测关节点
    predict_heatmap = pose_net(pose_input)
    predict_coords, confidence = heatmap_to_coord_alpha_pose(predict_heatmap, upscale_bbox)

    # 显示2d姿态
    # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores)

    return {
        'img': img,
        'coords': predict_coords,
        'confidence': confidence,
        'class_ids': class_ids,
        'bboxes': bounding_boxes,
        'scores': scores
    }
Ejemplo n.º 2
0
def validate(val_data, val_dataset, net, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric.reset()

    from tqdm import tqdm
    for batch in tqdm(val_data):
        # data, scale, center, score, imgid = val_batch_fn(batch, ctx)
        data, scale_box, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
        if opt.flip_test:
            data_flip = [nd.flip(X, axis=3) for X in data]
            outputs_flip = [net(X) for X in data_flip]
            outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip]
            outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)]

        if len(outputs) > 1:
            outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0)
        else:
            outputs_stack = outputs[0].as_in_context(mx.cpu())

        # preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy())
        preds, maxvals = heatmap_to_coord_alpha_pose(outputs_stack, scale_box)
        # print(preds, maxvals, scale_box)
        # print(preds, maxvals)
        # raise
        val_metric.update(preds, maxvals, score, imgid)

    res = val_metric.get()
    return
Ejemplo n.º 3
0
    def inference(self, model_input):
        x, img = model_input
        class_ids, scores, bounding_boxes = self.detector(x)
        pose_input, upscale_bbox = detector_to_alpha_pose(
            img, class_ids, scores, bounding_boxes)
        predicted_heatmap = self.pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord_alpha_pose(
            predicted_heatmap, upscale_bbox)

        return pred_coords, confidence
Ejemplo n.º 4
0
def keypoint_detection(img_path, detector, pose_net):
    x, img = data.transforms.presets.yolo.load_test(img_path, short=512)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs)
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox)

    ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                        box_thresh=0.5, keypoint_thresh=0.2)
    plt.show()
def get_pose_estimation(img_object,
                        detector_model="yolo3_mobilenet1.0_coco",
                        pose_model="simple_pose_resnet18_v1b",
                        box_thresh=0.5,
                        keypoint_thresh=0.2):
    '''
	//TODO
	'''
    detector = model_zoo.get_model(detector_model, pretrained=True)

    pose_net = model_zoo.get_model(pose_model, pretrained=True)

    # Loading weights for only person class
    detector.reset_class(["person"], reuse_weights=['person'])

    try:
        img_object = utils.download(img_object)
    except ValueError:
        pass

    if "yolo" in detector_model:
        x, img = data.transforms.presets.yolo.load_test(img_object, short=512)
    elif "ssd" in detector_model:
        x, img = data.transforms.presets.ssd.load_test(img_object, short=512)

    class_IDs, scores, bounding_boxs = detector(x)

    if "simple_pose" in pose_model:
        pose_input, upscale_bbox = detector_to_simple_pose(
            img, class_IDs, scores, bounding_boxs)
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap,
                                                   upscale_bbox)
    elif "alpha_pose" in pose_model:
        pose_input, upscale_bbox = detector_to_alpha_pose(
            img, class_IDs, scores, bounding_boxs)
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord_alpha_pose(
            predicted_heatmap, upscale_bbox)

    ax = utils.viz.plot_keypoints(img,
                                  pred_coords,
                                  confidence,
                                  class_IDs,
                                  bounding_boxs,
                                  scores,
                                  box_thresh=box_thresh,
                                  keypoint_thresh=keypoint_thresh)

    return ax
    def get_poses(self, image):
        x, image = self.transformer(mx.nd.array(image).astype('uint8'),
                                    short=512)
        x = x.as_in_context(self.ctx)

        class_IDs, scores, bounding_boxs = self.detector(x)
        pose_input, upscale_bbox = detector_to_alpha_pose(image,
                                                          class_IDs,
                                                          scores,
                                                          bounding_boxs,
                                                          ctx=self.ctx)

        if upscale_bbox is not None and len(upscale_bbox) > 0:
            pose_input = pose_input.as_in_context(self.ctx)
            predicted_heatmap = self.pose_net(pose_input).as_in_context(
                mx.cpu())
            return heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox)
        else:
            return mx.nd.array([]), mx.nd.array([])
Ejemplo n.º 7
0
def validate(val_data, val_dataset, net, ctx, opt):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric = COCOKeyPointsMetric(val_dataset,
                                     'coco_keypoints',
                                     in_vis_thresh=0)

    for batch in tqdm(val_data, dynamic_ncols=True):
        # data, scale, center, score, imgid = val_batch_fn(batch, ctx)
        data, scale_box, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
        if opt.flip_test:
            data_flip = [nd.flip(X, axis=3) for X in data]
            outputs_flip = [net(X) for X in data_flip]
            outputs_flipback = [
                flip_heatmap(o, val_dataset.joint_pairs, shift=True)
                for o in outputs_flip
            ]
            outputs = [(o + o_flip) / 2
                       for o, o_flip in zip(outputs, outputs_flipback)]

        if len(outputs) > 1:
            outputs_stack = nd.concat(
                *[o.as_in_context(mx.cpu()) for o in outputs], dim=0)
        else:
            outputs_stack = outputs[0].as_in_context(mx.cpu())

        # preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy())
        preds, maxvals = heatmap_to_coord_alpha_pose(outputs_stack, scale_box)
        val_metric.update(preds, maxvals, score, imgid)

    nullwriter = NullWriter()
    oldstdout = sys.stdout
    sys.stdout = nullwriter
    try:
        res = val_metric.get()
    finally:
        sys.stdout = oldstdout
    return res
Ejemplo n.º 8
0
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None):
    x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    plt.cla()
    pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs,
                                                       output_shape=(128, 96), ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox)

        axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                              box_thresh=0.5, keypoint_thresh=0.2, ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes
Ejemplo n.º 9
0
def predict(img_path):
    # 1.预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)

    start = time.time()

    # detect persons and bbox
    class_ids, scores, bounding_boxes = detector(x)
    # 2.预处理检测器的输出张量作为alpha_pose的输入
    pose_input, upscale_bbox = detector_to_alpha_pose(img, class_ids, scores,
                                                      bounding_boxes)
    global detector_time
    detector_time += (time.time() - start)
    print("detector cost time: {:.3f} seconds".format(time.time() - start))

    # 3.预测关节点
    start_time = time.time()
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord_alpha_pose(
        predicted_heatmap, upscale_bbox)
    global predictor_2d_time
    predictor_2d_time += (time.time() - start_time)
    print("2d pose predictor cost time: {:.3f} seconds".format(time.time() -
                                                               start_time))

    # 4.显示2d姿态
    # ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5,
    #                               keypoint_thresh=0.2)

    # 5.坐标标准化
    start_time = time.time()
    kps = normalize_screen_coordinates(pred_coords.asnumpy(),
                                       w=img.shape[1],
                                       h=img.shape[0])

    receptive_field = pose3d_predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 6.创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None,
                                   None, [kps],
                                   pad=pad,
                                   causal_shift=causal_shift,
                                   augment=False)

    # 7.3d姿势估计和显示
    prediction = predict_3d_pos(generator, pose3d_predictor)
    global predictor_3d_time, full_time
    predictor_3d_time += time.time() - start_time
    full_time += time.time() - start
    print("3d pose predictor cost time: {:.3f} seconds".format(time.time() -
                                                               start_time))

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804],
                   dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    elapsed = time.time() - start
    print("Total elapsed time of predicting image {}: {:.3f} seconds".format(
        img_path, elapsed))
    return prediction, img
    pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores,
                                                      bounding_boxs)

    ######################################################################
    # Predict with a Alpha Pose network
    # --------------------
    #
    # Now we can make prediction.
    #
    # A Alpha Pose network predicts the heatmap for each joint (i.e. keypoint).
    # After the inference we search for the highest value in the heatmap and map it to the
    # coordinates on the original image.

    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord_alpha_pose(
        predicted_heatmap, upscale_bbox)

    ######################################################################
    # Display the pose estimation results
    # ---------------------
    #
    # We can use :py:func:`gluoncv.utils.viz.plot_keypoints` to visualize the
    # results.

    ax = utils.viz.plot_keypoints(img,
                                  pred_coords,
                                  confidence,
                                  class_IDs,
                                  bounding_boxs,
                                  scores,
                                  box_thresh=0.5,
Ejemplo n.º 11
0
def video_to_listPose(vid):
    cap = cv2.VideoCapture(vid)  # load video
    if (cap.isOpened() == False):  # Check if camera opened successfully
        print("Error opening video stream or file")
        return

    frame_count = 0
    pose_data_vid = []
    dimensions = (0, 0)
    frame_length = (int)(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    pbar = tqdm(total=frame_length, ncols=1, desc='.')
    pbar.ncols = 100

    # Iterate through every frame in video
    while (cap.isOpened()):
        ret, frame = cap.read()  # read current frame
        if (frame is None):
            break  # If current frame doesn't exist, finished iterating through frames
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype(
            'uint8')  # mxnet readable

        # Person detection
        x, frame = gcv.data.transforms.presets.yolo.transform_test(
            frame)  # short=406, max_size=1024
        class_IDs, scores, bounding_boxs = detector(x.as_in_context(ctx))

        # Pose estimation
        pose_input, upscale_bbox = detector_to_alpha_pose(frame,
                                                          class_IDs,
                                                          scores,
                                                          bounding_boxs,
                                                          output_shape=(320,
                                                                        256))
        # Gets current pose keypoints
        if (upscale_bbox is None):  # Caters for no person detection
            pbar.set_description_str('Skipping  ')
            pose_data_curr = [[-1, -1] for j in range(0, 17)]
        else:  # Caters for person detection
            pbar.set_description_str('Processing')
            predicted_heatmap = estimator(pose_input)
            pred_coords, confidence = heatmap_to_coord_alpha_pose(
                predicted_heatmap, upscale_bbox)

            scores = scores.asnumpy()
            confidence = confidence.asnumpy()
            pred_coords = pred_coords.asnumpy()

            # Preparing for json
            pose_data_curr = curr_pose(frame,
                                       pred_coords,
                                       confidence,
                                       scores,
                                       keypoint_thresh=0.2)
        pose_data_vid.append(pose_data_curr)

        if (frame_count == 0):
            dimensions = [frame.shape[1], frame.shape[0]]
        frame_count += 1
        pbar.update(1)
    cap.release()
    pbar.close()

    return dimensions, pose_data_vid