def detect_2d_joints(frame, short=360): """ Args: short: 较短边resize大小 frame: 任意尺寸的RGB图像 Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素 """ # 缩放图像和生成目标检测器输入张量 frame = nd.array(frame) x, img = data.transforms.presets.yolo.transform_test(frame, short=short) # print(x.shape, img.shape) # 检测人体 class_ids, scores, bounding_boxes = detector(x) # 生成posenet的输入张量 pose_input, upscale_bbox = detector_to_alpha_pose(img, class_ids, scores, bounding_boxes) # 预测关节点 predict_heatmap = pose_net(pose_input) predict_coords, confidence = heatmap_to_coord_alpha_pose(predict_heatmap, upscale_bbox) # 显示2d姿态 # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores) return { 'img': img, 'coords': predict_coords, 'confidence': confidence, 'class_ids': class_ids, 'bboxes': bounding_boxes, 'scores': scores }
def validate(val_data, val_dataset, net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] val_metric.reset() from tqdm import tqdm for batch in tqdm(val_data): # data, scale, center, score, imgid = val_batch_fn(batch, ctx) data, scale_box, score, imgid = val_batch_fn(batch, ctx) outputs = [net(X) for X in data] if opt.flip_test: data_flip = [nd.flip(X, axis=3) for X in data] outputs_flip = [net(X) for X in data_flip] outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip] outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)] if len(outputs) > 1: outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0) else: outputs_stack = outputs[0].as_in_context(mx.cpu()) # preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy()) preds, maxvals = heatmap_to_coord_alpha_pose(outputs_stack, scale_box) # print(preds, maxvals, scale_box) # print(preds, maxvals) # raise val_metric.update(preds, maxvals, score, imgid) res = val_metric.get() return
def inference(self, model_input): x, img = model_input class_ids, scores, bounding_boxes = self.detector(x) pose_input, upscale_bbox = detector_to_alpha_pose( img, class_ids, scores, bounding_boxes) predicted_heatmap = self.pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) return pred_coords, confidence
def keypoint_detection(img_path, detector, pose_net): x, img = data.transforms.presets.yolo.load_test(img_path, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox) ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) plt.show()
def get_pose_estimation(img_object, detector_model="yolo3_mobilenet1.0_coco", pose_model="simple_pose_resnet18_v1b", box_thresh=0.5, keypoint_thresh=0.2): ''' //TODO ''' detector = model_zoo.get_model(detector_model, pretrained=True) pose_net = model_zoo.get_model(pose_model, pretrained=True) # Loading weights for only person class detector.reset_class(["person"], reuse_weights=['person']) try: img_object = utils.download(img_object) except ValueError: pass if "yolo" in detector_model: x, img = data.transforms.presets.yolo.load_test(img_object, short=512) elif "ssd" in detector_model: x, img = data.transforms.presets.ssd.load_test(img_object, short=512) class_IDs, scores, bounding_boxs = detector(x) if "simple_pose" in pose_model: pose_input, upscale_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) elif "alpha_pose" in pose_model: pose_input, upscale_bbox = detector_to_alpha_pose( img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=box_thresh, keypoint_thresh=keypoint_thresh) return ax
def get_poses(self, image): x, image = self.transformer(mx.nd.array(image).astype('uint8'), short=512) x = x.as_in_context(self.ctx) class_IDs, scores, bounding_boxs = self.detector(x) pose_input, upscale_bbox = detector_to_alpha_pose(image, class_IDs, scores, bounding_boxs, ctx=self.ctx) if upscale_bbox is not None and len(upscale_bbox) > 0: pose_input = pose_input.as_in_context(self.ctx) predicted_heatmap = self.pose_net(pose_input).as_in_context( mx.cpu()) return heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox) else: return mx.nd.array([]), mx.nd.array([])
def validate(val_data, val_dataset, net, ctx, opt): if isinstance(ctx, mx.Context): ctx = [ctx] val_metric = COCOKeyPointsMetric(val_dataset, 'coco_keypoints', in_vis_thresh=0) for batch in tqdm(val_data, dynamic_ncols=True): # data, scale, center, score, imgid = val_batch_fn(batch, ctx) data, scale_box, score, imgid = val_batch_fn(batch, ctx) outputs = [net(X) for X in data] if opt.flip_test: data_flip = [nd.flip(X, axis=3) for X in data] outputs_flip = [net(X) for X in data_flip] outputs_flipback = [ flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip ] outputs = [(o + o_flip) / 2 for o, o_flip in zip(outputs, outputs_flipback)] if len(outputs) > 1: outputs_stack = nd.concat( *[o.as_in_context(mx.cpu()) for o in outputs], dim=0) else: outputs_stack = outputs[0].as_in_context(mx.cpu()) # preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy()) preds, maxvals = heatmap_to_coord_alpha_pose(outputs_stack, scale_box) val_metric.update(preds, maxvals, score, imgid) nullwriter = NullWriter() oldstdout = sys.stdout sys.stdout = nullwriter try: res = val_metric.get() finally: sys.stdout = oldstdout return res
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None): x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes
def predict(img_path): # 1.预处理输入图像和检测人体 x, img = data.transforms.presets.yolo.load_test(img_path, short=256) start = time.time() # detect persons and bbox class_ids, scores, bounding_boxes = detector(x) # 2.预处理检测器的输出张量作为alpha_pose的输入 pose_input, upscale_bbox = detector_to_alpha_pose(img, class_ids, scores, bounding_boxes) global detector_time detector_time += (time.time() - start) print("detector cost time: {:.3f} seconds".format(time.time() - start)) # 3.预测关节点 start_time = time.time() predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) global predictor_2d_time predictor_2d_time += (time.time() - start_time) print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time)) # 4.显示2d姿态 # ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5, # keypoint_thresh=0.2) # 5.坐标标准化 start_time = time.time() kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0]) receptive_field = pose3d_predictor.receptive_field() pad = (receptive_field - 1) // 2 # Padding on each side causal_shift = 0 # 6.创建生成器作为3d预测器的输入 generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False) # 7.3d姿势估计和显示 prediction = predict_3d_pos(generator, pose3d_predictor) global predictor_3d_time, full_time predictor_3d_time += time.time() - start_time full_time += time.time() - start print("3d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time)) rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32) prediction = camera_to_world(prediction, R=rot, t=0) prediction[:, :, 2] -= np.min(prediction[:, :, 2]) elapsed = time.time() - start print("Total elapsed time of predicting image {}: {:.3f} seconds".format( img_path, elapsed)) return prediction, img
pose_input, upscale_bbox = detector_to_alpha_pose(img, class_IDs, scores, bounding_boxs) ###################################################################### # Predict with a Alpha Pose network # -------------------- # # Now we can make prediction. # # A Alpha Pose network predicts the heatmap for each joint (i.e. keypoint). # After the inference we search for the highest value in the heatmap and map it to the # coordinates on the original image. predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) ###################################################################### # Display the pose estimation results # --------------------- # # We can use :py:func:`gluoncv.utils.viz.plot_keypoints` to visualize the # results. ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5,
def video_to_listPose(vid): cap = cv2.VideoCapture(vid) # load video if (cap.isOpened() == False): # Check if camera opened successfully print("Error opening video stream or file") return frame_count = 0 pose_data_vid = [] dimensions = (0, 0) frame_length = (int)(cap.get(cv2.CAP_PROP_FRAME_COUNT)) pbar = tqdm(total=frame_length, ncols=1, desc='.') pbar.ncols = 100 # Iterate through every frame in video while (cap.isOpened()): ret, frame = cap.read() # read current frame if (frame is None): break # If current frame doesn't exist, finished iterating through frames frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype( 'uint8') # mxnet readable # Person detection x, frame = gcv.data.transforms.presets.yolo.transform_test( frame) # short=406, max_size=1024 class_IDs, scores, bounding_boxs = detector(x.as_in_context(ctx)) # Pose estimation pose_input, upscale_bbox = detector_to_alpha_pose(frame, class_IDs, scores, bounding_boxs, output_shape=(320, 256)) # Gets current pose keypoints if (upscale_bbox is None): # Caters for no person detection pbar.set_description_str('Skipping ') pose_data_curr = [[-1, -1] for j in range(0, 17)] else: # Caters for person detection pbar.set_description_str('Processing') predicted_heatmap = estimator(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) scores = scores.asnumpy() confidence = confidence.asnumpy() pred_coords = pred_coords.asnumpy() # Preparing for json pose_data_curr = curr_pose(frame, pred_coords, confidence, scores, keypoint_thresh=0.2) pose_data_vid.append(pose_data_curr) if (frame_count == 0): dimensions = [frame.shape[1], frame.shape[0]] frame_count += 1 pbar.update(1) cap.release() pbar.close() return dimensions, pose_data_vid