def estimate(frame): frame = mx.nd.array(cv.cvtColor(frame, cv.COLOR_BGR2RGB)).astype('uint8') x, img = gcv.data.transforms.presets.ssd.transform_test(frame, short=512) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) # 只检测一个人 if len(upscale_bbox) == 1: pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) return estimator(pose_input).asnumpy().flatten() return False
def detect_2d_joints(frame, short=360): """ Args: short: 较短边resize大小 frame: 任意尺寸的RGB图像 Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素 """ # 缩放图像和生成目标检测器输入张量 frame = nd.array(frame) x, img = data.transforms.presets.yolo.transform_test(frame, short=short) # print(x.shape, img.shape) # 检测人体 class_ids, scores, bounding_boxes = detector(x) # 生成posenet的输入张量 pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores, bounding_boxes) # 预测关节点 predict_heatmap = pose_net(pose_input) predict_coords, confidence = heatmap_to_coord(predict_heatmap, upscale_bbox) # 显示2d姿态 # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores) return { 'img': img, 'coords': predict_coords, 'confidence': confidence, 'class_ids': class_ids, 'bboxes': bounding_boxes, 'scores': scores }
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu()): x, scaled_img = gcv.data.transforms.presets.yolo.transform_test( img, short=480, max_size=1024) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(scaled_img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) scale = 1.0 * img.shape[0] / scaled_img.shape[0] img = cv_plot_keypoints(img.asnumpy(), pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=1, keypoint_thresh=0.3, scale=scale) return img
def predict_fn(input_object, model): try: if os.environ['USE_EIA'] == "1": device = mx.eia() img, cid, scores, bbox = copy_to_device(input_object, device) elif os.environ['USE_GPU'] == "1": device = mx.gpu() img, cid, scores, bbox = copy_to_device(input_object, device) else: device = mx.cpu() img, cid, scores, bbox = input_object except: device = mx.cpu() img, cid, scores, bbox = input_object logger.error("Failed to load data into desired context") pose_input, upscale_bbox = detector_to_simple_pose(img, cid, scores, bbox) predicted_heatmap = model(pose_input.as_in_context(device)) predicted_heatmap = model(pose_input) keypoints, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) c = cid[0].asnumpy().reshape(cid[0].shape[0] * cid[0].shape[1]) s = scores[0].asnumpy().reshape(scores[0].shape[0] * scores[0].shape[1]) bb = bbox[0].asnumpy().reshape(bbox[0].shape[0] * bbox[0].shape[1]) kp = keypoints.asnumpy().reshape(keypoints.shape[0] * keypoints.shape[1] * keypoints.shape[2]) cfd = confidence.asnumpy().reshape( confidence.shape[0] * confidence.shape[1] * confidence.shape[2]) return np.concatenate((c, s, bb, kp, cfd))
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None): x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes
def process_pose_frame(np_frame, resolution): width, height = resolution if np_frame is None: return mxnet.nd.zeros((height, width, 3), ctx=gpu) frame = mxnet.nd.array(np_frame, ctx=gpu) x, img = data.transforms.presets.yolo.transform_test(frame, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxs) if pose_input is None: return predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) img = cv_plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, scale=1.0, **kwargs) print(img.size) # for j in range(len(pred_coords)): # for i in range(len(pred_coords[0])): # x, y = pred_coords[j][i].astype(int).asnumpy() # cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED) return img
def main(): args = parse_args() network = None scale = 1.0 detector = get_model('yolo_darknet53_coco', pretrained=True) detector.reset_class(['person'], reuse_weights=['person']) if args.type == 'ONNX': network = cv2.dnn.readNetFromONNX(args.model) elif args.type == 'OpenVINO': network = cv2.dnn.readNetFromModelOptimizer(args.xml, args.model) # default backend if wasn`t specified if not args.backend: network.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) # in case you are going to use CUDA backend in OpenCV, make sure that opencv built with CUDA support elif args.backend == 'CUDA': network.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) network.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) # in case you are going to use OpenVINO model, make sure that inference engine already installed and opencv built with IE support elif args.backend == 'INFERENCE': network.setPreferableBackend(cv2.dnn.DNN_BACKEND_INFERENCE_ENGINE) network.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) x, img = data.transforms.presets.yolo.load_test(args.img, short=512) class_IDs, scores, bounding_boxes = detector(x) pose_input, upscaled_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxes) pose_input = pose_input.asnumpy() bs = [] for i in range(pose_input.shape[0]): input = cv2.dnn.blobFromImage( np.transpose(np.squeeze(pose_input[i, :, :, :]), (1, 2, 0)), scale, (args.width, args.height), (0, 0, 0), False) network.setInput(input) temp = network.forward() bs.append(temp) output = np.concatenate(bs, axis=0) output = mx.nd.array(output) pred_coords, confidence = heatmap_to_coord(output, upscaled_bbox) ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5, keypoint_thresh=0.2) plt.show()
def get_full_frame_info(a_frame): x, frame = transform_test(a_frame, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs) if len(upscale_bbox)>0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) pred_coords = pred_coords.asnumpy() return class_IDs, scores, upscale_bbox, pred_coords, confidence, bounding_boxs
def predict(img_path): # 1.检测关节点并显示 # 预处理输入图像和检测人体 x, img = data.transforms.presets.yolo.load_test(img_path, short=256) # print("Shape of pre-processed image:", x.shape) start = time.time() # detect persons and bbox class_ids, scores, bounding_boxes = detector(x) # 2.预处理检测器的输出张量作为alpha_pose的输入 pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores, bounding_boxes) global detector_time detector_time += (time.time() - start) print("detector cost time: {:.3f} seconds".format(time.time() - start)) prepare_end = time.time() # 3.预测关节点 if pose_input is None: return None, None predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) global predictor_2d_time predictor_2d_time += (time.time() - prepare_end) print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - prepare_end)) # 4.显示2d姿态 # utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5, # keypoint_thresh=0.2) # 5.坐标标准化 prepare_end = time.time() kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0]) receptive_field = pose3d_predictor.receptive_field() pad = (receptive_field - 1) // 2 # Padding on each side causal_shift = 0 # 6.创建生成器作为3d预测器的输入 generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False) # 7.3d姿势估计和显示 prediction = predict_3d_pos(generator, pose3d_predictor) global full_time, predictor_3d_time predictor_3d_time += time.time() - prepare_end full_time += time.time() - start print("3d predictor time: {:.3f} seconds".format(time.time() - prepare_end)) rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32) prediction = camera_to_world(prediction, R=rot, t=0) prediction[:, :, 2] -= np.min(prediction[:, :, 2]) return prediction, img
def keypoint_detection(img_path, detector, pose_net): x, img = data.transforms.presets.yolo.load_test(img_path, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) plt.show()
def keypoint_detection(img_path, detector, pose_net): x, img = data.transforms.presets.yolo.load_test(img_path, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) plt.show()
def getPose(im_fname): x, img = data.transforms.presets.ssd.load_test(im_fname, short=512) #print('Shape of pre-processed image:', x.shape) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) return img, pred_coords, confidence, class_IDs, bounding_boxs, scores, upscale_bbox
def keypoint_detection(i, frame, imagepath, detector, pose_net, ctx=mx.cpu(), axes=None): global pause_time x, img = gcv.data.transforms.presets.yolo.transform_test(frame, short=512, max_size=1024) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(1024, 768), ctx=ctx) #print(pose_input,"\n") if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) hackathon_action(i, frame, imagepath, pred_coords, confidence, class_IDs, bounding_boxs, scores) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(pause_time) #plt.pause(1.0) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(pause_time) return axes
def detection(net, image, use_gpu): ''' 进行预测: input: net(dict): 模型 image(str): 图片(numpy) use_gpu(bool): 是否使用gpu return: pred(dict): 包含各种信息的字典(若未检测到人则该返回值为None) img(numpy): 图片 ''' if use_gpu: ctx = mx.gpu() else: ctx = mx.cpu() img_adarry = nd.array(image) x, img = transform_test(img_adarry, short=512, max_size=1024, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) if use_gpu: # 转移至GPU x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = net['detector'](x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) if len(upscale_bbox) == 0: # 图片中未检测到人 return None, img if use_gpu: # 转移至GPU pose_input = pose_input.as_in_context(ctx) predicted_heatmap = net['pose_net'](pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) if use_gpu: # 转移至GPU pred_coords = pred_coords.as_in_context(ctx) pred = { 'class_IDs': class_IDs, 'scores': scores, 'bounding_boxs': bounding_boxs, 'pred_coords': pred_coords, 'confidence': confidence } return pred, img
def get_pose_estimation(img_object, detector_model="yolo3_mobilenet1.0_coco", pose_model="simple_pose_resnet18_v1b", box_thresh=0.5, keypoint_thresh=0.2): ''' //TODO ''' detector = model_zoo.get_model(detector_model, pretrained=True) pose_net = model_zoo.get_model(pose_model, pretrained=True) # Loading weights for only person class detector.reset_class(["person"], reuse_weights=['person']) try: img_object = utils.download(img_object) except ValueError: pass if "yolo" in detector_model: x, img = data.transforms.presets.yolo.load_test(img_object, short=512) elif "ssd" in detector_model: x, img = data.transforms.presets.ssd.load_test(img_object, short=512) class_IDs, scores, bounding_boxs = detector(x) if "simple_pose" in pose_model: pose_input, upscale_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) elif "alpha_pose" in pose_model: pose_input, upscale_bbox = detector_to_alpha_pose( img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord_alpha_pose( predicted_heatmap, upscale_bbox) ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=box_thresh, keypoint_thresh=keypoint_thresh) return ax
def Infer(self, img_path, output_path="result.jpg", bbox_thresh=0.5, kp_thresh=0.2): x, img = data.transforms.presets.ssd.load_test(img_path, short=512) x = x.copyto(self.system_dict["local"]["ctx"][0]) print('Shape of pre-processed image:', x.shape) print('Running Person Detector') class_IDs, scores, bounding_boxs = self.system_dict["local"][ "detector"](x) print('Running Pose Estimator') pose_input, upscale_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxs) pose_input = pose_input.copyto(self.system_dict["local"]["ctx"][0]) predicted_heatmap = self.system_dict["local"]["posenet"](pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) print('Saving Result') img = utils.viz.cv_plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) cv2.imwrite(output_path, img) print('Done') result = {} result["pred_coords"] = pred_coords result["confidence"] = confidence result["class_IDs"] = class_IDs result["bounding_boxs"] = bounding_boxs result["scores"] = scores return result
def detector_kp(image, bbox): class_IDs = mx.nd.array([[[0.]]], mx.gpu()) scores = mx.nd.array([[[1.0]]], mx.gpu()) bounding_boxs = mx.nd.array([[bbox]], mx.gpu()) pose_input, upscale_bbox = detector_to_simple_pose(image, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) print('coord', pred_coords.asnumpy().shape) print('confidence', confidence.asnumpy().shape) kps = np.concatenate([pred_coords.asnumpy(), confidence.asnumpy()], axis=2) return True, np.squeeze(kps, axis=0)
def detect_2d_joints(frame, cur_frame, short=360): """ Args: cur_frame: 当前帧 short: 较短边resize大小 frame: 任意尺寸的RGB图像 Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素 """ global pre_frame, gap, csb # print("current frame: {}".format(cur_frame)) # 缩放图像和生成目标检测器输入张量 frame = nd.array(frame) x, img = data.transforms.presets.yolo.transform_test(frame, short=short) # print(x.shape, img.shape) # 检测人体 if csb is None or cur_frame - pre_frame == gap: class_ids, scores, bounding_boxes = detector(x) pre_frame = cur_frame csb = (class_ids, scores, bounding_boxes) else: class_ids, scores, bounding_boxes = csb # 生成posenet的输入张量 pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores, bounding_boxes) pose_input, upscale_bbox = pose_input[:1], upscale_bbox[:1] # 预测关节点 predict_heatmap = pose_net(pose_input) predict_coords, confidence = heatmap_to_coord(predict_heatmap, upscale_bbox) # 显示2d姿态 # ax = utils.viz.plot_keypoints(img, predict_coords, confidence, class_ids, bounding_boxes, scores) return { 'img': img, 'coords': predict_coords, 'confidence': confidence, 'class_ids': class_ids, 'bboxes': bounding_boxes, 'scores': scores }
def detect_main_point(self, capture): #axes = None #num_frames = len(F) #for i in range(num_frames): while True: ret, frame = capture.read() if ret is None: break #frame = F[i] frame = mx.nd.array(cv2.cvtColor( frame, cv2.COLOR_BGR2RGB)).astype('uint8') x, frame = gcv.data.transforms.presets.ssd.transform_test( frame, short=512, max_size=350) x = x.as_in_context(self.ctx) class_IDs, scores, bounding_boxs = self.detector(x) pose_input, upscale_bbox = detector_to_simple_pose( frame, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = self.estimator(pose_input) pred_coords, confidence = heatmap_to_coord( predicted_heatmap, upscale_bbox) #img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores, # box_thresh=0.5, keypoint_thresh=0.2) # mark important point of body # img = cv2.circle(img, (x, y), 4, (0, 255, 0), -1) # mark knee and sholders # img = cv2.circle(img, kn, 4, (0, 255, 0), -1) # img = cv2.circle(img, shl, 4, (0, 255, 0), -1) _, plv = pred_coords.asnumpy()[:, 11][0] _, shl = pred_coords.asnumpy()[:, 5][0] _, kn = pred_coords.asnumpy()[:, 14][0] self.trajectory['palvic'].append(plv) self.trajectory['knee'].append(kn) self.trajectory['shoulders'].append(shl)
def get_skeleton_from_frame(a_frame): ok_flag = False extra_person_flag = False x, frame = transform_test(a_frame, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs) b_coords = 0 if len(upscale_bbox)>0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) pred_coords = pred_coords.asnumpy() b_coords = pred_coords[0] if pred_coords.shape[0]>2: extra_person_flag = True if pred_coords.shape[0]==2: # doing best guest when two boxes ( subject and background are similar) if upscale_bbox[0][3]==512: b_coords = pred_coords[1] ok_flag= True return ok_flag, extra_person_flag, b_coords
def get_poses(self, image): x, image = self.transformer(mx.nd.array(image).astype('uint8'), short=512) x = x.as_in_context(self.ctx) class_IDs, scores, bounding_boxs = self.detector(x) pose_input, upscale_bbox = detector_to_simple_pose(image, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=self.ctx) if upscale_bbox is not None and len(upscale_bbox) > 0: if (type(upscale_bbox) == list): upscale_bbox = np.array(upscale_bbox) predicted_heatmap = self.pose_net(pose_input).as_in_context( mx.cpu()) return heatmap_to_coord_alpha_pose(predicted_heatmap, upscale_bbox) else: return mx.nd.array([]), mx.nd.array([])
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None): x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes
def extract_pose_from_image(self, img, values): class_IDs, scores, bounding_boxs = self.detector(values) pose_input, upscale_bbox = detector_to_simple_pose( img, class_IDs, scores, bounding_boxs) predicted_heatmap = self.pose_net(pose_input) # left hand 5 7 9 # right hand 6 8 10 # left leg 11 13 15 # right leg 12 14 16 pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) return pred_coords[0]
def keypoint_detection(self, frame): img = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8') x, scaled_img = gcv.data.transforms.presets.yolo.transform_test( img, short=480, max_size=1024) x = x.as_in_context(self.ctx) class_IDs, scores, bounding_boxs = self.person_detector(x) pred_coords = np.zeros(1) pose_input, upscale_bbox = detector_to_simple_pose(scaled_img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=self.ctx) if len(upscale_bbox) > 0: predicted_heatmap = self.pose_estimator(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) scale = 1.0 * img.shape[0] / scaled_img.shape[0] img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=1, keypoint_thresh=0.3, scale=scale) pred_coords *= scale if isinstance(img, mx.nd.NDArray): img = frame if isinstance(pred_coords, mx.nd.NDArray): pred_coords = pred_coords.asnumpy() return pred_coords, img
class_IDs, scores, bounding_boxs = detector(x) ###################################################################### # Process tensor from detector to keypoint network # -------------------- # # Next we process the output from the detector. # # For a Simple Pose network, it expects the input has the size 256x192, # and the human is centered. We crop the bounding boxed area # for each human, and resize it to 256x192, then finally normalize it. # # In order to make sure the bounding box has included the entire person, # we usually slightly upscale the box size. pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) ###################################################################### # Predict with a Simple Pose network # -------------------- # # Now we can make prediction. # # A Simple Pose network predicts the heatmap for each joint (i.e. keypoint). # After the inference we search for the highest value in the heatmap and map it to the # coordinates on the original image. predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
class_IDs, scores, bounding_boxs = detector(x) ###################################################################### # Process tensor from detector to keypoiny network # -------------------- # # Next we process the output from the detector. # # For a Simple Pose network, it expects the input has the size 256x192, # and the human is centered. We crop the bounding boxed area # for each human, and resize it to 256x192, then finally normalize it. # # In order to make sure the bounding box has included the entire person, # we usually slightly upscale the box size. pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) ###################################################################### # Predict with a Simple Pose network # -------------------- # # Now we can make prediction. # # A Simple Pose network predicts the heatmap for each joint (i.e. keypoint). # After the inference we search for the highest value in the heatmap and map it to the # coordinates on the original image. predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) ######################################################################
def main(): start = time.time() count = 0 pose = None skip_frame = False # to increase frame rate, I only process every other frame. #with open('data.txt', 'a') as outfile: while True: # While there is video frames to process... frame = vs.read() if using_vid_file: exists, frame = ( frame ) #with vid file, frame is a tuple. First value is boolean second is array of pixels if not exists: break #exit when there are no incoming frames try: frame = np.fliplr( frame) # I want to display the mirror image of input except ValueError: print( '[ERROR] video file not found, make sure to include path and extension i.e. \'./vid.mp4\'' ) break count += 1 frame = imutils.resize(frame, width=280) frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8') if not skip_frame: x, frame = gcv.data.transforms.presets.ssd.transform_test( frame, short=512, max_size=280) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose( frame, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = estimator(pose_input) pred_coords, confidence = heatmap_to_coord( predicted_heatmap, upscale_bbox) img, pose = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, None, scores, box_thresh=0.5, keypoint_thresh=0.15) #The following lines were for saving vid info to a file #pose = args['vid'].split('/') #pose = pose[2][:-4] #outfile.write(pose + ', ') #for angle in angles: # outfile.write(str(angle) + ', ') #outfile.write('\n') img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) skip_frame = True else: skip_frame = False img = imutils.resize(img, height=280, width=500) # blowup image for displaying if pose: cv2.putText(img, '{}'.format(pose), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) else: cv2.putText(img, 'No Pose Detected', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2) if using_vid_file: vid_writer.write(img) cv2.imshow('Webcam', img) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break cv2.destroyAllWindows() if not using_vid_file: vs.stop() if using_vid_file: vid_writer.release() stop = time.time() #outfile.close() print("fps: {}".format(count / (stop - start)))
- estimate the pose for each person - plot the result .. code-block:: python''' axes = None num_frames = 100 for i in range(num_frames): ret, frame = cap.read() frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8') x, frame = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = estimator(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) cv_plot_image(img) cv2.waitKey(1) '''We release the webcam before exiting: .. code-block:: python'''