def NextFrame(self, event): ret, rgb = self.capture.read() self.counter_label.SetLabel("Times: " + str(self.count)) if ret: input_scale = self.base_height / rgb.shape[0] scaled_img = cv2.resize(rgb, dsize=None, fx=input_scale, fy=input_scale) inference_result = self.inference_engine.infer(scaled_img) poses_2d = parse_poses(inference_result, input_scale, 8, -1, True) draw_poses(rgb, poses_2d) if self.motion=="jumpingjack": self.up,self.count = count_jumpup(poses_2d,self.up,self.count) elif self.motion=="situp": self.up,self.count = count_situp(poses_2d,self.up,self.count) elif self.motion=="squat": self.up,self.count = count_squat(poses_2d,self.up,self.count) rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) self.bmp.CopyFromBuffer(rgb) self.Refresh() else: self.timer.Stop()
def eval(self, frame): self.frame = frame base_height = 256 scale = base_height / self.frame.shape[0] scaled_img = cv2.resize(self.frame, dsize=None, fx=scale, fy=scale) inference_result = self.net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, scale, 8, 1) if self.draw_render: draw_poses(self.frame, poses_2d) if poses_2d.shape[ 0] != 0: # When no person is detected, shape = (), else (nb_persons, 25, 3) self.body_kps = np.array([ np.array(poses_2d[pose_id][0:-1]).reshape((-1, 3)) for pose_id in range(len(poses_2d)) ]) # We sort persons by their an "estimation" of their size # size has little to do with the real size of a person, but is a arbitrary value, here, calculated as distance(Nose, Neck) + 0.33*distance(Neck,Midhip) sizes = np.array([ self.length(pairs_spine, person_idx=i, coefs=[1, 0.33]) for i in range(self.body_kps.shape[0]) ]) # Sort from biggest size to smallest order = np.argsort(-sizes) sizes = sizes[order] self.body_kps = self.body_kps[order] # Keep only the biggest person self.body_kps = self.body_kps[0] self.nb_persons = 1 else: self.nb_persons = 0 self.body_kps = [] return self.nb_persons, self.body_kps
def process_frame(self, frame, inference_result, merged=False): poses_3d = inference_result.get("pose_3d", {}).get("value", []) poses_2d = inference_result.get("pose_2d", {}).get("value", []) edges = inference_result.get("edges", {}).get("value", []) self.plotter.plot(self.canvas_3d, poses_3d, edges) draw_poses(frame, poses_2d) if merged: frame_side = np.copy(self.canvas_3d) new_w = min(frame.shape[1], frame_side.shape[1]) rel_h_f = int(new_w * frame.shape[0] * 1.0 / frame.shape[1]) rel_h_s = int(new_w * frame_side.shape[0] * 1.0 / frame_side.shape[1]) frame = cv2.resize(frame, (new_w, rel_h_f)) frame_side = cv2.resize(frame_side, (new_w, rel_h_s)) return np.hstack([frame, frame_side]) return frame, np.copy(self.canvas_3d)
if len(poses_3d) > 0: poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape( (-1, 1, 1))).reshape((-1, 2)) plotter.plot(canvas_3d, poses_3d, edges) presenter.drawGraphs(frame) draw_poses(frame, poses_2d) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: cv2.imshow(canvas_3d_window_name, canvas_3d) cv2.imshow('3D Human Pose Estimation', frame) key = cv2.waitKey(delay) if key == esc_code: break if key == p_code:
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) check_file_existance(FILE_PATH) # prepare input data canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8) plotter = Plotter3d(canvas_3d.shape[:2]) canvas_3d_window_name = 'Canvas3D' cv2.namedWindow(canvas_3d_window_name) cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback) with open(FILE_PATH, 'r') as f: extrinsics = json.load(f) R = np.array(extrinsics['R'], dtype=np.float32) t = np.array(extrinsics['t'], dtype=np.float32) if args.video is None: frame_provider = ImageReader([args.input]) is_video = False else: frame_provider = VideoReader(args.video) is_video = True fx = -1 delay = 1 esc_code = 27 p_code = 112 space_code = 32 mean_time = 0 img_mean = np.array([128, 128, 128], dtype=np.float32) base_width_calculated = False # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference for frame_id, frame in enumerate(frame_provider): current_time = cv2.getTickCount() if frame is None: break if not base_width_calculated: IMAGE_WIDTH = frame.shape[1] * (IMAGE_HEIGHT / frame.shape[0]) IMAGE_WIDTH = int(IMAGE_WIDTH / STRIDE) * STRIDE net.set_input_shape((1, 3, IMAGE_HEIGHT, IMAGE_WIDTH)) base_width_calculated = True input_scale = IMAGE_HEIGHT / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) # better to pad, but cut out for demo scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % STRIDE)] if fx < 0: # Focal length is unknown fx = np.float32(0.8 * frame.shape[1]) normalized_img = (scaled_img.astype(np.float32) - img_mean) / 255.0 normalized_img = np.expand_dims(normalized_img.transpose(2, 0, 1), axis=0) # exectution if is_video: input_blobs = net.get_input_blob_list() net.set_input_blob_data(normalized_img, input_blobs[0]) net.update() features, heatmaps, pafs = net.get_results() else: print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) features, heatmaps, pafs = net.predict([normalized_img]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: features, heatmaps, pafs = net.predict([normalized_img]) inference_result = (features[-1].squeeze(), heatmaps[-1].squeeze(), pafs[-1].squeeze()) poses_3d, poses_2d = parse_poses(inference_result, input_scale, STRIDE, fx, is_video) edges = [] if len(poses_3d): poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape( (-1, 1, 1))).reshape((-1, 2)) plotter.plot(canvas_3d, poses_3d, edges) if is_video: cv2.imshow(canvas_3d_window_name, canvas_3d) else: cv2.imwrite(f'Canvas3D_{frame_id}.png', canvas_3d) draw_poses(frame, poses_2d) current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency() if mean_time == 0: mean_time = current_time else: mean_time = mean_time * 0.95 + current_time * 0.05 cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10), (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255)) if is_video: cv2.imshow('ICV 3D Human Pose Estimation', frame) else: cv2.imwrite(args.savepath, frame) key = cv2.waitKey(delay) if key == esc_code: break if key == p_code: if delay == 1: delay = 0 else: delay = 1 if delay == 0 and args.rotate3d: key = 0 while (key != p_code and key != esc_code and key != space_code): plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) key = cv2.waitKey(33) if key == esc_code: break else: delay = 1 print('Script finished successfully.')
def updatefig(i, noPlot=False): print("updatefig", i, label) # ax0.cla() # ax1.cla() # ax2.cla() imageIdx = i + startFrom print("imageIdx", imageIdx) if (noPlot == False and imageIdx == vidLength): print( f'imageIdx {imageIdx} == vidLength {vidLength}; closing!') plt.close(fig) try: frame = vid.get_data(imageIdx) input_scale = base_height / frame.shape[0] fx = np.float32(0.8 * frame.shape[1]) scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) scaled_img = scaled_img[:, 0:scaled_img.shape[1] - ( scaled_img.shape[1] % stride)] # better to pad, but cut out for demo inference_result = net.infer(scaled_img) poses_3dFromImage, poses_2d = parse_poses( inference_result, input_scale, stride, fx, is_video) except: poses_3dFromImage = [] poses_2d = [] if len(poses_3dFromImage) == 0 or len( poses_2d) == 0 or poses_3dFromImage.all( ) == nullPose3D.all(): print("No pose detected ") # return False if noPlot==True else ax0,ax1,ax2 # return False poses_3dFromImage = np.array([np.zeros((19, 3))]) else: if True: poses_3dFromImage = rotate_poses(poses_3dFromImage, R, t) poses_3dFromImage = reshape_poses(poses_3dFromImage) else: poses_3dFromImage = stand3dmatrix if (noPlot == False): edgesFromImage = ( Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3dFromImage.shape[0]).reshape( (-1, 1, 1))).reshape((-1, 2)) canvas_3d = np.zeros((450, 450, 3), dtype=np.uint8) plotter = Plotter3d(canvas_3d.shape[:2]) plotter.plot(canvas_3d, poses_3dFromImage, edgesFromImage) ax0.imshow(canvas_3d) draw_poses(frame, poses_2d) ax1.imshow(frame) # Setting the values for all axes. csiIndices, parsedTimeInVid = imageIdx2csiIndicesPrecise( duration_in_sec, imageIdx, tsList, vidLength, lastsec) if (noPlot == True): print("parsedTimeInVid", parsedTimeInVid) parsedPoses_3dFromImage = np.array( poses_3dFromImage[0]).reshape(3 * 19) parsedTimeInVid_array = np.array([parsedTimeInVid]) pose3D_value.append( np.concatenate( (parsedTimeInVid_array, parsedPoses_3dFromImage))) if (len(csiIndices) > 0): startCSIIdx = csiIndices[0] endCSIIdx = csiIndices[len(csiIndices) - 1] print(startCSIIdx, '-', endCSIIdx) print(endCSIIdx - startCSIIdx + 1) for k in csiIndices: curParseCSI = parseCSI(csiList[k]) print("adding ", curParseCSI) if (curParseCSI != False): print("len check") print(k, len(curParseCSI), tsList[k]) if (len(curParseCSI) != 384): print("len not 384") continue print("isFloat check") isInt = True for l in range(384): if (isinstance(curParseCSI[l], int) == False): print(curParseCSI[l], " is not int") isInt = False break if isInt == False: continue csi_value.append([tsList[k]] + parseCSI(csiList[k])) print("added ", k) else: csi_value.append([tsList[k]] + [0 for l in range(384)]) print("added ", k, 'as 0s') else: for j in range(0, 64): if (6 <= j < 32 or 33 <= j < 59): textX = [] textY = [] for k in csiIndices: textX.append(tsList[k] / (10**6)) textY.append(rawCSItoAmp(parseCSI(x), 128)[k][j]) ax2.plot(textX, gaussian_filter(textY, sigma=1), label='CSI subcarrier') print("added") # print(tsList[csiIdx]) return False #if noPlot==True else ax0,ax1,ax2
async def estimate(websocket, path): name = await websocket.recv() print(f"< {name}") global fx delay = 1 esc_code = 27 p_code = 112 space_code = 32 mean_time = 0 for frame in frame_provider: current_time = cv2.getTickCount() if frame is None: break input_scale = base_height / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # better to pad, but cut out for demo if fx < 0: # Focal length is unknown fx = np.float32(0.8 * frame.shape[1]) inference_result = net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video) edges = [] if len(poses_3d): poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2)) plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) draw_poses(frame, poses_2d) current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency() if mean_time == 0: mean_time = current_time else: mean_time = mean_time * 0.95 + current_time * 0.05 cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10), (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255)) cv2.imshow('ICV 3D Human Pose Estimation', frame) greeting = f"{json.dumps(poses_3d, cls=NumpyEncoder)}" await websocket.send(greeting) print(f"> {greeting}") key = cv2.waitKey(delay) if key == esc_code: break if key == p_code: if delay == 1: delay = 0 else: delay = 1 if delay == 0 or not is_video: # allow to rotate 3D canvas while on pause key = 0 while (key != p_code and key != esc_code and key != space_code): plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) key = cv2.waitKey(33) if key == esc_code: break else: delay = 1
def run_inference(args): from modules.inference_engine_pytorch import InferenceEnginePyTorch socket_server = SocketServer(args.port) joint_angle_calculator = JointAngleCalculator() stride = 8 model_path = os.path.join('models', 'human-pose-estimation-3d.pth') net = InferenceEnginePyTorch(model_path, "GPU") canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8) plotter = Plotter3d(canvas_3d.shape[:2]) canvas_3d_window_name = 'Canvas 3D' cv2.namedWindow(canvas_3d_window_name) cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback) file_path = None if file_path is None: file_path = os.path.join('data', 'extrinsics.json') with open(file_path, 'r') as f: extrinsics = json.load(f) R = np.array(extrinsics['R'], dtype=np.float32) t = np.array(extrinsics['t'], dtype=np.float32) frame_provider = ImageReader(args.images) is_video = False if args.video != '': frame_provider = VideoReader(args.video) is_video = True base_height = args.height_size fx = 1 # focal length delay = 1 esc_code = 27 p_code = 112 space_code = 32 mean_time = 0 for frame in frame_provider: current_time = cv2.getTickCount() if frame is None: break input_scale = base_height / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # better to pad, but cut out for demo if fx < 0: # Focal length is unknown fx = np.float32(0.8 * frame.shape[1]) inference_result = net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video) edges = [] if len(poses_3d): poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2)) plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) draw_poses(frame, poses_2d) current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency() if mean_time == 0: mean_time = current_time else: mean_time = mean_time * 0.95 + current_time * 0.05 cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10), (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255)) cv2.imshow('ICV 3D Human Pose Estimation', frame) key = cv2.waitKey(delay) if key == esc_code: break if key == p_code: if delay == 1: delay = 0 else: delay = 1 if delay == 0 or not is_video: # allow to rotate 3D canvas while on pause key = 0 while (key != p_code and key != esc_code and key != space_code): plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) key = cv2.waitKey(33) if key == esc_code: break else: delay = 1 joint_angles = joint_angle_calculator.calculate_angles(poses_3d) if joint_angles: socket_server.send_data(joint_angles)
def draw(self, frame, poses_2d, draw_fps=False): draw_poses(frame, poses_2d, color_palette=self.color_palette)