def process(self): self.image = tf.placeholder(tf.float32, shape=[1, self.width, self.height, 3], name='image') x = self.image rate = [1, 1] buff = [] with tf.variable_scope(None, 'MobilenetV1'): for m in self.layers: strinde = [1, m['stride'], m['stride'], 1] rate = [m['rate'], m['rate']] if (m['convType'] == "conv2d"): x = self.conv(x, strinde, m['blockId']) buff.append(x) elif (m['convType'] == "separableConv"): x = self.separableConv(x, strinde, m['blockId'], rate) buff.append(x) self.heatmaps = self.convToOutput(x, 'heatmap_2') self.offsets = self.convToOutput(x, 'offset_2') self.displacementFwd = self.convToOutput(x, 'displacement_fwd_2') self.displacementBwd = self.convToOutput(x, 'displacement_bwd_2') self.heatmaps = tf.sigmoid(self.heatmaps, 'heatmap') cap = cv2.VideoCapture(0) #读取摄像头 cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width_factor = cap_width / self.width height_factor = cap_height / self.height with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() save_dir = './checkpoints' save_path = os.path.join(save_dir, 'model.ckpt') saver.save(sess, save_path) flag, frame = cap.read() while flag: startime = time.time() orig_image = frame frame = cv2.resize(frame, (self.width, self.height)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = frame.astype(float) frame = frame * (2.0 / 255.0) - 1.0 frame = np.array(frame, dtype=np.float32) frame = frame.reshape(1, self.width, self.height, 3) heatmaps_result, offsets_result, displacementFwd_result, displacementBwd_result \ = sess.run([self.heatmaps, \ self.offsets, \ self.displacementFwd, \ self.displacementBwd], feed_dict={self.image: frame } ) ''' poses = decode_single_pose(heatmaps_result, offsets_result, 16, width_factor, height_factor) ''' poses = decodeMultiplePoses(heatmaps_result, offsets_result, \ displacementFwd_result, \ displacementBwd_result, \ width_factor, height_factor) for idx in range(len(poses)): if poses[idx]['score'] > 0.2: color = color_table[idx] drawKeypoints(poses[idx], orig_image, color) drawSkeleton(poses[idx], orig_image) endtime = time.time() print('Time cost per frame : %f' % (endtime - startime)) cv2.imshow("1", orig_image) cv2.waitKey(1) flag, frame = cap.read()
def process(self): self.image = tf.placeholder(tf.float32, shape=[1, self.width, self.height, 3],name='image') x = self.image rate = [1,1] buff = [] with tf.variable_scope(None, 'MobilenetV1'): for m in self.layers: strinde = [1,m['stride'],m['stride'],1] rate = [m['rate'],m['rate']] if (m['convType'] == "conv2d"): x = self.conv(x,strinde,m['blockId']) buff.append(x) elif (m['convType'] == "separableConv"): x = self.separableConv(x,strinde,m['blockId'],rate) buff.append(x) self.heatmaps = self.convToOutput(x, 'heatmap_2') self.offsets = self.convToOutput(x, 'offset_2') self.displacementFwd = self.convToOutput(x, 'displacement_fwd_2') self.displacementBwd = self.convToOutput(x, 'displacement_bwd_2') self.heatmaps = tf.sigmoid(self.heatmaps, 'heatmap') cap = PiVideoStream().start() time.sleep(2.0) cap_width = 320 cap_height = 240 width_factor = cap_width/self.width height_factor = cap_height/self.height with tf.Session() as sess: ###################### # Setup GCloud # ###################### project_id = "ai-dj-36" topic_name = "pose" publisher = pubsub_v1.PublisherClient() topic_path = publisher.topic_path(project_id, topic_name) # create topic if it does not exists project_path = publisher.project_path(project_id) topics = publisher.list_topics(project_path) topic_names = [topic.name for topic in topics] if topic_path not in topic_names: topic = publisher.create_topic(topic_path) print('Topic created: {}'.format(topic)) # continue with model init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() save_dir = './checkpoints' save_path = os.path.join(save_dir, 'model_ckpt') saver.save(sess, save_path) while True: total_point_var = 0.0 ave_counter = 0 frame = cap.read() init_pose = self.process_frame(sess, cap, frame, width_factor, height_factor) while(len(init_pose) <= 0 or init_pose[0]['score'] <= 0.2): frame = cap.read() init_pose = self.process_frame(sess, cap, frame, width_factor, height_factor) cv2.imshow("1", frame) cv2.waitKey(1) while True: frame = cap.read() orig_image = frame startime = time.time() curr_pose = self.process_frame(sess, cap, frame, width_factor, height_factor) if len(curr_pose) == 0: continue if ave_counter < NUM_FRAMES_TO_AVERAGE: total_point_var += measure_keypoint_var(init_pose[0], curr_pose[0]) ave_counter += 1 else: print(total_point_var / NUM_FRAMES_TO_AVERAGE) # send score to appengine curtime = time.time() payload_contents = {"device_id":"jeffsrpi", "published_at":curtime, "pose": total_point_var / NUM_FRAMES_TO_AVERAGE} payload = json.dumps(payload_contents) payload = payload.encode("utf-8") print(payload_contents) future = publisher.publish(topic_path, data=payload) ave_counter = 0 total_point_var = 0 init_pose = curr_pose for idx in range(len(curr_pose)): if curr_pose[idx]['score'] > 0.2: color = color_table[idx] drawKeypoints(curr_pose[idx], orig_image, color) drawSkeleton(curr_pose[idx], orig_image) endtime = time.time() print('Time cost per frame : %f' % (endtime - startime)) cv2.imshow("1", orig_image) cv2.waitKey(1)
def switch_active_monitor(self, show_cam=False, show_spf=False): # Webcam setup cap = cv2.VideoCapture(0) cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width_factor = cap_width / self.width height_factor = cap_height / self.height # Get monitors' IDs p_mon_id = int(win32api.EnumDisplayMonitors()[0][0]) s_mon_id = int(win32api.EnumDisplayMonitors()[1][0]) # Get thread's ID cur_thread_id = win32api.GetCurrentThreadId() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() save_dir = "checkpoints" save_path = os.path.join(save_dir, "model.ckpt") saver.save(sess, save_path) flag, frame = cap.read() while flag: if show_spf: startime = time.time() orig_image = frame frame = cv2.resize(frame, (self.width, self.height)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = frame.astype(float) frame = frame * (2.0 / 255.0) - 1.0 frame = np.array(frame, dtype=np.float32) frame = frame.reshape(1, self.width, self.height, 3) ( heatmaps_result, offsets_result, displacementFwd_result, displacementBwd_result, ) = sess.run( [ self.heatmaps, self.offsets, self.displacementFwd, self.displacementBwd, ], feed_dict={self.image: frame}, ) poses = decode_single_pose(heatmaps_result, offsets_result, 16, width_factor, height_factor) if show_spf: endtime = time.time() print("Time cost per frame : %f" % (endtime - startime)) # Calculate the distance between nose and shoulders nose_pos = poses[0]["keypoints"][0]["position"] nose_pos_array = np.array((nose_pos["x"], nose_pos["y"])) lshoulder_pos = poses[0]["keypoints"][5]["position"] lshoulder_pos_array = np.array( (lshoulder_pos["x"], lshoulder_pos["y"])) rshoulder_pos = poses[0]["keypoints"][6]["position"] rshoulder_pos_array = np.array( (rshoulder_pos["x"], rshoulder_pos["y"])) ldiff = nose_pos_array - lshoulder_pos_array ldist = np.linalg.norm(ldiff) rdiff = nose_pos_array - rshoulder_pos_array rdist = np.linalg.norm(rdiff) # Get active monitor's ID win_id = win32gui.GetForegroundWindow() act_mon_id = int(win32api.MonitorFromWindow(win_id, 2)) if act_mon_id == p_mon_id: if win_id != 0: p_mon_win = win_id if rdist < ldist: try: win_thread_id, _ = win32process.GetWindowThreadProcessId( s_mon_win) win32process.AttachThreadInput( cur_thread_id, win_thread_id, True) win32gui.SetFocus(s_mon_win) win32gui.SetForegroundWindow(s_mon_win) except Exception: pass elif act_mon_id == s_mon_id: if win_id != 0: s_mon_win = win_id if rdist >= ldist: try: win_thread_id, _ = win32process.GetWindowThreadProcessId( p_mon_win) win32process.AttachThreadInput( cur_thread_id, win_thread_id, True) win32gui.SetFocus(p_mon_win) win32gui.SetForegroundWindow(p_mon_win) except Exception: pass else: raise Exception() if show_cam: for i, _ in enumerate(poses): if poses[i]["score"] > 0.2: color = self.color_table[i] drawKeypoints(poses[i], orig_image, color) drawSkeleton(poses[i], orig_image) cv2.imshow("1", orig_image) cv2.waitKey(1) flag, frame = cap.read()
def web_cam_test(self, pose_mode="single"): cap = cv2.VideoCapture(0) cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width_factor = cap_width / self.width height_factor = cap_height / self.height with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() save_dir = "checkpoints" save_path = os.path.join(save_dir, "model.ckpt") saver.save(sess, save_path) flag, frame = cap.read() while flag: startime = time.time() orig_image = frame frame = cv2.resize(frame, (self.width, self.height)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = frame.astype(float) frame = frame * (2.0 / 255.0) - 1.0 frame = np.array(frame, dtype=np.float32) frame = frame.reshape(1, self.width, self.height, 3) ( heatmaps_result, offsets_result, displacementFwd_result, displacementBwd_result, ) = sess.run( [ self.heatmaps, self.offsets, self.displacementFwd, self.displacementBwd, ], feed_dict={self.image: frame}, ) if pose_mode == "single": poses = decode_single_pose(heatmaps_result, offsets_result, 16, width_factor, height_factor) elif pose_mode == "multi": poses = decodeMultiplePoses( heatmaps_result, offsets_result, displacementFwd_result, displacementBwd_result, width_factor, height_factor, ) else: raise ValueError("Unknown pose mode.") for i, _ in enumerate(poses): if poses[i]["score"] > 0.2: color = self.color_table[i] drawKeypoints(poses[i], orig_image, color) drawSkeleton(poses[i], orig_image) endtime = time.time() print("Time cost per frame : %f" % (endtime - startime)) cv2.imshow("1", orig_image) cv2.waitKey(1) flag, frame = cap.read()
frame = np.array(frame, dtype=np.float32) frame = frame.reshape(1, imageSize, imageSize, 3) input_data = frame interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() heatmaps_result = interpreter.get_tensor(output_details[0]['index']) #print(heatmaps_result.shape) offsets_result = interpreter.get_tensor(output_details[1]['index']) displacementFwd_result = interpreter.get_tensor(output_details[2]['index']) displacementBwd_result = interpreter.get_tensor(output_details[3]['index']) poses = decodeMultiplePoses(heatmaps_result, offsets_result, \ displacementFwd_result, \ displacementBwd_result, \ width_factor, height_factor) for idx in range(len(poses)): if poses[idx]['score'] > 0.2: color = color_table[idx] drawKeypoints(poses[idx], orig_image, color) drawSkeleton(poses[idx], orig_image) endtime = time.time() print('Time cost per frame : %f' % (endtime - startime)) cv2.imshow("1", orig_image) cv2.waitKey(1) ret, frame = cap.read() ''' # Test model on random input data. print(input_shape) #print(output_data) '''
def main(): print('Entering main application...', flush=True) try: print('Initializing stream manager client...', flush=True) stream_mgr_client = init_gg_stream_manager() print('Completed stream manager initiation', flush=True) except: print('Error initializing stream manager client...', sys.exc_info()[0], flush=True) sys.exit(0) # To flip the image, modify the flip_method parameter (0 and 2 are the most common) gst_pipeline = gstreamer_pipeline(framerate=10, flip_method=0) print(gst_pipeline, flush=True) cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER) src_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) src_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width_factor = src_width / pose_input_tensor_shape[1] height_factor = src_height / pose_input_tensor_shape[2] if cap.isOpened(): while cap.isOpened(): ret_val, img = cap.read() if ret_val: producer_timestamp = int(datetime.now().timestamp()) start_time = default_timer() src_img = img # with CUDA # gpu_frame = cv2.cuda_GpuMat() # gpu_frame.upload(img) # rgb_frame = cv2.cuda_cvtColor(img, cv2.COLOR_BGR2GRAY) # without CUDA pose_input_tensor = preprocess_image(img) print("Transformed after " + str(default_timer() - start_time), flush=True) # print(pose_input_tensor.shape, flush=True) infer_start_time = default_timer() heatmaps, offsets, fwd_displacement, bwd_displacement = model.run( {'sub_2': pose_input_tensor}) print("Inference finished after " + str(default_timer() - infer_start_time), flush=True) postprocess_start_time = default_timer() # print(heatmaps.shape, flush=True) # print(offsets.shape, flush=True) # print(fwd_displacement.shape, flush=True) # print(bwd_displacement.shape, flush=True) poses = decodeMultiplePoses(heatmaps, offsets, \ fwd_displacement, bwd_displacement, \ width_factor, height_factor) pose_cnt = 0 for idx in range(len(poses)): if poses[idx]['score'] > 0.2: color = color_table[idx] drawKeypoints(poses[idx], src_img, color) drawSkeleton(poses[idx], src_img) print('Pose drawn', flush=True) pose_cnt += 1 print("Postprocessing finished after " + str(default_timer() - postprocess_start_time), flush=True) # if pose_cnt > 0: img_filename = 'nano-pose-output-' + str( producer_timestamp) + '.jpg' img_folder = '/tmp/pose-output' img_path = img_folder + '/' + img_filename cv2.imwrite(img_path, src_img) # send_to_gg_stream_manager(stream_mgr_client, img_path, img_filename, s3_prefix) else: print("Unable to open camera", flush=True)