def test_model_accuracy(self): image = Image.open(test_image).convert('RGB') for model_path, model_name in test_utils.generate_models(): print('Testing Accuracy for: ', model_path) engine = PoseEngine(model_path) model_pose_result, _ = engine.DetectPosesInImage(image) reference_pose_scores, reference_keypoints = test_utils.parse_reference_results( model_name) score_delta = 0.1 # Allows score to change within 1 decimal place. pixel_delta = 4.0 # Allows placement changes of 4 pixels. pose_idx = 0 keypoint_idx = 0 for model_pose in model_pose_result: self.assertAlmostEqual(model_pose.score, reference_pose_scores[pose_idx], delta=score_delta) for _, model_keypoint in model_pose.keypoints.items(): reference_keypoint = reference_keypoints[keypoint_idx] self.assertAlmostEqual(model_keypoint.score, reference_keypoint.score, delta=score_delta) self.assertAlmostEqual(model_keypoint.point[0], reference_keypoint.point[0], delta=pixel_delta) self.assertAlmostEqual(model_keypoint.point[1], reference_keypoint.point[1], delta=pixel_delta) keypoint_idx += 1 pose_idx += 1
def __init__(self): self.engine = PoseEngine( 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') self.confidence = 3 self.marin = 50 self.image_shape = (640, 480) self.input_shape = (641, 481)
def __init__(self, config): self.config = config # Get the model name from the config self.model_name = self.config.DETECTOR_NAME # Frames Per Second self.fps = None self.engine = PoseEngine( "/project-posenet/models/mobilenet/posenet_mobilenet_v1_075_721_1281_quant_decoder_edgetpu.tflite" ) self.w, self.h, = self.config.DETECTOR_INPUT_SIZE[0], self.config.DETECTOR_INPUT_SIZE[1] self.keypoints = ( 'nose', 'left eye', 'right eye', 'left ear', 'right ear', 'left shoulder', 'right shoulder', 'left elbow', 'right elbow', 'left wrist', 'right wrist', 'left hip', 'right hip', 'left knee', 'right knee', 'left ankle', 'right ankle' )
def main(): parser = argparse.ArgumentParser(description='PoseNet') parser.add_argument('--model', type=str, default='mobilenet') args = parser.parse_args() if args.model == 'mobilenet': model = 'models/mobilenet/posenet_mobilenet_v1_075_353_481_quant_decoder_edgetpu.tflite' else: model = 'models/resnet/posenet_resnet_50_416_288_16_quant_edgetpu_decoder.tflite' engine = PoseEngine(model) input_shape = engine.get_input_tensor_shape() inference_size = (input_shape[2], input_shape[1]) print(inference_size) cap = cv2.VideoCapture(0) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('./output.mp4', fourcc, cap.get(cv2.CAP_PROP_FPS), inference_size) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) if cap is None: print("Camera Open Error") sys.exit(0) count = 0 total_ftp = 0.0 fps_cnt = 0 while cap.isOpened() and count < 60: ret_val, img = cap.read() if ret_val == False: print("Camera read Error") break print('frame read') img = cv2.resize(img, inference_size) rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) s_time = time.time() poses, inference_time = engine.DetectPosesInImage(rgb) fps = 1.0 / (time.time() - s_time) total_ftp += fps fps_cnt += 1 for pose in poses: print('\nPose Score: %f FPS:%f' % (pose.score, fps)) if pose.score < 0.3: continue for label, keypoint in pose.keypoints.items(): print(' %-20s x=%-4d y=%-4d score=%.1f' % (label, keypoint.yx[1], keypoint.yx[0], keypoint.score)) p1 = (keypoint.yx[1], keypoint.yx[0]) p2 = (keypoint.yx[1] + 5, keypoint.yx[0] + 5) cv2.circle(img, (keypoint.yx[1], keypoint.yx[0]), 2, (0, 255, 0), -1) out_video.write(img) count += 1 if fps_cnt > 0: print('Model[%s] Avg FPS: %f' % (args.model, total_ftp / fps_cnt)) cv2.destroyAllWindows() cap.release() out_video.release()
def posecamera(cap): model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite' engine = PoseEngine(model) # engine = PoseEngine(model) width, height = src_size isVideoFile = False frameCount = 0 maxFrames = 0 # VideoCapture init print("Open Video Source") cap.set(cv2.CAP_PROP_FPS, 60) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) try: pose_ary = {} flag = False while (True ): ret, frame = cap.read() if ret == False: print('can\'t read video source') break; # frame = cv2.imread('./images/output'+str(i)+'.jpg') rgb = frame[:,:,::-1] # nonlocal n, sum_fps, sum_process_time, sum_inference_time, last_time # image = Image.fromarray(rgb) outputs, inference_time = engine.DetectPosesInImage(rgb) # cv2.imshow('TEST PoseNet - OpenCV', frame) # print(text_line) # crop image imgDisp = frame[0:appsink_size[1], 0:appsink_size[0]].copy() ## cv2.imshow('TEST2 PoseNet - OpenCV', frame) if args.mirror == True: imgDisp = cv2.flip(imgDisp, 1) for pose in outputs: draw_pose(imgDisp, pose) cv2.imshow('PoseNet - OpenCV', imgDisp) except Exception as ex: raise ex finally: cv2.destroyAllWindows() cap.release()
def run(inf_callback, render_callback): global t if t >= 3: return parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--res', help='Resolution', default='640x480', choices=['480x360', '640x480', '1280x720']) parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0') parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true') parser.add_argument('--jpeg', help='Use image/jpeg input', action='store_true') args = parser.parse_args() default_model = 'models/mobilenet/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite' if args.res == '480x360': src_size = (480, 360) appsink_size = (480, 360) model = args.model or default_model % (353, 481) elif args.res == '640x480': src_size = (640, 480) appsink_size = (640, 480) model = args.model or default_model % (481, 641) elif args.res == '1280x720': src_size = (1280, 720) appsink_size = (1280, 720) model = args.model or default_model % (721, 1281) print('Loading model: ', model) engine = PoseEngine(model) input_shape = engine.get_input_tensor_shape() inference_size = (input_shape[2], input_shape[1]) gstreamer.run_pipeline( partial(inf_callback, engine), partial(render_callback, engine), src_size, inference_size, # mirror=args.mirror, mirror=True, videosrc=args.videosrc, h264=args.h264, jpeg=args.jpeg)
def __init__(self, model, anonymize=True, bodyparts=True, drawposes=True): self.anonymize = anonymize self.bodyparts = bodyparts self.drawposes = drawposes self.background_image = None self.last_time = time.monotonic() self.frames = 0 self.sum_fps = 0 self.sum_process_time = 0 self.sum_inference_time = 0 self.engine = PoseEngine(model) inference_size = (self.engine.image_width, self.engine.image_height) print('Inference size: {}'.format(inference_size))
def main(): synth = fluidsynth.Synth() synth.start('alsa') soundfont_id = synth.sfload('/usr/share/sounds/sf2/FluidR3_GM.sf2') for channel, instrument in enumerate(CHANNELS): synth.program_select(channel, soundfont_id, 0, instrument) pil_image = Image.open( 'PATH/TO/TEST/IMAGE/GOES/HERE.jpg' ) #this will open an image to run through PoseNet and return a Pose object pil_image.resize((641, 481), Image.NEAREST) engine = PoseEngine( 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image)) print('Inference time: %.fms' % inference_time) # for pose in poses: velocities = {} for pose in poses: left = pose.keypoints.get('left wrist') right = pose.keypoints.get('right wrist') if not (left and right): continue if pose.score < 0.4: continue print('\nPose Score: ', pose.score) for label, keypoint in pose.keypoints.items(): print(' %-20s x=%-4d y=%-4d score=%.1f' % (label, keypoint.yx[1], keypoint.yx[0], keypoint.score)) identity = IDENTITIES[2 % len( IDENTITIES)] #set identity to VOICE_OOHS #Those Voice OOHs tho! left = 1 - left.yx[0] / engine.image_height right = 1 - right.yx[0] / engine.image_height velocity = int(left * 165) i = int(right * identity.extent) note = (identity.base_note + OCTAVE * (i // len(SCALE)) + SCALE[i % len(SCALE)]) velocities[(identity.channel, note)] = velocity # prev_notes is assumed to play notes based on the previous frame (when using video or a live feed #for note in prev_notes: # if note not in velocities: synth.noteoff(*note) # Our way of making the note last longer since we are using just one frame for note, velocity in velocities.items(): synth.noteon(*note, velocity) time.sleep(1)
def run(callback, use_appsrc=False): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--res', help='Resolution', default='640x480', choices=['480x360', '640x480', '1280x720']) parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video1') parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true') args = parser.parse_args() default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite' if args.res == '480x360': src_size = (640, 480) appsink_size = (480, 360) model = args.model or default_model % (353, 481) elif args.res == '640x480': src_size = (640, 480) appsink_size = (640, 480) model = args.model or default_model % (481, 641) elif args.res == '1280x720': src_size = (1280, 720) appsink_size = (1280, 720) model = args.model or default_model % (721, 1281) print('Loading model: ', model) engine = PoseEngine(model, mirror=args.mirror) gstreamer.run_pipeline(partial(callback, engine), src_size, appsink_size, use_appsrc=use_appsrc, mirror=args.mirror, videosrc=args.videosrc, h264input=args.h264)
class PoseNet: def __init__( self, model='../models/posenet_mobilenet_v1_075_353_481_quant_decoder_edgetpu.tflite' ): self.engine = PoseEngine(model, mirror=False) def eval(self, frame): nposes, pose_scores, kps_coords, kps_scores = self.engine.DetectPosesInImage( frame) kps_coords = kps_coords.astype(np.int32) pose_list = [] if nposes: for i in range(nposes): if pose_scores[i] >= C_PSCORE_THRESHOLD: kps_coord = kps_coords[i] kps_score = kps_scores[i] keypoints = {} for j in range(C_NKP): if kps_score[j] >= C_KP_THRESHOLD: keypoints[C_KP_NAMES[j]] = Keypoint( C_KP_NAMES[j], kps_coord[j], kps_score[j]) pose_list.append(Pose(keypoints, pose_scores[i])) return pose_list
def forehead_coords(engine, img, thermal_shape, h, v, im_x=641, im_y=481): # Posenet getting keypoints pil_image = img im_x, im_y = img.size engine = PoseEngine('./forehead_detection/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') poses, keypoints, img, inference_time = engine.DetectPosesInImage(np.uint8(pil_image),0) # Getting forehead coordinates for 10 people in frame (NOTE: posenet currently processes a maximum of 10 people in the frame) keypoints_eyes = (keypoints[:,1,:], keypoints[:,2,:]) lips_keypoints = [] forehead_keypoints = [] transformed_keypoints = [] x_ratio = (im_x*h)/thermal_shape[0] y_ratio = (im_y*v)/thermal_shape[1] x_bias = (im_x*h-im_x)/2 y_bias = (im_y*v-im_y)/2 for i in range(10): dist = abs(keypoints_eyes[0][i][1]-keypoints_eyes[1][i][1]) # Format: ((left_eye_x, left_eye_y), (right_eye_x, right_eye_y+offset)) offset is the distance wanted above the eyes. forehead_keypoints.append([(int(keypoints_eyes[0][i][1]+dist*0.5),int(keypoints_eyes[0][i][0]+dist*0.2)), (int(keypoints_eyes[1][i][1]-dist*0.5), int(keypoints_eyes[1][i][0]-dist*1.5))]) # Transform coordinates if keypoints_eyes[0][i][1]==0: xL, yL, xR, yR = 0,0,0,0 else: # include some hard-coded emperical fix e = np.e xL = (forehead_keypoints[i][0][0]+ x_bias) /x_ratio yL = (forehead_keypoints[i][0][1]+ y_bias) /y_ratio xR = (forehead_keypoints[i][1][0] + x_bias) /x_ratio yR = (forehead_keypoints[i][1][1] + y_bias) /y_ratio size_factor = abs(xL-xR)/thermal_shape[0] size_factor=0.08*e**size_factor v_dis = abs((xR+xL)/2-thermal_shape[0]) ori_factor = abs(xR+xL+20)/thermal_shape[0]*1.5 xL, xR = xL-v_dis*0.7/e**ori_factor, xR-v_dis*0.7/e**ori_factor yL, yR = yL+size_factor*100, yR+size_factor*100 transformed_keypoints.append(((int(xL),int(yL)), (int(xR),int(yR)))) # Find lip keypoints lip_dist = abs(keypoints_eyes[0][i][1]-keypoints_eyes[1][i][1])*1.5 lips_keypoints.append([(int(keypoints_eyes[0][i][1]),int(keypoints_eyes[0][i][0]+0.5*lip_dist)), (int(keypoints_eyes[1][i][1]), int(keypoints_eyes[1][i][0]+lip_dist))]) return forehead_keypoints, transformed_keypoints, lips_keypoints
def run(self): print('Loading model: ', self.model) engine = PoseEngine(self.model, mirror=self.args.mirror) cap = cv2.VideoCapture(0) last_time = time.monotonic() n = 0 sum_fps = 0 sum_process_time = 0 sum_inference_time = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame # pil_im = Image.fromarray(cv2_im) start_time = time.monotonic() outputs, inference_time = engine.DetectPosesInImage(cv2_im) end_time = time.monotonic() n += 1 sum_fps += 1.0 / (end_time - last_time) sum_process_time += 1000 * (end_time - start_time) - inference_time sum_inference_time += inference_time last_time = end_time text_line = 'PoseNet: %.1fms Frame IO: %.2fms TrueFPS: %.2f Nposes %d' % ( sum_inference_time / n, sum_process_time / n, sum_fps / n, len(outputs)) print(text_line) for pose in outputs: self.draw_pose(cv2_im, pose) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def pose_inferencer(results, frameBuffer, model, device): pose_engine = None pose_engine = PoseEngine(model, device) print("Loaded Graphs!!! (Posenet)") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg_pose = color_image[:, :, ::-1].copy() tinf = time.perf_counter() result_pose, inference_time = pose_engine.DetectPosesInImage( prepimg_pose) print(time.perf_counter() - tinf, "sec (Posenet)") results.put(result_pose)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--width', help='Source width', default='1920') parser.add_argument('--height', help='Source height', default='1080') parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0') parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true') parser.add_argument('--jpg', help='Use video/x-h264 input', action='store_true') parser.add_argument('--print-stats', help='print timing stats', action='store_true') args = parser.parse_args() if args.h264 and args.jpg: print('Error: both mutually exclusive options h264 and jpg set') sys.exit(1) default_model = 'models/bodypix_mobilenet_v1_075_1280_720_16_edgetpu.tflite' model = args.model if args.model else default_model print('Model: {}'.format(model)) engine = PoseEngine(model) inference_size = (engine.image_width, engine.image_height) print('Inference size: {}'.format(inference_size)) src_size = (int(args.width), int(args.height)) if args.videosrc.startswith('/dev/video'): print('Source size: {}'.format(src_size)) print('Toggle mode keys:') print(' TOGGLE_SKELETONS = s') print(' TOGGLE_BBOXES = b') print(' TOGGLE_ANON = a') print(' TOGGLE_HM = h') print(' TOGGLE_AHM = g') run_pipeline(Callback(engine, save_frames=False, print_stats=args.print_stats), src_size, inference_size, video_src=args.videosrc, h264=args.h264, jpg=args.jpg, mirror=args.mirror)
def __init__(self, config, model_name, variables): self.config = config self.model_name = model_name self.model_variables = variables # Frames Per Second self.fps = None self.w, self.h, _ = [ int(i) for i in self.model_variables['ImageSize'].split(',') ] self.engine = PoseEngine( f"/project-posenet/models/mobilenet/posenet_mobilenet_v1_075_{self.h}_{self.w}_quant_decoder_edgetpu.tflite" ) # Get class id from config self.class_id = int(self.model_variables['ClassID']) self.score_threshold = float(self.model_variables['MinScore']) self.keypoints = ('nose', 'left eye', 'right eye', 'left ear', 'right ear', 'left shoulder', 'right shoulder', 'left elbow', 'right elbow', 'left wrist', 'right wrist', 'left hip', 'right hip', 'left knee', 'right knee', 'left ankle', 'right ankle')
def generate_results(write_csv=False, visualize_model_results=False, visualize_reference_results=False): """Generates results form a model (both from reference results or from new inference). Args: write_csv: Whether to write new inference results to a csv file or not. visualize_model_results: Whether to visualize the new results or not. visualize_reference_result: Whether to visualize the reference results or not. """ image = Image.open(TEST_IMAGE).convert('RGB') for model_path, model_name in generate_models(): engine = PoseEngine(model_path) poses, _ = engine.DetectPosesInImage(image) if write_csv: write_to_csv(model_name, poses) input_shape = engine.get_input_tensor_shape()[1:3] if visualize_model_results: visualize_results_from_model(model_name, image, input_shape, poses) if visualize_reference_results: visualize_results_from_reference_file(model_name, image, input_shape)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--width', help='Source width', default='1920') parser.add_argument('--height', help='Source height', default='1080') parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0') parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true') parser.add_argument('--jpeg', help='Use video/jpeg input', action='store_true') args = parser.parse_args() if args.h264 and args.jpeg: print('Error: both mutually exclusive options h264 and jpeg set') sys.exit(1) default_model = 'models/bodypix_mobilenet_v1_075_1024_768_16_quant_edgetpu_decoder.tflite' model = args.model if args.model else default_model print('Model: {}'.format(model)) engine = PoseEngine(model) inference_size = (engine.image_width, engine.image_height) print('Inference size: {}'.format(inference_size)) src_size = (int(args.width), int(args.height)) if args.videosrc.startswith('/dev/video'): print('Source size: {}'.format(src_size)) print('Toggle mode keys:') print(' Toggle skeletons: ', TOGGLE_SKELETONS) print(' Toggle bounding boxes: ', TOGGLE_BBOXES) print(' Toggle anonymizer mode: ', TOGGLE_ANON) print(' Toggle heatmaps: ', TOGGLE_HEATMAP) print(' Toggle bodyparts: ', TOGGLE_BODYPARTS) run_pipeline(Callback(engine, src_size, save_every_n_frames=-1, print_stats=True), src_size, inference_size, video_src=args.videosrc, h264=args.h264, jpeg=args.jpeg, mirror=args.mirror)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--res', help='Resolution', default='640x480', choices=['480x360', '640x480', '1280x720']) parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0') parser.add_argument('--videotgt', help='Where to write the video to', default='autovideosink') parser.add_argument('--anonymize', help='Use anonymizer mode', action='store_true') parser.add_argument('--jpg', help='Use image/jpeg input', action='store_true') args = parser.parse_args() default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite' if args.res == '480x360': src_size = (640, 480) appsink_size = (480, 360) model = args.model or default_model % (353, 481) elif args.res == '640x480': src_size = (640, 480) appsink_size = (640, 480) model = args.model or default_model % (481, 641) elif args.res == '1280x720': src_size = (1280, 720) appsink_size = (1280, 720) model = args.model or default_model % (721, 1281) print('Loading model: ', model) engine = PoseEngine(model, mirror=args.mirror) gstreamer.run_pipeline(Callback(engine, anonymize=args.anonymize), src_size, appsink_size, use_appsrc=True, mirror=args.mirror, videosrc=args.videosrc, jpginput=args.jpg, videotgt=args.videotgt)
def inferencer(results, frameBuffer, model, camera_width, camera_height): engine = None # Acquisition of TPU list without model assignment devices = edgetpu_utils.ListEdgeTpuPaths( edgetpu_utils.EDGE_TPU_STATE_UNASSIGNED) devopen = False for device in devices: try: engine = PoseEngine(model, device) devopen = True break except: continue if devopen == False: print("TPU Devices open Error!!!") sys.exit(1) print("Loaded Graphs!!! ") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg = color_image[:, :, ::-1].copy() tinf = time.perf_counter() result, inference_time = engine.DetectPosesInImage(prepimg) print(time.perf_counter() - tinf, "sec") results.put(result)
def run(callback, use_appsrc=False): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--mirror", help="flip video horizontally", action="store_true") parser.add_argument("--model", help=".tflite model path.", required=False) parser.add_argument( "--res", help="Resolution", default="1280x720", choices=["480x360", "640x480", "1280x720"], ) parser.add_argument("--videosrc", help="Which video source to use", default="/dev/video0") parser.add_argument("--h264", help="Use video/x-h264 input", action="store_true") args = parser.parse_args() default_model = "models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite" if args.res == "480x360": src_size = (640, 480) appsink_size = (480, 360) model = args.model or default_model % (353, 481) elif args.res == "640x480": src_size = (640, 480) appsink_size = (640, 480) model = args.model or default_model % (481, 641) elif args.res == "1280x720": src_size = (1280, 720) appsink_size = (1280, 720) model = args.model or default_model % (721, 1281) print("Loading model: ", model) engine = PoseEngine(model, mirror=args.mirror) gstreamer.run_pipeline( partial(callback, engine), src_size, appsink_size, use_appsrc=use_appsrc, mirror=args.mirror, videosrc=args.videosrc, h264input=args.h264, )
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mirror', help='flip video horizontally', action='store_true') parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--width', help='Source width', default='640') parser.add_argument('--height', help='Source height', default='480') parser.add_argument('--videosrc', help='Which video source to use', default='/dev/video0') parser.add_argument('--anonymize', dest='anonymize', action='store_true', help='Use anonymizer mode [--noanonymize]') parser.add_argument('--noanonymize', dest='anonymize', action='store_false', help=argparse.SUPPRESS) parser.set_defaults(anonymize=False) parser.add_argument('--bodyparts', dest='bodyparts', action='store_true', help='Color by bodyparts [--nobodyparts]') parser.add_argument('--nobodyparts', dest='bodyparts', action='store_false', help=argparse.SUPPRESS) parser.set_defaults(bodyparts=True) parser.add_argument('--h264', help='Use video/x-h264 input', action='store_true') parser.add_argument('--jpeg', help='Use video/jpeg input', action='store_true') args = parser.parse_args() if args.h264 and args.jpeg: print('Error: both mutually exclusive options h264 and jpeg set') sys.exit(1) default_model = 'models/bodypix_mobilenet_v1_075_640_480_16_quant_edgetpu_decoder.tflite' model = args.model if args.model else default_model print('Model: {}'.format(model)) engine = PoseEngine(model) inference_size = (engine.image_width, engine.image_height) print('Inference size: {}'.format(inference_size)) src_size = (int(args.width), int(args.height)) if args.videosrc.startswith('/dev/video'): print('Source size: {}'.format(src_size)) gstreamer.run_pipeline(Callback(engine, anonymize=args.anonymize, bodyparts=args.bodyparts), src_size, inference_size, mirror=args.mirror, videosrc=args.videosrc, h264=args.h264, jpeg=args.jpeg)
def run(callback): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', help='.tflite model path.', required=False) parser.add_argument('--res', help='Resolution', default='640x480', choices=['480x360', '640x480', '1280x720']) parser.add_argument('--camera', help='Camera', default='ip', choices=['ip', 'built-in', 'realsense']) args = parser.parse_args() default_model = 'models/posenet_mobilenet_v1_075_%d_%d_quant_decoder_edgetpu.tflite' if args.res == '480x360': appsink_size = (480, 360) model = args.model or default_model % (353, 481) elif args.res == '640x480': appsink_size = (640, 480) model = args.model or default_model % (481, 641) elif args.res == '1280x720': appsink_size = (1280, 720) model = args.model or default_model % (721, 1281) camera = args.camera if camera == 'ip': src_size = (640, 480) elif camera == 'built-in': src_size = (1280, 720) elif camera == 'realsense': src_size = (1920, 1080) print('Loading model: ', model) engine = PoseEngine(model, mirror=False) x_scalar = src_size[0] / appsink_size[0] y_scalar = src_size[1] / appsink_size[1] point_scalar = src_size[0] / 480 gstreamer2.run_pipeline( partial(callback, engine, x_scalar, y_scalar, point_scalar), src_size, appsink_size, camera)
#('left elbow', 'left wrist', 0.26), # avg male 0.26; avg female 0,235 (estimate) #('right elbow', 'right wrist', 0.26), # avg male 0.26; avg female 0,235 (estimate) ('left ankle', 'left knee', 0.45 ), # avg male 0.45; avg female 0.415 (valid) ('right ankle', 'right knee', 0.45), # avg male 0.45; avg female 0.415 (valid) ('left shoulder', 'left elbow', 0.35), #avg male 0.35; avg female 0.325 (valid) ('right shoulder', 'right elbow', 0.35), #avg male 0.35; avg female 0.325 (valid) ('right shoulder', 'right hip', 0.56), #avg male 0.5; avg female 0.485 (estimate) ('right shoulder', 'right hip', 0.56), #avg male 0.5; avg female 0.485 (estimate) ] MAX = 2 THRESHOLD = 0.5 # depends on the camera that is used # FOCAL_LENGTH = distance_to_obj * pixle_size_of_obj / real_obj_size #FOCAL_LENGTH = 6.6761817924503907e+02 #616.728321429 FOCAL_LENGTH = 7.395847924773853e+02 / 2.3 #4.533079645790183e+02 ENGINE = PoseEngine( os.path.join( path, 'src/project-posenet/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' ))
import os import cv2 import time import numpy as np from picamera import PiCamera import matplotlib.pyplot as plt from picamera.array import PiRGBArray from pose_engine import PoseEngine IM_WIDTH = 640 IM_HEIGHT = 480 # load deeplab-v3-plus model engine = PoseEngine( 'models/mobilenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' ) # initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() font = cv2.FONT_HERSHEY_SIMPLEX camera = PiCamera() camera.resolution = (IM_WIDTH, IM_HEIGHT) camera.framerate = 10 rawCapture = PiRGBArray(camera, size=(IM_WIDTH, IM_HEIGHT)) rawCapture.truncate(0) # squat counter counter = 0
# bucket_name = "your-bucket-name" # source_file_name = "local/path/to/file" # destination_blob_name = "storage-object-name" storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(destination_blob_name) blob.upload_from_file(source_file_name) print("File uploaded to {}.".format(destination_blob_name)) model = 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' print('Loading model: ', model) engine = PoseEngine(model) input_shape = engine.get_input_tensor_shape() inference_size = (input_shape[2], input_shape[1]) src_size = (512, 512) bucket_name = 'YOUR-BUCKET-HERE' cap = cv2.VideoCapture(0) def main(): while True: ret, frame = cap.read() pil_image = Image.fromarray(frame) pil_image.resize((641, 481), Image.NEAREST) poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image)) print('Inference Time: ', inference_time)
# set measurement time in second parser = argparse.ArgumentParser(description='Set measurement time') parser.add_argument('--t', type=int, default=20, help='set the length of time for one measurement') parser.add_argument('--total', type=int, default=60, help='set the total length of time for monitoring') args = parser.parse_args() measurement_time = args.t measurement_time_agg = args.total engine = PoseEngine( '../forehead_detection/models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' ) # #################### Viola-Jones Face Detection ################## # face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') # eye_cascade = cv2.CascadeClassifier('haarcascade_eye_tree_eyeglasses.xml') # mouth_cascade = cv2.CascadeClassifier('haarcascade_smile.xml') def viola_jones(img): img1 = deepcopy(img) gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 5) if len(faces) == 0: raise ValueError('No Face detected') for (x, y, w, h) in faces:
def main(): parser = argparse.ArgumentParser() parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default=0) parser.add_argument( '--model', type=str, help='Pose model to use. ', default= 'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') parser.add_argument('--pose3d', type=str, help='3D Pose model to use. ', default='models/3dpose_gan_edgetpu.tflite') parser.add_argument('--dataset', type=str, help='Type of dataset. ', default="CORAL") parser.add_argument('--rot', type=int, help='Number of degree to rotate in 3D pose. ', default=90) args = parser.parse_args() engine = PoseEngine(args.model) _, image_height, image_width, _ = engine.get_input_tensor_shape() interpreter_3dpose = None if len(args.pose3d) > 0: interpreter_3dpose = common.make_interpreter(args.pose3d) interpreter_3dpose.allocate_tensors() print("Load all models done!") cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(cv2_im_rgb) pil_image.resize((image_width, image_height), Image.NEAREST) start_time = time.monotonic() poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image)) print('2D Pose Inference time: %.fms (%.2f fps)' % (inference_time, 1000 / inference_time)) cv2_im = draw_skel_and_kp(cv2_im, poses, args.rot, interpreter_3dpose) end_time = time.monotonic() process_time = 1000 * (end_time - start_time) print('3D Pose End-to-End Inference time: %.fms (%.2f fps)' % (process_time, 1000 / process_time)) cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("frame", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
from io import BytesIO import time import re import picamera import io import numpy as np from PIL import Image from PIL import ImageDraw from pose_engine import PoseEngine GHOST=False picam = picamera.PiCamera() engine = PoseEngine('models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') #engine = PoseEngine('models/posenet_mobilenet_v1_075_721_1281_quant_decoder_edgetpu.tflite') EDGES = ( ('nose', 'left eye'), ('nose', 'right eye'), ('nose', 'left ear'), ('nose', 'right ear'), ('left ear', 'left eye'), ('right ear', 'right eye'), ('left eye', 'right eye'), ('left shoulder', 'right shoulder'), ('left shoulder', 'left elbow'), ('left shoulder', 'left hip'), ('right shoulder', 'right elbow'), ('right shoulder', 'right hip'), ('left elbow', 'left wrist'),
def main(): default_model_dir = '../all_models' default_model = 'posenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' default_labels = 'hand_label.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=1, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.5, help='classifier score threshold') args = parser.parse_args() W = 640 H = 480 src_size = (W, H) print('Loading Pose model {}'.format(args.model)) engine = PoseEngine(args.model) cap = cv2.VideoCapture(args.camera_idx) #==============social distancing========================= dt_vector = {} first_frame = True overlap_precision = 2 #"lower better, from 1 to 16, create ellipses image sub-size mask" #======================================================= while cap.isOpened(): ret, frame = cap.read() if not ret: break if first_frame: mask = np.zeros( (int(frame.shape[0] / overlap_precision), int(frame.shape[1] / overlap_precision), frame.shape[2]), dtype=np.uint8) first_frame = False background = frame ellipse_boxes = [] draw_ellipse_requirements = [] ellipse_pool = [] cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) im_size = (frame.shape[1], frame.shape[0]) horizontal_ratio = 0.7 vertical_ratio = 0.7 homography_matrix = compute_homography(horizontal_ratio, vertical_ratio, im_size) poses, inference_time = engine.DetectPosesInImage( np.uint8(pil_im.resize((641, 481), Image.NEAREST))) skeletal_coordinates = poses dt_vector['ts'] = int(round(time.time() * 1000)) dt_vector['bodys'] = [] if type(skeletal_coordinates) is list: # Remove probability from joints and get a joint position list skeletal_coordinates = create_joint_array(skeletal_coordinates) print(skeletal_coordinates[0]) is_skeletal_overlapped = np.zeros( np.shape(skeletal_coordinates[0])[0]) # Evaluate ellipses for each body detected by openpose draw_ellipse_requirements = evaluate_ellipses( skeletal_coordinates[0], draw_ellipse_requirements, ellipse_boxes, ellipse_pool, mask, homography_matrix) # Evaluate overlapping is_skeletal_overlapped = evaluate_overlapping( ellipse_boxes, is_skeletal_overlapped, ellipse_pool) # Trace results over output image bodys = trace(cv2_im, skeletal_coordinates, draw_ellipse_requirements, is_skeletal_overlapped) dt_vector["bodys"] = bodys #=============================== cv2_sodidi = cv2_im cv2.imshow('frame', cv2_im) cv2.imshow('1', cv2_sodidi) #===========print mouse pos===================== cv2.setMouseCallback('frame', onMouse) posNp = np.array(posList) print(posNp) #============streamming to server============== img = np.hstack((cv2_im, cv2_sodidi)) #thay frame = img encoded, buffer = cv2.imencode('.jpg', img) jpg_as_text = base64.b64encode(buffer) footage_socket.send(jpg_as_text) #============================================= if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
from pose_engine import PoseEngine import argparse parser = argparse.ArgumentParser(description='PoseNet') parser.add_argument('--model', type=str, default='mobilenet') args = parser.parse_args() os.system('wget https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/' 'Hindu_marriage_ceremony_offering.jpg/' '640px-Hindu_marriage_ceremony_offering.jpg -O couple.jpg') pil_image = Image.open('couple.jpg') if (args.model == 'mobilenet'): pil_image.resize((641, 481), Image.NEAREST) engine = PoseEngine( 'models/mobilenet/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite' ) else: pil_image.resize((640, 480), Image.NEAREST) engine = PoseEngine( 'models/resnet/posenet_resnet_50_640_480_16_quant_edgetpu_decoder.tflite' ) poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image)) print('Inference time: %.fms' % inference_time) output = pil_image.copy() draw = ImageDraw.Draw(output) for pose in poses: if pose.score < 0.4: continue print('\nPose Score: ', pose.score)