def run(self, num_frames, preview_alpha, image_format, image_folder, enable_streaming): logger.info('Starting...') leds = Leds() with contextlib.ExitStack() as stack: player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context(Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context(PiCamera(sensor_mode=4, resolution=(820, 616))) stack.enter_context(PrivacyLed(leds)) server = None if enable_streaming: server = stack.enter_context(StreamingServer(camera)) server.run() def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(camera) if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) button = Button(BUTTON_GPIO) button.when_pressed = take_photo joy_score_moving_average = MovingAverage(10) prev_joy_score = 0.0 with CameraInference(face_detection.model()) as inference: logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) for i, result in enumerate(inference.run()): faces = face_detection.get_faces(result) photographer.update_faces(faces) joy_score = joy_score_moving_average.next(average_joy_score(faces)) animator.update_joy_score(joy_score) if server: data = server_inference_data(result.width, result.height, faces, joy_score) server.send_inference_data(data) if joy_score > JOY_SCORE_PEAK > prev_joy_score: player.play(JOY_SOUND) elif joy_score < JOY_SCORE_MIN < prev_joy_score: player.play(SAD_SOUND) prev_joy_score = joy_score if self._done.is_set() or i == num_frames: break
def _run(self): logger.info('Starting...') leds = Leds() with contextlib.ExitStack() as stack: player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(self.args.image_format, self.args.image_folder)) animator = stack.enter_context(Animator(leds)) stack.enter_context(PrivacyLed(leds)) server = None if self.args.enable_streaming: server = stack.enter_context(StreamingServer(self.camera)) server.run() def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(self.camera) button = Button(BUTTON_GPIO) button.when_pressed = take_photo joy_score_moving_average = MovingAverage(10) prev_joy_score = 0.0 with CameraInference(face_detection.model()) as inference: logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) for i, result in enumerate(inference.run()): faces = face_detection.get_faces(result) photographer.update_faces(faces) avg_joy_score = average_joy_score(faces) joy_score = joy_score_moving_average.next(avg_joy_score) animator.update_joy_score(joy_score) if server: data = server_inference_data(result.width, result.height, faces, joy_score) server.send_inference_data(data) if avg_joy_score > JOY_SCORE_MIN: photographer.shoot(self.camera) # if joy_score > JOY_SCORE_PEAK > prev_joy_score: # player.play(JOY_SOUND) # elif joy_score < JOY_SCORE_MIN < prev_joy_score: # player.play(SAD_SOUND) prev_joy_score = joy_score if self._done.is_set() or i == self.args.num_frames: break
def gather_data(interval, image_format, image_folder, enable_streaming, streaming_bitrate, mdns_name): done = threading.Event() def stop(): logger.info('Stopping...') done.set() signal.signal(signal.SIGINT, lambda signum, frame: stop()) signal.signal(signal.SIGTERM, lambda signum, frame: stop()) logger.info('Starting...') with contextlib.ExitStack() as stack: photographer = stack.enter_context( Photographer(image_format, image_folder)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616))) server = None if enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=streaming_bitrate, mdns_name=mdns_name)) def take_photo(): logger.info('Taking picture.') photographer.shoot(camera) while True: take_photo() time.sleep(interval) if done.is_set(): break
def joy_detector(num_frames, preview_alpha, image_format, image_folder, enable_streaming, streaming_bitrate, mdns_name): done = threading.Event() def stop(): logger.info('Stopping...') done.set() signal.signal(signal.SIGINT, lambda signum, frame: stop()) signal.signal(signal.SIGTERM, lambda signum, frame: stop()) logger.info('Starting...') with contextlib.ExitStack() as stack: leds = stack.enter_context(Leds()) board = stack.enter_context(Board()) player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616))) stack.enter_context(PrivacyLed(leds)) server = None if enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=streaming_bitrate, mdns_name=mdns_name)) def model_loaded(): logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(camera) if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) board.button.when_pressed = take_photo joy_moving_average = moving_average(10) joy_moving_average.send(None) # Initialize. joy_threshold_detector = threshold_detector(JOY_SCORE_LOW, JOY_SCORE_HIGH) joy_threshold_detector.send(None) # Initialize. for faces, frame_size in run_inference(num_frames, model_loaded): photographer.update_faces((faces, frame_size)) joy_score = joy_moving_average.send(average_joy_score(faces)) animator.update_joy_score(joy_score) event = joy_threshold_detector.send(joy_score) if event == 'high': logger.info('High joy detected.') player.play(JOY_SOUND) elif event == 'low': logger.info('Low joy detected.') player.play(SAD_SOUND) if server: server.send_overlay(svg_overlay(faces, frame_size, joy_score)) if done.is_set(): break
def monitor_run(num_frames, preview_alpha, image_format, image_folder, enable_streaming, streaming_bitrate, mdns_name, width, height, fps, region, enter_side, use_annotator, url, uname, pw, image_dir, dev): # Sign the device in and get an access and a refresh token, if a password and username provided. access_token = None refresh_token = None tokens = None start_token_timer = timer() if uname is not None and pw is not None: try: tokens = connect_to_server(url, uname, pw) access_token = tokens['access'] refresh_token = tokens['refresh'] print(access_token) print(refresh_token) except: print("Could not get tokens from the server.") pass # location where we want to send the faces + status for classification on web server. classification_path = url + "/" + image_dir done = threading.Event() def stop(): logger.info('Stopping...') done.set() # Get the region center point and two corners r_center = (region[0] + region[2] / 2, region[1] + region[3] / 2) r_corners = (region[0], region[0] + region[2], region[1], region[1] + region[3]) signal.signal(signal.SIGINT, lambda signum, frame: stop()) signal.signal(signal.SIGTERM, lambda signum, frame: stop()) logger.info('Starting...') with contextlib.ExitStack() as stack: leds = stack.enter_context(Leds()) board = stack.enter_context(Board()) player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, framerate=fps, resolution=(width, height))) stack.enter_context(PrivacyLed(leds)) # Annotator renders in software so use a smaller size and scale results # for increased performace. annotator = None if use_annotator: annotator = Annotator(camera, dimensions=(320, 240)) scale_x = 320 / width scale_y = 240 / height server = None if enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=streaming_bitrate, mdns_name=mdns_name)) def model_loaded(): logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(camera) if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) board.button.when_pressed = take_photo joy_moving_average = moving_average(10) joy_moving_average.send(None) # Initialize. joy_threshold_detector = threshold_detector(JOY_SCORE_LOW, JOY_SCORE_HIGH) joy_threshold_detector.send(None) # Initialize. previous_faces3 = [] previous_faces2 = [] previous_faces = [] num_faces = 0 for faces, frame_size in run_inference(num_frames, model_loaded): # If 4 mins have passed since access token obtained, refresh the token. end_token_timer = timer() # time in seconds if refresh_token is not None and end_token_timer - start_token_timer >= 240: tokens = refresh_access_token(url, refresh_token) access_token = tokens["access"] photographer.update_faces((faces, frame_size)) joy_score = joy_moving_average.send(average_joy_score(faces)) animator.update_joy_score(joy_score) event = joy_threshold_detector.send(joy_score) if event == 'high': logger.info('High joy detected.') player.play(JOY_SOUND) elif event == 'low': logger.info('Low joy detected.') player.play(SAD_SOUND) num_previous_faces = num_faces if use_annotator: annotator.clear() annotator.bounding_box(transform(region, scale_x, scale_y), fill=0) num_faces = 0 tmp_arr = [] faces_in_region = [] photo_taken = False image = None for face in faces: face_center = (face.bounding_box[0] + face.bounding_box[2] / 2, face.bounding_box[1] + face.bounding_box[3] / 2) # check if the center of the face is in our region of interest: if r_corners[0] <= face_center[0] <= r_corners[1] and \ r_corners[2] <= face_center[1] <= r_corners[3]: if not photo_taken: stream = io.BytesIO() with stopwatch('Taking photo'): camera.capture(stream, format=image_format, use_video_port=True) stream.seek(0) image = Image.open(stream) photo_taken = True num_faces = num_faces + 1 faces_in_region.append(face) # creates a tuple ( image of the face, entering/exiting status) tmp_arr.append([ crop_face(image, image_format, image_folder, face.bounding_box), get_status(face.bounding_box, r_center, enter_side) ]) if use_annotator: annotator.bounding_box( transform(face.bounding_box, scale_x, scale_y), fill=0) # draw a box around the face if server: server.send_overlay( svg_overlay(faces_in_region, frame_size, region, joy_score)) if use_annotator: annotator.update() if num_faces < num_previous_faces: # loop through previous faces: send face data, image and status print(" A face left the region: send previous face data") #if not use_annotator: #take_photo() faces_to_use = previous_faces if previous_faces2: faces_to_use = previous_faces2 if previous_faces3: faces_to_use = previous_faces3 for face in faces_to_use: print(classification_path, face, access_token) if access_token is not None: print("sent face with access token") send_face(classification_path, face, access_token, dev) previous_faces3 = previous_faces2 previous_faces2 = previous_faces previous_faces = tmp_arr if done.is_set(): break
def run(self, num_frames, preview_alpha, image_format, image_folder, enable_streaming): logger.info('Starting...') leds = Leds() with contextlib.ExitStack() as stack: player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616))) stack.enter_context(PrivacyLed(leds)) server = None if enable_streaming: server = stack.enter_context(StreamingServer(camera)) server.run() def take_photo(): logger.info('Button pressed.') player.play(BEEP_SOUND) photographer.shoot(camera) if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) button = Button(BUTTON_GPIO) button.when_pressed = take_photo joy_score_moving_average = MovingAverage(5) #Changed it from 10 prev_joy_score = 0.0 with CameraInference(face_detection.model()) as inference: logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) for i, result in enumerate(inference.run()): faces = face_detection.get_faces(result) photographer.update_faces(faces) joy_score = joy_score_moving_average.next( average_joy_score(faces)) animator.update_joy_score(joy_score) if server: data = server_inference_data(result.width, result.height, faces, joy_score) server.send_inference_data(data) if joy_score > JOY_SCORE_PEAK > prev_joy_score: player.play(JOY_SOUND) ## picoSpeakNow(list_happy[np.random.randint(0,N_HAPPY)]) ## os.system('pico2wave -w test.wav "keep smiling. I feed off of smile energy... do not let the smile die down." && aplay test.wav') ## time.sleep(3) espeak_happy = 'espeak -s160 -g6 -ven+f3 ' + '"' + if_happy_list[ np.random.randint(0, N_HAPPY)] + '"' os.system(espeak_happy) elif joy_score < 0.35 < prev_joy_score: player.play(SAD_SOUND) espeak_sad = 'espeak -s160 -g6 -ven+f3 ' + '"' + if_sad[ 0] + '"' os.system(espeak_sad) ## picoSpeakNow(list_sad[np.random.randint(0,N_SAD)]) ## time.sleep(3) ## os.system('espeak "Keep smiling. I feed off of smile energy... do not let the smile die down"') ## os.system('pico2wave -w test.wav "start smiling. I feed off of smile energy... do not let the smile die down." && aplay test.wav') prev_joy_score = joy_score if self._done.is_set() or i == num_frames: break
def joy_detector(num_frames, preview_alpha, image_format, image_folder, enable_streaming, streaming_bitrate, mdns_name): readings = [] averages = [] num_reading = 0 num_average = 0 done = threading.Event() def stop(): logger.info('Stopping...') done.set() signal.signal(signal.SIGINT, lambda signum, frame: stop()) signal.signal(signal.SIGTERM, lambda signum, frame: stop()) logger.info('Starting...') with contextlib.ExitStack() as stack: leds = stack.enter_context(Leds()) board = stack.enter_context(Board()) player = stack.enter_context(Player(gpio=BUZZER_GPIO, bpm=10)) photographer = stack.enter_context( Photographer(image_format, image_folder)) animator = stack.enter_context(Animator(leds)) # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. # Use half of that for video streaming (820x616). camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616))) stack.enter_context(PrivacyLed(leds)) server = None if enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=streaming_bitrate, mdns_name=mdns_name)) def model_loaded(): logger.info('Model loaded.') player.play(MODEL_LOAD_SOUND) def stop_playing(): client.loop_start() client.subscribe("music") client.publish("music", "stop") client.loop_stop() print("Sent stopping signal.") if preview_alpha > 0: camera.start_preview(alpha=preview_alpha) board.button.when_pressed = stop_playing joy_moving_average = moving_average(10) joy_moving_average.send(None) # Initialize. joy_threshold_detector = threshold_detector(JOY_SCORE_LOW, JOY_SCORE_HIGH) joy_threshold_detector.send(None) # Initialize. for faces, frame_size in run_inference(num_frames, model_loaded): photographer.update_faces((faces, frame_size)) joy_score = joy_moving_average.send(average_joy_score(faces)) # ---------------------------------- if len(readings) < 10: readings.append(joy_score) else: x = readings[0] readings.remove(x) readings.append(joy_score) num_reading += 1 time.sleep(0.2) if num_reading % 10 == 0: total_x = 0 for item_x in readings: total_x += item_x average = total_x / 10 message = str(average) client.publish(topic, message) print("published") animator.update_joy_score(joy_score) event = joy_threshold_detector.send(joy_score) if event == 'high': logger.info('High joy detected.') player.play(JOY_SOUND) elif event == 'low': logger.info('Low joy detected.') player.play(SAD_SOUND) if server: server.send_overlay(svg_overlay(faces, frame_size, joy_score)) if done.is_set(): break
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dog_park_model_path', help='Path to the model file for the dog park.') parser.add_argument('--vb1_model_path', help='Path to the model file for volley ball court 1.') parser.add_argument('--vb2_model_path', help='Path to the model file for volley ball court 1.') parser.add_argument( '--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument('--enable_streaming', default=False, action='store_true', help='Enable streaming server') parser.add_argument('--streaming_bitrate', type=int, default=1000000, help='Streaming server video bitrate (kbps)') parser.add_argument('--mdns_name', default='', help='Streaming server mDNS name') parser.add_argument( '--preview', action='store_true', default=False, help= 'Enables camera preview in addition to printing result to terminal.') parser.add_argument( '--time_interval', type=int, default=10, help='Time interval at which to store data in seconds.') parser.add_argument( '--gather_data', action='store_true', default=False, help='Also save images according to the assigned category.') parser.add_argument( '--timelapse', action='store_true', default=False, help='Also save some timelapses of the entire scene, every 120 seconds.' ) parser.add_argument('--image_folder', default='/home/pi/Pictures/Data', help='Folder to save captured images') args = parser.parse_args() labels = read_labels(args.label_path) # At least one model needs to be passed in. assert args.dog_park_model_path or args.vb1_model_path or args.vb2_model_path # Check that the folder exists if args.gather_data: expected_subfolders = ['dog_park', 'court_one', 'court_two'] subfolders = os.listdir(args.image_folder) for folder in expected_subfolders: assert folder in subfolders with ExitStack() as stack: dog_park = { 'location_name': 'dog_park', 'path': args.dog_park_model_path, } if args.dog_park_model_path else None vb1 = { 'location_name': 'court_one', 'path': args.vb1_model_path, } if args.vb1_model_path else None vb2 = { 'location_name': 'court_two', 'path': args.vb2_model_path, } if args.vb2_model_path else None # Get the list of models, filter to only the ones that were passed in. models = [dog_park, vb1, vb2] models = list(filter(lambda model: model, models)) # Initialize models and add them to the context for model in models: print('Initializing {model_name}...'.format( model_name=model["location_name"])) descriptor = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, 160, 160, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(model['path'])) model['descriptor'] = descriptor if dog_park: dog_park['image_inference'] = stack.enter_context( inference.ImageInference(dog_park['descriptor'])) if vb1: vb1['image_inference'] = stack.enter_context( inference.ImageInference(vb1['descriptor'])) if vb2: vb2['image_inference'] = stack.enter_context( inference.ImageInference(vb2['descriptor'])) camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616), framerate=30)) server = None if args.enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=args.streaming_bitrate, mdns_name=args.mdns_name)) if args.preview: # Draw bounding boxes around locations # Load the arbitrarily sized image img = Image.new('RGB', (820, 616)) draw = ImageDraw.Draw(img) for location in LOCATIONS.values(): x1, y1, x2, y2 = location draw_rectangle(draw, x1, y1, x2, y2, 3, outline='white') # Create an image padded to the required size with # mode 'RGB' pad = Image.new('RGB', ( ((img.size[0] + 31) // 32) * 32, ((img.size[1] + 15) // 16) * 16, )) # Paste the original image into the padded one pad.paste(img, (0, 0)) # Add the overlay with the padded image as the source, # but the original image's dimensions camera.add_overlay(pad.tobytes(), alpha=64, layer=3, size=img.size) camera.start_preview() data_filename = _make_filename(args.image_folder, 'data', None, 'json') data_generator = commit_data_to_long_term(args.time_interval, data_filename) data_generator.send(None) # Capture one picture of entire scene each time it's started again. time.sleep(2) date = time.strftime('%Y-%m-%d') scene_filename = _make_filename(args.image_folder, date, None) camera.capture(scene_filename) # Draw bounding box on image showing the crop locations with Image.open(scene_filename) as scene: draw = ImageDraw.Draw(scene) for location in LOCATIONS.values(): x1, y1, x2, y2 = location draw_rectangle(draw, x1, y1, x2, y2, 3, outline='white') scene.save(scene_filename) # Constantly get cropped images for cropped_images in get_cropped_images(camera, args.timelapse): svg_doc = None if args.enable_streaming: width = 820 * SVG_SCALE_FACTOR height = 616 * SVG_SCALE_FACTOR svg_doc = svg.Svg(width=width, height=height) for location in LOCATIONS.values(): x, y, x2, y2 = location w = (x2 - x) * SVG_SCALE_FACTOR h = (y2 - y) * SVG_SCALE_FACTOR x = x * SVG_SCALE_FACTOR y = y * SVG_SCALE_FACTOR svg_doc.add( svg.Rect( x=int(x), y=int(y), width=int(w), height=int(h), rx=10, ry=10, fill_opacity=0.3, style='fill:none;stroke:white;stroke-width:4px')) # For each inference model, crop and process a different thing. for model in models: location_name = model['location_name'] image_inference = model['image_inference'] cropped_image = cropped_images[location_name] # TODO: (Image Comparison) If False,return no activity. if cropped_image: # then run image_inference on them. result = image_inference.run(cropped_image) processed_result = process(result, labels, 'final_result') data_generator.send( (location_name, processed_result, svg_doc)) message = get_message(processed_result) # Print the message # print('\n') # print('{location_name}:'.format(location_name=location_name)) # print(message) else: # Fake processed_result processed_result = [('inactive', 1.00), ('active', 0.00)] data_generator.send( (location_name, processed_result, svg_doc)) label = processed_result[0][0] timestamp = time.strftime('%Y-%m-%d_%H.%M.%S') # print(timestamp) # print('\n') if args.gather_data and cropped_image: # Gather 1% data on 'no activity' since it's biased against that. # Gather 0.1% of all images. if ( # (label == 'no activity' and random.random() > 0.99) or # (random.random() > 0.999) # (location_name != 'dog_park' and random.random() > 0.99) or (random.random() > 0.9)): subdir = '{location_name}/{label}'.format( location_name=location_name, label=label) filename = _make_filename(args.image_folder, timestamp, subdir) cropped_image.save(filename) # if svg_doc: # ## Plot points out # ## 160 x 80 grid # ## 16px width # ## 20, 40, 60 for 0, 1, 2 # lines = message.split('\n') # y_correction = len(lines) * 20 # for line in lines: # svg_doc.add(svg.Text(line, # x=(LOCATIONS[location_name][0]) * SVG_SCALE_FACTOR, # y=(LOCATIONS[location_name][1] - y_correction) * SVG_SCALE_FACTOR, # fill='white', font_size=20)) # y_correction = y_correction - 20 # TODO: Figure out how to annotate at specific locations. # if args.preview: # camera.annotate_foreground = Color('black') # camera.annotate_background = Color('white') # # PiCamera text annotation only supports ascii. # camera.annotate_text = '\n %s' % message.encode( # 'ascii', 'backslashreplace').decode('ascii') if server: server.send_overlay(str(svg_doc)) if args.preview: camera.stop_preview()