def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_name', default='test_model', help='Model identifier.') parser.add_argument('--model_path', required=True, help='Path to model file.') parser.add_argument('--test_file', default=None, help='Path to test file.') args = parser.parse_args() model = ModelDescriptor(name=args.model_name, input_shape=(1, 192, 192, 3), input_normalizer=(0, 1), compute_graph=utils.load_compute_graph( args.model_path)) if args.test_file: with ImageInference(model) as inference: image = Image.open(args.test_file) result = inference.run(image) print(tensors_info(result.tensors)) return with PiCamera(sensor_mode=4, framerate=30): with CameraInference(model) as inference: for result in inference.run(): print('#%05d (%5.2f fps): %s' % (inference.count, inference.rate, tensors_info(result.tensors)))
def model(): print(os.path.exists(_COMPUTE_GRAPH_NAME)) return ModelDescriptor( name='FaceRecognition', input_shape=(1, 160, 160, 3), input_normalizer=(127.5, 128), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def model_descriptor(graph_name): # Face detection model has special implementation in VisionBonnet firmware. # input_shape, input_normalizer, and compute_graph params have on effect. return ModelDescriptor(name='MODEL' + graph_name, input_shape=(1, 0, 0, 3), input_normalizer=(0, 0), compute_graph=utils.load_compute_graph(graph_name))
def start_self_driving(): global on model = inference.ModelDescriptor( name='mobilenet_160', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(MODEL_NAME)) with PiCamera(sensor_mode=4, resolution=(160, 160), framerate=30) as camera: camera_thread = threading.Thread(target=capture, args=(camera, )) camera_thread.daemon = True camera_thread.start() with inference.CameraInference(model) as inf: print('Model is ready. Type on/off to start/stop self-driving') sys.stdout.flush() on_off_thread = threading.Thread(target=on_off, args=()) on_off_thread.daemon = True on_off_thread.start() for result in inf.run(): if on: direction, probability = process(result) print('prediction: {:.2f} {} {:.2f}'.format( time.time(), direction, probability)) sys.stdout.flush()
def model(): # Face detection model has special implementation in VisionBonnet firmware. # input_shape, input_normalizer, and compute_graph params have no effect. return ModelDescriptor( name='FaceDetection', input_shape=(1, 0, 0, 3), input_normalizer=(0, 0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def model(): # Face detection model has special implementation in VisionBonnet firmware. # input_shape, input_normalizer, and computate_graph params have on effect. return ModelDescriptor( name='FaceDetection', input_shape=(1, 0, 0, 3), input_normalizer=(0, 0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_layer', default='map/TensorArrayStack/TensorArrayGatherV3', help='Name of input layer.') parser.add_argument('--output_layer', default="prediction", help='Name of output layer.') parser.add_argument( '--num_frames', type=int, default=-1, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument( '--threshold', type=float, default=0.1, help='Threshold for classification score (from output tensor).') parser.add_argument('--top_k', type=int, default=3, help='Keep at most top_k labels.') parser.add_argument('--detecting_list', type=list, default=[ 'Biston betularia (Peppered Moth)', 'Spodoptera litura (Oriental Leafworm Moth)' ], help='Input a list of bugs that you want to keep.') parser.add_argument('--message_threshold', type=int, default=1, help='Input detection threshold for sending sms') args = parser.parse_args() model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, 192, 192, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph( 'mobilenet_v2_192res_1.0_inat_insect.binaryproto')) labels = read_labels( "/home/pi/models/mobilenet_v2_192res_1.0_inat_insect_labels.txt") detector = FawDetector() detector.run(args.input_layer, args.output_layer, args.num_frames, args.input_mean, args.input_std, args.threshold, args.top_k, args.detecting_list, args.message_threshold, model, labels)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_path', required=True, help='Path to converted model file that can run on VisionKit.') parser.add_argument('--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument('--input_height', type=int, required=True, help='Input height.') parser.add_argument('--input_width', type=int, required=True, help='Input width.') parser.add_argument('--input_layer', required=True, help='Name of input layer.') parser.add_argument('--output_layer', required=True, help='Name of output layer.') parser.add_argument('--num_frames', type=int, default=None, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument('--threshold', type=float, default=0.1, help='Threshold for classification score (from output tensor).') parser.add_argument('--top_k', type=int, default=3, help='Keep at most top_k labels.') parser.add_argument('--preview', action='store_true', default=False, help='Enables camera preview in addition to printing result to terminal.') parser.add_argument('--show_fps', action='store_true', default=False, help='Shows end to end FPS.') args = parser.parse_args() model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) labels = read_labels(args.label_path) with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: if args.preview: camera.start_preview() with inference.CameraInference(model) as camera_inference: for result in camera_inference.run(args.num_frames): processed_result = process(result, labels, args.output_layer, args.threshold, args.top_k) send_signal_to_servos(processed_result[0]) message = get_message(processed_result, args.threshold, args.top_k) if args.show_fps: message += '\nWith %.1f FPS.' % camera_inference.rate print(message) if args.preview: camera.annotate_foreground = Color('black') camera.annotate_background = Color('white') # PiCamera text annotation only supports ascii. camera.annotate_text = '\n %s' % message.encode( 'ascii', 'backslashreplace').decode('ascii') if args.preview: camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_path', required=True, help='Path to converted model file that can run on VisionKit.') parser.add_argument('--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument('--input_height', type=int, required=True, help='Input height.') parser.add_argument('--input_width', type=int, required=True, help='Input width.') parser.add_argument('--input_layer', required=True, help='Name of input layer.') parser.add_argument('--output_layer', required=True, help='Name of output layer.') parser.add_argument('--num_frames', type=int, default=None, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument('--threshold', type=float, default=0.1, help='Threshold for classification score (from output tensor).') parser.add_argument('--top_k', type=int, default=3, help='Keep at most top_k labels.') parser.add_argument('--preview', action='store_true', default=False, help='Enables camera preview in addition to printing result to terminal.') parser.add_argument('--show_fps', action='store_true', default=False, help='Shows end to end FPS.') args = parser.parse_args() model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) labels = read_labels(args.label_path) with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: if args.preview: camera.start_preview() with inference.CameraInference(model) as camera_inference: for result in camera_inference.run(args.num_frames): processed_result = process(result, labels, args.output_layer, args.threshold, args.top_k) message = get_message(processed_result, args.threshold, args.top_k) if args.show_fps: message += '\nWith %.1f FPS.' % camera_inference.rate print(message) if args.preview: camera.annotate_foreground = Color('black') camera.annotate_background = Color('white') # PiCamera text annotation only supports ascii. camera.annotate_text = '\n %s' % message.encode( 'ascii', 'backslashreplace').decode('ascii') if args.preview: camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model_path', required=True, help='Path to converted model file that can run on VisionKit.') parser.add_argument('--input_height', type=int, required=True, help='Input height.') parser.add_argument('--input_width', type=int, required=True, help='Input width.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') args = parser.parse_args() model = ModelDescriptor( name='test_run_model', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) with PiCamera(sensor_mode=4, framerate=30) as camera: with CameraInference(model) as camera_inference: last_time = time.time() for i, result in enumerate(camera_inference.run()): output_tensor_str = [ '%s [%d elements]' % (k, len(v.data)) for k, v in result.tensors.items() ] cur_time = time.time() fps = 1.0 / (cur_time - last_time) last_time = cur_time print('%d-th inference, fps: %.1f FPS, %s' % (i, fps, ','.join(output_tensor_str)))
def start_self_driving(): model = inference.ModelDescriptor( name='mobilenet_160', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(MODEL_NAME)) print('Model loaded') with PiCamera(sensor_mode=4, resolution=(160, 160), framerate=30) as camera: print('Connected to the Pi Camera') with inference.CameraInference(model) as inf: for result in inf.run(): direction, probability = process(result) RCool_drive.drive(direction) print('{:.2f} {} {:.2f}'.format(time.time(), direction, probability))
def main(): model = inference.ModelDescriptor( name='mobilenet_160', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph('dumb.binaryproto')) with inference.ImageInference(model) as inf: print('Waiting for input...') sys.stdout.flush() for _ in sys.stdin: now = time.time() img = Image.open('{}/current.jpg'.format(os.getcwd())) result = inf.run(img) label, probability = process(result) print('prediction: {} {} {}'.format(now, label, probability)) sys.stdout.flush()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_name', default='test_model', help='Model identifier.') parser.add_argument('--model_path', required=True, help='Path to model file.') parser.add_argument('--speed', default=0.5, type=float, help='Reduction factor on speed') args = parser.parse_args() model = ModelDescriptor(name=args.model_name, input_shape=(1, 192, 192, 3), input_normalizer=(0, 1), compute_graph=utils.load_compute_graph( args.model_path)) left = Motor(PIN_A, PIN_B) right = Motor(PIN_C, PIN_D) print('spinning') try: with PiCamera(sensor_mode=4, framerate=30): with CameraInference(model) as inference: for result in inference.run(): data = [ tensor.data for _, tensor in result.tensors.items() ] lspeed, rspeed = data[0] print('#%05d (%5.2f fps): %1.2f/%1.2f' % (inference.count, inference.rate, lspeed, rspeed)) if lspeed < 0: left.reverse(-max(-1, lspeed) * args.speed) else: left.forward(min(1, lspeed) * args.speed) if rspeed < 0: right.reverse(-max(-1, rspeed) * args.speed) else: right.forward(min(1, rspeed) * args.speed) except Exception as e: left.stop() right.stop() print(e)
def initialize(self): if Model.car is None: car = Car() if Car.connected: Model.car = car self.log('INFO', 'Car for self-driving is connected') else: self.log('ERROR', 'Car is not connected for self-driving') return False if Model.model is None: try: from aiy.vision import inference from aiy.vision.models import utils Model.model = inference.ModelDescriptor( name='mobilenet_160', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(MODEL_NAME)) self.log('INFO', 'Self-driving model is loaded') except Exception as e: self.log( 'ERROR', 'Self-driving model cannot be loaded: {}'.format(str(e))) return False if Model.inference_engine is None: try: from aiy.vision import inference Model.inference_engine = inference.InferenceEngine() try: Model.inference_engine.unload_model('mobilenet_160') except: pass Model.model_name = Model.inference_engine.load_model( Model.model) Model.good = True self.log('INFO', 'Image inference has started') except Exception as e: self.log( 'ERROR', 'Image inference cannot be started: {}'.format(str(e))) return False return True
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_name', default='test_model', help='Model identifier.') parser.add_argument('--model_path', required=True, help='Path to model file.') parser.add_argument('--input_height', type=int, required=True, help='Input height.') parser.add_argument('--input_width', type=int, required=True, help='Input width.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') args = parser.parse_args() model = ModelDescriptor( name=args.model_name, input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) with PiCamera(sensor_mode=4, framerate=30): with CameraInference(model) as inference: for result in inference.run(): print('#%05d (%5.2f fps): %s' % (inference.count, inference.rate, tensors_info(result.tensors)))
def main(): # Loading the model and label model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph('CrackClassification_graph.binaryproto')) print("Model loaded.") labels = read_labels(label_path + 'crack_label.txt') print("Labels loaded") # Classifier parameters top_k = 3 threshold = 0.4 num_frame = None show_fps = False # LED setup ledRED = LED(PIN_B) ledGREEN = LED(PIN_A) ledRED.off() ledGREEN.on() with PiCamera(sensor_mode=4, resolution=(1640, 1232), framerate=30) as camera: with inference.CameraInference(model) as camera_inference: for result in camera_inference.run(num_frame): processed_result = process(result, labels, 'final_result',threshold, top_k) if processed_result[0][0] == 'positive': print("CRACK") ledGREEN.off() ledRED.on() else: print("CLEAR") ledRED.off() ledGREEN.on() print("Camera inference rate: " + str(camera_inference.rate))
def model(): return ModelDescriptor( name='object_detection', input_shape=(1, 256, 256, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def compute_graph(self): return utils.load_compute_graph(self.compute_graph_file)
def model(): return ModelDescriptor( name='DishDetection', input_shape=(1, 0, 0, 3), input_normalizer=(0, 0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
frameHeight = 256 frameRate = 20 contrast = 40 rotation = 180 # Set the picamera parametertaob camera = picamera.PiCamera() camera.resolution = (frameWidth, frameHeight) camera.framerate = frameRate camera.contrast = contrast model = ModelDescriptor(name="DarthVaderDetector", input_shape=(1, 256, 256, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph( os.path.join(os.getcwd(), "darthvader.binaryproto"))) # Start the video process with ImgCap(model, frameWidth, frameHeight, DEBUG) as img: camera.start_recording(img, format='rgb', splitter_port=1) try: while True: camera.wait_recording( timeout=0 ) # using timeout=0, default, it'll return immediately # if img.output is not None: # print(img.output[0,0,0]) except KeyboardInterrupt: pass
def compute_graph(self): return utils.load_compute_graph(self.compute_graph_file)
def model(model_type=MOBILENET): return ModelDescriptor(name=model_type, input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph( _COMPUTE_GRAPH_NAME_MAP[model_type]))
def model(): return ModelDescriptor( name='dish_classification', input_shape=(1, 192, 192, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model_path', required=True, help='Path to converted model file that can run on VisionKit.') parser.add_argument( '--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument( '--input_height', type=int, required=True, help='Input height.') parser.add_argument( '--input_width', type=int, required=True, help='Input width.') parser.add_argument( '--input_layer', required=True, help='Name of input layer.') parser.add_argument( '--output_layer', required=True, help='Name of output layer.') parser.add_argument( '--num_frames', type=int, default=-1, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument( '--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument( '--input_std', type=float, default=128.0, help='Input std.') parser.add_argument( '--input_depth', type=int, default=3, help='Input depth.') parser.add_argument( '--threshold', type=float, default=0.6, help='Threshold for classification score (from output tensor).') parser.add_argument( '--preview', action='store_true', default=False, help='Enables camera preview in addition to printing result to terminal.') parser.add_argument( '--gpio_logic', default='NORMAL', help='Indicates if NORMAL or INVERSE logic is used in GPIO pins.') parser.add_argument( '--show_fps', action='store_true', default=False, help='Shows end to end FPS.') args = parser.parse_args() # Model & labels model = ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) labels = read_labels(args.label_path) with PiCamera() as camera: # Forced sensor mode, 1640x1232, full FoV. See: # https://picamera.readthedocs.io/en/release-1.13/fov.html#sensor-modes # This is the resolution inference run on. camera.sensor_mode = 4 # Scaled and cropped resolution. If different from sensor mode implied # resolution, inference results must be adjusted accordingly. This is # true in particular when camera.start_recording is used to record an # encoded h264 video stream as the Pi encoder can't encode all native # sensor resolutions, or a standard one like 1080p may be desired. camera.resolution = (1640, 1232) # Start the camera stream. camera.framerate = 30 camera.start_preview() while True: while True: long_buffer = [] short_buffer = [] pinStatus(pin_A,'LOW',args.gpio_logic) pinStatus(pin_B,'LOW',args.gpio_logic) pinStatus(pin_C,'LOW',args.gpio_logic) leds.update(Leds.rgb_on(GREEN)) face_box = detect_face() print("Entered the loop of face classifier") hand_box_params = determine_hand_box_params(face_box) if image_boundary_check(hand_box_params): print("Hand gesture identified") break # Start hand classifier is_active = False leds.update(Leds.rgb_on(PURPLE)) start_timer = time.time() with ImageInference(model) as img_inference: while True: print("Entered the loop of gesture classifier") #check_termination_trigger() if is_active: leds.update(Leds.rgb_on(RED)) hands_image = capture_hands_image(camera,hand_box_params) output = classify_hand_gestures(img_inference,hands_image,model=model,labels=labels,output_layer=args.output_layer,threshold = args.threshold) short_guess, num_short_guess = buffer_update(output,short_buffer,short_buffer_length) long_guess, num_long_guess = buffer_update(output,long_buffer,long_buffer_length) # Activation of classifier if (long_guess == activation_index or long_guess == deactivation_index) and not is_active and num_long_guess >= (long_buffer_length - 3): is_active = True leds.update(Leds.rgb_on(RED)) send_signal_to_pins(activation_index,args.gpio_logic) long_buffer = [] num_long_guess = 0 time.sleep(1) # Deactivation of classifier (go back to stable face detection) if (long_guess == activation_index or long_guess == deactivation_index) and is_active and num_long_guess >= (long_buffer_length - 3): is_active = False leds.update(Leds.rgb_off()) long_buffer = [] num_long_guess = 0 send_signal_to_pins(deactivation_index,args.gpio_logic) time.sleep(1) break # If not activated within max_no_activity_period seconds, go back to stable face detection if not is_active: timer = time.time()-start_timer if timer >= max_no_activity_period: leds.update(Leds.rgb_off()) send_signal_to_pins(deactivation_index,args.gpio_logic) time.sleep(1) break else: start_timer = time.time() # Displaying classified hand gesture commands if num_short_guess >= (short_buffer_length-1) and is_active: print_hand_command(short_guess) send_signal_to_pins(short_guess,args.gpio_logic) camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dog_park_model_path', help='Path to the model file for the dog park.') parser.add_argument('--vb1_model_path', help='Path to the model file for volley ball court 1.') parser.add_argument('--vb2_model_path', help='Path to the model file for volley ball court 1.') parser.add_argument( '--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument('--enable_streaming', default=False, action='store_true', help='Enable streaming server') parser.add_argument('--streaming_bitrate', type=int, default=1000000, help='Streaming server video bitrate (kbps)') parser.add_argument('--mdns_name', default='', help='Streaming server mDNS name') parser.add_argument( '--preview', action='store_true', default=False, help= 'Enables camera preview in addition to printing result to terminal.') parser.add_argument( '--time_interval', type=int, default=10, help='Time interval at which to store data in seconds.') parser.add_argument( '--gather_data', action='store_true', default=False, help='Also save images according to the assigned category.') parser.add_argument( '--timelapse', action='store_true', default=False, help='Also save some timelapses of the entire scene, every 120 seconds.' ) parser.add_argument('--image_folder', default='/home/pi/Pictures/Data', help='Folder to save captured images') args = parser.parse_args() labels = read_labels(args.label_path) # At least one model needs to be passed in. assert args.dog_park_model_path or args.vb1_model_path or args.vb2_model_path # Check that the folder exists if args.gather_data: expected_subfolders = ['dog_park', 'court_one', 'court_two'] subfolders = os.listdir(args.image_folder) for folder in expected_subfolders: assert folder in subfolders with ExitStack() as stack: dog_park = { 'location_name': 'dog_park', 'path': args.dog_park_model_path, } if args.dog_park_model_path else None vb1 = { 'location_name': 'court_one', 'path': args.vb1_model_path, } if args.vb1_model_path else None vb2 = { 'location_name': 'court_two', 'path': args.vb2_model_path, } if args.vb2_model_path else None # Get the list of models, filter to only the ones that were passed in. models = [dog_park, vb1, vb2] models = list(filter(lambda model: model, models)) # Initialize models and add them to the context for model in models: print('Initializing {model_name}...'.format( model_name=model["location_name"])) descriptor = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, 160, 160, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(model['path'])) model['descriptor'] = descriptor if dog_park: dog_park['image_inference'] = stack.enter_context( inference.ImageInference(dog_park['descriptor'])) if vb1: vb1['image_inference'] = stack.enter_context( inference.ImageInference(vb1['descriptor'])) if vb2: vb2['image_inference'] = stack.enter_context( inference.ImageInference(vb2['descriptor'])) camera = stack.enter_context( PiCamera(sensor_mode=4, resolution=(820, 616), framerate=30)) server = None if args.enable_streaming: server = stack.enter_context( StreamingServer(camera, bitrate=args.streaming_bitrate, mdns_name=args.mdns_name)) if args.preview: # Draw bounding boxes around locations # Load the arbitrarily sized image img = Image.new('RGB', (820, 616)) draw = ImageDraw.Draw(img) for location in LOCATIONS.values(): x1, y1, x2, y2 = location draw_rectangle(draw, x1, y1, x2, y2, 3, outline='white') # Create an image padded to the required size with # mode 'RGB' pad = Image.new('RGB', ( ((img.size[0] + 31) // 32) * 32, ((img.size[1] + 15) // 16) * 16, )) # Paste the original image into the padded one pad.paste(img, (0, 0)) # Add the overlay with the padded image as the source, # but the original image's dimensions camera.add_overlay(pad.tobytes(), alpha=64, layer=3, size=img.size) camera.start_preview() data_filename = _make_filename(args.image_folder, 'data', None, 'json') data_generator = commit_data_to_long_term(args.time_interval, data_filename) data_generator.send(None) # Capture one picture of entire scene each time it's started again. time.sleep(2) date = time.strftime('%Y-%m-%d') scene_filename = _make_filename(args.image_folder, date, None) camera.capture(scene_filename) # Draw bounding box on image showing the crop locations with Image.open(scene_filename) as scene: draw = ImageDraw.Draw(scene) for location in LOCATIONS.values(): x1, y1, x2, y2 = location draw_rectangle(draw, x1, y1, x2, y2, 3, outline='white') scene.save(scene_filename) # Constantly get cropped images for cropped_images in get_cropped_images(camera, args.timelapse): svg_doc = None if args.enable_streaming: width = 820 * SVG_SCALE_FACTOR height = 616 * SVG_SCALE_FACTOR svg_doc = svg.Svg(width=width, height=height) for location in LOCATIONS.values(): x, y, x2, y2 = location w = (x2 - x) * SVG_SCALE_FACTOR h = (y2 - y) * SVG_SCALE_FACTOR x = x * SVG_SCALE_FACTOR y = y * SVG_SCALE_FACTOR svg_doc.add( svg.Rect( x=int(x), y=int(y), width=int(w), height=int(h), rx=10, ry=10, fill_opacity=0.3, style='fill:none;stroke:white;stroke-width:4px')) # For each inference model, crop and process a different thing. for model in models: location_name = model['location_name'] image_inference = model['image_inference'] cropped_image = cropped_images[location_name] # TODO: (Image Comparison) If False,return no activity. if cropped_image: # then run image_inference on them. result = image_inference.run(cropped_image) processed_result = process(result, labels, 'final_result') data_generator.send( (location_name, processed_result, svg_doc)) message = get_message(processed_result) # Print the message # print('\n') # print('{location_name}:'.format(location_name=location_name)) # print(message) else: # Fake processed_result processed_result = [('inactive', 1.00), ('active', 0.00)] data_generator.send( (location_name, processed_result, svg_doc)) label = processed_result[0][0] timestamp = time.strftime('%Y-%m-%d_%H.%M.%S') # print(timestamp) # print('\n') if args.gather_data and cropped_image: # Gather 1% data on 'no activity' since it's biased against that. # Gather 0.1% of all images. if ( # (label == 'no activity' and random.random() > 0.99) or # (random.random() > 0.999) # (location_name != 'dog_park' and random.random() > 0.99) or (random.random() > 0.9)): subdir = '{location_name}/{label}'.format( location_name=location_name, label=label) filename = _make_filename(args.image_folder, timestamp, subdir) cropped_image.save(filename) # if svg_doc: # ## Plot points out # ## 160 x 80 grid # ## 16px width # ## 20, 40, 60 for 0, 1, 2 # lines = message.split('\n') # y_correction = len(lines) * 20 # for line in lines: # svg_doc.add(svg.Text(line, # x=(LOCATIONS[location_name][0]) * SVG_SCALE_FACTOR, # y=(LOCATIONS[location_name][1] - y_correction) * SVG_SCALE_FACTOR, # fill='white', font_size=20)) # y_correction = y_correction - 20 # TODO: Figure out how to annotate at specific locations. # if args.preview: # camera.annotate_foreground = Color('black') # camera.annotate_background = Color('white') # # PiCamera text annotation only supports ascii. # camera.annotate_text = '\n %s' % message.encode( # 'ascii', 'backslashreplace').decode('ascii') if server: server.send_overlay(str(svg_doc)) if args.preview: camera.stop_preview()
def model(): return ModelDescriptor( name='dish_classifier', input_shape=(1, 192, 192, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
fp = open(filename) tmp_shutter_numb = fp.readlines() tmp_shutter_numb = tmp_shutter_numb[0].rstrip() shutter_numb = int(tmp_shutter_numb) def read_labels(label_path): with open(label_path) as label_file: return [label.strip() for label in label_file.readlines()] model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) labels = read_labels(args.label_path) def get_message(result, threshold, top_k): if result: return '%s' % '\n'.join(result) else: return 'Nothing detected when threshold=%.2f, top_k=%d' % (threshold, top_k) def process(result, labels, tensor_name, threshold, top_k): """Processes inference result and returns labels sorted by confidence.""" # MobileNet based classification model returns one result vector. assert len(result.tensors) == 1
def main(): model_path = '/opt/aiy/models/retrained_graph.binaryproto' #model_path = '/opt/aiy/models/mobilenet_v1_160res_0.5_imagenet.binaryproto' label_path = '/opt/aiy/models/retrained_labels_new.txt' #label_path = '/opt/aiy/models/mobilenet_v1_160res_0.5_imagenet_labels.txt' model_path = '/opt/aiy/models/rg_v3_new.binaryproto' label_path = '/opt/aiy/models/retrained_labels_new.txt' input_height = 160 input_width = 160 input_layer = 'input' output_layer = 'final_result' threshold = 0.8 # Model & labels model = ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, input_height, input_width, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(model_path)) labels = read_labels(label_path) new_labels = [] for eachLabel in labels: if len(eachLabel)>1: new_labels.append(eachLabel) labels = new_labels #print(labels) s = xmlrpc.client.ServerProxy("http://aiy.mdzz.info:8000/") player = TonePlayer(BUZZER_GPIO, 10) player.play(*MODEL_LOAD_SOUND) while True: while True: if s.camera() == 1: print('vision kit is woken up') with Leds() as leds: leds.pattern = Pattern.blink(100) leds.update(Leds.rgb_pattern(Color.RED)) time.sleep(2.0) start_time = round(time.time()) break time.sleep(0.2) print('no signal, sleeping...') with PiCamera() as camera: # Configure camera camera.sensor_mode = 4 camera.resolution = (1664, 1232) # Full Frame, 16:9 (Camera v2) camera.framerate = 30 camera.start_preview() while True: # Do inference on VisionBonnet #print('Start capturing') with CameraInference(face_detection.model()) as inference: for result in inference.run(): #print(type(result)) faces = face_detection.get_faces(result) if len(faces) >= 1: #print('camera captures...') extension = '.jpg' filename = time.strftime('%Y-%m-%d %H:%M:%S') + extension camera.capture(filename) image_npp = np.empty((1664 * 1232 * 3,), dtype=np.uint8) camera.capture(image_npp, 'rgb') image_npp = image_npp.reshape((1232, 1664, 3)) image_npp = image_npp[:1232, :1640, :] # image = Image.open('jj.jpg') # draw = ImageDraw.Draw(image) faces_data = [] faces_cropped = [] for i, face in enumerate(faces): # print('Face #%d: %s' % (i, face)) x, y, w, h = face.bounding_box #print(x,y,w,h) w_rm = int(0.3 * w / 2) face_cropped = crop_np((x, y, w, h), w_rm, image_npp) if face_cropped is None: continue #print('face_cropped None'); continue # faces_data.append(image[y: y + h, x + w_rm: x + w - w_rm]) # image[y: y + h, x + w_rm: x + w - w_rm].save('1.jpg') face_cropped.save('face_cropped_'+str(i)+'.jpg') faces_cropped.append(face_cropped) #break break # else: # tt = round(time.time()) - start_time # if tt > 10: # break #print('face cutting finishes') #print(type(faces_cropped), len(faces_cropped)) player.play(*BEEP_SOUND) flag = 0 for eachFace in faces_cropped: #print(type(eachFace)) if eachFace is None: flag = 1 if (len(faces_cropped)) <= 0: flag = 1 if flag == 1: continue with ImageInference(model) as img_inference: #with CameraInference(model) as img_inference: print('Entering classify_hand_gestures()') output = classify_hand_gestures(img_inference, faces_cropped, model=model, labels=labels, output_layer=output_layer, threshold=threshold) #print(output) if (output == 3): player.play(*JOY_SOUND) print('Yani face detected') print(s.result("Owner", filename)) else: player.play(*SAD_SOUND) print('Suspicious face detected') print(s.result("Unknown Face", filename)) upload(filename) # Stop preview # #break while (s.camera()==0): print('sleeping') time.sleep(.2) print('Waken up')
def model_roll(): return ModelDescriptor(name='roll_inference',input_shape=(1, 64, 64, 3),input_normalizer=(128, 128),compute_graph=utils.load_compute_graph(_ROLL_GRAPH_NAME))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model_path', required=True, help='Path to converted model file that can run on VisionKit.') parser.add_argument( '--label_path', required=True, help='Path to label file that corresponds to the model.') parser.add_argument('--input_height', type=int, required=True, help='Input height.') parser.add_argument('--input_width', type=int, required=True, help='Input width.') parser.add_argument('--input_layer', required=True, help='Name of input layer.') parser.add_argument('--output_layer', required=True, help='Name of output layer.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument('--input_depth', type=int, default=3, help='Input depth.') parser.add_argument( '--threshold', type=float, default=0.1, help='Threshold for classification score (from output tensor).') parser.add_argument('--top_k', type=int, default=1, help='Keep at most top_k labels.') args = parser.parse_args() model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, args.input_height, args.input_width, args.input_depth), input_normalizer=(args.input_mean, args.input_std), compute_graph=utils.load_compute_graph(args.model_path)) labels = read_labels(args.label_path) print("Taking photo") with PiCamera() as camera: camera.resolution = (640, 480) camera.start_preview() sleep(3.000) camera.capture(photo_filename) with inference.ImageInference(model) as image_inference: image = Image.open(photo_filename) result = image_inference.run(image) processed_result = process(result, labels, args.output_layer, args.threshold, args.top_k) message = get_message(processed_result, args.threshold, args.top_k) return message
def model(): return ModelDescriptor( name='image_classification', input_shape=(1, 160, 160, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def model(): return ModelDescriptor( name='cifar10_classification', input_shape=(1, 32, 32, 3), input_normalizer=(127.5, 127.5), compute_graph=utils.load_compute_graph(_COMPUTE_GRAPH_NAME))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_layer', default='map/TensorArrayStack/TensorArrayGatherV3', help='Name of input layer.') parser.add_argument('--output_layer', default="prediction", help='Name of output layer.') parser.add_argument( '--num_frames', type=int, default=-1, help='Sets the number of frames to run for, otherwise runs forever.') parser.add_argument('--input_mean', type=float, default=128.0, help='Input mean.') parser.add_argument('--input_std', type=float, default=128.0, help='Input std.') parser.add_argument( '--threshold', type=float, default=0.6, help='Threshold for classification score (from output tensor).') parser.add_argument('--top_k', type=int, default=3, help='Keep at most top_k labels.') parser.add_argument( '--detecting_list', type=list, default=[ 'Biston betularia (Peppered Moth)', 'Spodoptera litura (Oriental Leafworm Moth)', 'Utetheisa ornatrix (Ornate Bella Moth)', 'Polygrammate hebraeicum (Hebrew Moth)', 'Palpita magniferalis (Splendid Palpita Moth) (0.14)', 'Hyles lineata (White-lined Sphinx Moth)', 'Hemileuca eglanterina (Western Sheep Moth)', 'Ceratomia undulosa (Waved Sphinx Moth)', 'Nadata gibbosa (White-dotted Prominent Moth)', 'Lophocampa caryae (Hickory Tussock Moth)', 'Spodoptera ornithogalli (Yellow-striped Armyworm Moth)', 'Spodoptera litura (Oriental Leafworm Moth)', 'Charadra deridens (Laugher Moth)' ], help='Input a list of bugs that you want to keep.') parser.add_argument('--message_threshold', type=int, default=4, help='Input detection threshold for sending sms') args = parser.parse_args() model = inference.ModelDescriptor( name='mobilenet_based_classifier', input_shape=(1, 192, 192, 3), input_normalizer=(128.0, 128.0), compute_graph=utils.load_compute_graph( 'mobilenet_v2_192res_1.0_inat_insect.binaryproto')) labels = read_labels( "/home/pi/models/mobilenet_v2_192res_1.0_inat_insect_labels.txt") detector = FawDetector() detector.run(args.input_layer, args.output_layer, args.num_frames, args.input_mean, args.input_std, args.threshold, args.top_k, args.detecting_list, args.message_threshold, model, labels)