class PreppedQueueProcessor(threading.Thread): def __init__(self, cameras, prepped_frame_queue): threading.Thread.__init__(self) self.cameras = cameras self.prepped_frame_queue = prepped_frame_queue # Load the edgetpu engine and labels self.engine = DetectionEngine(PATH_TO_CKPT) self.labels = ReadLabelFile(PATH_TO_LABELS) def run(self): # process queue... while True: frame = self.prepped_frame_queue.get() # Actual detection. objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=frame['region_threshold'], top_k=3) # print(self.engine.get_inference_time()) # parse and pass detected objects back to the camera parsed_objects = [] for obj in objects: box = obj.bounding_box.flatten().tolist() parsed_objects.append({ 'frame_time': frame['frame_time'], 'name': str(self.labels[obj.label_id]), 'score': float(obj.score), 'xmin': int((box[0] * frame['region_size']) + frame['region_x_offset']), 'ymin': int((box[1] * frame['region_size']) + frame['region_y_offset']), 'xmax': int((box[2] * frame['region_size']) + frame['region_x_offset']), 'ymax': int((box[3] * frame['region_size']) + frame['region_y_offset']) }) self.cameras[frame['camera_name']].add_objects(parsed_objects)
def detection_job(detection_model, image_name, num_inferences): """Runs detection job.""" engine = DetectionEngine(detection_model) with open_image(image_name) as img: # Resized image. _, height, width, _ = engine.get_input_tensor_shape() tensor = np.asarray(img.resize((width, height), Image.NEAREST)).flatten() # Using `DetectWithInputTensor` to exclude image down-scale cost. for _ in range(num_inferences): engine.DetectWithInputTensor(tensor, top_k=1)
def main(argv): argparser = build_argparser() args = argparser.parse_args(argv) labels = load_labels(args.label) engine = DetectionEngine(args.model) camera = picamera.PiCamera() camera.resolution = (640, 480) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0)) overlay = camera.add_overlay(overlay_img.tobytes(), size=overlay_img.size) overlay.layer = 3 try: start_time = time.time() camera.start_preview(fullscreen=True) buff = io.BytesIO() for _ in camera.capture_continuous(buff, format='rgb', use_video_port=True, resize=(width, height)): buff.truncate() buff.seek(0) array = np.frombuffer(buff.getvalue(), dtype=np.uint8) # Do inference start_ms = time.time() detected = engine.DetectWithInputTensor(array, top_k=10) elapsed_ms = time.time() - start_ms if detected: camera.annotate_text = ('%d objects detected.\n%.2fms' % (len(detected), elapsed_ms * 1000.0)) overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0)) draw = ImageDraw.Draw(overlay_img) for obj in detected: # relative coord to abs coord. box = obj.bounding_box * [[width, height]] draw.rectangle(box.flatten().tolist(), COLORS[obj.label_id % len(COLORS)]) overlay.update(overlay_img.tobytes()) if time.time() - start_time >= args.time: break finally: camera.stop_preview() camera.close() return 0
class ObjectDetector: def init( self, model_file="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", label_file="coco_labels.txt"): self.model_file = model_file self.label_file = label_file self.labels = ReadLabelFile(self.label_file) self.engine = DetectionEngine(self.model_file) def detect(self, input_frame): if (self.labels == '' or self.engine == ''): print("Detector is not initialized!") return [] objects = self.engine.DetectWithInputTensor(input_frame.flatten(), threshold=0.5, top_k=10) _, width, height, channels = self.engine.get_input_tensor_shape() detected_objects = [] if objects: for obj in objects: box = obj.bounding_box.flatten().tolist() box_left = int(box[0] * width) box_top = int(box[1] * height) box_right = int(box[2] * width) box_bottom = int(box[3] * height) percentage = int(obj.score * 100) label = self.labels[obj.label_id] object_info = { 'box_left': box_left, 'box_right': box_right, 'box_top': box_top, 'box_bottom': box_bottom, 'percentage': percentage, 'label': label } detected_objects.append(object_info) return detected_objects
class PreppedQueueProcessor(threading.Thread): def __init__(self, cameras, prepped_frame_queue): threading.Thread.__init__(self) self.cameras = cameras self.prepped_frame_queue = prepped_frame_queue # Load the edgetpu engine and labels self.engine = DetectionEngine(PATH_TO_CKPT) self.labels = LABELS def run(self): # process queue... while True: frame = self.prepped_frame_queue.get() # Actual detection. objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=5) # print(self.engine.get_inference_time()) # parse and pass detected objects back to the camera parsed_objects = [] for obj in objects: parsed_objects.append({ 'region_id': frame['region_id'], 'frame_time': frame['frame_time'], 'name': str(self.labels[obj.label_id]), 'score': float(obj.score), 'box': obj.bounding_box.flatten().tolist() }) self.cameras[frame['camera_name']].add_objects(parsed_objects)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() engine_a = ClassificationEngine(classification_model) # `engine_b` shares the same Edge TPU as `engine_a` engine_b = DetectionEngine(detection_model, engine_a.device_path()) with open_image(image_name) as image: # Resized image for `engine_a`, `engine_b`. tensor_a = get_input_tensor(engine_a, image) tensor_b = get_input_tensor(engine_b, image) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): # Using `ClassifyWithInputTensor` and `DetectWithInputTensor` on purpose to # exclude image down-scale cost. for _ in range(batch_size): engine_a.ClassifyWithInputTensor(tensor_a, top_k=1) for _ in range(batch_size): engine_b.DetectWithInputTensor(tensor_b, top_k=1) return time.perf_counter() - start_time
def mqtt_device_demo(args): """Connects a device, sends data, and receives data.""" # [START iot_mqtt_run] global minimum_backoff_time global MAXIMUM_BACKOFF_TIME # Publish to the events or state topic based on the flag. sub_topic = 'events' if args.message_type == 'event' else 'state' mqtt_topic = '/devices/{}/{}'.format(args.device_id, sub_topic) jwt_iat = datetime.datetime.utcnow() jwt_exp_mins = args.jwt_expires_minutes client = get_client( args.project_id, args.cloud_region, args.registry_id, args.device_id, args.private_key_file, args.algorithm, args.ca_certs, args.mqtt_bridge_hostname, args.mqtt_bridge_port) # Initialize engine. engine = DetectionEngine('./edgetpu/test_data/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') # labels = ReadLabelFile(args_tpu.label) if args_tpu.label else None # initializing the camera with picamera.PiCamera() as camera: camera.resolution = (1024, 768) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): client.loop() if should_backoff: # If backoff time is too large, give up. if minimum_backoff_time > MAXIMUM_BACKOFF_TIME: print('Exceeded maximum backoff time. Giving up.') break delay = minimum_backoff_time + random.randint(0, 1000) / 1000.0 time.sleep(delay) minimum_backoff_time *= 2 client.connect(mqtt_bridge_hostname, mqtt_bridge_port) now = datetime.datetime.now() stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10) elapsed_ms = time.time() - start_ms # Display result. print ('-----------------------------------------') nPerson = 0 bbox = list() scores = list() if ans: for obj in ans: nPerson = nPerson + 1 # if labels: # print(labels[obj.label_id]) score = [obj.score] # print ('score = ', obj.score) box = obj.bounding_box.flatten().tolist() # print ('box = ', box) bbox.append(box) scores.append(score) msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)} else: msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)} bounding_box = [{'box_0': bb[0], 'box_1': bb[1], 'box_2': bb[2], 'box_3': bb[3]} for bb in eval(msg["bounding_box"])] scores_msg = [{'score': s[0]} for s in eval(msg["scores"])] info = {'nPersons': '{}'.format(nPerson), 'bounding_box': bounding_box, 'scores': scores_msg, 'TimeStamp': str(int(time.time()))} ################################### try: list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson, length='short') except: list_short = [] list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson, length='short') try: list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson, length='long') except: list_long = [] list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson, length='long') info.update(info_short) info.update(info_long) ################################### info = json.dumps(info) payload = info print('Publishing message {}'.format(payload)) # [START iot_mqtt_jwt_refresh] seconds_since_issue = (datetime.datetime.utcnow() - jwt_iat).seconds if seconds_since_issue > 60 * jwt_exp_mins: # print('Refreshing token') jwt_iat = datetime.datetime.utcnow() client = get_client( args.project_id, args.cloud_region, args.registry_id, args.device_id, args.private_key_file, args.algorithm, args.ca_certs, args.mqtt_bridge_hostname, args.mqtt_bridge_port) # [END iot_mqtt_jwt_refresh] # Publish "payload" to the MQTT topic. qos=1 means at least once # delivery. Cloud IoT Core also supports qos=0 for at most once # delivery. client.publish(mqtt_topic, payload, qos=1) # Send events every second. State should not be updated as often time.sleep(1 if args.message_type == 'event' else 5) finally: camera.stop_preview()
def main(): cam_w, cam_h = 640, 480 default_model_dir = "./all_models" default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=5, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.5, help='class score threshold') args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) print("Loading %s with %s labels." % (args.model, args.labels)) engine = DetectionEngine(args.model) labels = load_labels(args.labels) pygame.init() pygame.font.init() font = pygame.font.SysFont("Arial", 20) pygame.camera.init() camlist = pygame.camera.list_cameras() _, w, h, _ = engine.get_input_tensor_shape() camera = pygame.camera.Camera(camlist[0], (cam_w, cam_h)) display = pygame.display.set_mode((cam_w, cam_h), 0) red = pygame.Color(255, 0, 0) camera.start() try: last_time = time.monotonic() while True: mysurface = camera.get_image() imagen = pygame.transform.scale(mysurface, (w, h)) input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8) start_time = time.monotonic() results = engine.DetectWithInputTensor(input, threshold=args.threshold, top_k=args.top_k) stop_time = time.monotonic() inference_ms = (stop_time - start_time) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = "Inference: %5.2fms FPS: %3.1f" % (inference_ms, fps_ms) for result in results: x0, y0, x1, y1 = result.bounding_box.flatten().tolist() rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w, (y1 - y0) * cam_h) pygame.draw.rect(mysurface, red, rect, 1) label = "%.0f%% %s" % (100 * result.score, labels[result.label_id]) text = font.render(label, True, red) mysurface.blit(text, (x0 * cam_w, y0 * cam_h)) text = font.render(annotate_text, True, red) mysurface.blit(text, (0, 0)) display.blit(mysurface, (0, 0)) pygame.display.flip() finally: camera.stop()
_, width, height, channels = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input = np.frombuffer(stream.getvalue(), dtype=np.uint8) # cv2.imwrite('current_frame.jpg', input) # img = Image.open('current_frame.jpg') # draw = ImageDraw.Draw(img) start_ms = time.time() ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10) elapsed_ms = time.time() - start_ms # Display result. print ('-----------------------------------------') nPerson = 0 bbox = list() scores = list() if ans: print(ans) for obj in ans: nPerson = nPerson+ 1 if labels: print(labels[obj.label_id]) score = obj.score print ('score = ', obj.score) box = obj.bounding_box.flatten().tolist()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--draw', help='If to draw the results.', default=True) parser.add_argument('--label', help='Path of the labels file.') args = parser.parse_args() renderer = None # Initialize engine. engine = DetectionEngine(args.model) labels = read_label_file(args.label) if args.label else None shown = False frames = 0 start_seconds = time.time() print('opening socket.') # s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) receiveSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((TCP_IP, TCP_PORT)) s.listen(1) # senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) receiveSocket.bind((UDP_IP, UDP_RECEIVE_PORT)) # senderSocket.bind((UDP_IP, UDP_SEND_PORT)) print('listening...') _, width, height, channels = engine.get_input_tensor_shape() imageSize = width * height * 3 print('waiting for client') conn, addr = s.accept() print('Connection address:', addr) # Open image. while 1: print('waiting for packet') data, addr = receiveSocket.recvfrom(66507) # print('got packet of length', len(data)) if (len(data) > 0): start_s = time.time() # print('processing image') try: image = Image.open(io.BytesIO(data)).convert('RGB') except OSError: print('Could not read image') continue input = np.frombuffer(image.tobytes(), dtype=np.uint8) results = engine.DetectWithInputTensor(input, threshold=0.25, top_k=10) print('time to process image', (time.time() - start_s) * 1000) output = to_output(results, image.size, labels) message = json.dumps({'results': output}) + '|' # print('sending', message) try: conn.send(message.encode('utf-8')) except ConnectionResetError: print('Socket disconnected...waiting for client') conn, addr = s.accept()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--draw', help='If to draw the results.', default=True) parser.add_argument('--label', help='Path of the labels file.') args = parser.parse_args() renderer = None # Initialize engine. engine = DetectionEngine(args.model) labels = read_label_file(args.label) if args.label else None shown = False frames = 0 start_seconds = time.time() FULL_SIZE_W = 640 FULL_SIZE_H = 480 img = Image.new('RGBA', (FULL_SIZE_W, FULL_SIZE_H)) draw = ImageDraw.Draw(img) # Open image. with picamera.PiCamera() as camera: camera.resolution = (FULL_SIZE_W, FULL_SIZE_H) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() print('input dims', width, height) camera.start_preview(fullscreen=False, window=(700, 200, FULL_SIZE_W, FULL_SIZE_H)) # camera.start_preview() # rasberry pi requires images to be resizes to multiples of 32x16 camera_multiple = (16, 32) valid_resize_w = width - width % camera_multiple[1] valid_resize_h = height - height % camera_multiple[0] padding_w = (width - valid_resize_w) // 2 padding_h = (height - valid_resize_h) // 2 scale_w = FULL_SIZE_W / width scale_h = FULL_SIZE_H / height try: stream = io.BytesIO() for foo in camera.capture_continuous( stream, format='rgb', # format='jpeg', use_video_port=True, resize=(valid_resize_w, valid_resize_h)): stream.truncate() stream.seek(0) start_frame = time.time() input = np.frombuffer(stream.getvalue(), dtype=np.uint8) if padding_w > 0 or padding_h > 0: flattened = pad_and_flatten( input, (valid_resize_h, valid_resize_w), padding_h, padding_w) else: flattened = input # flatten padded element reshape_time = time.time() - start_frame start_s = time.time() # Run inference. results = engine.DetectWithInputTensor(flattened, threshold=0.2, top_k=10) elapsed_s = time.time() - start_frame if padding_w > 0 or padding_h > 0: boxes = translate_and_scale_boxes(\ results, \ (valid_resize_w, valid_resize_h),\ (padding_w, padding_h), \ (FULL_SIZE_W, FULL_SIZE_H)) else: boxes = scale_boxes(results, (FULL_SIZE_W, FULL_SIZE_H)) if args.draw: img.putalpha(0) draw_boxes(draw, boxes) if labels: draw_labels(draw, results, boxes, labels) # display_results(ans, labels, img) imbytes = img.tobytes() if renderer == None: renderer = camera.add_overlay(imbytes, size=img.size, layer=4, format='rgba', fullscreen=False, window=(700, 200, 640, FULL_SIZE_H)) else: # print('updating') renderer.update(imbytes) frame_seconds = time.time() # print(frame_seconds - start_seconds, frames) fps = frames * 1.0 / (frame_seconds - start_seconds) frames = frames + 1 # time.sleep(1) camera.annotate_text = "%.2fms, %d fps" % (elapsed_s * 1000.0, fps) finally: camera.stop_preview()
class App: def __init__(self): self.frame = None self.thread = None self.stopEvent = None self.camera = cv2.VideoCapture(0) self.camera.set(3, WIDTH) self.camera.set(4, HEIGHT) self.engine = DetectionEngine('./mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite') self.labels = ReadLabelFile('./coco_labels.txt') self.root = tki.Tk() self.root.bind('<Escape>', lambda e: self.onClose()) self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose) self.panel = None self.stopEvent = threading.Event() self.thread = threading.Thread(target=self.videoLoop, args=()) self.thread.daemon = True self.thread.start() def findObjects(self, image): _, width, height, channels = self.engine.get_input_tensor_shape() input = cv2.resize(image, (width, height)) input = input.reshape((width * height * channels)) results = self.engine.DetectWithInputTensor(input, top_k=5) return results def videoLoop(self): try: while not self.stopEvent.is_set(): if not self.camera.isOpened(): continue ret, self.frame = self.camera.read() if not ret: continue font = cv2.FONT_HERSHEY_SIMPLEX self.frame = cv2.cvtColor(self.frame, cv2.COLOR_BGR2RGB) results = self.findObjects(self.frame) if results: for obj in results: if(obj.score > 0.5): top_left = calculatePosition(obj.bounding_box[0]) bottom_right = calculatePosition(obj.bounding_box[1]) center_point = (int(top_left[0] + ((bottom_right[0] - top_left[0]) / 2)), int(top_left[1] + ((bottom_right[1] - top_left[1]) / 2))) # cv2.rectangle(self.frame, top_left, bottom_right, (0, 255, 0), 1) label = self.labels[obj.label_id] label_size = cv2.getTextSize(label, font, 0.5,cv2.LINE_AA) label_width = label_size[0][0] label_height = label_size[0][1] # pointer cv2.circle(self.frame, center_point, 5, (0,255,0),-1) cv2.line(self.frame, (int(top_left[0] + label_width/2),top_left[1]), center_point, (0,255,0),2) # label label_x = top_left[0] - 1 label_y = top_left[1]-label_height if label_y < 0: label_y = 0 cv2.rectangle(self.frame, (label_x, label_y), (label_x+label_width, label_y + label_height), (0,255,0),-1) cv2.putText(self.frame, label, (label_x+5, label_y + label_height-5), font, 0.5, (255,255,255)) image = Image.fromarray(self.frame) image = ImageTk.PhotoImage(image) if self.panel is None: self.panel = tki.Label(image=image) self.panel.image = image self.panel.pack(side="left", padx=0, pady=0) else: self.panel.configure(image=image) self.panel.image = image print("[INFO] closing...") self.camera.release() self.root.destroy() return -1 except Exception as e: print("[INFO] caught a RuntimeError") print(e) finally: print("[INFO] closing...") self.camera.release() self.root.destroy() return -1 def onClose(self): self.stopEvent.set()
class VideoCamera(object): def __init__(self): print('starting camera') with open(Config.LABEL_PATH, 'r', encoding="utf-8") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) self.labels = dict((int(k), v) for k, v in pairs) self.engine = DetectionEngine(Config.MODEL_PATH) # Using OpenCV to capture from device 0. If you have trouble capturing # from a webcam, comment the line below out and use a video file # instead. self.video = cv2.VideoCapture(0) if self.video: self.video.set(3, 640) self.video.set(4, 480) # If you decide to use video.mp4, you must have this file in the folder # as the main.py. # self.video = cv2.VideoCapture('video.mp4') def __del__(self): print('closing camera') self.video.release() def get_frame(self): start_time = time.time() font = cv2.FONT_HERSHEY_SIMPLEX topLeftCornerOfText = (10, 20) bottomLeftCornerOfText = (10, 470) fontScale = 0.6 fontColor = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) lineType = 1 annotate_text = "" _, width, height, channels = self.engine.get_input_tensor_shape() if not self.video.isOpened(): print('Camera is not opened') ret, img = self.video.read() if not ret: print('Camera is not read') input = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) input = cv2.resize(input, (width, height)) input = input.reshape((width * height * channels)) rows = img.shape[0] cols = img.shape[1] record_time = time.time() elapsed_record_ms = record_time - start_time ############# # Run inference. ans = self.engine.DetectWithInputTensor( input, threshold=Config.DETECT_THRESHOLD, top_k=Config.TOP_K) detection_time = time.time() elapsed_detection_ms = detection_time - record_time # Display result. if ans: for obj in ans: box = obj.bounding_box.flatten().tolist() #print ('id=', obj.label_id, 'score = ', obj.score, 'box = ', box) # Draw a rectangle. x = box[0] * cols y = box[1] * rows right = box[2] * cols bottom = box[3] * rows if obj.score > Config.DETECT_THRESHOLD: cv2.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), fontColor, thickness=1) annotate_text = "%s %.2f" % (self.labels[obj.label_id], obj.score) annotate_text_time = time.time() cv2.putText(img, annotate_text, (int(x), int(bottom)), font, fontScale, fontColor, lineType) elapsed_frame_ms = (time.time() - start_time) * 1000.0 frame_rate_text = "FPS: %.2f record: %.2fms detection: %.2fms" % ( 1000.0 / elapsed_frame_ms, elapsed_record_ms * 1000.0, elapsed_detection_ms * 1000.0) cv2.putText(img, frame_rate_text, topLeftCornerOfText, font, fontScale, fontColor, lineType) ret, jpeg = cv2.imencode('.jpg', img) return jpeg.tobytes()
import statistics import numpy as np from edgetpu.detection.engine import DetectionEngine # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = '/frozen_inference_graph.pb' # Load the edgetpu engine and labels engine = DetectionEngine(PATH_TO_CKPT) frame = np.zeros((300, 300, 3), np.uint8) flattened_frame = np.expand_dims(frame, axis=0).flatten() detection_times = [] for x in range(0, 1000): objects = engine.DetectWithInputTensor(flattened_frame, threshold=0.1, top_k=3) detection_times.append(engine.get_inference_time()) print("Average inference time: " + str(statistics.mean(detection_times)))
class RoboPiCamContr(object): # Step 2: Constructor which defines default values for settings def __init__(self, appDuration=50, cameraResolution=(304, 304), useVideoPort=True, btDisconMode=False, serialPort='/dev/rfcomm0', mailboxName='abc', minObjectScore=0.35): self.cameraResolution = cameraResolution self.useVideoPort = useVideoPort self.btDisconMode = btDisconMode self.serialPort = serialPort self.mailboxName = mailboxName self.appDuration = appDuration #seconds to run self.minObjectScore = minObjectScore modelFile = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' objectLabelsFile = 'coco_labels.txt' print("Reading Model: ", modelFile) self.engine = DetectionEngine(modelFile) print("Reading object labels: ", objectLabelsFile) self.labels = self.readLabelFile(objectLabelsFile) print("Minimal object score: ", self.minObjectScore) # Step 4: Configure PiCam # Return parameter: created PiCam def configurePiCam(self): print("\nConfigure and warming up PiCamera") self.cam = PiCamera() self.cam.resolution = self.cameraResolution print("Camera resolution: " + repr(self.cam.resolution)) self.cam.start_preview() sleep(2) self.cam.stop_preview() return self.cam #Step 7: Test if the bluetooth connection is established and a #program on EV3 is running and answering. #Returns if the bluetooth connection is ok. If bluetooth is #disabled on application level True is returned. def bluetoothStartTest(self): if self.btDisconMode: print("Bluetooth disconnected mode is enabled") return True else: try: print('Performing bluetooth start tests') self.ev3 = tmtcCom.TMTCpi2EV3(self.serialPort, self.mailboxName) print('Bluetooth device is present: ' + self.serialPort) ack, result = self.ev3.sendTC('Heartbeat', False) print(ack, result) if not ack: print( 'Heartbeat was not acknowledged. Start program on EV3!' ) return False else: print("Heartbeat acknowledged") return True except: print('\nConnection error during EV3 communication') print('No device: ', self.serialPort) return False #Step 9: Take a photo returned as numpy array def takePhoto(self): picData = np.empty( (self.cameraResolution[1], self.cameraResolution[0], 3), dtype=np.uint8) self.cam.capture(picData, format='rgb', use_video_port=self.useVideoPort) #24bit rgb format # Coco-Model requires 300 x 300 resolution # Remove last 4 rows and last 4 columns in all 3 dimensions picData = picData[:-4, :-4] return picData # Function to read labels from text files. def readLabelFile(self, file_path): with open(file_path, 'r') as f: lines = f.readlines() ret = {} for line in lines: pair = line.strip().split(maxsplit=1) ret[int(pair[0])] = pair[1].strip() return ret #Step 12: Predict the picture by running it on the TPU def predict(self, picData): print("\nPredicting image on TPU") print('Shape of data: ', picData.shape) flatArray = picData.flatten() #3D to 1D conversion print('Input array size: ', flatArray.shape) #Call the TPU to detect objects on the image with a neural network result = self.engine.DetectWithInputTensor( flatArray, threshold=self.minObjectScore, top_k=10) return result #Step 14: Analyse the result of inferencing on the TPU. #The result is analysed and all objects will be set as detected #if they belong to the objects IDs of interest def analyseResult(self, predResult, objectIdsOfInterest): print("Analysing results...") detectedObjList = [] lbl = '' if predResult: for obj in predResult: if obj.label_id in objectIdsOfInterest: if self.labels: lbl = self.labels[obj.label_id] print(lbl, obj.label_id) print('score = ', obj.score) box = obj.bounding_box.flatten() box *= self.cameraResolution[1] #scale up to resolution print('box = ', box.tolist()) detectedObjList.append((lbl, box)) if len(detectedObjList) == 0: print('No object detected!') return detectedObjList #Step 17: Depending on the detected object #take desired action #Return True if telecommand has been processed properly def processResult(self, detectedObjects): num = len(detectedObjects) print('Number of detected objects: ', num) ack = True if not self.btDisconMode: if num > 0: obj = detectedObjects[0][0] print('Processing', obj) if obj == 'bottle': print('\nCommanding EV3: MoveBottle') ack, reply = self.ev3.sendTC('MoveBottle', True, 19) print(ack, reply, "\n") elif obj == 'apple': print('\nCommanding EV3: MoveApple') ack, reply = self.ev3.sendTC('MoveApple', True, 19) print(ack, reply, "\n") if not ack: print('Telecommand failed! ') print('Check TC, bluetooth connection and timeout for telecommand') print('Check also if program on EV3 is running.') return ack
class SmartPiCamContr(object): # Step 2: Constructor which defines default values for settings def __init__(self, appDuration=30, cameraResolution=(304, 304), useVideoPort=True, minObjectScore=0.35): self.cameraResolution = cameraResolution self.useVideoPort = useVideoPort self.appDuration = appDuration #seconds to run self.minObjectScore = minObjectScore modelFile = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' objectLabelsFile = 'coco_labels.txt' print("Reading Model: ", modelFile) self.engine = DetectionEngine(modelFile) print("Reading object labels: ", objectLabelsFile) self.labels = self.readLabelFile(objectLabelsFile) print("Minimal object score: ", self.minObjectScore) # Step 4: Configure PiCam # Return parameter: created PiCam def configurePiCam(self): print("\nConfigure and warming up PiCamera") self.cam = PiCamera() self.cam.resolution = self.cameraResolution print("Camera resolution: " + repr(self.cam.resolution)) self.cam.start_preview() sleep(2) self.cam.stop_preview() return self.cam #Step 7: Take a photo returned as numpy array def takePhoto(self): picData = np.empty( (self.cameraResolution[1], self.cameraResolution[0], 3), dtype=np.uint8) self.cam.capture(picData, format='rgb', use_video_port=self.useVideoPort) #24bit rgb format # Coco-Model requires 300 x 300 resolution # Remove last 4 rows and last 4 colummns in all 3 dimensions picData = picData[:-4, :-4] return picData # Function to read labels from text files. def readLabelFile(self, file_path): with open(file_path, 'r') as f: lines = f.readlines() ret = {} for line in lines: pair = line.strip().split(maxsplit=1) ret[int(pair[0])] = pair[1].strip() return ret #Step 10: Predict the picture by running it on the TPU def predict(self, picData): print("\nPredicting imgage on TPU") print('Shape of data: ', picData.shape) flatArray = picData.flatten() #3D to 1D conversion print('Input array size: ', flatArray.shape) #Call the TPU to detect objects on the image with a neural network result = self.engine.DetectWithInputTensor( flatArray, threshold=self.minObjectScore, top_k=10) return result #Step 12: Analyse the result of inferencing on the TPU. #The result is analysed and all objects will be set as detected #if they belong to the objects IDs of interest def analyseResult(self, predResult, objectIdsOfInterest): print("Analysing results...") detectedObjList = [] lbl = '' if predResult: for obj in predResult: if obj.label_id in objectIdsOfInterest: if self.labels: lbl = self.labels[obj.label_id] print(lbl, obj.label_id) print('score = ', obj.score) box = obj.bounding_box.flatten() box *= self.cameraResolution[1] #scale up to resolution print('box = ', box.tolist()) detectedObjList.append((lbl, box)) if len(detectedObjList) == 0: print('No object detected!') return detectedObjList #Step 15: Depending on the detected objects and location #take desired action def processResult(self, detectedObjects): print('Number of detected objects: ', len(detectedObjects))