def classify_frame(img, inputQueue, outputQueue): engine = edgetpu.detection.engine.DetectionEngine(\ 'unquant_tpu_tflite/landing_tflite_graph.tflite') # keep looping while True: # check to see if there is a frame in our input queue if not inputQueue.empty(): # grab the frame from the input queue img = inputQueue.get() results = engine.DetectWithImage(img, threshold=0.4,\ keep_aspect_ratio=True, relative_coord=False, top_k=10) data_out = [] if results: for obj in results: inference = [] box = obj.bounding_box.flatten().tolist() xmin = int(box[0]) ymin = int(box[1]) xmax = int(box[2]) ymax = int(box[3]) inference.extend( (obj.label_id, obj.score, xmin, ymin, xmax, ymax)) data_out.append(inference) outputQueue.put(data_out)
def predict(image, class_to_detect, confidence): image_height, image_width = image.shape[:2] image = Image.fromarray(image) results = engine.DetectWithImage(image, threshold=0.4, keep_aspect_ratio=True, relative_coord=False, top_k=10) predictions = [] if results: for obj in results: box = obj.bounding_box.flatten().tolist() xmin = int(box[0]) ymin = int(box[1]) xmax = int(box[2]) ymax = int(box[3]) if obj.score > confidence and obj.label_id == class_to_detect.value: predictions.append( Prediction(class_id=obj.label_id, img_w=image_width, img_h=image_height, bounding_box=Box(xmin, ymin, xmax - xmin, ymax - ymin))) return predictions
def main(): os.chdir('/home/pi/DeepPiCar/models/object_detection') parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=False) parser.add_argument('--label', help='File path of label file.', required=False) args = parser.parse_args() #args.model = 'test_data/mobilenet_ssd_v2_coco_quant_postprocess.tflite' args.model = 'data/model_result/road_signs_quantized.tflite' args.label = 'data/model_result/road_sign_labels.txt' with open(args.label, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # initialize open cv IM_WIDTH = 640 IM_HEIGHT = 480 camera = cv2.VideoCapture(0) ret = camera.set(3, IM_WIDTH) ret = camera.set(4, IM_HEIGHT) font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (10, IM_HEIGHT - 10) fontScale = 1 fontColor = (255, 255, 255) # white boxColor = (0, 0, 255) # RED? boxLineWidth = 1 lineType = 2 annotate_text = "" annotate_text_time = time.time() time_to_show_prediction = 1.0 # ms min_confidence = 0.20 # initial classification engine engine = edgetpu.detection.engine.DetectionEngine(args.model) elapsed_ms = 0 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 20.0, (IM_WIDTH, IM_HEIGHT)) try: while camera.isOpened(): try: start_ms = time.time() ret, frame = camera.read() # grab a frame from camera if ret == False: print('can NOT read from camera') break frame_expanded = np.expand_dims(frame, axis=0) ret, img = camera.read() input = cv2.cvtColor( frame, cv2.COLOR_BGR2RGB) # convert to RGB color space img_pil = Image.fromarray(input) #input = cv2.resize(input, (width,height)) start_tf_ms = time.time() results = engine.DetectWithImage(img_pil, threshold=min_confidence, keep_aspect_ratio=True, relative_coord=False, top_k=5) end_tf_ms = time.time() elapsed_tf_ms = end_tf_ms - start_ms if results: for obj in results: print("%s, %.0f%% %s %.2fms" % (labels[obj.label_id], obj.score * 100, obj.bounding_box, elapsed_tf_ms * 1000)) box = obj.bounding_box coord_top_left = (int(box[0][0]), int(box[0][1])) coord_bottom_right = (int(box[1][0]), int(box[1][1])) cv2.rectangle(img, coord_top_left, coord_bottom_right, boxColor, boxLineWidth) annotate_text = "%s, %.0f%%" % (labels[obj.label_id], obj.score * 100) coord_top_left = (coord_top_left[0], coord_top_left[1] + 15) cv2.putText(img, annotate_text, coord_top_left, font, fontScale, boxColor, lineType) print('------') else: print('No object detected') # Print Frame rate info elapsed_ms = time.time() - start_ms annotate_text = "%.2f FPS, %.2fms total, %.2fms in tf " % ( 1.0 / elapsed_ms, elapsed_ms * 1000, elapsed_tf_ms * 1000) print('%s: %s' % (datetime.datetime.now(), annotate_text)) cv2.putText(img, annotate_text, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) out.write(img) cv2.imshow('Detected Objects', img) if cv2.waitKey(1) & 0xFF == ord('q'): break except: # catch it and don't exit the while loop print('In except') traceback.print_exc() finally: print('In Finally') camera.release() out.release() cv2.destroyAllWindows()
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) engine = edgetpu.detection.engine.DetectionEngine(args.model) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 camera.annotate_text_size = 20 _, width, height, channels = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() fps = deque(maxlen=20) fps.append(time.time()) for foo in camera.capture_continuous( stream, #format='rgb', format='jpeg', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) # input = np.fromstring(stream.getvalue(),dtype=np.uint8) camera.capture(stream, format='jpeg') img = Image.open(stream) draw = ImageDraw.Draw(img) output_name = 'images/test_{}.jpeg'.format(time.time()) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=3) # Display result. if ans: for obj in ans: print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) # Draw a rectangle. draw.rectangle(box, outline='blue') draw.text((box[0], box[1]), '{} {}'.format(labels[obj.label_id], str(obj.score)), 'red') if labels[ obj.label_id] == 'person' and obj.score > 0.6: img.save(output_name) url = upload_image_datalake(output_name) short_url = get_bitly_url(url) push_line_message(short_url) exit() img.save(output_name) if platform.machine() == 'x86_64': # For gLinux, simply show the image. img.show() elif platform.machine() == 'armv7l': # For Raspberry Pi, you need to install 'feh' to display image. subprocess.Popen(['feh', output_name]) else: print('Please check ', output_name) else: print('No object detected!') finally: camera.stop_preview()
import time import re import numpy as np import os import datetime import time import edgetpu.detection.engine from PIL import Image model = "road_signs_quantized_v2_edgetpu.tflite" label = 'road_sign_labels.txt' image_path = "data/test/propios/IMG_20190715_212222.jpg" with open(label, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) engine = edgetpu.detection.engine.DetectionEngine(model) start = time.time() image = Image.open(image_path) results = engine.DetectWithImage(image, threshold=0.20, top_k=6) end = time.time() for result in results: print(labels[result.label_id], result.score * 100) #end = time.time() print(end - start)
def main() -> None: # Store labels for matching with inference results labels = read_label_file(ARGS.labels) if ARGS.labels else None # Specify font for labels # font = PIL.ImageFont.truetype("/usr/share/fonts/truetype/piboto/Piboto-Regular.ttf", 20) font = PIL.ImageFont.truetype( "/usr/share/fonts/truetype/roboto/hinted/Roboto-Regular.ttf", 30) # font = None engine = edgetpu.detection.engine.DetectionEngine(ARGS.model) width = 640 height = 480 out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'MJPG'), 30, (width, height)) # Use imutils to count Frames Per Second (FPS) fps = FPS().start() live_view = LiveView(ARGS.ip, ARGS.port) while True: try: try: image = PIL.Image.open(io.BytesIO(live_view.image())) except OSError: print('could not identify image file') continue # Perform inference and note time taken startMs = time.time() try: inferenceResults = engine.DetectWithImage(image, threshold=ARGS.confidence, keep_aspect_ratio=True, relative_coord=False, top_k=ARGS.maxobjects) elapsedMs = time.time() - startMs annotate(image, inferenceResults, elapsedMs, labels, font) except OSError as e: print(e) pass cv2_image = cv2.cvtColor(numpy.asarray(image), cv2.COLOR_RGB2BGR) out.write(cv2_image) if 'DISPLAY' in os.environ: cv2.imshow('NCS Improved live inference', cv2_image) # Display the frame for 5ms, and close the window so that the next # frame can be displayed. Close the window if 'q' or 'Q' is pressed. if cv2.waitKey(5) & 0xFF == ord('q'): fps.stop() break fps.update() # Allows graceful exit using ctrl-c (handy for headless mode). except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows()