class CoralObjectDetector: """Performs inference on Edge TPU. """ def __init__(self, model_path, device_path): self.__engine = DetectionEngine(model_path=os.path.join( model_path, 'edgetpu.tflite'), device_path=device_path) self.__model_shape = itemgetter(1, 2)( self.__engine.get_input_tensor_shape()) @property def device_name(self): return "Coral" def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): pass def detect(self, image_shape, image_np, detections: List[Detection]): image_np = cv2.resize(image_np, dsize=self.__model_shape, interpolation=cv2.INTER_LINEAR) objs = self.__engine.detect_with_input_tensor( input_tensor=image_np.flatten(), top_k=len(detections)) d = 0 max_width = image_shape[1] - 1 max_height = image_shape[0] - 1 while d < len(objs) and d < len(detections): detection = detections[d] obj = objs[d] detection.label = obj.label_id + 1 detection.confidence = obj.score detection.bounding_box.y_min = int(obj.bounding_box[0][1] * max_height) detection.bounding_box.x_min = int(obj.bounding_box[0][0] * max_width) detection.bounding_box.y_max = int(obj.bounding_box[1][1] * max_height) detection.bounding_box.x_max = int(obj.bounding_box[1][0] * max_width) d += 1 return self.__engine.get_inference_time()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera #cam = cv2.VideoCapture(camera) camera = PiCamera() time.sleep(2) camera.resolution = (640, 480) # Create the in-memory stream stream = io.BytesIO() # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: #ret, cv2_im = cam.read() stream = io.BytesIO() #wipe the contents camera.capture(stream, format='jpeg', use_video_port=True) stream.seek(0) pil_im = Image.open(stream) cv2_im = np.array(pil_im) cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] + " - {0:.2f}".format( obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #flipping the image: cv2.flip(cv2_im, 1) #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() exit() break
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera cam = cv2.VideoCapture(camera) # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: ret, cv2_im = cam.read() #we are transforming the npimage to img, and the TPU library/utils are doing the #inverse process #The CV2 Way #pil_im = Image.fromarray(cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)) pil_im = Image.fromarray(np.uint8(cv2_im)).convert('RGB') #This is the tf utils way for the transformation. It needs numpy, and is slightly slower if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: print("{} object(s) detected".format(len(ans))) for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] if SHOW_CONFIDENCE_IN_LABEL: label = label + "({0:.2f})".format(obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) # flipping the image: #cv2.flip(cv2_im, 1) #resizing the image #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break #end exit()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print('open camera.') cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) elapsed_list = [] while (cap.isOpened()): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) input_buf = PIL.Image.fromarray(im) # Run inference. start_ms = time.time() ans = engine.detect_with_image( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) elapsed_ms = engine.get_inference_time() # Display result. if ans: for obj in ans: label_name = "Unknown" if labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(frame, box, colors[obj.label_id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. start_ms = time.time() ans = engine.detect_with_image( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) elapsed_ms = engine.get_inference_time() # Display result. if ans: for obj in ans: label_name = "Unknown" if labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
import statistics import numpy as np from edgetpu.detection.engine import DetectionEngine # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = '/frozen_inference_graph.pb' # Load the edgetpu engine and labels engine = DetectionEngine(PATH_TO_CKPT) frame = np.zeros((300, 300, 3), np.uint8) flattened_frame = np.expand_dims(frame, axis=0).flatten() detection_times = [] for x in range(0, 1000): objects = engine.DetectWithInputTensor(flattened_frame, threshold=0.1, top_k=3) detection_times.append(engine.get_inference_time()) print("Average inference time: " + str(statistics.mean(detection_times)))
class Neural: def __init__(self, parameters): self.engine = DetectionEngine(parameters.model_path + '/' + parameters.model_file) self.labels = Neural.read_label_file(parameters.model_path + '/' + parameters.label_file) last_key = sorted(self.labels.keys())[len(self.labels.keys()) - 1] self.colors = Neural.random_colors(last_key) self.parameters = parameters self.scene = [] def process(self, frame): self.scene = [] #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) input_buf = Image.fromarray(frame) im = frame ans = self.engine.detect_with_image( input_buf, threshold=self.parameters.detection_threshold, keep_aspect_ratio=False, relative_coord=False, top_k=10) self.elapsed_ms = self.engine.get_inference_time() if ans: for obj in ans: self.scene.append({ 'label': self.labels[obj.label_id], 'bbox': obj.bounding_box.flatten().tolist(), 'score': obj.score }) label_name = "Unknown" if self.labels: label_name = self.labels[obj.label_id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() Neural.draw_rectangle(im, box, self.colors[obj.label_id]) Neural.draw_caption(im, box, caption) return im def draw_rectangle(image, box, color, thickness=3): """ Draws a rectangle. Args: image: The image to draw on. box: A list of 4 elements (x1, y1, x2, y2). color: Rectangle color. thickness: Thickness of lines. """ b = np.array(box).astype(int) cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness) def draw_caption(image, box, caption): """ Draws a caption above the box in an image. Args: image: The image to draw on. box: A list of 4 elements (x1, y1, x2, y2). caption: String containing the text to draw. """ b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 1) def read_label_file(file_path): """ Function to read labels from text files. Args: file_path: File path to labels. """ with open(file_path, "r") as f: lines = f.readlines() ret = {} for line in lines: pair = line.strip().split(maxsplit=1) ret[int(pair[0])] = pair[1].strip() return ret def random_colors(N): """ Random color generator. """ N = N + 1 hsv = [(i / N, 1.0, 1.0) for i in range(N)] colors = list( map(lambda c: tuple(int(i * 255) for i in colorsys.hsv_to_rgb(*c)), hsv)) random.shuffle(colors) return colors
writer = None i = 0 vs = cv2.VideoCapture(args.video) while True: (grabbed, frame) = vs.read() if not grabbed: break i += 1 print(f"Processing on {i}th frame...", end='\r') image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) ans = engine.DetectWithImage(image, threshold=args.detection_threshold, keep_aspect_ratio=False, resample=Image.NEAREST, relative_coord=False, top_k=10) logger.info("%.3f %d" % (engine.get_inference_time(), count_target_bbox(ans, labels, args.target_labels))) bboxs = generate_sort_tracker_bbox(ans, labels, args.target_labels) trackers = mot_tracker.update(numpy.asarray(bboxs)) img_str = cv2.imencode('.jpg', frame)[1].tostring() vclient.push_frame(args.name, i, img_str, trackers.tolist()) vs.release() # writer.release()