def main(): parser = argparse.ArgumentParser() parser.add_argument('--image', help='File path of the image.', required=True) args = parser.parse_args() output_name = '/opt/demo/images/objectdetection_{0}.jpg'.format( strftime("%Y%m%d%H%M%S", gmtime())) # Initialize engine. engine = DetectionEngine( '/opt/demo/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') # Open image. img = Image.open(args.image) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) # Display result. if ans: for obj in ans: print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) draw.rectangle(box, outline='red') img.save(output_name) else: print('No object detected!')
class Model(): def __init__(self): default_model_dir = './app/all_models' default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='class score threshold') self.args = parser.parse_args() print("Loading %s with %s labels." % (self.args.model, self.args.labels)) self.engine = DetectionEngine(self.args.model) self.labels = load_labels(self.args.labels) self.last_time = time.monotonic() def user_callback(self, image): #added global flaskImage global flaskStatus flaskImage = image start_time = time.monotonic() objs = self.engine.DetectWithImage(image, threshold=self.args.threshold, keep_aspect_ratio=True, relative_coord=True, top_k=self.args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: %.2f ms' % ((end_time - start_time) * 1000), 'FPS: %.2f fps' % (1.0 / (end_time - self.last_time)), ] objBoxes = [] for obj in objs: x0, y0, x1, y1 = obj.bounding_box.flatten().tolist() percent = int(100 * obj.score) #print(percent) objBoxes.append([percent, x0, y0, x1, y1]) self.last_time = end_time flaskStatus = objBoxes return (flaskStatus)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument('--input', help='File path of the input image.', required=True) parser.add_argument('--output', help='File path of the output image.') parser.add_argument('--threshold', type=float, default=0.45, help='Threshold for DetectionEngine') args = parser.parse_args() if not args.output: output_name = 'object_detection_result.jpg' else: output_name = args.output # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Open image. img = Image.open(args.input) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=10) # Display result. if ans: for obj in ans: print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) # Draw a rectangle. draw.rectangle(box, outline='red') img.save(output_name) if platform.machine() == 'x86_64': # For gLinux, simply show the image. img.show() elif platform.machine() == 'armv7l': # For Raspberry Pi, you need to install 'feh' to display image. subprocess.Popen(['feh', output_name]) else: print('Please check ', output_name) else: print('No object detected!')
def inf_proc(qa, qb, args, status): from edgetpu.detection.engine import DetectionEngine engine = DetectionEngine(args.model) while True: img = qa.get() if status.value != 0:break ans = engine.DetectWithImage(img, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=10) if qb.full(): qb.get() qb.put(ans)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--maxobjects', type=int, default=3, help='Maximum objects') parser.add_argument( '--threshold', type=float, default=0.3, help="Minimum threshold") parser.add_argument( '--picamera', action='store_true', help="Use PiCamera for image capture", default=False) args = parser.parse_args() # Prepare labels. labels = ReadLabelFile(args.label) if args.label else None # Initialize engine. engine = DetectionEngine(args.model) # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) # Perform inference results = engine.DetectWithImage(image, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=args.maxobjects) # draw image draw_image(image, results, labels) # closing condition if cv2.waitKey(5) & 0xFF == ord('q'): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) args = parser.parse_args() # Initialize engine. engine = DetectionEngine(args.model) labels = {0: "hatch", 1: "cargo"} cs = cscore.CameraServer.getInstance() camera = cs.startAutomaticCapture() camera.setResolution(WIDTH, HEIGHT) cvSink = cs.getVideo() img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8) output = cs.putVideo("MLOut", WIDTH, HEIGHT) start = time() # Open image. while True: t, img = cvSink.grabFrame(img) frame = Image.fromarray(img) draw = ImageDraw.Draw(frame) # Run inference. ans = engine.DetectWithImage(frame, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) # Display result. if ans: for obj in ans: print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() x1, y1, x2, y2 = box print(abs(x1 - x2)) print('box = ', box) # Draw a rectangle. draw.rectangle(box, outline='green') output.putFrame(np.array(frame)) else: print('No object detected!') output.putFrame(img) print("FPS:", 1 / (time() - start)) start = time()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument('--input', help='File path of the input image.', required=True) parser.add_argument('--output', help='File path of the output image.') args = parser.parse_args() if not args.output: output_name = 'object_detection_result.jpg' else: output_name = args.output engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None img = Image.open(args.input) draw = ImageDraw.Draw(img) imcv = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2BGR) ans = engine.DetectWithImage(img, threshold=0.40, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: for obj in ans: print('-----------------------------------------') box = obj.bounding_box.flatten().tolist() if labels[obj.label_id] == 'white' or labels[ obj.label_id] == 'red': if get_pink_bounding_box(imcv, box): print(labels[5]) else: print(labels[obj.label_id]) else: print(labels[obj.label_id]) print('score = ', obj.score) print('box = ', box) draw.rectangle(box, outline='red') img.save(output_name) if platform.machine() == 'x86_64': img.show() elif platform.machine() == 'armv7l': subprocess.Popen(['feh', output_name]) else: print('Please check ', output_name) else: print('No object detected!')
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='class score threshold') args = parser.parse_args() print("Loading %s with %s labels." % (args.model, args.labels)) engine = DetectionEngine(args.model) labels = load_labels(args.labels) cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) objs = engine.DetectWithImage(pil_im, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=True, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def func_detection(buffer_cap2det, lock_cap2det, result_det2disp, lock_det2disp): global g_is_exit engine = DetectionEngine(MODEL_NAME) buff = None while True: time_start = time.time() # print("func_detection") ### Receive captured image from CAPTURE lock_cap2det.acquire() if len(buffer_cap2det) > 0: buff = buffer_cap2det.pop() lock_cap2det.release() if buff is not None: ### Detect objects pil_img = cv2pil(buff) ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=False, relative_coord=True, top_k=10) results = [] if ans: for obj in ans: # print ('-----------------------------------------') # print('label = ', label2string[obj.label_id]) # print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() # print ('box = ', box) results.append([ box[0], box[1], box[2], box[3], label2string[obj.label_id] ]) ### Send detection results to DISPLAY lock_det2disp.acquire() for i in range(len(result_det2disp)): # delete all of the old buffers (no need to release memory explicitly (GC will be done automatically)) _ = result_det2disp.pop() result_det2disp.append(results) lock_det2disp.release() else: time.sleep(0.001) if g_is_exit == True: break time_end = time.time() # print ("Detection:{0}".format((time_end - time_start) * 1000) + "[msec]") print("func_detection: exit")
def recognition(frameBuffer, objsBuffer, stop_prog): engine = DetectionEngine('mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') with open("trained_knn_model.clf", 'rb') as f: knn_clf = pickle.load(f) while True: if stop_prog.is_set(): break if frameBuffer.empty(): continue t0 = time.monotonic() bgr_img = frameBuffer.get() rgb_img = bgr_img[:, :, ::-1].copy() arr_img = Image.fromarray(rgb_img) t1 = time.monotonic() objs = engine.DetectWithImage(arr_img, threshold = 0.1, keep_aspect_ratio = True, relative_coord = False, top_k = 100) t2 = time.monotonic() coral_boxes = [] for obj in objs: x0, y0, x1, y1 = obj.bounding_box.flatten().tolist() w = x1-x0 h = y1-y0 x0 = int(x0+w/10) y0 = int(y0+h/4) x1 = int(x1-w/10) y1 = int(y1) coral_boxes.append((y0, x1, y1, x0)) t3 = time.monotonic() kk = 1 if coral_boxes: enc = face_recognition.face_encodings(rgb_img, coral_boxes) closest_distances = knn_clf.kneighbors(enc, n_neighbors=1) are_matches = [closest_distances[0][i][0] <= 0.55 for i in range(len(coral_boxes))] predR = [] locR = [] for pred, loc, rec in zip(knn_clf.predict(enc), coral_boxes, are_matches): if rec: predR.append(pred) else: predR.append("unknown_{n}".format(n=kk)) kk += 1 locR.append(loc) if objsBuffer.empty(): objsBuffer.put({"boxes": locR, "names": predR}) else: if objsBuffer.empty(): objsBuffer.put(None) t4 = time.monotonic() print('Prep time = {dt1:.1f}ms, Infer time = {dt2:.1f}ms, Face enc time = {dt3:.1f}ms, Overall time = {dt4:.1f}ms'.format( dt1=(t1-t0)*1000, dt2=(t2-t1)*1000, dt3=(t4-t3)*1000, dt4 = (t4-t0)*1000))
def main(): model = "path to model" threshold = 0.3 max_objects = 3 # Prepare labels. labels = None # Initialize engine. engine = DetectionEngine(args.model) # Initialize video stream vs = VideoStream(usePiCamera=True, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) # Perform inference results = engine.DetectWithImage(image, threshold=threshold, keep_aspect_ratio=True, relative_coord=False, top_k=maxobjects) # draw image draw_image(image, results, labels) # closing condition if cv2.waitKey(5) & 0xFF == ord('q'): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) args = parser.parse_args() with open(args.label, 'r', encoding="utf-8") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) engine = DetectionEngine(args.model) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() try: stream = io.BytesIO() for foo in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) frame = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() results = engine.DetectWithImage(frame, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms if results: logging.info("frame has {} objects".format(len(results))) for detected_object in results: logging.info("label: {}, score: {}, bounds: {}".format( labels[detected_object.label_id], detected_object.score, obj.bounding_box.flatten().tolist())) finally: logging.info("done capturing")
def inferencer(results, frameBuffer, model, camera_width, camera_height): engine = DetectionEngine(model) while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg = color_image[:, :, ::-1].copy() prepimg = Image.fromarray(prepimg) tinf = time.perf_counter() ans = engine.DetectWithImage(prepimg, threshold=0.3, keep_aspect_ratio=True, relative_coord=False, top_k=10) #print(time.perf_counter() - tinf, "sec") results.put(ans)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='Path of the detection model.', required=True) parser.add_argument( '--label', help='Path of the labels file.') parser.add_argument( '--image', help='File path of the input image.', required=True) parser.add_argument( '--output', help='File path of the output image.') args = parser.parse_args() if not args.output: output_name = 'object_detection_result.jpg' else: output_name = args.output # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Open image. img = Image.open(args.input) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) # Display result. if ans: for obj in ans: print ('-----------------------------------------') if labels: print(labels[obj.label_id]) print ('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print ('box = ', box) # Draw a rectangle. draw.rectangle(box, outline='red') img.save(output_name) print ('Please check ', output_name) else: print ('No object detected!')
def inferencer(results, frameBuffer, model, camera_width, camera_height): engine = None # Acquisition of TPU list without model assignment devices = edgetpu_utils.ListEdgeTpuPaths( edgetpu_utils.EDGE_TPU_STATE_UNASSIGNED) devopen = False for device in devices: try: engine = DetectionEngine(model, device) devopen = True break except: continue if devopen == False: print("TPU Devices open Error!!!") sys.exit(1) print("Loaded Graphs!!! ") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg = color_image[:, :, ::-1].copy() prepimg = Image.fromarray(prepimg) tinf = time.perf_counter() ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10) print(time.perf_counter() - tinf, "sec") results.put(ans)
def recognition(frameBuffer, objsBuffer, stop_prog, path): engine = DetectionEngine( 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') face_cnt = 0 while True: if stop_prog.is_set(): break if frameBuffer.empty(): continue t0 = time.monotonic() bgr_img = frameBuffer.get() rgb_img = bgr_img[:, :, ::-1].copy() arr_img = Image.fromarray(rgb_img) t1 = time.monotonic() objs = engine.DetectWithImage(arr_img, threshold=0.1, keep_aspect_ratio=True, relative_coord=False, top_k=5) t2 = time.monotonic() coral_boxes = [] m = 0 for obj in objs: x0, y0, x1, y1 = obj.bounding_box.flatten().tolist() w = x1 - x0 h = y1 - y0 x0 = int(x0 + w / 10) y0 = int(y0 + h / 4) x1 = int(x1 - w / 10) y1 = int(y1) coral_boxes.append((y0, x1, y1, x0)) face_mini = bgr_img[y0:y1, x0:x1] cv2.imwrite(path + "face_" + str(face_cnt) + "_" + str(m) + ".jpg", face_mini) print(path + "face_" + str(face_cnt) + "_" + str(m) + ".jpg") m += 1 face_cnt += 1 sleep(1) enc = [] objsBuffer.put({"boxes": coral_boxes, "encodings": enc})
class SynchronizedDetectionEngine(Engine): @inject def __init__(self, config: ConfigProvider): from edgetpu.detection.engine import DetectionEngine self._engine = DetectionEngine(config.detector_model_file) self._engine_lock = Lock() self._pending_processing_start = 0 def detect(self, img, threshold): with self._engine_lock: self._pending_processing_start = time.monotonic() result = self._engine.DetectWithImage(img, threshold=threshold, keep_aspect_ratio=True, relative_coord=False, top_k=1000) self._pending_processing_start = 0 return result def get_pending_processing_seconds(self) -> float: start = self._pending_processing_start return (time.monotonic() - start) if start != 0 else 0
def ssd_inferencer(results, frameBuffer, model, device): ssd_engine = None ssd_engine = DetectionEngine(model, device) print("Loaded Graphs!!! (SSD)") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg_ssd = color_image[:, :, ::-1].copy() prepimg_ssd = Image.fromarray(prepimg_ssd) tinf = time.perf_counter() result_ssd = ssd_engine.DetectWithImage(prepimg_ssd, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10) print(time.perf_counter() - tinf, "sec (SSD)") results.put(result_ssd)
def main(): '''Main function for running head tracking. It will initialize picam, CoralTPU with a given model (e.g. face recorgnition) Then, it will capture an image, feed it into CoralTPU, get coordinates and move servos (pan, tilt) towards the center of the box ''' parser = argparse.ArgumentParser() parser.add_argument( '--model', help='Path of the detection model.', required=True) args = parser.parse_args() image_number = 0 # Let's create the images directory if not exists(IMAGES_DIR): makedirs(IMAGES_DIR) # Initialize the pan-tilt thing reset_pan_tilt() # Initialize engine. engine = DetectionEngine(args.model) # Initialize the camera #cam = cv2.VideoCapture(camera) camera = PiCamera() time.sleep(2) camera.resolution = IMAGE_SIZE camera.vflip = True # Create the in-memory stream stream = io.BytesIO() debug = DEBUG image_center = IMAGE_CENTER object_on_sight = False timer = None print("Capture started") while True: try: #ret, cv2_im = cam.read() stream = io.BytesIO() #wipe the contents camera.capture(stream, format='jpeg', use_video_port=True) stream.seek(0) # we need to flip it here! pil_im = Image.open(stream) #cv2_im = np.array(pil_im) #cv2_im = cv2.flip(cv2_im, 1) #Flip horizontally #cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) # Remember. Doc at https://coral.ai/docs/reference/edgetpu.detection.engine/ ans = engine.DetectWithImage(pil_im, threshold=0.3, keep_aspect_ratio=True, relative_coord=False, top_k=1) if ans: #for obj in ans: # For multiple objects on screen obj = ans[0] # Follow only one object, the first detected object object_on_sight = True timer = None result = show_box_center_and_size(obj.bounding_box) moved = center_camera(result, image_center, debug) if moved == 0: # it didn't move, and there is an object on sight image_number = save_picture(pil_im, image_number) else: #print("No objects detected with the given threshold") object_on_sight = False if not object_on_sight and not timer: # if there was an object before, and is no longer on screen timer = time.time() # We start a timer for reseting the angles later elif not object_on_sight and timer: elapsed_time = time.time() - timer # If 5 seconds have passed without activity. More than 8, do nothing if elapsed_time > 5 and elapsed_time < 8: reset_pan_tilt() timer = None # We stop the timer except KeyboardInterrupt: print("Closing program") reset_pan_tilt() sys.exit() except: reset_pan_tilt() raise
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera #cam = cv2.VideoCapture(camera) camera = PiCamera() time.sleep(2) camera.resolution = (640, 480) # Create the in-memory stream stream = io.BytesIO() # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: #ret, cv2_im = cam.read() stream = io.BytesIO() #wipe the contents camera.capture(stream, format='jpeg', use_video_port=True) stream.seek(0) pil_im = Image.open(stream) cv2_im = np.array(pil_im) cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] + " - {0:.2f}".format( obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #flipping the image: cv2.flip(cv2_im, 1) #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() exit() break
import cv2 from edgetpu.detection.engine import DetectionEngine from PIL import Image cap = cv2.VideoCapture(0) model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' engine = DetectionEngine(model) while True: ret, frame = cap.read() if ret: img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) h, w = frame.shape[:2] im_pil = Image.fromarray(img) results = engine.DetectWithImage(im_pil) for result in results: b = result.bounding_box x0, y0 = b[0] x1, y1 = b[1] x0 = int(x0 * w) y0 = int(y0 * h) x1 = int(x1 * w) y1 = int(y1 * h) cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2) cv2.imshow('', frame) k = cv2.waitKey(1) if k == 27: break
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--top_k', help="keep top k candidates.", default=3) parser.add_argument( '--threshold', help="threshold to filter results.", default=0.5, type=float) parser.add_argument( '--width', help="Resolution width.", default=640, type=int) parser.add_argument( '--height', help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format='rgb', use_video_port=True): rawCapture.truncate(0) # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. start_ms = time.time() ans = engine.DetectWithImage(input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k) # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05, # keep_aspect_ratio=False, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms # Display result. if ans: for obj in ans: label_name = 'Unknown' if labels: label_name = labels[obj.label_id] caption = '{0}({1:.2f})'.format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = '{0:.2f}ms, {1:.2f}fps'.format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord('q'): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def inference_thread(running, state, result_buffer, frame_buffer, args, identity_dict, current_identity): global IDLE, TRACK, RESET, FACE_RECOG_THRESHOLD, FACE_RECOG_THRESHOLD_A global od_engine, face_detector, facenet_engine, svm_clf # Initialize object detection engine. od_engine = DetectionEngine(args.od_model) print("device_path: ", od_engine.device_path()) _, od_width, od_height, _ = od_engine.get_input_tensor_shape() print("od input dim: ", od_width, od_height) # initial face detector using the opencv haarcascade model face_detector = FaceDetector(args.hc_model) # Initialize facenet engine. facenet_engine = BasicEngine(args.fn_model) # load the sklearn support vector machine model from disk svm_clf = pickle.load(open(args.svm_model, 'rb')) while running.value: # check if the frame buffer has a frame, else busy waiting if frame_buffer.empty(): continue frame = frame_buffer.get() tinf = time.perf_counter() if state.value == IDLE: fd_results = None # reorder image frame from BGR to RGB img = frame[:,:,::-1] # face detection faces_coord = face_detector.detect(img, True) # image preprocessing, downsampling print("faces_coord: ",faces_coord) if not isinstance(faces_coord, type(None)): # normalize face image face_image = np.array(normalize_faces(img ,faces_coord)) # facenet to generate face embedding facenet_engine.RunInference(face_image.flatten()) face_emb = facenet_engine.get_raw_output().reshape(1,-1) # use SVM to classfy identity with face embedding pred_prob = svm_clf.predict_proba(face_emb) best_class_index = np.argmax(pred_prob, axis=1)[0] best_class_prob = pred_prob[0, best_class_index] print("best_class_index: ",best_class_index) print("best_class_prob: ",best_class_prob) print("label", svm_clf.classes_[best_class_index]) # Check threshold and verify identify is in the identifiy dictionary if best_class_prob > FACE_RECOG_THRESHOLD: face_label = svm_clf.classes_[best_class_index] if face_label in identity_dict: print("\n=================================") print("Identity found: ", face_label, " ",identity_dict[face_label], " with Prob = ", best_class_prob) print("=================================\n") current_identity.value = identity_dict[face_label][0] # ID result_buffer.put(faces_coord) elif state.value == TRACK: od_results = None # convert numpy array representation to PIL image with rgb format img = Image.fromarray(frame[:,:,::-1], 'RGB') # Run inference. od_results = od_engine.DetectWithImage(img, threshold=0.30, keep_aspect_ratio=True, relative_coord=False, top_k=10) # push result to buffer queue result_buffer.put(od_results) print(time.perf_counter() - tinf, "sec") print("[Finish] inference_thread")
if args.model_name == Models.tf_lite: model_type = 'tf_lite' image = load_image_into_numpy_array(image_filepath) # image to numpy array resized_image = cv2.resize(image, model_image_dim, interpolation = cv2.INTER_AREA) # resized to 300x300xRGB reshaped_image = np.reshape(resized_image, model_input_dim) # reshape for model (1,300,300,3) bbox_array, class_id_array, prob_array = send_image_to_model(logger, image_filepath, reshaped_image, interpreter) elif args.model_name == Models.edge_tpu: model_type = 'edge_tpu' # we'll need this to process the output differently # cv2_img = cv2.cvtColor(reshaped_image, cv2.COLOR_BGR2RGB) # pil_image = Image.fromarray(cv2_img) # expecting a PIL image # #cv2_image = load_image_into_numpy_array('/home/jay/Downloads/parrot.jpg') # print ("cv2 image shape:", cv2_image.shape) pil_image = Image.open(image_filepath) # returns class: DetectionCandidate start_time = time.time() ans = engine.DetectWithImage(pil_image, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) finish_time = time.time() logger.info("time spent: {:.4f}".format(finish_time - start_time)) bbox_list = [] class_id_list = [] prob_list = [] for i,obj in enumerate(ans): box = obj.bounding_box.flatten().tolist() bbox_list.append(box) class_id_list.append(obj.label_id) prob_list.append(obj.score) bbox_array = array(bbox_list) class_id_array = array(class_id_list) prob_array = array(prob_list) inference_image, orig_image_dim, detected_objects = inference_to_image(model_type, logger,
def main(): load_time = time.time() # Initialize engine. engine = DetectionEngine(Model_weight) labels = None # Face recognize engine face_engine = ClassificationEngine(FaceNet_weight) # read embedding class_arr, emb_arr = read_embedding(Embedding_book) l = time.time() - load_time with tf.Graph().as_default(): with tf.compat.v1.Session() as sess: cap = cv2.VideoCapture(0) while (True): t1 = cv2.getTickCount() print('Load_model: {:.2f} sec'.format(l)) ret, frame = cap.read() img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=False, relative_coord=False, top_k=10) img = numpy.asarray(img) # Display result. if ans: crop_img = crop_image(ans, frame) if cv2.waitKey(1) == ord('a'): for k in range(0, len(crop_img)): new_class_name = input( 'Please input your name of class:') new_save = cv2.cvtColor(crop_img[k], cv2.COLOR_BGR2RGB) cv2.imwrite( 'pictures/' + str(new_class_name) + '.jpg', new_save) Create_embeddings(face_engine) class_arr, emb_arr = read_embedding( 'embedding_book/embeddings.h5') embs = Tpu_FaceRecognize(face_engine, crop_img) face_num = len(ans) face_class = ['Others'] * face_num for i in range(face_num): diff = np.mean(np.square(embs[i] - emb_arr), axis=1) min_diff = min(diff) if min_diff < THRED: index = np.argmin(diff) face_class[i] = class_arr[index] print('Face_class:', face_class) print('Classes:', class_arr) for count, obj in enumerate(ans): print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('Score = ', obj.score) box = obj.bounding_box.flatten().tolist() # Draw a rectangle and label cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0), 2) cv2.putText(img, '{}'.format(face_class[count]), (int(box[0]), int(box[1]) - 5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t cv2.putText(img, 'fps: {:.2f}'.format(fps), (5, 20), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) cv2.putText(img, 'A: Add new class', (5, 450), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) cv2.putText(img, 'Q: Quit', (5, 470), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) img_ = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('frame', img_) if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows()
class EdgeTPUFaceDetector(ConnectionBasedTransport): def __init__(self): super(EdgeTPUFaceDetector, self).__init__() rospack = rospkg.RosPack() pkg_path = rospack.get_path('coral_usb') self.bridge = CvBridge() self.classifier_name = rospy.get_param( '~classifier_name', rospy.get_name()) model_file = os.path.join( pkg_path, './models/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') model_file = rospy.get_param('~model_file', model_file) self.engine = DetectionEngine(model_file) # only for human face self.label_ids = [0] self.label_names = ['face'] # dynamic reconfigure self.srv = Server(EdgeTPUFaceDetectorConfig, self.config_callback) self.pub_rects = self.advertise( '~output/rects', RectArray, queue_size=1) self.pub_class = self.advertise( '~output/class', ClassificationResult, queue_size=1) self.pub_image = self.advertise( '~output/image', Image, queue_size=1) def subscribe(self): self.sub_image = rospy.Subscriber( '~input', Image, self.image_cb, queue_size=1, buff_size=2**26) def unsubscribe(self): self.sub_image.unregister() @property def visualize(self): return self.pub_image.get_num_connections() > 0 def config_callback(self, config, level): self.score_thresh = config.score_thresh self.top_k = config.top_k return config def image_cb(self, msg): img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') H, W = img.shape[:2] objs = self.engine.DetectWithImage( PIL.Image.fromarray(img), threshold=self.score_thresh, keep_aspect_ratio=True, relative_coord=True, top_k=self.top_k) bboxes = [] scores = [] labels = [] rect_msg = RectArray(header=msg.header) for obj in objs: x_min, y_min, x_max, y_max = obj.bounding_box.flatten().tolist() x_min = int(np.round(x_min * W)) y_min = int(np.round(y_min * H)) x_max = int(np.round(x_max * W)) y_max = int(np.round(y_max * H)) bboxes.append([y_min, x_min, y_max, x_max]) scores.append(obj.score) labels.append(self.label_ids.index(int(obj.label_id))) rect = Rect( x=x_min, y=y_min, width=x_max-x_min, height=y_max-y_min) rect_msg.rects.append(rect) bboxes = np.array(bboxes) scores = np.array(scores) labels = np.array(labels) cls_msg = ClassificationResult( header=msg.header, classifier=self.classifier_name, target_names=self.label_names, labels=labels, label_names=[self.label_names[l] for l in labels], label_proba=scores) self.pub_rects.publish(rect_msg) self.pub_class.publish(cls_msg) if self.visualize: fig = plt.figure( tight_layout={'pad': 0}) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) vis_bbox( img[:, :, ::-1].transpose((2, 0, 1)), bboxes, labels, scores, label_names=self.label_names, ax=ax) fig.canvas.draw() w, h = fig.canvas.get_width_height() vis_img = np.fromstring( fig.canvas.tostring_rgb(), dtype=np.uint8) vis_img.shape = (h, w, 3) fig.clf() plt.close() vis_msg = self.bridge.cv2_to_imgmsg(vis_img, 'rgb8') # BUG: https://answers.ros.org/question/316362/sensor_msgsimage-generates-float-instead-of-int-with-python3/ # NOQA vis_msg.step = int(vis_msg.step) vis_msg.header = msg.header self.pub_image.publish(vis_msg)
class FaceDetectorEdgeTPU: def __init__(self): # Read input parameters self.input_image_compressed = rospy.get_param('~input_image_compressed', "usb_cam/image_raw/compressed") self.output_image_compressed = rospy.get_param('~output_image', "face_image/compressed") self.model_path = rospy.get_param('~model_path', "model.tflite") self.threshold = rospy.get_param('~threshold', 0.8) self.rotation_angle = rospy.get_param('~rotation_angle', 0.0) self.labels_file = rospy.get_param('~labels_file', "") self.tracked_object = rospy.get_param('~tracked_object', "person") self.enable_labeling = False self.labels = {} # fix path if required if len(self.model_path) > 0: if "pkg://" in self.model_path: rp = rospkg.RosPack() path = rp.get_path('braccio_camai') self.model_path = self.model_path.replace("pkg://braccio_camai", path) else: rospy.loginfo("Invalid model path") return if len(self.labels_file) > 0: if "pkg://" in self.labels_file: rp = rospkg.RosPack() path = rp.get_path('braccio_camai') self.labels_file = self.labels_file.replace("pkg://braccio_camai", path) # loop over the class labels file for row in open(self.labels_file): # unpack the row and update the labels dictionary (classID, label) = row.strip().split(maxsplit=1) self.labels[int(classID)] = label.strip() self.enable_labeling = True # print input parameters rospy.loginfo("input_image_compressed: " + self.input_image_compressed) rospy.loginfo("output_image_compressed: " + self.output_image_compressed) rospy.loginfo("model_path: " + self.model_path) rospy.loginfo("threshold: " + str(self.threshold)) rospy.loginfo("rotation_angle: " + str(self.rotation_angle)) rospy.loginfo("labels_file: " + str(self.labels_file)) rospy.loginfo("tracked_object: " + str(self.tracked_object)) self.current_image = CompressedImage() rospy.loginfo("Loading Tensorflow model") self.model = DetectionEngine(self.model_path) self.pub_image = rospy.Publisher(self.output_image_compressed, CompressedImage, queue_size=1) self.pub_box = rospy.Publisher("bounding_box", Int16MultiArray, queue_size=1) self.subscriber = rospy.Subscriber(self.input_image_compressed, CompressedImage, self.callback, queue_size=1) rospy.loginfo("detection started") while not rospy.is_shutdown(): self.process_current_image() #rospy.spin() def process_current_image(self): # No image data received if len(self.current_image.data) == 0: return # skip is no subscribers request for detections if self.pub_box.get_num_connections() == 0 and self.pub_image.get_num_connections() == 0: return np_arr = np.fromstring(self.current_image.data, np.uint8) frame_ori = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) frame = self.rotate_image(frame_ori, self.rotation_angle) orig = frame.copy() frame = Image.fromarray(frame) # make predictions on the input frame results = self.model.DetectWithImage(frame, threshold=self.threshold, keep_aspect_ratio=True, relative_coord=False) # loop over the results for r in results: # extract the bounding box box = r.bounding_box.flatten().astype("int") (startX, startY, endX, endY) = box if self.enable_labeling: label = self.labels[r.label_id] if label != self.tracked_object: continue # skip current object y = startY - 15 if startY - 15 > 15 else startY + 15 text = "{}: {:.2f}%".format(label, r.score * 100) cv2.putText(orig, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # draw the bounding box and label on the image cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) box_msg = Int16MultiArray() box_msg.data = [r.label_id, startX, startY, endX, endY] self.pub_box.publish(box_msg) break # skip if no subscribers are registered if self.pub_image.get_num_connections() == 0: return #### Create CompressedIamge #### msg = CompressedImage() msg.header.stamp = rospy.Time.now() msg.format = "jpeg" msg.data = np.array(cv2.imencode('.jpg', orig)[1]).tostring() # Publish image with face detections self.pub_image.publish(msg) def callback(self, ros_data): self.current_image = ros_data def rotate_image(self, image, angle): image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) return result
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--model", default= "/home/libre/models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.") parser.add_argument("--label", default="/home/libre/detection/coco_labels.txt", help="Path of the labels file.") parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.") parser.add_argument("--cam_w", type=int, default=320, help="Camera width") parser.add_argument("--cam_h", type=int, default=320, help="Camera width") args = parser.parse_args() fps = "" detectfps = "" framecount = 0 detectframecount = 0 time1 = 0 time2 = 0 box_color = (255, 128, 0) box_thickness = 1 label_background_color = (125, 175, 75) label_text_color = (255, 255, 255) percentage = 0.0 camera_width = cam_w camera_height = cam_h cap = cv2.VideoCapture(args.usbcamno) cap.set(cv2.CAP_PROP_FPS, 150) cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None while True: t1 = time.perf_counter() ret, color_image = cap.read() if not ret: break # Run inference. prepimg = color_image[:, :, ::-1].copy() prepimg = Image.fromarray(prepimg) tinf = time.perf_counter() ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10) print(time.perf_counter() - tinf, "sec") # Display result. if ans: detectframecount += 1 for obj in ans: box = obj.bounding_box.flatten().tolist() box_left = int(box[0]) box_top = int(box[1]) box_right = int(box[2]) box_bottom = int(box[3]) cv2.rectangle(color_image, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness) percentage = int(obj.score * 100) label_text = labels[obj.label_id] + " (" + str( percentage) + "%)" label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0] label_left = box_left label_top = box_top - label_size[1] if (label_top < 1): label_top = 1 label_right = label_left + label_size[0] label_bottom = label_top + label_size[1] cv2.rectangle(color_image, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1) cv2.putText(color_image, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1) cv2.putText(color_image, fps, (camera_width - 170, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) cv2.putText(color_image, detectfps, (camera_width - 170, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE) cv2.imshow('USB Camera', color_image) if cv2.waitKey(1) & 0xFF == ord('q'): break # FPS calculation framecount += 1 if framecount >= 15: fps = "(Playback) {:.1f} FPS".format(time1 / 15) detectfps = "(Detection) {:.1f} FPS".format(detectframecount / time2) framecount = 0 detectframecount = 0 time1 = 0 time2 = 0 t2 = time.perf_counter() elapsedTime = t2 - t1 time1 += 1 / elapsedTime time2 += elapsedTime
continue ##create numpy array of depth and color frames depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) color_image_copy = np.asanyarray(color_frame.get_data()) ##resize image based upon argument and create a copy to annotate and display color_image = imutils.resize(color_image, width=args["width"]) orig = color_image #color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB) color_image = Image.fromarray(color_image) ##start a timer for inferencing time and feed the frame into the model start_inference = time.time() results = model.DetectWithImage(color_image, threshold=args["confidence"], keep_aspect_ratio=True, relative_coord=False) end_inference = time.time() depth_colormap = cv2.applyColorMap( cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET) depth_colormap = imutils.resize(depth_colormap, width=args["width"]) ##put a bounding box on the result in the copy image for r in results: bounding_box = r.bounding_box.flatten().astype("int") #try changing r (startX, startY, endX, endY) = bounding_box #try changing startx etc label = labels[r.label_id] cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) cv2.rectangle(depth_colormap, (startX, startY), (endX, endY), (255, 255, 255), 2)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help= 'Path of the detection model, it must be a SSD model with postprocessing operator.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument('--input', help='File path of the input image.', required=True) parser.add_argument('--output', help='File path of the output image.') parser.add_argument( '--keep_aspect_ratio', dest='keep_aspect_ratio', action='store_true', help= ('keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) parser.set_defaults(keep_aspect_ratio=False) args = parser.parse_args() if not args.output: output_name = 'object_detection_result.jpg' else: output_name = args.output # Initialize engine. engine = DetectionEngine(args.model) labels = dataset_utils.ReadLabelFile(args.label) if args.label else None # Open image. img = Image.open(args.input) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=args.keep_aspect_ratio, relative_coord=False, top_k=10) # Display result. if ans: for obj in ans: print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) # Draw a rectangle. draw.rectangle(box, outline='red') img.save(output_name) if platform.machine() == 'x86_64': # For gLinux, simply show the image. img.show() elif platform.machine() == 'armv7l': # For Raspberry Pi, you need to install 'feh' to display image. subprocess.Popen(['feh', output_name]) else: print('Please check ', output_name) else: print('No object detected!')