def thread_job(model_name, input_filename, num_inferences, task_type, device): """Runs classification or detection job on one Python thread.""" tid = threading.get_ident() logging.info('Thread: %d, # inferences: %d, model: %s', tid, num_inferences, model_name) interpreter = make_interpreter(test_utils.test_data_path(model_name), device) interpreter.allocate_tensors() with test_utils.test_image(input_filename) as img: if task_type == 'classification': resize_image = img.resize(common.input_size(interpreter), Image.NEAREST) common.set_input(interpreter, resize_image) elif task_type == 'detection': common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.NEAREST)) else: raise ValueError( 'task_type should be classification or detection, but is given %s' % task_type) for _ in range(num_inferences): interpreter.invoke() if task_type == 'classification': classify.get_classes(interpreter) else: detect.get_objects(interpreter) logging.info('Thread: %d, model: %s done', tid, model_name)
def detect(self, image=None): Height, Width = image.shape[:2] img = image.copy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) if self.options.get('auto_lock', True): self.acquire_lock() try: if not self.model: self.load_model() g.logger.Debug( 1, '|---------- TPU (input image: {}w*{}h) ----------|'.format( Width, Height)) t = Timer() _, scale = common.set_resized_input( self.model, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) self.model.invoke() objs = detect.get_objects( self.model, float(self.options.get('object_min_confidence')), scale) #outs = self.model.detect_with_image(img, threshold=int(self.options.get('object_min_confidence')), # keep_aspect_ratio=True, relative_coord=False) diff_time = t.stop_and_get_ms() if self.options.get('auto_lock', True): self.release_lock() except: if self.options.get('auto_lock', True): self.release_lock() raise diff_time = t.stop_and_get_ms() g.logger.Debug( 1, 'perf: processor:{} Coral TPU detection took: {}'.format( self.processor, diff_time)) bbox = [] labels = [] conf = [] for obj in objs: # box = obj.bbox.flatten().astype("int") bbox.append([ int(round(obj.bbox.xmin)), int(round(obj.bbox.ymin)), int(round(obj.bbox.xmax)), int(round(obj.bbox.ymax)) ]) labels.append(self.classes.get(obj.id)) conf.append(float(obj.score)) g.logger.Debug( 3, 'Coral object returning: {},{},{}'.format(bbox, labels, conf)) return bbox, labels, conf, ['coral'] * len(labels)
def detect_person(image_input): from pycoral.adapters import common from pycoral.adapters import detect from pycoral.utils.dataset import read_label_file from pycoral.utils.edgetpu import make_interpreter label_path = os.path.join(BASE_DIR, 'coral_files', 'coco_labels.txt') model_path = os.path.join( BASE_DIR, 'coral_files', 'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite') print(model_path) image = Image.fromarray(image_input) print(image) labels = read_label_file(label_path) print("labels", labels) interpreter = make_interpreter(model_path) print("INterpreter made") interpreter.allocate_tensors() print("Tensor allocated") _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print("Before invoke") interpreter.invoke() objs = detect.get_objects(interpreter, 0.4, scale) print(objs) for obj in objs: print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) return False
def _ProcessImageInternal(self): img = self._image.copy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) # Prepare image data _, scale = common.set_resized_input(self.__net, img.size, lambda size : img.resize(size, Image.ANTIALIAS)) # Invoke the model self.__net.invoke() # Run the tensorflow model detectionData = detect.get_objects(self.__net, self._minConfidence, scale) for obj in detectionData: if (not self._targetID or (isinstance(self._targetID, list) and obj.id in self._targetID)): self._LogObjectFound(obj.id, obj.score) # Get the bounding box of the object box = obj.bbox self._HandleObjectDetectionResult(box.xmin, box.xmax, box.ymin, box.ymax) # If we found atleast one object, then we can exit out. break self._DrawBoundingBox()
def detection_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for detection task' % (tid, num_inferences)) model_name = 'ssd_mobilenet_v1_coco_quant_postprocess_edgetpu.tflite' interpreter = make_interpreter( test_utils.test_data_path(model_name), device=':1') interpreter.allocate_tensors() print('Thread: %d, using device 1' % tid) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): _, scale = common.set_resized_input( interpreter, img.size, lambda size, image=img: image.resize(size, Image.ANTIALIAS)) interpreter.invoke() ret = detect.get_objects( interpreter, score_threshold=0.7, image_scale=scale) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].id, 16) # cat expected_bbox = detect.BBox( xmin=int(0.1 * img.size[0]), ymin=int(0.1 * img.size[1]), xmax=int(0.7 * img.size[0]), ymax=int(1.0 * img.size[1])) self.assertGreaterEqual( detect.BBox.iou(expected_bbox, ret[0].bbox), 0.85) print('Thread: %d, done detection task' % tid)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--labels', help='File path of label file.', required=True) parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Classification score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(args.model) interpreter.allocate_tensors() # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) interpreter.invoke() objs = detect.get_objects(interpreter, args.threshold, scale) draw_objects(image, objs, labels) if (cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def detect_func(): ## parser = argparse.ArgumentParser( ## formatter_class=argparse.ArgumentDefaultsHelpFormatter) ## parser.add_argument('-m', '--model', required=True, ## help='File path of .tflite file') # parser.add_argument('-i', '--input', required=True, # help='File path of image to process') # parser.add_argument('-l', '--labels', help='File path of labels file') # parser.add_argument('-t', '--threshold', type=float, default=0.4, # help='Score threshold for detected objects') # parser.add_argument('-o', '--output', # help='File path for the result image with annotations') # parser.add_argument('-c', '--count', type=int, default=5, # help='Number of times to run inference') # args = parser.parse_args() labels = read_label_file('test_data/coco_labels.txt') interpreter = make_interpreter( 'test_data/ssd_mobilenet_v2_coco_quant_postprocess.tflite') interpreter.allocate_tensors() image = Image.open('pic.jpg') _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print('----INFERENCE TIME----') print('Note: The first inference is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(5): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start objs = detect.get_objects(interpreter, 0.4, scale) print('%.2f ms' % (inference_time * 1000)) print('-------RESULTS--------') if not objs: print('No objects detected') people_flag = 0 for obj in objs: if obj.id == 0: print('people detected!') people_flag = 1 print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) # if args.output: # image = image.convert('RGB') # draw_objects(ImageDraw.Draw(image), objs, labels) # image.save(args.output) # image.show() return people_flag
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', required=True, help='Path of the segmentation model.') parser.add_argument('--input', required=True, help='File path of the input image.') parser.add_argument('--output', default='semantic_segmentation_result.jpg', help='File path of the output image.') parser.add_argument( '--keep_aspect_ratio', action='store_true', default=False, help= ('keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) args = parser.parse_args() interpreter = make_interpreter(args.model, device=':0') interpreter.allocate_tensors() width, height = common.input_size(interpreter) img = Image.open(args.input) if args.keep_aspect_ratio: resized_img, _ = common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) else: resized_img = img.resize((width, height), Image.ANTIALIAS) common.set_input(interpreter, resized_img) interpreter.invoke() result = segment.get_output(interpreter) if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray(label_to_color_image(result).astype(np.uint8)) # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (width, 0)) output_img.save(args.output) print('Done. Results saved at', args.output)
def _calculate_overlay(frame): global _overlayObjs global _overlay # Updates overlay_pane by inferencing the latest frame. # Runs on a mutex, so it will onyl run once at a time. # It runs in a thread so it is protecte # prepare the frame for classification by converting (1) it from # BGR to RGB channel ordering and then (2) from a NumPy array to # PIL image format frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) start = time.perf_counter() if initialized: print("Initialized") if interpreter is None: print( "Interpreter is none and this is initialized ERROR ERROR ERROR" ) else: print("Interpreter is not none") print(interpreter) _, scale = common.set_resized_input( interpreter, frame.size, lambda size: frame.resize(size, Image.ANTIALIAS)) interpreter.invoke() inference_time = time.perf_counter() - start _overlayObjs = detect.get_objects(interpreter, confidence, scale) print(_overlayObjs) #print('%.2f ms' % (inference_time * 1000)) def overlay_function(frame): # ensure at least one result was found for obj in _overlayObjs: bbox = obj.bbox frame = cv2.rectangle(frame, (bbox.xmin, bbox.ymin), (bbox.xmax, bbox.ymax), (0, 0, 255), 2) frame = cv2.putText( frame, '%s %.2f' % (labels.get(obj.id, obj.id), obj.score), (bbox.xmin + 20, bbox.ymin + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) #draw.text((bbox.xmin + 10, bbox.ymin + 10), # '%s\n%.2f' % (labels.get(obj.id, obj.id), obj.score), # fill='red') return frame _overlay = overlay_function else: print("Uninitialized")
def detection_job(detection_model, image_name, num_inferences): """Runs detection job.""" interpreter = make_interpreter(detection_model, device=':1') interpreter.allocate_tensors() with open_image(image_name) as image: _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.NEAREST)) for _ in range(num_inferences): interpreter.invoke() detect.get_objects(interpreter, score_threshold=0., image_scale=scale)
def detect(self, image, offset): image = Image.fromarray(image) _, scale = common.set_resized_input( self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) self.interpreter.invoke() objs = detect.get_objects(self.interpreter, 0.5, scale) observations = [] for o in objs: observations.append( (self.labels.get(o.id, o.id), o.score, (max(int(o.bbox.xmin * scale[1] + offset[0]), 0), max(int(o.bbox.ymin * scale[0] + offset[1]), 0), int(o.bbox.xmax * scale[1] + offset[0]), int(o.bbox.ymax * scale[0] + offset[1])))) return observations
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): for _ in range(batch_size): interpreter_a.invoke() classify.get_classes(interpreter_a, top_k=1) for _ in range(batch_size): interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) return time.perf_counter() - start_time
def get_objects(self, frame, threshold=0.01): """ Gets a list of objects detected in the given image frame. Args: frame: The bitmap image to pass through the model. threshold: The minimum confidence score for returned results. Returns: A list of `Object` objects, each of which contains a detected object's id, score, and bounding box as `BBox`. See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.detect.Object """ height, width, _ = frame.shape _, scale = common.set_resized_input(self.interpreter, (width, height), lambda size: cv2.resize(frame, size, fx=0, fy=0, interpolation=cv2.INTER_CUBIC)) self.interpreter.invoke() return detect.get_objects(self.interpreter, threshold, scale)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() identification = [] classification = [] with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in tqdm(range(num_iterations)): for _ in range(batch_size): identification_start_time = time.perf_counter() interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) identification.append(time.perf_counter() - identification_start_time) for _ in range(batch_size): classification_start_time = time.perf_counter() interpreter_a.invoke() result1 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result2 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result3 = classify.get_classes(interpreter_a, top_k=4) classification.append(time.perf_counter() - classification_start_time) total_time = time.perf_counter() - start_time return total_time, identification, classification
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image_bytes = image_file.read() image = Image.open(io.BytesIO(image_bytes)) size = common.input_size(interpreter) image = image.convert("RGB").resize(size, Image.ANTIALIAS) # Run an inference common.set_input(interpreter, image) interpreter.invoke() _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) threshold = 0.4 objs = detect.get_objects(interpreter, threshold, scale) if objs: data["success"] = True preds = [] for obj in objs: preds.append({ "confidence": float(obj.score), "label": labels[obj.id], "y_min": int(obj.bbox[1]), "x_min": int(obj.bbox[0]), "y_max": int(obj.bbox[3]), "x_max": int(obj.bbox[2]), }) data["predictions"] = preds # return the data dictionary as a JSON response return flask.jsonify(data)
def main(): labels = read_label_file("models/coco_labels.txt") interpreter = make_interpreter( "models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite") interpreter.allocate_tensors() threshold = 0.4 printInfo("ready") while True: line = sys.stdin.readline().rstrip("\n") try: #load image from shinobi stream rawImage = BytesIO(base64.b64decode(line)) image = Image.open(rawImage) #resize the image for object detection using built in coral code #it will set it to 300x300 and provide a scale for object detection later _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start #passing the scale from above, this function creates the bounding boxes #it takes the 300x300 image and divides the scale ratio for original coordinates objs = detect.get_objects(interpreter, threshold, scale) output = [] for obj in objs: label = labels.get(obj.id, obj.id) labelID = obj.id score = obj.score bbox = obj.bbox output.append({"bbox": bbox, "class": label, "score": score}) #outputted data is based on original feed in image size printData(output, (inference_time * 1000)) except Exception as e: printError(str(e))
def callback(self, data): cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") img = Image.fromarray(cv_image) if self.keep_aspect_ratio: resized_img, _ = common.set_resized_input( self.interpreter, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) else: resized_img = img.resize( (self.model_input_width, self.model_input_height), Image.ANTIALIAS) common.set_input(interpreter, resized_img) self.interpreter.invoke() result = segment.get_output(self.interpreter) if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray( self.label_to_color_image(result).astype(np.uint8)) # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (self.model_input_width, 0)) original_width, original_height = img.size recovered_cvimg = np.array( output_img.resize((2 * original_width, original_height), Image.ANTIALIAS)) cv2.imshow("resizedimg", recovered_cvimg) cv2.waitKey(3)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) start = time.perf_counter() # Create inpute tensor # camera resolution (640, 480) => input tensor size (513, 513) _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) # Run inference. interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Create segmentation map result = segment.get_output(interpreter) seg_map = result[:height, :width] seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution(640, 480) seg_image = cv2.resize(seg_image, (resolution_width, rezolution_height)) out_image = image // 2 + seg_image // 2 im = cv2.cvtColor(out_image, cv2.COLOR_RGB2BGR) # display image # Calc fps. fps = 1000.0 / elapsed_ms fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='Model directory') args = parser.parse_args() # Load tflite model detector = edgetpu.make_interpreter(os.path.join(args.model, "detector.tflite")) detector.allocate_tensors() labels = dataset.read_label_file(os.path.join(args.model, 'labels.txt')) # Load webcam prevTime = 0 cap = cv.VideoCapture(0) rows, cols = 320, 320 cap.set(cv.CAP_PROP_FRAME_WIDTH, 320) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 320) # Run model while(True): _, image = cap.read() # 이미지의 중심점을 기준으로 90도 회전 하면서 0.5배 Scale image = cv.cvtColor(image, cv.COLOR_BGR2RGB) M= cv.getRotationMatrix2D((cols/2, rows/2),180, 1) image = cv.warpAffine(image, M, (cols, rows)) image = Image.fromarray(image, "RGB") _, scale = common.set_resized_input(detector, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) # Insert FPS curTime = time.time() detector.invoke() objs = detect.get_objects(detector, 0.6, scale) draw_image = image.copy() if not objs: draw_no_detect(draw_image) else: draw_objects(draw_image, objs, labels) sec = curTime - prevTime prevTime = curTime fps = 1/(sec) str = "FPS : %0.1f" % fps draw_text(draw_image, str, (0, 0)) draw_image = np.array(draw_image) draw_image = cv.cvtColor(draw_image, cv.COLOR_RGB2BGR) # Display frame cv.imshow("Frame", draw_image) key = cv.waitKey(1) & 0xff if key==27: # Stop using ESC break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--nano", help="Works with JETSON Nao and Pi Camera.", action="store_true") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize colormap colormap = label_util.create_pascal_label_colormap() # Initialize engine. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) if args.nano == True: GST_STR = "nvarguscamerasrc \ ! video/x-raw(memory:NVMM), width={0:d}, height={1:d}, format=(string)NV12, framerate=(fraction)30/1 \ ! nvvidconv flip-method=2 ! video/x-raw, width=(int){2:d}, height=(int){3:d}, format=(string)BGRx \ ! videoconvert \ ! appsink".format(args.width, args.height, args.width, args.height) cap = cv2.VideoCapture(GST_STR, cv2.CAP_GSTREAMER) else: cap = cv2.VideoCapture(0) cap.set(3, args.width) cap.set(4, args.height) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) while cap.isOpened(): _, frame = cap.read() start = time.perf_counter() # Create inpute tensor # camera resolution => input tensor size (513, 513) input_buf = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) _, scale = common.set_resized_input( interpreter, (cap_width, cap_height), lambda size: cv2.resize(input_buf, size), ) # Run inference interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Create segmentation map result = segment.get_output(interpreter) seg_map = result[:height, :width] seg_image = label_util.label_to_color_image(colormap, seg_map) # segmentation map resize 513, 513 => camera resolution seg_image = cv2.resize(seg_image, (args.width, args.height)) im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) # Calc fps. fps = 1000.0 / elapsed_ms fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, im) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break if args.nano != True: for i in range(10): ret, frame = cap.read() # When everything done, release the window cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument( "--threshold", help="threshold to filter results.", default=0.5, type=float ) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous( rawCapture, format="rgb", use_video_port=True ): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size) ) interpreter.invoke() elapsed_ms = engine.get_inference_time() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument( '-r', '--roi', required=True, help='ROI [Face, Top, Whole]') args = parser.parse_args() if args.roi.lower() == 'top': _NUM_KEYPOINTS = 11 elif args.roi.lower() == 'face': _NUM_KEYPOINTS = 5 else: _NUM_KEYPOINTS = 17 interpreter = make_interpreter(args.model) interpreter.allocate_tensors() # Load webcam prevTime = 0 cap = cv.VideoCapture(0) rows, cols = 320, 320 cap.set(cv.CAP_PROP_FRAME_WIDTH, 320) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 320) # Run model while(True): _, image = cap.read() # 이미지의 중심점을 기준으로 90도 회전 하면서 0.5배 Scale image = cv.cvtColor(image, cv.COLOR_BGR2RGB) M= cv.getRotationMatrix2D((cols/2, rows/2),180, 1) image = cv.warpAffine(image, M, (cols, rows)) image = Image.fromarray(image, "RGB") # resized_img = image.resize(common.input_size(interpreter), Image.ANTIALIAS) # common.set_input(interpreter, resized_img) common.set_resized_input(interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) # Insert FPS curTime = time.time() interpreter.invoke() pose = common.output_tensor(interpreter, 0).copy().reshape(17, 3) draw = ImageDraw.Draw(image) width, height = image.size for i in range(0, _NUM_KEYPOINTS): draw.ellipse( xy=[ pose[i][1] * width - 2, pose[i][0] * height - 2, pose[i][1] * width + 2, pose[i][0] * height + 2 ], fill=(255, 0, 0)) sec = curTime - prevTime prevTime = curTime fps = 1/(sec) str = "FPS : %0.1f" % fps draw_text(image, str, (0, 0)) image = np.array(image) image = cv.cvtColor(image, cv.COLOR_RGB2BGR) # Display frame cv.imshow("Frame", image) key = cv.waitKey(1) & 0xff if key==27: # Stop using ESC break
def detect(self, pilImage): _, scale = common.set_resized_input( self.interpreter, pilImage.size, lambda size: pilImage.resize(size)) self.interpreter.invoke() return detect.get_objects(self.interpreter, score_threshold=0.6, image_scale=scale)
def main(): global message parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file') parser.add_argument('-l', '--labels', help='File path of labels file') parser.add_argument('-t', '--threshold', type=float, default=0.4, help='Score threshold for detected objects') parser.add_argument('-o', '--output', help='File path for the result image with annotations') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(args.model) interpreter.allocate_tensors() #cap = cv2.VideoCapture(0) # HM-10 Module MAC Address and UUID #address = ("DC5D07D7-38D1-4B52-94DA-4BDC300F5506") #uncomment for macos #write_characteristic = "0000FFE1-0000-1000-8000-00805f9b34fb" # Connecting to Bluetooth Module #address = "64:69:4E:89:2B:C5" #client = BleakClient(address) _thread.start_new_thread(asyncio.run, (connectionHandler(), )) #if not client.is_connected: #asyncio.run(connect(client)) #initialize eye detector eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml') #print('----INFERENCE TIME----') #print('Note: The first inference is slow because it includes', # 'loading the model into Edge TPU memory.') stream = io.BytesIO() with picamera.PiCamera() as camera: camera.start_preview() #counts the number of consective frames during which the driver is distracted distraction_event_duration = 0 already_distracted = False while True: camera.capture(stream, format='jpeg') image = Image.open(stream) #ret, frame = cap.read() #image = Image.fromarray(frame) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) start = time.perf_counter() interpreter.invoke() objs = detect.get_objects(interpreter, args.threshold, scale) #print('-------RESULTS--------') if not objs: #print('No objects detected') a = '' else: #If more than one face is detected, just use whatever is at index 0. face = objs[0] #extract bounding box coordinates left = face.bbox.xmin right = face.bbox.xmax bottom = face.bbox.ymax top = face.bbox.ymin w = right - left h = bottom - top #print(f'left: {left}, right: {right}, bottom: {bottom}, top: {top}') #convert video frame to a numpy array #TODO: WE WILL NEED TO CHANGE THIS WHEN THE PI CAMERA COMES IN #numpy_frame = frame numpy_frame = numpy.asarray(image) #crop out the drivers face using bbox coordinates cropped_numpy_frame = numpy_frame[bottom:top, left:right] #run eye detector roi_color = numpy_frame[top:bottom, left:right] #cv2.imshow('frame', roi_color) eyes = eye_cascade.detectMultiScale(roi_color, minSize=(int(w / 20), int(h / 20)), maxSize=(int(w / 6), int(h / 6)), minNeighbors=5) num_eyes_detected = len(eyes) #print(num_eyes_detected, "Eyes Detected") if (num_eyes_detected < 2): distraction_event_duration += 1 else: distraction_event_duration = 0 #if the driver is distracted for 4 consecutive frames, play an audible alert if distraction_event_duration >= 4: #send a 5 second long alert to the Arduino if not already_distracted: #print("Playing p") #asyncio.run(speakerCommand(client, write_characteristic, 'p')) message = 'p' already_distracted = True #time.sleep(5) #speakerCommand(client, write_characteristic, 's') else: if already_distracted: #print("Playing s") #asyncio.run(speakerCommand(client, write_characteristic, 's')) message = 's' already_distracted = False #dont need this, but might be good to reference '''for obj in objs: print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox)''' #stream.seek(0) #stream.truncate() if args.output: image = image.convert('RGB') draw_objects(ImageDraw.Draw(image), objs, labels) image.save(args.output) image.show() inference_time = time.perf_counter() - start #print('%.2f ms' % (inference_time * 1000)) #cv2.imshow('frame', frame) #if cv2.waitKey(1) & 0xFF == ord('q'): # break #cap.release() #cv2.destroyAllWindows() message = 'd' time.sleep(3)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-m', '--model', default="ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite", help='File path of .tflite file') parser.add_argument('-i', '--input', required=True, help='File path of image to process') parser.add_argument('-t', '--threshold', type=float, default=0.4, help='Score threshold for detected objects') parser.add_argument('-o', '--output', default="out.jpg", help='File path for the result image with annotations') args = parser.parse_args() ## ========== ========== =========== ## Load the network ## ========== ========== =========== interpreter = make_interpreter(args.model) interpreter.allocate_tensors() ## ========== ========== =========== ## Compute bounding boxes ## ========== ========== =========== image = Image.open(args.input) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start objs = detect.get_objects(interpreter, args.threshold, scale) print('%.2f ms' % (inference_time * 1000)) ## ========== ========== =========== ## Crop the image ## ========== ========== =========== ## Ensure that there is only one face in the image assert len(objs) == 1 bbox = objs[0].bbox sx = int((bbox[0] + bbox[2]) / 2) sy = int((bbox[1] + bbox[3]) / 2) ss = int(max((bbox[3] - bbox[1]), (bbox[2] - bbox[0])) / 2.5) print((sx - ss, sy - ss, sx + ss, sy + ss)) cropped_image = image.crop((sx - ss, sy - ss, sx + ss, sy + ss)) cropped_image.resize((240, 240)).save(args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) for obj in objects: if labels and obj.id in labels: # Draw a mask rectangle. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in objects: if labels and obj.id in labels: label_name = labels[obj.id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file') parser.add_argument('-i', '--input', required=True, help='File path of image to process') parser.add_argument('-l', '--labels', help='File path of labels file') parser.add_argument('-t', '--threshold', type=float, default=0.4, help='Score threshold for detected objects') parser.add_argument('-o', '--output', help='File path for the result image with annotations') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(args.model) interpreter.allocate_tensors() image = Image.open(args.input) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print('----INFERENCE TIME----') print('Note: The first inference is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(args.count): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start objs = detect.get_objects(interpreter, args.threshold, scale) print('%.2f ms' % (inference_time * 1000)) print('-------RESULTS--------') if not objs: print('No objects detected') for obj in objs: print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) if args.output: image = image.convert('RGB') draw_objects(ImageDraw.Draw(image), objs, labels) image.save(args.output) image.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--threshold", help="Score threshold.", default=0.0, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() with open(args.label, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() results = classify.get_classes(interpreter, args.top_k, args.threshold) elapsed_ms = (time.perf_counter() - start) * 1000 # Check result. if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms * 1000 elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("Open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("Open video file: ", args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(frame, box, colors[obj.id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', required=True, help='Detection SSD model path (must have post-processing operator).') parser.add_argument('--label', help='Labels file path.') parser.add_argument( '--score_threshold', help='Threshold for returning the candidates.', type=float, default=0.1) parser.add_argument( '--tile_sizes', help=('Sizes of the tiles to split, could be more than one layer as a ' 'list a with comma delimiter in widthxheight. Example: ' '"300x300,250x250,.."'), required=True) parser.add_argument( '--tile_overlap', help=('Number of pixels to overlap the tiles. tile_overlap should be >= ' 'than half of the min desired object size, otherwise small objects ' 'could be missed on the tile boundary.'), type=int, default=15) parser.add_argument( '--iou_threshold', help=('threshold to merge bounding box duing nms'), type=float, default=.1) parser.add_argument('--input', help='Input image path.', required=True) parser.add_argument('--output', help='Output image path.') args = parser.parse_args() interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else {} # Open image. img = Image.open(args.input).convert('RGB') draw = ImageDraw.Draw(img) objects_by_label = dict() img_size = img.size tile_sizes = [ map(int, tile_size.split('x')) for tile_size in args.tile_sizes.split(',') ] for tile_size in tile_sizes: for tile_location in tiles_location_gen(img_size, tile_size, args.tile_overlap): tile = img.crop(tile_location) _, scale = common.set_resized_input( interpreter, tile.size, lambda size, img=tile: img.resize(size, Image.NEAREST)) interpreter.invoke() objs = detect.get_objects(interpreter, args.score_threshold, scale) for obj in objs: bbox = [obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax] bbox = reposition_bounding_box(bbox, tile_location) label = labels.get(obj.id, '') objects_by_label.setdefault(label, []).append(Object(label, obj.score, bbox)) for label, objects in objects_by_label.items(): idxs = non_max_suppression(objects, args.iou_threshold) for idx in idxs: draw_object(draw, objects[idx]) img.show() if args.output: img.save(args.output)