def __init__(self, config, bus, verbose=False): super().__init__(config, bus) bus.register('localized_artf', 'debug_rgbd', 'dropped') self.verbose = verbose model_path = config.get('model_path', 'subt/models/system/edgetpu.0/model_edgetpu.tflite') self.interpreter = edgetpu.make_interpreter(model_path, device=config.get('device')) self.interpreter.allocate_tensors() self.thresholds = config.get('thresholds', { 'backpack': 0.84, 'survivor': 0.95, 'phone': 1000, # Disabled. 'rope': 0.85, 'helmet': 0.95, 'fire_extinguisher': 0.85, 'drill': 0.9, 'vent': 0.95, 'cube': 1000 # Disabled. }) self.categories = dict(enumerate(self.thresholds.keys())) self.min_threshold = min(self.thresholds.values()) self.min_depth = config.get('min_depth', 0.1) self.max_depth = config.get('max_depth', 10.0) self.min_valid_depth_pixels = config.get('min_valid_depth_pixels', 4) self.camera_params = config['camera'] self.batch_size = config.get('batch_size', 1) # how many images process in one step self.input_size = coral_common.input_size(self.interpreter)
def get_objects(interpreter, score_threshold=-float('inf'), image_scale=(1.0, 1.0)): """Gets results from a detection model as a list of detected objects. Args: interpreter: The ``tf.lite.Interpreter`` to query for results. score_threshold (float): The score threshold for results. All returned results have a score greater-than-or-equal-to this value. image_scale (float, float): Scaling factor to apply to the bounding boxes as (x-scale-factor, y-scale-factor), where each factor is from 0 to 1.0. Returns: A list of :obj:`Object` objects, which each contains the detected object's id, score, and bounding box as :obj:`BBox`. """ boxes = common.output_tensor(interpreter, 0)[0] class_ids = common.output_tensor(interpreter, 1)[0] scores = common.output_tensor(interpreter, 2)[0] count = int(common.output_tensor(interpreter, 3)[0]) width, height = common.input_size(interpreter) image_scale_x, image_scale_y = image_scale sx, sy = width / image_scale_x, height / image_scale_y def make(i): ymin, xmin, ymax, xmax = boxes[i] return Object(id=int(class_ids[i]), score=int(scores[i]), bbox=BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax).scale(sx, sy).map(int)) return [make(i) for i in range(count) if scores[i] >= score_threshold]
def classify_image(model_file, image_file, image_quantization=None): """Runs image classification and returns result with the highest score. Args: model_file: string, model file name. image_file: string, image file name. image_quantization: (scale: float, zero_point: float), assumed image quantization parameters. Returns: Classification result with the highest score as (index, score) tuple. """ interpreter = make_interpreter(test_data_path(model_file)) interpreter.allocate_tensors() image = test_image(image_file, common.input_size(interpreter)) input_type = common.input_details(interpreter, 'dtype') if np.issubdtype(input_type, np.floating): # This preprocessing is specific to MobileNet V1 with floating point input. image = (input_type(image) - 127.5) / 127.5 if np.issubdtype(input_type, np.integer) and image_quantization: image = rescale_image( image, image_quantization, common.input_details(interpreter, 'quantization'), input_type) common.set_input(interpreter, image) interpreter.invoke() return classify.get_classes(interpreter)[0]
def __init__(self, model_path, device): self.__interpreter = edgetpu.make_interpreter(os.path.join( model_path, 'edgetpu.tflite'), device=device) self.__interpreter.allocate_tensors() self.__model_shape = common.input_size(self.__interpreter)
def thread_job(model_name, input_filename, num_inferences, task_type, device): """Runs classification or detection job on one Python thread.""" tid = threading.get_ident() logging.info('Thread: %d, # inferences: %d, model: %s', tid, num_inferences, model_name) interpreter = make_interpreter(test_utils.test_data_path(model_name), device) interpreter.allocate_tensors() with test_utils.test_image(input_filename) as img: if task_type == 'classification': resize_image = img.resize(common.input_size(interpreter), Image.NEAREST) common.set_input(interpreter, resize_image) elif task_type == 'detection': common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.NEAREST)) else: raise ValueError( 'task_type should be classification or detection, but is given %s' % task_type) for _ in range(num_inferences): interpreter.invoke() if task_type == 'classification': classify.get_classes(interpreter) else: detect.get_objects(interpreter) logging.info('Thread: %d, model: %s done', tid, model_name)
def __init__( self, sample_im, model_dir='/mounted_folder/models', model_name='ssdlite_mobiledet_coco_qat_postprocess_edgetpu.tflite', img_size=416, conf_thres=0.5, classes_ids=[80], max_instances_per_class=5): # ssdlite_mobiledet_coco_qat_postprocess_edgetpu.tflite | ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite self.img_size = img_size self.conf_thres = conf_thres self.classes_ids = classes_ids # if isinstance(max_instances_per_class, int): # self.max_instances_per_class = [max_instances_per_class]*len(classes_ids) # elif len(max_instances_per_class)== len(classes_ids): # self.max_instances_per_class = max_instances_per_class # else: # raise NameError('Inconsistent max instances per class and classes ids') self.classes_ids = classes_ids # Initialize the TF interpreter model_file_path_and_name = os.path.join(model_dir, model_name) self.interpreter = edgetpu.make_interpreter(model_file_path_and_name) self.interpreter.allocate_tensors() self.size = common.input_size(self.interpreter)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--labels', help='File path of label file.', required=True) parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Classification score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) size = common.input_size(interpreter) # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) image_pred = image.resize(size, Image.ANTIALIAS) common.set_input(interpreter, image_pred) interpreter.invoke() classes = classify.get_classes(interpreter, 1, args.threshold) draw_image(image, classes, labels) if (cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def __init__(self, model, device=':0', keep_aspect_ratio=True): self.img_sub = rospy.Subscriber("~input", ImageMsg, self.callback) self.interpreter = make_interpreter(model, device=device) self.interpreter.allocate_tensors() self.model_input_width, self.model_input_height = common.input_size( self.interpreter) self.keep_aspect_ratio = keep_aspect_ratio self.bridge = CvBridge()
def __init__(self): self.face_model = os.path.join( os.path.dirname(__file__), 'models/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') self.max_faces = 10 self.threshold = FACE_DETECTOR_THRESHOLD self.interpreter = make_interpreter(self.face_model) self.interpreter.allocate_tensors() self.inference_size = input_size(self.interpreter)
def __init__(self): super().__init__("CoralWorker") self.interpreter = make_interpreter( os.path.join( ExopticonWorker.get_data_dir(), "ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite")) self.interpreter.allocate_tensors() input_size = common.input_size(self.interpreter) self.labels = read_label_file( os.path.join(ExopticonWorker.get_data_dir(), "coco_labels.txt"))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', required=True, help='Path of the segmentation model.') parser.add_argument('--input', required=True, help='File path of the input image.') parser.add_argument('--output', default='semantic_segmentation_result.jpg', help='File path of the output image.') parser.add_argument( '--keep_aspect_ratio', action='store_true', default=False, help= ('keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) args = parser.parse_args() interpreter = make_interpreter(args.model, device=':0') interpreter.allocate_tensors() width, height = common.input_size(interpreter) img = Image.open(args.input) if args.keep_aspect_ratio: resized_img, _ = common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) else: resized_img = img.resize((width, height), Image.ANTIALIAS) common.set_input(interpreter, resized_img) interpreter.invoke() result = segment.get_output(interpreter) if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray(label_to_color_image(result).astype(np.uint8)) # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (width, 0)) output_img.save(args.output) print('Done. Results saved at', args.output)
def classification_job(classification_model, image_name, num_inferences): """Runs classification job.""" interpreter = make_interpreter(classification_model, device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) with open_image(image_name) as image: common.set_input(interpreter, image.resize(size, Image.NEAREST)) for _ in range(num_inferences): interpreter.invoke() classify.get_classes(interpreter, top_k=1)
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', required=True, help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() size = common.input_size(interpreter) image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(args.count): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def detect_and_classify_faces(detector, classifier, image, threshold, padding=10): predictions = [] boxes = [] faces = [] height, width, _ = image.shape detector_target_size = common.input_size(detector) classifier_target_size = common.input_size(classifier) scale_x, scale_y = width / detector_target_size[ 0], height / detector_target_size[1] resized_image = cv2.resize(image, detector_target_size) run_inference(detector, resized_image.tobytes()) objects = detect.get_objects(detector, threshold) for object in objects: bbox = object.bbox.scale(scale_x, scale_y) startX, startY = int(bbox.xmin - padding), int(bbox.ymin - padding) endX, endY = int(bbox.xmax + padding), int(bbox.ymax + padding) # ensure the bounding boxes fall within the dimensions of the image (startX, startY) = (max(1, startX), max(1, startY)) (endX, endY) = (min(width - 1, endX), min(height - 1, endY)) boxes.append((startX, startY, endX, endY)) face = image[startY:endY, startX:endX] face = cv2.resize(face, classifier_target_size) faces.append(face) for face in faces: run_inference(classifier, face.tobytes()) prediction = classify.get_scores(classifier) predictions.append(prediction) return (boxes, predictions)
def configure(self, modelDir): # Workaround, if no edgetpu is available if not edgetpu.list_edge_tpus(): print("No EdgeTPUs found. Using the CPU only...") from tflite_runtime.interpreter import Interpreter self.interpreter = Interpreter(modelDir + "/model.tflite") else: print("EdgeTPU found. Connecting to it via PyCoral...") from pycoral.utils.edgetpu import make_interpreter self.interpreter = make_interpreter(modelDir + "/model.tflite") self.interpreter.allocate_tensors() self.modelDir = modelDir self._inputSize = common.input_size(self.interpreter) self._labels = dataset.read_label_file(modelDir + "/labels.txt")
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() pygame.init() pygame.camera.init() camlist = pygame.camera.list_cameras() print('By default using camera: ', camlist[-1]) camera = pygame.camera.Camera(camlist[-1], (640, 480)) inference_size = input_size(interpreter) camera.start() try: last_time = time.monotonic() while True: imagen = camera.get_image() imagen = pygame.transform.scale(imagen, inference_size) start_ms = time.time() run_inference(interpreter, imagen.get_buffer().raw) results = get_classes(interpreter, top_k=3, score_threshold=0) stop_time = time.monotonic() inference_ms = (time.time() - start_ms) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format( inference_ms, fps_ms) for result in results: annotate_text += '\n{:.0f}% {}'.format(100 * result[1], labels[result[0]]) print(annotate_text) finally: camera.stop()
def segment_image(model_file, image_file, mask_file): interpreter = make_interpreter(test_data_path(model_file)) interpreter.allocate_tensors() image = Image.open(test_data_path(image_file)).resize( common.input_size(interpreter), Image.ANTIALIAS) common.set_input(interpreter, image) interpreter.invoke() result = segment.get_output(interpreter) if len(result.shape) > 2: result = np.argmax(result, axis=2) reference = np.asarray(Image.open(test_data_path(mask_file))) return array_iou(result, reference)
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time in ms. """ engine = ImprintingEngine(test_utils.test_data_path(model), keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() width, height = common.input_size(extractor) np.random.seed(12345) # 10 Categories, each has 20 images. data_by_category = collections.defaultdict(list) for i in range(10): for _ in range(20): data_by_category[i].append( np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)) delegate = load_edgetpu_delegate({'device': ':0'}) inference_time = 0. for class_id, tensors in enumerate(data_by_category.values()): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=class_id) start = time.perf_counter() interpreter = tflite.Interpreter( model_content=engine.serialize_model(), experimental_delegates=[delegate]) interpreter.allocate_tensors() common.set_input(interpreter, tensors[0]) interpreter.invoke() classify.get_classes(interpreter, top_k=3) inference_time += (time.perf_counter() - start) * 1000 print('Model: %s' % model) print('Inference time: %.2fms' % inference_time) return inference_time
def main(): # 入力変数(配列)設定 -> parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite', help='File path of .tflite file.') parser.add_argument('-i', '--input', default='parrot.jpg', help='Image to be classified.') parser.add_argument('-l', '--labels', default='inat_bird_labels.txt', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() # 入力変数(配列)設定 <- # ラベル読み込み labels = read_label_file(args.labels) if args.labels else {} # モデル読み込み。Coral使用、未使用でモデル異なる interpreter = make_interpreter(*args.model.split('@')) # 推論用メモリ確保。モデル読み込み直後に実行必須 interpreter.allocate_tensors() size = common.input_size(interpreter) # 入力ファイルをRGB変換しinterpreterサイズに変更 image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) # interpreterに入力イメージをセット common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') # 入力変数(配列)で指定した回数分推論を繰り返す for _ in range(args.count): start = time.perf_counter() # 推論時間測定開始 interpreter.invoke() # 推論 inference_time = time.perf_counter() - start # 推論時間測定終了 # 入力変数(配列)で指定した一致率(args.threshold)以上のラベルの上位args.top_kを取得する classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) # 推論時間表示 print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def get_classes(self, frame, top_k=1, threshold=0.0): """ Gets classification results as a list of ordered classes. Args: frame: The bitmap image to pass through the model. top_k: The number of top results to return. threshold: The minimum confidence score for returned results. Returns: A list of `Class` objects representing the classification results, ordered by scores. See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.classify.Class """ size = common.input_size(self.interpreter) common.set_input(self.interpreter, cv2.resize(frame, size, fx=0, fy=0, interpolation = cv2.INTER_CUBIC)) self.interpreter.invoke() return classify.get_classes(self.interpreter, top_k, threshold)
def image(): global running global labels global interpreter if running: return Response(response="{}", status=429, mimetype="application/json") running = True # Run an inference interpreter.allocate_tensors() size = common.input_size(interpreter) image = Image.open(request.data).convert('RGB').resize( size, Image.ANTIALIAS) common.set_input(interpreter, image) interpreter.invoke() classes = classify.get_classes(interpreter, top_k=3) nomouseValue = 0 mouseValue = 0 # Print the result for c in classes: label = labels.get(c.id, c.id) score = c.score if label == "nomouse": nomouseValue = score if label == "mouse": mouseValue = score running = False # build a response dict to send back to client response = { 'tags': { 'mouse': float(mouseValue), 'nomouse': float(nomouseValue) } } # encode response using jsonpickle response_pickled = jsonpickle.encode(response) return Response(response=response_pickled, status=200, mimetype="application/json")
def capture_v(args): global outputFrame, lock print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) # cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # Sony PS3 EYE cam settings: # 320x240 @ 125 FPS, 640x480 @ 60 FPS, 320x240 @187 FPS --> use excat FSP setting cap.set(cv2.CAP_PROP_FPS, 187) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320), cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print("image size=", size) fps = 0 start_time = time.time() while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame # cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) # cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) # run_inference(interpreter, cv2_im_rgb.tobytes()) # objs = get_objects(interpreter, args.threshold)[:args.top_k] # cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) with lock: outputFrame = cv2_im fps += 1 if fps == 200: end_time = time.time() print("cam FPS:", fps / (end_time - start_time)) start_time = time.time() fps = 0 cap.release()
def classification_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for classification task' % (tid, num_inferences)) labels = read_label_file(test_utils.test_data_path('imagenet_labels.txt')) model_name = 'mobilenet_v1_1.0_224_quant_edgetpu.tflite' interpreter = make_interpreter( test_utils.test_data_path(model_name), device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) print('Thread: %d, using device 0' % tid) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): common.set_input(interpreter, img.resize(size, Image.NEAREST)) interpreter.invoke() ret = classify.get_classes(interpreter, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(labels[ret[0].id], 'Egyptian cat') print('Thread: %d, done classification task' % tid)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): for _ in range(batch_size): interpreter_a.invoke() classify.get_classes(interpreter_a, top_k=1) for _ in range(batch_size): interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) return time.perf_counter() - start_time
def extract_embeddings(image_paths, interpreter): """Uses model to process images as embeddings. Reads image, resizes and feeds to model to get feature embeddings. Original image is discarded to keep maximum memory consumption low. Args: image_paths: ndarray, represents a list of image paths. interpreter: TFLite interpreter, wraps embedding extractor model. Returns: ndarray of length image_paths.shape[0] of embeddings. """ input_size = common.input_size(interpreter) feature_dim = classify.num_classes(interpreter) embeddings = np.empty((len(image_paths), feature_dim), dtype=np.float32) for idx, path in enumerate(image_paths): with test_image(path) as img: common.set_input(interpreter, img.resize(input_size, Image.NEAREST)) interpreter.invoke() embeddings[idx, :] = classify.get_scores(interpreter) return embeddings
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time in ms. """ engine = ImprintingEngine( test_utils.test_data_path(model), keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model()) extractor.allocate_tensors() width, height = common.input_size(extractor) np.random.seed(12345) # 10 Categories, each has 20 images. data_by_category = collections.defaultdict(list) for i in range(10): for _ in range(20): data_by_category[i].append( np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)) start = time.perf_counter() for class_id, tensors in enumerate(data_by_category.values()): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=class_id) engine.serialize_model() training_time = (time.perf_counter() - start) * 1000 print('Model: %s' % model) print('Training time: %.2fms' % training_time) return training_time
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() identification = [] classification = [] with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in tqdm(range(num_iterations)): for _ in range(batch_size): identification_start_time = time.perf_counter() interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) identification.append(time.perf_counter() - identification_start_time) for _ in range(batch_size): classification_start_time = time.perf_counter() interpreter_a.invoke() result1 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result2 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result3 = classify.get_classes(interpreter_a, top_k=4) classification.append(time.perf_counter() - classification_start_time) total_time = time.perf_counter() - start_time return total_time, identification, classification
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image_bytes = image_file.read() image = Image.open(io.BytesIO(image_bytes)) size = common.input_size(interpreter) image = image.convert("RGB").resize(size, Image.ANTIALIAS) # Run an inference common.set_input(interpreter, image) interpreter.invoke() _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) threshold = 0.4 objs = detect.get_objects(interpreter, threshold, scale) if objs: data["success"] = True preds = [] for obj in objs: preds.append({ "confidence": float(obj.score), "label": labels[obj.id], "y_min": int(obj.bbox[1]), "x_min": int(obj.bbox[0]), "y_max": int(obj.bbox[3]), "x_max": int(obj.bbox[2]), }) data["predictions"] = preds # return the data dictionary as a JSON response return flask.jsonify(data)
def init(args): global HOLDER HOLDER['model'] = args.model labels_file = args.models_directory + args.labels labels = read_label_file(labels_file) if args.labels else {} model_file = args.models_directory + args.model interpreter = make_interpreter(model_file) interpreter.allocate_tensors() print("\n Loaded engine with model : {}".format(model_file)) # Model must be uint8 quantized if common.input_details(interpreter, 'dtype') != np.uint8: raise ValueError('Only support uint8 input type.') size = common.input_size(interpreter) HOLDER['labels'] = labels HOLDER['interpreter'] = interpreter HOLDER['size'] = size HOLDER['top_k'] = args.top_k