def thread_job(model_name, input_filename, num_inferences, task_type, device): """Runs classification or detection job on one Python thread.""" tid = threading.get_ident() logging.info('Thread: %d, # inferences: %d, model: %s', tid, num_inferences, model_name) interpreter = make_interpreter(test_utils.test_data_path(model_name), device) interpreter.allocate_tensors() with test_utils.test_image(input_filename) as img: if task_type == 'classification': resize_image = img.resize(common.input_size(interpreter), Image.NEAREST) common.set_input(interpreter, resize_image) elif task_type == 'detection': common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.NEAREST)) else: raise ValueError( 'task_type should be classification or detection, but is given %s' % task_type) for _ in range(num_inferences): interpreter.invoke() if task_type == 'classification': classify.get_classes(interpreter) else: detect.get_objects(interpreter) logging.info('Thread: %d, model: %s done', tid, model_name)
def classification_job(classification_model, image_name, num_inferences): """Runs classification job.""" interpreter = make_interpreter(classification_model, device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) with open_image(image_name) as image: common.set_input(interpreter, image.resize(size, Image.NEAREST)) for _ in range(num_inferences): interpreter.invoke() classify.get_classes(interpreter, top_k=1)
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time in ms. """ engine = ImprintingEngine(test_utils.test_data_path(model), keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() width, height = common.input_size(extractor) np.random.seed(12345) # 10 Categories, each has 20 images. data_by_category = collections.defaultdict(list) for i in range(10): for _ in range(20): data_by_category[i].append( np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)) delegate = load_edgetpu_delegate({'device': ':0'}) inference_time = 0. for class_id, tensors in enumerate(data_by_category.values()): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=class_id) start = time.perf_counter() interpreter = tflite.Interpreter( model_content=engine.serialize_model(), experimental_delegates=[delegate]) interpreter.allocate_tensors() common.set_input(interpreter, tensors[0]) interpreter.invoke() classify.get_classes(interpreter, top_k=3) inference_time += (time.perf_counter() - start) * 1000 print('Model: %s' % model) print('Inference time: %.2fms' % inference_time) return inference_time
def classify_image(model_file, image_file, image_quantization=None): """Runs image classification and returns result with the highest score. Args: model_file: string, model file name. image_file: string, image file name. image_quantization: (scale: float, zero_point: float), assumed image quantization parameters. Returns: Classification result with the highest score as (index, score) tuple. """ interpreter = make_interpreter(test_data_path(model_file)) interpreter.allocate_tensors() image = test_image(image_file, common.input_size(interpreter)) input_type = common.input_details(interpreter, 'dtype') if np.issubdtype(input_type, np.floating): # This preprocessing is specific to MobileNet V1 with floating point input. image = (input_type(image) - 127.5) / 127.5 if np.issubdtype(input_type, np.integer) and image_quantization: image = rescale_image( image, image_quantization, common.input_details(interpreter, 'quantization'), input_type) common.set_input(interpreter, image) interpreter.invoke() return classify.get_classes(interpreter)[0]
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--labels', help='File path of label file.', required=True) parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Classification score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) size = common.input_size(interpreter) # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) image_pred = image.resize(size, Image.ANTIALIAS) common.set_input(interpreter, image_pred) interpreter.invoke() classes = classify.get_classes(interpreter, 1, args.threshold) draw_image(image, classes, labels) if (cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): for _ in range(batch_size): interpreter_a.invoke() classify.get_classes(interpreter_a, top_k=1) for _ in range(batch_size): interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) return time.perf_counter() - start_time
def faceinference(interpreter, face, labels, frame, person,i): common.set_input(interpreter, face) interpreter.invoke() classes = classify.get_classes(interpreter, 1, 0.0) pred = [] for class1 in classes: pred.append(str(labels.get(class1.id, class1.id))) print(pred) if person in pred: cv2.imwrite('pics/'+str(i)+'.jpg', frame)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() identification = [] classification = [] with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in tqdm(range(num_iterations)): for _ in range(batch_size): identification_start_time = time.perf_counter() interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) identification.append(time.perf_counter() - identification_start_time) for _ in range(batch_size): classification_start_time = time.perf_counter() interpreter_a.invoke() result1 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result2 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result3 = classify.get_classes(interpreter_a, top_k=4) classification.append(time.perf_counter() - classification_start_time) total_time = time.perf_counter() - start_time return total_time, identification, classification
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() _, height, width = interpreter.get_input_details()[0]['shape'] size = [height, width] trigger = GPIO("/dev/gpiochip2", 13, "out") # pin 37 print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') #for i in range(1,351): while 1: #input_image_name = "./testSample/img_"+ str(i) + ".jpg" #input_image_name = "./testSample/img_1.jpg" #image = Image.open(input_image_name).resize(size, Image.ANTIALIAS) arr = numpy.random.randint(0,255,(28,28), dtype='uint8') image = Image.fromarray(arr, 'L').resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) start = time.perf_counter() trigger.write(True) interpreter.invoke() trigger.write(False) inference_time = time.perf_counter() - start print('%.6fms' % (inference_time * 1000)) classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('RESULTS for image ', 1) for c in classes: print('%s: %.6f' % (labels.get(c.id, c.id), c.score))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', required=True, help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() size = common.input_size(interpreter) image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(args.count): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) args = parser.parse_args() with open(args.labels, 'r') as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() pygame.init() pygame.camera.init() camlist = pygame.camera.list_cameras() print('By default using camera: ', camlist[-1]) camera = pygame.camera.Camera(camlist[-1], (640, 480)) inference_size = input_size(interpreter) camera.start() try: last_time = time.monotonic() while True: imagen = camera.get_image() imagen = pygame.transform.scale(imagen, inference_size) start_ms = time.time() run_inference(interpreter, imagen.get_buffer().raw) results = get_classes(interpreter, top_k=3, score_threshold=0) stop_time = time.monotonic() inference_ms = (time.time() - start_ms) * 1000.0 fps_ms = 1.0 / (stop_time - last_time) last_time = stop_time annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format( inference_ms, fps_ms) for result in results: annotate_text += '\n{:.0f}% {}'.format(100 * result[1], labels[result[0]]) print(annotate_text) finally: camera.stop()
def getimagedata(message): message = bytes(message, encoding='utf-8') message = message[message.find(b'/9'):] pimage = Image.open(io.BytesIO(base64.b64decode(message))) pimage = cv2.cvtColor(np.array(pimage), cv2.COLOR_RGB2BGR) pimage = detectface(pimage) pimage = cv2.resize(pimage, (224, 224)) pimage = cv2.flip(pimage, 1) common.set_input(interpreter, pimage) interpreter.invoke() classes = classify.get_classes(interpreter, 1, 0.0) for class1 in classes: pred = str(labels.get(class1.id, class1.id)) + " " + str(class1.score) print(pred) emit('predresult', pred)
def main(): # 入力変数(配列)設定 -> parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite', help='File path of .tflite file.') parser.add_argument('-i', '--input', default='parrot.jpg', help='Image to be classified.') parser.add_argument('-l', '--labels', default='inat_bird_labels.txt', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() # 入力変数(配列)設定 <- # ラベル読み込み labels = read_label_file(args.labels) if args.labels else {} # モデル読み込み。Coral使用、未使用でモデル異なる interpreter = make_interpreter(*args.model.split('@')) # 推論用メモリ確保。モデル読み込み直後に実行必須 interpreter.allocate_tensors() size = common.input_size(interpreter) # 入力ファイルをRGB変換しinterpreterサイズに変更 image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) # interpreterに入力イメージをセット common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') # 入力変数(配列)で指定した回数分推論を繰り返す for _ in range(args.count): start = time.perf_counter() # 推論時間測定開始 interpreter.invoke() # 推論 inference_time = time.perf_counter() - start # 推論時間測定終了 # 入力変数(配列)で指定した一致率(args.threshold)以上のラベルの上位args.top_kを取得する classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) # 推論時間表示 print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def get_classes(self, frame, top_k=1, threshold=0.0): """ Gets classification results as a list of ordered classes. Args: frame: The bitmap image to pass through the model. top_k: The number of top results to return. threshold: The minimum confidence score for returned results. Returns: A list of `Class` objects representing the classification results, ordered by scores. See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.classify.Class """ size = common.input_size(self.interpreter) common.set_input(self.interpreter, cv2.resize(frame, size, fx=0, fy=0, interpolation = cv2.INTER_CUBIC)) self.interpreter.invoke() return classify.get_classes(self.interpreter, top_k, threshold)
def user_callback(input_tensor, src_size, inference_box): nonlocal fps_counter start_time = time.monotonic() run_inference(interpreter, input_tensor) results = get_classes(interpreter, args.top_k, args.threshold) end_time = time.monotonic() text_lines = [ ' ', 'Inference: {:.2f} ms'.format((end_time - start_time) * 1000), 'FPS: {} fps'.format(round(next(fps_counter))), ] for result in results: text_lines.append('score={:.2f}: {}'.format( result.score, labels.get(result.id, result.id))) print(' '.join(text_lines)) return generate_svg(src_size, text_lines)
def image(): global running global labels global interpreter if running: return Response(response="{}", status=429, mimetype="application/json") running = True # Run an inference interpreter.allocate_tensors() size = common.input_size(interpreter) image = Image.open(request.data).convert('RGB').resize( size, Image.ANTIALIAS) common.set_input(interpreter, image) interpreter.invoke() classes = classify.get_classes(interpreter, top_k=3) nomouseValue = 0 mouseValue = 0 # Print the result for c in classes: label = labels.get(c.id, c.id) score = c.score if label == "nomouse": nomouseValue = score if label == "mouse": mouseValue = score running = False # build a response dict to send back to client response = { 'tags': { 'mouse': float(mouseValue), 'nomouse': float(nomouseValue) } } # encode response using jsonpickle response_pickled = jsonpickle.encode(response) return Response(response=response_pickled, status=200, mimetype="application/json")
def classification_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for classification task' % (tid, num_inferences)) labels = read_label_file(test_utils.test_data_path('imagenet_labels.txt')) model_name = 'mobilenet_v1_1.0_224_quant_edgetpu.tflite' interpreter = make_interpreter( test_utils.test_data_path(model_name), device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) print('Thread: %d, using device 0' % tid) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): common.set_input(interpreter, img.resize(size, Image.NEAREST)) interpreter.invoke() ret = classify.get_classes(interpreter, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(labels[ret[0].id], 'Egyptian cat') print('Thread: %d, done classification task' % tid)
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image = Image.open(image_file).convert('RGB').resize( HOLDER['size'], Image.ANTIALIAS) params = common.input_details(HOLDER['interpreter'], 'quantization_parameters') scale = params['scales'] zero_point = params['zero_points'] mean = 128.0 std = 128.0 if abs(scale * std - 1) < 1e-5 and abs(mean - zero_point) < 1e-5: # Input data does not require preprocessing. common.set_input(HOLDER['interpreter'], image) else: # Input data requires preprocessing normalized_input = (np.asarray(image) - mean) / (std * scale) + zero_point np.clip(normalized_input, 0, 255, out=normalized_input) common.set_input(HOLDER['interpreter'], normalized_input.astype(np.uint8)) start = time.perf_counter() HOLDER['interpreter'].invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(HOLDER['interpreter'], HOLDER['top_k'], 0.0) if classes: data["success"] = True data["inference-time"] = '%.2f ms' % (inference_time * 1000) preds = [] for c in classes: preds.append({ "score": float(c.score), "label": HOLDER['labels'].get(c.id, c.id) }) data["predictions"] = preds return flask.jsonify(data)
def _train_and_test(self, model_path, train_points, test_points, keep_classes): # Train. engine = ImprintingEngine(model_path, keep_classes) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() for point in train_points: for image in point.images: with test_utils.test_image('imprinting', image) as img: set_input(extractor, img) extractor.invoke() embedding = classify.get_scores(extractor) self.assertEqual(len(embedding), engine.embedding_dim) engine.train(embedding, point.class_id) # Test. trained_model = engine.serialize_model() classifier = make_interpreter(trained_model, device=':0') classifier.allocate_tensors() self.assertEqual(len(classifier.get_output_details()), 1) if not keep_classes: self.assertEqual(len(train_points), classify.num_classes(classifier)) for point in test_points: with test_utils.test_image('imprinting', point.image) as img: set_input(classifier, img) classifier.invoke() top = classify.get_classes(classifier, top_k=1)[0] self.assertEqual(top.id, point.class_id) self.assertGreater(top.score, point.score) return trained_model
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', required=True, help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') parser.add_argument('-a', '--input_mean', type=float, default=128.0, help='Mean value for input normalization') parser.add_argument('-s', '--input_std', type=float, default=128.0, help='STD value for input normalization') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() # Model must be uint8 quantized if common.input_details(interpreter, 'dtype') != np.uint8: raise ValueError('Only support uint8 input type.') size = common.input_size(interpreter) image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) # Image data must go through two transforms before running inference: # 1. normalization: f = (input - mean) / std # 2. quantization: q = f / scale + zero_point # The following code combines the two steps as such: # q = (input - mean) / (std * scale) + zero_point # However, if std * scale equals 1, and mean - zero_point equals 0, the input # does not need any preprocessing (but in practice, even if the results are # very close to 1 and 0, it is probably okay to skip preprocessing for better # efficiency; we use 1e-5 below instead of absolute zero). params = common.input_details(interpreter, 'quantization_parameters') scale = params['scales'] zero_point = params['zero_points'] mean = args.input_mean std = args.input_std if abs(scale * std - 1) < 1e-5 and abs(mean - zero_point) < 1e-5: # Input data does not require preprocessing. common.set_input(interpreter, image) else: # Input data requires preprocessing normalized_input = (np.asarray(image) - mean) / (std * scale) + zero_point np.clip(normalized_input, 0, 255, out=normalized_input) common.set_input(interpreter, normalized_input.astype(np.uint8)) # Run inference print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(args.count): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def classify_and_stream(self, detected, prediction, queue: Queue, buffer_count=5): FIFO = '/home/pi/final_project/fifo' context = zmq.Context() footage_socket = context.socket(zmq.PUB) footage_socket.connect('tcp://localhost:5555') interpreter_seg = make_interpreter( '/home/pi/rpi-face/deeplabv3_mnv2_pascal_quant_edgetpu.tflite', device=':0') interpreter_seg.allocate_tensors() width, height = common.input_size(interpreter_seg) if (checkPIcam('video0')): cap = cv2.VideoCapture(1) else: cap = cv2.VideoCapture(0) # counter for classification results: result_counter = [0, 0, 0] while (True): # Capture frame-by-frame ret, frame = cap.read() #convert opencv image to PIL image img = Image.fromarray(frame) #classification: labels = read_label_file('/home/pi/rpi-face/label_map.txt') interpreter = make_interpreter( '/home/pi/rpi-face/retrained_model_edgetpu.tflite') interpreter.allocate_tensors() size = common.input_size(interpreter) image = img.resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(interpreter, 1, 0.0) #print('%.1fms' % (inference_time * 1000)) #print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score)) # determine if door should be opened result_counter[c.id] += 1 if (result_counter[c.id] >= buffer_count): result_counter[0] = 0 result_counter[1] = 0 result_counter[2] = 0 prediction = labels.get(c.id, c.id) if (prediction == 'negative'): detected = False else: detected = True #print("write in queue") while (queue.qsize() > 2): queue.get(True) queue.put(detected) print(detected) print(prediction) # fifo = open(FIFO, 'w') # print("write in") # fifo.write(str(detected)) # fifo.close() #start = time.perf_counter() # segmentation resized_img, _ = common.set_resized_input( interpreter_seg, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) start = time.perf_counter() interpreter_seg.invoke() result = segment.get_output(interpreter_seg) end = time.perf_counter() if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray( self.label_to_color_image(result).astype(np.uint8)) #end = time.perf_counter() # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (width, 0)) #end = time.perf_counter() seg_time = end - start #print('segmentation time: %0.1fms' % (seg_time * 1000)) #print('classification time: %.1fms' % (inference_time * 1000)) #convert PIL image to opencv form open_cv_image = np.array(output_img) #frame = cv2.resize(open_cv_image, (640, 480)) # resize the frame encoded, buffer = cv2.imencode('.jpg', open_cv_image) jpg_as_text = base64.b64encode(buffer) footage_socket.send(jpg_as_text) # Display the resulting frame #cv2.imshow('frame', open_cv_image) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def _transfer_learn_and_evaluate(self, model_path, keep_classes, dataset_path, test_ratio, top_k_range): """Transfer-learns with given params and returns the evaluation result. Args: model_path: string, path of the base model. keep_classes: bool, whether to keep base model classes. dataset_path: string, path to the directory of dataset. The images should be put under sub-directory named by category. test_ratio: float, the ratio of images used for test. top_k_range: int, top_k range to be evaluated. The function will return accuracy from top 1 to top k. Returns: list of float numbers. """ engine = ImprintingEngine(model_path, keep_classes) extractor = make_interpreter(engine.serialize_extractor_model()) extractor.allocate_tensors() num_classes = engine.num_classes print('--------------- Parsing dataset ----------------') print('Dataset path:', dataset_path) # train in fixed order to ensure the same evaluation result. train_set, test_set = test_utils.prepare_data_set_from_directory( dataset_path, test_ratio, True) print('Image list successfully parsed! Number of Categories = ', len(train_set)) print('--------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing {} ({} images)'.format(category, len(image_list))) train_input.append( [os.path.join(dataset_path, category, image) for image in image_list]) labels_map[num_classes + class_id] = category # train print('---------------- Start training -----------------') size = common.input_size(extractor) for class_id, images in enumerate(train_input): for image in images: with test_image(image) as img: common.set_input(extractor, img.resize(size, Image.NEAREST)) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=num_classes + class_id) print('---------------- Training finished -----------------') with test_utils.temporary_file(suffix='.tflite') as output_model_path: output_model_path.write(engine.serialize_model()) # Evaluate print('---------------- Start evaluating -----------------') classifier = make_interpreter(output_model_path.name) classifier.allocate_tensors() # top[i] represents number of top (i+1) correct inference. top_k_correct_count = [0] * top_k_range image_num = 0 for category, image_list in test_set.items(): n = len(image_list) print('Evaluating {} ({} images)'.format(category, n)) for image_name in image_list: with test_image(os.path.join(dataset_path, category, image_name)) as img: # Set threshold as a negative number to ensure we get top k # candidates even if its score is 0. size = common.input_size(classifier) common.set_input(classifier, img.resize(size, Image.NEAREST)) classifier.invoke() candidates = classify.get_classes(classifier, top_k=top_k_range) for i in range(len(candidates)): candidate = candidates[i] if candidate.id in labels_map and \ labels_map[candidate.id] == category: top_k_correct_count[i] += 1 break image_num += n for i in range(1, top_k_range): top_k_correct_count[i] += top_k_correct_count[i - 1] return [top_k_correct_count[i] / image_num for i in range(top_k_range)]
def main(): args = _parse_args() engine = ImprintingEngine(args.model_path, keep_classes=args.keep_classes) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() shape = common.input_size(extractor) print('--------------- Parsing data set -----------------') print('Dataset path:', args.data) train_set, test_set = _read_data(args.data, args.test_ratio) print('Image list successfully parsed! Category Num = ', len(train_set)) print('---------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing category:', category) train_input.append( _prepare_images(image_list, os.path.join(args.data, category), shape)) labels_map[class_id] = category print('---------------- Start training -----------------') num_classes = engine.num_classes for class_id, tensors in enumerate(train_input): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() embedding = classify.get_scores(extractor) engine.train(embedding, class_id=num_classes + class_id) print('---------------- Training finished! -----------------') with open(args.output, 'wb') as f: f.write(engine.serialize_model()) print('Model saved as : ', args.output) _save_labels(labels_map, args.output) print('------------------ Start evaluating ------------------') interpreter = make_interpreter(args.output) interpreter.allocate_tensors() size = common.input_size(interpreter) top_k = 5 correct = [0] * top_k wrong = [0] * top_k for category, image_list in test_set.items(): print('Evaluating category [', category, ']') for img_name in image_list: img = Image.open(os.path.join(args.data, category, img_name)).resize( size, Image.NEAREST) common.set_input(interpreter, img) interpreter.invoke() candidates = classify.get_classes(interpreter, top_k, score_threshold=0.1) recognized = False for i in range(top_k): if i < len(candidates) and labels_map[ candidates[i].id] == category: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 print('---------------- Evaluation result -----------------') for i in range(top_k): print('Top {} : {:.0%}'.format(i + 1, correct[i] / (correct[i] + wrong[i])))
from pycoral.adapters import classify from PIL import Image model_file = os.path.join( "models", "ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite") print(os.path.isfile(model_file)) label_file = os.path.join("models", "coco_labels.txt") print(os.path.isfile(label_file)) image_file = os.path.join("images", "parrot.jpg") print(os.path.isfile(image_file)) # Initialize the TF interpreter interpreter = edgetpu.make_interpreter(model_file) interpreter.allocate_tensors() # Resize the image size = common.input_size(interpreter) image = Image.open(image_file).convert("RGB").resize(size, Image.ANTIALIAS) # Run an inference common.set_input(interpreter, image) interpreter.invoke() classes = classify.get_classes(interpreter, top_k=1) # Print the result labels = dataset.read_label_file(label_file) for c in classes: print("%s: %.5f" % (labels.get(c.id, c.id), c.score))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--threshold", help="Score threshold.", default=0.0, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() with open(args.label, "r") as f: pairs = (l.strip().split(maxsplit=1) for l in f.readlines()) labels = dict((int(k), v) for k, v in pairs) # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() width, height = common.input_size(interpreter) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() results = classify.get_classes(interpreter, args.top_k, args.threshold) elapsed_ms = (time.perf_counter() - start) * 1000 # Check result. if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms * 1000 elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3, type=int) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() # Check result. results = classify.get_classes(interpreter, args.top_k, args.threshold) if results: for i in range(len(results)): label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1]) pos = 60 + (i * 30) visual.draw_caption(im, (10, pos), label) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format( avg_elapsed_ms, avg_fps) # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break