def thread_job(model_name, input_filename, num_inferences, task_type, device): """Runs classification or detection job on one Python thread.""" tid = threading.get_ident() logging.info('Thread: %d, # inferences: %d, model: %s', tid, num_inferences, model_name) interpreter = make_interpreter(test_utils.test_data_path(model_name), device) interpreter.allocate_tensors() with test_utils.test_image(input_filename) as img: if task_type == 'classification': resize_image = img.resize(common.input_size(interpreter), Image.NEAREST) common.set_input(interpreter, resize_image) elif task_type == 'detection': common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.NEAREST)) else: raise ValueError( 'task_type should be classification or detection, but is given %s' % task_type) for _ in range(num_inferences): interpreter.invoke() if task_type == 'classification': classify.get_classes(interpreter) else: detect.get_objects(interpreter) logging.info('Thread: %d, model: %s done', tid, model_name)
def detection_job(detection_model, image_name, num_inferences): """Runs detection job.""" interpreter = make_interpreter(detection_model, device=':1') interpreter.allocate_tensors() with open_image(image_name) as image: _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.NEAREST)) for _ in range(num_inferences): interpreter.invoke() detect.get_objects(interpreter, score_threshold=0., image_scale=scale)
def _ProcessImageInternal(self): img = self._image.copy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) # Prepare image data _, scale = common.set_resized_input(self.__net, img.size, lambda size : img.resize(size, Image.ANTIALIAS)) # Invoke the model self.__net.invoke() # Run the tensorflow model detectionData = detect.get_objects(self.__net, self._minConfidence, scale) for obj in detectionData: if (not self._targetID or (isinstance(self._targetID, list) and obj.id in self._targetID)): self._LogObjectFound(obj.id, obj.score) # Get the bounding box of the object box = obj.bbox self._HandleObjectDetectionResult(box.xmin, box.xmax, box.ymin, box.ymax) # If we found atleast one object, then we can exit out. break self._DrawBoundingBox()
def detect(self, image_shape, image_np, detections: List[Detection]): image_np = cv2.resize(image_np, dsize=self.__model_shape, interpolation=cv2.INTER_LINEAR) limits = np.subtract(itemgetter(1, 0)(image_shape), (1, 1)) image_scale = np.divide(self.__model_shape, limits) inference_start_time = time() common.set_input(self.__interpreter, image_np) self.__interpreter.invoke() objs = detect.get_objects(self.__interpreter, image_scale=image_scale) inference_time = (time() - inference_start_time) * 1000 d = 0 while d < len(objs) and d < len(detections): detection = detections[d] obj = objs[d] detection.label = obj.id + 1 detection.confidence = obj.score detection.bounding_box.y_min = min(obj.bbox.ymin, limits[1]) detection.bounding_box.x_min = min(obj.bbox.xmin, limits[0]) detection.bounding_box.y_max = min(obj.bbox.ymax, limits[1]) detection.bounding_box.x_max = min(obj.bbox.xmax, limits[0]) d += 1 return inference_time
def detection_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for detection task' % (tid, num_inferences)) model_name = 'ssd_mobilenet_v1_coco_quant_postprocess_edgetpu.tflite' interpreter = make_interpreter( test_utils.test_data_path(model_name), device=':1') interpreter.allocate_tensors() print('Thread: %d, using device 1' % tid) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): _, scale = common.set_resized_input( interpreter, img.size, lambda size, image=img: image.resize(size, Image.ANTIALIAS)) interpreter.invoke() ret = detect.get_objects( interpreter, score_threshold=0.7, image_scale=scale) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].id, 16) # cat expected_bbox = detect.BBox( xmin=int(0.1 * img.size[0]), ymin=int(0.1 * img.size[1]), xmax=int(0.7 * img.size[0]), ymax=int(1.0 * img.size[1])) self.assertGreaterEqual( detect.BBox.iou(expected_bbox, ret[0].bbox), 0.85) print('Thread: %d, done detection task' % tid)
def detect_person(image_input): from pycoral.adapters import common from pycoral.adapters import detect from pycoral.utils.dataset import read_label_file from pycoral.utils.edgetpu import make_interpreter label_path = os.path.join(BASE_DIR, 'coral_files', 'coco_labels.txt') model_path = os.path.join( BASE_DIR, 'coral_files', 'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite') print(model_path) image = Image.fromarray(image_input) print(image) labels = read_label_file(label_path) print("labels", labels) interpreter = make_interpreter(model_path) print("INterpreter made") interpreter.allocate_tensors() print("Tensor allocated") _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print("Before invoke") interpreter.invoke() objs = detect.get_objects(interpreter, 0.4, scale) print(objs) for obj in objs: print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) return False
def detect(self, image=None): Height, Width = image.shape[:2] img = image.copy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) if self.options.get('auto_lock', True): self.acquire_lock() try: if not self.model: self.load_model() g.logger.Debug( 1, '|---------- TPU (input image: {}w*{}h) ----------|'.format( Width, Height)) t = Timer() _, scale = common.set_resized_input( self.model, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) self.model.invoke() objs = detect.get_objects( self.model, float(self.options.get('object_min_confidence')), scale) #outs = self.model.detect_with_image(img, threshold=int(self.options.get('object_min_confidence')), # keep_aspect_ratio=True, relative_coord=False) diff_time = t.stop_and_get_ms() if self.options.get('auto_lock', True): self.release_lock() except: if self.options.get('auto_lock', True): self.release_lock() raise diff_time = t.stop_and_get_ms() g.logger.Debug( 1, 'perf: processor:{} Coral TPU detection took: {}'.format( self.processor, diff_time)) bbox = [] labels = [] conf = [] for obj in objs: # box = obj.bbox.flatten().astype("int") bbox.append([ int(round(obj.bbox.xmin)), int(round(obj.bbox.ymin)), int(round(obj.bbox.xmax)), int(round(obj.bbox.ymax)) ]) labels.append(self.classes.get(obj.id)) conf.append(float(obj.score)) g.logger.Debug( 3, 'Coral object returning: {},{},{}'.format(bbox, labels, conf)) return bbox, labels, conf, ['coral'] * len(labels)
def predict(self, picData): print("\nPredicting image on TPU") print('Shape of data: ', picData.shape) #Call the TPU to detect objects on the image with a neural network common.set_input(self.interpreter, picData) self.interpreter.invoke() result = detect.get_objects(self.interpreter, self.minObjectScore) return result
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--labels', help='File path of label file.', required=True) parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Classification score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(args.model) interpreter.allocate_tensors() # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) interpreter.invoke() objs = detect.get_objects(interpreter, args.threshold, scale) draw_objects(image, objs, labels) if (cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def authorized_get(self): if self.path == '/': self.send_response(301) self.send_header('Location', '/index.html') self.end_headers() elif self.path == '/index.html': content = PAGE.encode('utf-8') self.send_response(200) self.send_header('Content-Type', 'text/html') self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) elif self.path == '/stream.mjpg': self.send_response(200) self.send_header('Age', 0) self.send_header('Cache-Control', 'no-cache, private') self.send_header('Pragma', 'no-cache') self.send_header('Content-Type', 'multipart/x-mixed-replace; boundary=FRAME') self.end_headers() try: stream_video = io.BytesIO() fps.start() while True: # getting image frame = camera.read() cv2_im = frame # cv2 coding cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) if VFLIP: cv2_im_rgb = cv2.flip(cv2_im_rgb, 0) if HFLIP: cv2_im_rgb = cv2.flip(cv2_im_rgb, 1) # object detection run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = self.append_objs_to_img(cv2_im, inference_size, objs, labels) r, buf = cv2.imencode(".jpg", cv2_im) self.wfile.write(b'--FRAME\r\n') self.send_header('Content-type','image/jpeg') self.send_header('Content-length',str(len(buf))) self.end_headers() self.wfile.write(bytearray(buf)) self.wfile.write(b'\r\n') fps.update() except Exception as e: logging.warning( 'Removed streaming client %s: %s', self.client_address, str(e)) else: self.send_error(404) self.end_headers()
def detect_func(): ## parser = argparse.ArgumentParser( ## formatter_class=argparse.ArgumentDefaultsHelpFormatter) ## parser.add_argument('-m', '--model', required=True, ## help='File path of .tflite file') # parser.add_argument('-i', '--input', required=True, # help='File path of image to process') # parser.add_argument('-l', '--labels', help='File path of labels file') # parser.add_argument('-t', '--threshold', type=float, default=0.4, # help='Score threshold for detected objects') # parser.add_argument('-o', '--output', # help='File path for the result image with annotations') # parser.add_argument('-c', '--count', type=int, default=5, # help='Number of times to run inference') # args = parser.parse_args() labels = read_label_file('test_data/coco_labels.txt') interpreter = make_interpreter( 'test_data/ssd_mobilenet_v2_coco_quant_postprocess.tflite') interpreter.allocate_tensors() image = Image.open('pic.jpg') _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print('----INFERENCE TIME----') print('Note: The first inference is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(5): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start objs = detect.get_objects(interpreter, 0.4, scale) print('%.2f ms' % (inference_time * 1000)) print('-------RESULTS--------') if not objs: print('No objects detected') people_flag = 0 for obj in objs: if obj.id == 0: print('people detected!') people_flag = 1 print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) # if args.output: # image = image.convert('RGB') # draw_objects(ImageDraw.Draw(image), objs, labels) # image.save(args.output) # image.show() return people_flag
def object_frame(inputQueue, outputQueue): # interpreter = tf.lite.Interpreter(model_path=TFLITE_PATH+'/model.tflite') if not tpu: interpreter = tflite.Interpreter(model_path=TFLITE_PATH + '/model.tflite') else: if not cust: interpreter = make_interpreter(TFLITE_PATH+\ '/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') if cust: interpreter = make_interpreter(TFLITE_PATH+\ '/detect_edgetpu.tflite') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # keep looping while True: data_out = [] # check to see if there is a frame in our input queue if not inputQueue.empty(): # grab the frame from the input queue img = inputQueue.get() if not tpu: input_data = np.expand_dims(img, axis=0) input_data = input_data / 127.5 - 1 input_data = np.asarray(input_data, dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() else: common.set_input(interpreter, img) interpreter.invoke() scale = (1, 1) objects = detect.get_objects(interpreter, confThreshold, scale) if not tpu: boxes = interpreter.get_tensor(output_details[0]['index'])[0] classe = interpreter.get_tensor(output_details[1]['index'])[0] score = interpreter.get_tensor(output_details[2]['index'])[0] data_out = [boxes, classe, score] else: if objects: for obj in objects: box = obj.bbox # print('bbox:',obj.bbox) xmin = int(box[0]) ymin = int(box[1]) xmax = int(box[2]) ymax = int(box[3]) data_out = [[[ymin, xmin, ymax, xmax]], obj.id, obj.score] # print('data_out:',data_out ) outputQueue.put(data_out)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): for _ in range(batch_size): interpreter_a.invoke() classify.get_classes(interpreter_a, top_k=1) for _ in range(batch_size): interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) return time.perf_counter() - start_time
def _calculate_overlay(frame): global _overlayObjs global _overlay # Updates overlay_pane by inferencing the latest frame. # Runs on a mutex, so it will onyl run once at a time. # It runs in a thread so it is protecte # prepare the frame for classification by converting (1) it from # BGR to RGB channel ordering and then (2) from a NumPy array to # PIL image format frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) start = time.perf_counter() if initialized: print("Initialized") if interpreter is None: print( "Interpreter is none and this is initialized ERROR ERROR ERROR" ) else: print("Interpreter is not none") print(interpreter) _, scale = common.set_resized_input( interpreter, frame.size, lambda size: frame.resize(size, Image.ANTIALIAS)) interpreter.invoke() inference_time = time.perf_counter() - start _overlayObjs = detect.get_objects(interpreter, confidence, scale) print(_overlayObjs) #print('%.2f ms' % (inference_time * 1000)) def overlay_function(frame): # ensure at least one result was found for obj in _overlayObjs: bbox = obj.bbox frame = cv2.rectangle(frame, (bbox.xmin, bbox.ymin), (bbox.xmax, bbox.ymax), (0, 0, 255), 2) frame = cv2.putText( frame, '%s %.2f' % (labels.get(obj.id, obj.id), obj.score), (bbox.xmin + 20, bbox.ymin + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) #draw.text((bbox.xmin + 10, bbox.ymin + 10), # '%s\n%.2f' % (labels.get(obj.id, obj.id), obj.score), # fill='red') return frame _overlay = overlay_function else: print("Uninitialized")
def main(): default_model_dir = '../all_models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() identification = [] classification = [] with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in tqdm(range(num_iterations)): for _ in range(batch_size): identification_start_time = time.perf_counter() interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) identification.append(time.perf_counter() - identification_start_time) for _ in range(batch_size): classification_start_time = time.perf_counter() interpreter_a.invoke() result1 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result2 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result3 = classify.get_classes(interpreter_a, top_k=4) classification.append(time.perf_counter() - classification_start_time) total_time = time.perf_counter() - start_time return total_time, identification, classification
def process(self, channel, rgbd): robot_pose, camera_pose, img_data, compressed_depth = rgbd img = Image.open(BytesIO(img_data)).convert('RGB') input_img = img.resize(self.input_size, Image.ANTIALIAS) scale = input_img.width / float(img.width), input_img.height / float(img.height) coral_common.set_input(self.interpreter, input_img) self.interpreter.invoke() detections = coral_detection.get_objects(self.interpreter, self.min_threshold, image_scale=scale) if not detections: return depth = decompress(compressed_depth) camera_params = self.camera_params[channel] for detection in detections: category = self.categories.get(detection.id) if category is None: # This is one of the unsupported categories, such as "robot' or 'nothing'. continue threshold = self.thresholds[category] if detection.score >= threshold: xmin = np.clip(detection.bbox.xmin, 0, img.width) xmax = np.clip(detection.bbox.xmax, 0, img.width) ymin = np.clip(detection.bbox.ymin, 0, img.height) ymax = np.clip(detection.bbox.ymax, 0, img.height) patch = [v for v in depth[ymin:ymax, xmin:xmax].reshape((-1)) if v >= self.min_depth and v < self.max_depth] if len(patch) < self.min_valid_depth_pixels: continue d = np.median(patch) u = (xmin + xmax) / 2 v = (ymin + ymax) / 2 # Location of the artifact relative to the camera. x = d y = d * (camera_params['cy'] - u) / camera_params['fx'] z = d * (camera_params['cx'] - v) / camera_params['fy'] # Coordinate of the artifact relative to the robot. robot_rel = transform([x, y, z], camera_pose) # Global coordinate of the artifact. world_xyz = transform(robot_rel, robot_pose) ign_name = NAME2IGN[category] if self.verbose: print(ign_name, world_xyz, detection.score) self.publish('localized_artf', [ign_name, world_xyz]) # Making sure the output depth is compressed. Input depth may or may not be. self.publish('debug_rgbd', [robot_pose, camera_pose, img_data, compress(depth)])
def detect(self, image, offset): image = Image.fromarray(image) _, scale = common.set_resized_input( self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) self.interpreter.invoke() objs = detect.get_objects(self.interpreter, 0.5, scale) observations = [] for o in objs: observations.append( (self.labels.get(o.id, o.id), o.score, (max(int(o.bbox.xmin * scale[1] + offset[0]), 0), max(int(o.bbox.ymin * scale[0] + offset[1]), 0), int(o.bbox.xmax * scale[1] + offset[0]), int(o.bbox.ymax * scale[0] + offset[1])))) return observations
def capture_v(args): global outputFrame, lock print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) cap = cv2.VideoCapture(args.camera_idx) # cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # Sony PS3 EYE cam settings: # 320x240 @ 125 FPS, 640x480 @ 60 FPS, 320x240 @187 FPS --> use excat FSP setting cap.set(cv2.CAP_PROP_FPS, 60) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320), cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print("image size=", size) fps = 0 start_time = time.time() while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels) with lock: outputFrame = cv2_im fps += 1 if fps == 200: end_time = time.time() print("cam FPS:", fps / (end_time - start_time)) start_time = time.time() fps = 0 cap.release()
def get_objects(self, frame, threshold=0.01): """ Gets a list of objects detected in the given image frame. Args: frame: The bitmap image to pass through the model. threshold: The minimum confidence score for returned results. Returns: A list of `Object` objects, each of which contains a detected object's id, score, and bounding box as `BBox`. See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.detect.Object """ height, width, _ = frame.shape _, scale = common.set_resized_input(self.interpreter, (width, height), lambda size: cv2.resize(frame, size, fx=0, fy=0, interpolation=cv2.INTER_CUBIC)) self.interpreter.invoke() return detect.get_objects(self.interpreter, threshold, scale)
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image_bytes = image_file.read() image = Image.open(io.BytesIO(image_bytes)) size = common.input_size(interpreter) image = image.convert("RGB").resize(size, Image.ANTIALIAS) # Run an inference common.set_input(interpreter, image) interpreter.invoke() _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) threshold = 0.4 objs = detect.get_objects(interpreter, threshold, scale) if objs: data["success"] = True preds = [] for obj in objs: preds.append({ "confidence": float(obj.score), "label": labels[obj.id], "y_min": int(obj.bbox[1]), "x_min": int(obj.bbox[0]), "y_max": int(obj.bbox[3]), "x_max": int(obj.bbox[2]), }) data["predictions"] = preds # return the data dictionary as a JSON response return flask.jsonify(data)
def run(self): print('Loading {} with {} labels.'.format(self.model, self.labels)) interpreter = make_interpreter(self.model) interpreter.allocate_tensors() readLabels = read_label_file(self.labels) inference_size = input_size(interpreter) while(True): # Capture frame-by-frame frameWebcam = self.webcam.read() frameWebcam = imutils.resize(frameWebcam, width=800) framePicam = self.picam.read() framePicam = imutils.resize(framePicam, width=600) # Wenn nicht Coral eingesetzt werden soll, dann die Zeile auskommentieren und den nächsten Block kommentieren #grayWebcam = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2GRAY) #Bild holen und dieses danach im Coral Interpreter verarbeiten cv2_im_rgb = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, self.threshold)[:self.top_k] cv2_im = self.append_objs_to_img( frameWebcam, inference_size, objs, readLabels) #Video in Datei schreiben self.out.write(cv2_im) #starten der Picam grayPicam = cv2.cvtColor(framePicam, cv2.COLOR_BGR2GRAY) grayPicam = cv2.rotate(grayPicam, cv2.cv2.ROTATE_180) #grayPicam = cv2.GaussianBlur(grayPicam, (21, 21), 0) # Display the resulting frame cv2.imshow("RobotBack", grayPicam) cv2.imshow("RobotFront", cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture self.out.release() cv2.destroyAllWindows() self.webcam.stop() self.picam.stop()
def main(): labels = read_label_file("models/coco_labels.txt") interpreter = make_interpreter( "models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite") interpreter.allocate_tensors() threshold = 0.4 printInfo("ready") while True: line = sys.stdin.readline().rstrip("\n") try: #load image from shinobi stream rawImage = BytesIO(base64.b64decode(line)) image = Image.open(rawImage) #resize the image for object detection using built in coral code #it will set it to 300x300 and provide a scale for object detection later _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start #passing the scale from above, this function creates the bounding boxes #it takes the 300x300 image and divides the scale ratio for original coordinates objs = detect.get_objects(interpreter, threshold, scale) output = [] for obj in objs: label = labels.get(obj.id, obj.id) labelID = obj.id score = obj.score bbox = obj.bbox output.append({"bbox": bbox, "class": label, "score": score}) #outputted data is based on original feed in image size printData(output, (inference_time * 1000)) except Exception as e: printError(str(e))
def detect_and_classify_faces(detector, classifier, image, threshold, padding=10): predictions = [] boxes = [] faces = [] height, width, _ = image.shape detector_target_size = common.input_size(detector) classifier_target_size = common.input_size(classifier) scale_x, scale_y = width / detector_target_size[ 0], height / detector_target_size[1] resized_image = cv2.resize(image, detector_target_size) run_inference(detector, resized_image.tobytes()) objects = detect.get_objects(detector, threshold) for object in objects: bbox = object.bbox.scale(scale_x, scale_y) startX, startY = int(bbox.xmin - padding), int(bbox.ymin - padding) endX, endY = int(bbox.xmax + padding), int(bbox.ymax + padding) # ensure the bounding boxes fall within the dimensions of the image (startX, startY) = (max(1, startX), max(1, startY)) (endX, endY) = (min(width - 1, endX), min(height - 1, endY)) boxes.append((startX, startY, endX, endY)) face = image[startY:endY, startX:endX] face = cv2.resize(face, classifier_target_size) faces.append(face) for face in faces: run_inference(classifier, face.tobytes()) prediction = classify.get_scores(classifier) predictions.append(prediction) return (boxes, predictions)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("Open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("Open video file: ", args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(frame, box, colors[obj.id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file') parser.add_argument('-i', '--input', required=True, help='File path of image to process') parser.add_argument('-l', '--labels', help='File path of labels file') parser.add_argument('-t', '--threshold', type=float, default=0.4, help='Score threshold for detected objects') parser.add_argument('-o', '--output', help='File path for the result image with annotations') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(args.model) interpreter.allocate_tensors() image = Image.open(args.input) _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) print('----INFERENCE TIME----') print('Note: The first inference is slow because it includes', 'loading the model into Edge TPU memory.') for _ in range(args.count): start = time.perf_counter() interpreter.invoke() inference_time = time.perf_counter() - start objs = detect.get_objects(interpreter, args.threshold, scale) print('%.2f ms' % (inference_time * 1000)) print('-------RESULTS--------') if not objs: print('No objects detected') for obj in objs: print(labels.get(obj.id, obj.id)) print(' id: ', obj.id) print(' score: ', obj.score) print(' bbox: ', obj.bbox) if args.output: image = image.convert('RGB') draw_objects(ImageDraw.Draw(image), objs, labels) image.save(args.output) image.show()
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='Model directory') args = parser.parse_args() # Load tflite model detector = edgetpu.make_interpreter(os.path.join(args.model, "detector.tflite")) detector.allocate_tensors() labels = dataset.read_label_file(os.path.join(args.model, 'labels.txt')) # Load webcam prevTime = 0 cap = cv.VideoCapture(0) rows, cols = 320, 320 cap.set(cv.CAP_PROP_FRAME_WIDTH, 320) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 320) # Run model while(True): _, image = cap.read() # 이미지의 중심점을 기준으로 90도 회전 하면서 0.5배 Scale image = cv.cvtColor(image, cv.COLOR_BGR2RGB) M= cv.getRotationMatrix2D((cols/2, rows/2),180, 1) image = cv.warpAffine(image, M, (cols, rows)) image = Image.fromarray(image, "RGB") _, scale = common.set_resized_input(detector, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) # Insert FPS curTime = time.time() detector.invoke() objs = detect.get_objects(detector, 0.6, scale) draw_image = image.copy() if not objs: draw_no_detect(draw_image) else: draw_objects(draw_image, objs, labels) sec = curTime - prevTime prevTime = curTime fps = 1/(sec) str = "FPS : %0.1f" % fps draw_text(draw_image, str, (0, 0)) draw_image = np.array(draw_image) draw_image = cv.cvtColor(draw_image, cv.COLOR_RGB2BGR) # Display frame cv.imshow("Frame", draw_image) key = cv.waitKey(1) & 0xff if key==27: # Stop using ESC break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument( "--threshold", help="threshold to filter results.", default=0.5, type=float ) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous( rawCapture, format="rgb", use_video_port=True ): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size) ) interpreter.invoke() elapsed_ms = engine.get_inference_time() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): global mot_tracker default_model_dir = '../models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir,default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') parser.add_argument('--tracker', help='Name of the Object Tracker To be used.', default=None, choices=[None, 'sort']) parser.add_argument('--videosrc', help='Directly connected (dev) or Networked (net) video source. ', choices=['dev','net','file'], default='dev') parser.add_argument('--display', help='Is a display attached', default='False', choices=['True', 'False']) parser.add_argument('--netsrc', help="Networked video source, example format: rtsp://192.168.1.43/mpeg4/media.amp",) parser.add_argument('--filesrc', help="Video file source. The videos subdirectory gets mapped into the Docker container, so place your files there.",) parser.add_argument('--modelInt8', help="Model expects input tensors to be Int8, not UInt8", default='False', choices=['True', 'False']) args = parser.parse_args() trackerName=args.tracker ''' Check for the object tracker.''' if trackerName != None: if trackerName == 'mediapipe': if detectCoralDevBoard(): objectOfTracker = ObjectTracker('mediapipe') else: print("Tracker MediaPipe is only available on the Dev Board. Keeping the tracker as None") trackerName = None else: objectOfTracker = ObjectTracker(trackerName) else: pass if trackerName != None and objectOfTracker: mot_tracker = objectOfTracker.trackerObject.mot_tracker else: mot_tracker = None print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) inference_size = input_size(interpreter) if args.modelInt8=='True': model_int8 = True else: model_int8 = False if args.videosrc=='dev': cap = cv2.VideoCapture(args.camera_idx) elif args.videosrc=='file': cap = cv2.VideoCapture(args.filesrc) else: if args.netsrc==None: print("--videosrc was set to net but --netsrc was not specified") sys.exit() cap = cv2.VideoCapture(args.netsrc) cap.set(cv2.CAP_PROP_BUFFERSIZE, 0) while cap.isOpened(): ret, frame = cap.read() if not ret: if args.videosrc=='file': cap = cv2.VideoCapture(args.filesrc) continue else: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size) if model_int8: im_pil = Image.fromarray(cv2_im_rgb) input_type = common.input_details(interpreter, 'dtype') img = (input_type(cv2_im_rgb)- 127.5) / 128.0 run_inference(interpreter, img.flatten()) else: run_inference(interpreter, cv2_im_rgb.tobytes()) objs = get_objects(interpreter, args.threshold)[:args.top_k] height, width, channels = cv2_im.shape scale_x, scale_y = width / inference_size[0], height / inference_size[1] detections = [] # np.array([]) for obj in objs: bbox = obj.bbox.scale(scale_x, scale_y) element = [] # np.array([]) element.append(bbox.xmin) element.append(bbox.ymin) element.append(bbox.xmax) element.append(bbox.ymax) element.append(obj.score) # print('element= ',element) element.append(obj.id) detections.append(element) # print('dets: ',dets) # convert to numpy array # print('npdets: ',dets) detections = np.array(detections) trdata = [] trackerFlag = False if detections.any(): if mot_tracker != None: trdata = mot_tracker.update(detections) trackerFlag = True cv2_im = append_objs_to_img(cv2_im, detections, labels, trdata, trackerFlag) if args.display == 'True': cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def detect(self, pilImage): _, scale = common.set_resized_input( self.interpreter, pilImage.size, lambda size: pilImage.resize(size)) self.interpreter.invoke() return detect.get_objects(self.interpreter, score_threshold=0.6, image_scale=scale)