def _test_cat(self, model_name): engine = DetectionEngine(test_utils.test_data_path(model_name)) with test_utils.test_image('cat.bmp') as img: ret = engine.detect_with_image(img, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].label_id, 16) # cat self.assertGreater(ret[0].score, 0.7) self.assertGreater( test_utils.iou(np.array([[0.1, 0.1], [0.7, 1.0]]), ret[0].bounding_box), 0.86) # Check coordinates in pixels. ret = engine.detect_with_image(img, top_k=1, relative_coord=False) self.assertGreater( test_utils.iou(np.array([[60, 40], [420, 400]]), ret[0].bounding_box), 0.86)
def faceTracking(sender): engine = DetectionEngine( "ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite") cap = cv2.VideoCapture(-1) currentID = 1 faceTrackers = {} term = False peopleCount = 0 while not term: _, frame = cap.read() frame = cv2.rotate(frame, cv2.ROTATE_180) frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) framePIL = Image.fromarray(frameRGB) faces = engine.detect_with_image(framePIL, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10, resample=4) for face in faces: (x, y, x2, y2) = (int(i) for i in face.bounding_box.flatten().tolist()) w = x2 - x h = y2 - y center = (int(x + w * 0.5), int(y + h * 0.5)) fidMatch = False for fid in faceTrackers.keys(): (tx, ty, tw, th, n, u, c) = faceTrackers.get(fid) if tx <= center[0] <= tx + tw and ty <= center[1] <= ty + th: if n < 50: n += 1 if n >= 35 and c == False: c = True peopleCount += 1 faceTrackers.update({fid: (x, y, w, h, n, True, c)}) fidMatch = True break if not fidMatch: faceTrackers.update({currentID: (x, y, w, h, 1, True, False)}) currentID += 1 fidsToDelete = [] for fid in faceTrackers.keys(): (tx, ty, tw, th, n, u, c) = faceTrackers.get(fid) if not u: # if center is close to frame edge then decay faster #if res[0]-tw-20 < tx < 20: #n-=5 n -= 1 if n < 1: fidsToDelete.append(fid) else: faceTrackers.update({fid: (tx, ty, tw, th, n, False, c)}) for fid in fidsToDelete: faceTrackers.pop(fid, None) sender.send((faceTrackers, peopleCount, frameRGB)) if sender.poll(): term = sender.recv() pygame.time.Clock().tick(100) cap.release()
class face_detection: MODEL = 'models/ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite' def __init__(self, threshold=0.5, num_results=10): self.engine = DetectionEngine(face_detection.MODEL) self.objs = None self.boxes = None self.scores = None self.threshold = threshold self.num_results = num_results def set_threshold(self, num): self.threshold = num def set_max_results(self, num): self.num_results = num def detect(self, img): img = Image.fromarray(img) self.objs = self.engine.detect_with_image(img, threshold=self.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=self.num_results) self.boxes = [obj.bounding_box.flatten().tolist() for obj in self.objs] self.scores = [obj.score for obj in self.objs] return self.objs def get_bounding_boxes(self): return self.boxes def get_scores(self): return self.scores
class Snapshot(): def __init__(self): self.engine = DetectionEngine( "ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite") #self.cap = cv2.VideoCapture(-1) def take(self): cap = cv2.VideoCapture(-1) _, frame = cap.read() frame = cv2.rotate(frame, cv2.ROTATE_180) frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) framePIL = Image.fromarray(frameRGB) faces = self.engine.detect_with_image(framePIL, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10, resample=4) widest = 0 (a, b, c, d) = (0, 0, 0, 0) for face in faces: (x, y, x2, y2) = (int(i) for i in face.bounding_box.flatten().tolist()) h = y2 - y w = x2 - x if w > widest: (a, b, c, d) = (x, y, h, w) print(a, b, c, d) return frameRGB, a, b, c, d
def _test_pet(self, model_name): engine = DetectionEngine(test_utils.test_data_path(model_name)) with test_utils.test_image('cat.bmp') as img: ret = engine.detect_with_image(img, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].label_id, 0) # Abyssinian self.assertGreater(ret[0].score, 0.9) self.assertGreater( test_utils.iou(np.array([[0.35, 11], [0.7, 0.66]]), ret[0].bounding_box), 0.84) ret = engine.detect_with_image(img, top_k=1, relative_coord=False) # Check coordinates in pixels. self.assertGreater( test_utils.iou(np.array([[211, 47], [415, 264]]), ret[0].bounding_box), 0.84)
def _test_face(self, model_name): engine = DetectionEngine(test_utils.test_data_path(model_name)) with test_utils.test_image('face.jpg') as img: ret = engine.detect_with_image(img, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].label_id, 0) self.assertGreater(ret[0].score, 0.95) self.assertGreater( test_utils.iou(np.array([[0.41, 0.07], [0.78, 0.49]]), ret[0].bounding_box), 0.9) ret = engine.detect_with_image(img, top_k=1, relative_coord=False) # Check coordinates in pixels. self.assertGreater( test_utils.iou(np.array([[427, 53], [801, 354]]), ret[0].bounding_box), 0.9)
class Detector(DetectorBase): def __init__(self): DetectorBase.__init__(self, "face_coral") def init(self): # Initialize engine self.model_file = "/usr/share/edgetpu/examples/models/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite" self.label_file = None try: self.engine = DetectionEngine(self.model_file) except Exception as error: logger.error('Initializion error: {}'.format(error)) return # runs yolov3 object detection def detect(self, image_cv) -> DetectorResponse: pil_image = Image.fromarray( image_cv ) # convert opencv frame (with type()==numpy) into PIL Image # Run inference. ans = self.engine.detect_with_image(pil_image, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) bbox = [] conf = [] label = [] # Display result. if ans: for obj in ans: #sample output #score = 0.97265625 #box = [417.078184068203, 436.7141185646848, 2443.3632068037987, 1612.3385782686541] b = obj.bounding_box.flatten().tolist() b = [int(i) for i in b] #convert to int c = float(obj.score) l = 'person' bbox.append(b) conf.append(c) label.append(l) else: logger.debug('No face detected!') model_response = DetectorResponse(self.get_model_name()) for l, c, b in zip(label, conf, bbox): model_response.add(b, l, c, self.get_model_name()) return model_response
def _test_gray_face(self, model_name): engine = DetectionEngine(test_utils.test_data_path(model_name)) with test_utils.test_image('grace_hopper.bmp') as img: # Convert image to grayscale. img = img.convert('L') ret = engine.detect_with_image(img, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].label_id, 0) self.assertGreater(ret[0].score, 0.95) self.assertGreater( test_utils.iou( np.array([[0.28, 0.07], [0.74, 0.60]]), ret[0].bounding_box), 0.9) ret = engine.detect_with_image(img, top_k=1, relative_coord=False) # Check coordinates in pixels. self.assertGreater( test_utils.iou( np.array([[144, 41], [382, 365]]), ret[0].bounding_box), 0.9)
def main(): cv_publisher = Publisher(105) MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/' MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_pupper_quant_edgetpu.tflite' LABEL_PATH = MODELS_DIR + 'pupper_labels.txt' LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt' labels = dataset_utils.read_label_file(LABEL_PATH) engine = DetectionEngine(MODEL_PATH) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() try: stream = io.BytesIO() #count = 0 for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) #image = Image.frombuffer('RGB',(width,height), stream.getvalue()) image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb' #draw = ImageDraw.Draw(image) start_ms = time.time() results = engine.detect_with_image(image,threshold=0.2,keep_aspect_ratio=True,relative_coord=False,top_k=10) elapsed_ms = time.time() - start_ms detectedObjs = [] for obj in results: if (obj.label_id in range(3)): box = obj.bounding_box.flatten().tolist() #draw.rectangle(box, outline='red') #draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score)) w = box[0] - box[2] h = box[1] - box[3] objInfo = {'bbox_x':float(box[0]), 'bbox_y':float(box[1]), 'bbox_h':float(h), 'bbox_w':float(w), 'bbox_label':labels[obj.label_id], 'bbox_confidence': float(obj.score) } detectedObjs.append(objInfo) try: cv_publisher.send(detectedObjs) except BaseException as e: print('Failed to send bounding boxes. CV UDP subscriber likely not initialized') pass #print(detectedObjs) #with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images_120120/' + str(count) + '.png','wb') as f: # image.save(f) #count+=1 except BaseException as e: with open(LOG_FILE,'w') as f: f.write("Failed to run detection loop:\n {0}\n".format(traceback.format_exc()))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) args = parser.parse_args() labels = dataset_utils.read_label_file(args.label) engine = ClassificationEngine(args.model) detectionEngine = DetectionEngine('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite') detectionLabels = dataset_utils.read_label_file('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/coco_labels.txt') with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() count = 0 for _ in camera.capture_continuous( stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) print(type(stream.getvalue())) image = Image.frombuffer('RGB',(width,height), stream.getvalue()) draw = ImageDraw.Draw(image) with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '.png','wb') as f: image.save(f) start_ms = time.time() results = engine.classify_with_input_tensor(input_tensor, top_k=1) objects = detectionEngine.detect_with_image(image,threshold=0.1,keep_aspect_ratio=True,relative_coord=False,top_k=3) elapsed_ms = time.time() - start_ms print('--------------------------') for obj in objects: if detectionLabels: print(detectionLabels[obj.label_id] + ' score = ' + str(obj.score)) box = obj.bounding_box.flatten().tolist() print('box = ', box) draw.rectangle(box, outline='red') draw.text((box[0],box[1]), detectionLabels[obj.label_id] + " " + str(obj.score)) if not objects: print('No objects detected') else: with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '_boxes.png','wb') as f: image.save(f) count+=1 #if results: # camera.annotate_text = '%s %.2f\n%.2fms' % ( # labels[results[0][0]], results[0][1], elapsed_ms * 1000.0) finally: camera.stop_preview()
def detection(): global obj_ids global confs # Initialize Coral Engine modelfile = 'models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' labelfile = 'models/coco_labels.txt' thresh = 0.4 topk = 40 print("[INFO] loading labels & engine...") labels = load_labels(labelfile) obj_id = len(labels) engine = DetectionEngine(modelfile) cap = cv2.VideoCapture(0) time.sleep(2.0) print("[INFO] looping over frames...") while cap.isOpened(): ret, frame = cap.read() img = cv2.resize(frame, None, fx=0.5, fy=0.5) cv2_im = cv2.flip(img, 0) pil_im = Image.fromarray(cv2_im) objs = engine.detect_with_image(pil_im, threshold=thresh, keep_aspect_ratio=True, relative_coord=True, top_k=topk) # non-maximum suppression from opencv's DNN module bboxes = [] scores = [] score_thresh = 0.4 nms_threshold = 0.5 for obj in objs: bboxes.append(obj.bounding_box.flatten().tolist()) scores.append(float(obj.score)) indices = cv2.dnn.NMSBoxes(bboxes, scores, score_thresh, nms_threshold) print("num objects: " + str(len(objs)) + " and num left: " + str(len(indices))) leng = len(indices) if leng > 5: leng = 5 for i in range(leng): idx = indices[i][0] label = labels[objs[idx].label_id] obj_ids[i] = objs[idx].label_id confs[i] = int(100 * objs[idx].score) print(label + ": " + str(confs[i])) cv2_im = append_objs_to_img(cv2_im, objs, labels) # drawing for debugging cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--maxobjects', type=int, default=3, help='Maximum objects') parser.add_argument( '--threshold', type=float, default=0.3, help="Minimum threshold") parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) args = parser.parse_args() # Prepare labels. labels = ReadLabelFile(args.label) if args.label else None # Initialize engine. engine = DetectionEngine(args.model) # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) # Perform inference results = engine.detect_with_image( image, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=args.maxobjects) draw_image(image, results, labels) if(cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
class FaceDetector(): def __init__(self, modelPath): self.detector = DetectionEngine(modelPath) def detect(self, image, confidence): try: img = cv2.resize(image, (320, 320)) img = Image.fromarray(image) faces = self.detector.detect_with_image(img, threshold=confidence) return faces except Exception as e: raise e
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', required=True, help='Detection SSD model path (must have post-processing operator).') parser.add_argument('--label', help='Labels file path.') parser.add_argument('--input', help='Input image path.', required=True) parser.add_argument('--output', help='Output image path.') parser.add_argument( '--keep_aspect_ratio', action='store_true', help= ('keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) args = parser.parse_args() # Initialize engine. engine = DetectionEngine(args.model) labels = dataset_utils.read_label_file(args.label) if args.label else None # Open image. img = Image.open(args.input).convert('RGB') draw = ImageDraw.Draw(img) # Run inference. objs = engine.detect_with_image(img, threshold=0.05, keep_aspect_ratio=args.keep_aspect_ratio, relative_coord=False, top_k=10) # Print and draw detected objects. for obj in objs: print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('score =', obj.score) box = obj.bounding_box.flatten().tolist() print('box =', box) draw.rectangle(box, outline='red') if not objs: print('No objects detected.') # Save image with bounding boxes. if args.output: img.save(args.output)
class PedestrianDetector(): def __init__(self, modelPath, device_path): self.detector = DetectionEngine(modelPath, device_path) def detect(self, image, confidence): try: img = cv2.resize(image, (300, 300)) img = Image.fromarray(image) results = self.detector.detect_with_image(img, threshold=confidence) return results except Exception as e: raise e
def main(): cv_publisher = Publisher(105) MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/' MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite' LABEL_PATH = MODELS_DIR + 'coco_labels.txt' LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt' labels = dataset_utils.read_label_file(LABEL_PATH) engine = DetectionEngine(MODEL_PATH) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() stream = io.BytesIO() count = 0 for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) #image = Image.frombuffer('RGB',(width,height), stream.getvalue()) image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb' draw = ImageDraw.Draw(image) start_ms = time.time() results = engine.detect_with_image(image,threshold=0.1,keep_aspect_ratio=True,relative_coord=False,top_k=51) elapsed_ms = time.time() - start_ms detectedObjs = [] for obj in results: if (obj.label_id == 0 or obj.label_id == 36): if (obj.label_id == 36): print('Tennis ball detected') box = obj.bounding_box.flatten().tolist() draw.rectangle(box, outline='red') draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score)) w = box[0] - box[2] h = box[1] - box[3] objInfo = {'bbox_x':float(box[0]), 'bbox_y':float(box[1]), 'bbox_h':float(h), 'bbox_w':float(w), 'bbox_label':labels[obj.label_id], 'bbox_confidence': float(obj.score) } detectedObjs.append(objInfo) cv_publisher.send(detectedObjs) #print(detectedObjs) with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '.png','wb') as f: image.save(f) count+=1
def detection(): global obj_id global conf # Initialize Coral Engine modelfile = 'models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' labelfile = 'models/coco_labels.txt' thresh = 0.4 topk = 3 print("[INFO] loading labels & engine...") labels = load_labels(labelfile) obj_id = len(labels) engine = DetectionEngine(modelfile) vs = VideoStream().start() time.sleep(2.0) print("[INFO] looping over frames...") while vs.more(): frame = vs.read() #vs.clearQ() cv2_im = cv2.resize(frame, None, fx=0.5, fy=0.5) pil_im = Image.fromarray(cv2_im) objs = engine.detect_with_image(pil_im, threshold=thresh, keep_aspect_ratio=True, relative_coord=True, top_k=topk) print("num objects: ", len(objs)) for obj in objs: # drawing functions for debugging #box = obj.bounding_box.flatten().astype("int") #(startX, startY, endX, endY) = box # cv2.rectangle(cv2_im, (startX, startY), (endX, endY), (0, 255, 0), 2) #y = startY - 15 if startY - 15 > 15 else startY + 15 #text = "{}: {:.2f}%".format(label, obj.score * 100) #cv2.putText(cv2_im, text, (startX, y), #cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) label = labels[obj.label_id] if label == "person": print("PERSON FOUND") obj_id = obj.label_id conf = int(100 * obj.score) if len(objs) >= 0: print(labels[obj_id]) else: obj_id = 0
def main(): default_model_dir = 'models' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='class score threshold') args = parser.parse_args() print("Loading %s with %s labels." % (args.model, args.labels)) engine = DetectionEngine(args.model) labels = load_labels(args.labels) cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame pil_im = Image.fromarray(cv2_im) objs = engine.detect_with_image(pil_im, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=True, top_k=args.top_k) cv2_im = append_objs_to_img(cv2_im, objs, labels) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) args = parser.parse_args() engine = DetectionEngine(args.model) cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) print("Camera started. Streaming results") while (cap.isOpened()): print("Running") _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = PIL.Image.fromarray(im) ans = engine.detect_with_image(img, threshold=0.35, keep_aspect_ratio=True, relative_coord=False, top_k=1) pred = "nopeople" pos = 1.0 if ans: for obj in ans: if (obj.label_id == 0): box = obj.bounding_box.flatten().tolist() pos = ((box[0] + box[2]) / width) - 1 pred = '{:.3f}'.format(pos) print(pos) print(pos) print(pred) time_queue.append(time.time()) if len(time_queue) > 5: time_queue.popleft() if len(time_queue) > 1: print("FPS: ", len(time_queue) / (time_queue[-1] - time_queue[0]))
def detection_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for detection task' % (tid, num_inferences)) model_name = 'mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite' engine = DetectionEngine(test_utils.test_data_path(model_name)) print('Thread: %d, using device %s' % (tid, engine.device_path())) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): ret = engine.detect_with_image(img, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(ret[0].label_id, 16) # cat self.assertGreater(ret[0].score, 0.7) self.assertGreater( test_utils.iou(np.array([[0.1, 0.1], [0.7, 1.0]]), ret[0].bounding_box), 0.88) print('Thread: %d, done detection task' % tid)
def _test_model(self, model_name, expected_ap=None, expected_ar=None, resample=Image.NEAREST): engine = DetectionEngine(test_utils.test_data_path(model_name)) ground_truth_file = 'coco/annotations/instances_val2017.json' coco_gt = coco.COCO(test_utils.test_data_path(ground_truth_file)) detection_results = [] print('Running inference for model %s...' % model_name) for _, img in coco_gt.imgs.items(): with test_utils.test_image('coco', 'val2017', img['file_name']) as image: ret = engine.detect_with_image(image.convert('RGB'), threshold=0, top_k=100, relative_coord=False, resample=resample) for detection in ret: detection_results.append({ 'image_id': img['id'], # Model label id and ground truth label id are 1 off. 'category_id': detection.label_id + 1, 'bbox': self.absolute_to_relative_bbox( detection.bounding_box.flatten().tolist()), 'score': detection.score.item() }) detection_file = '/tmp/%s.json' % model_name with open(detection_file, 'w') as f: json.dump(detection_results, f, separators=(',', ':')) coco_dt = coco_gt.loadRes(detection_file) coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, 'bbox') coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() if expected_ap is not None: self.assertGreaterEqual(coco_eval.stats[0], expected_ap) if expected_ar is not None: self.assertGreaterEqual(coco_eval.stats[6], expected_ar)
class object_detection: MODEL_V1 = 'models/ssd_mobilenet_v1_coco_quant_postprocess_edgetpu.tflite' MODEL_V2 = 'models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite' LABELS = 'models/coco_labels.txt' def __init__(self, threshold=0.5, num_results=10, model=MODEL_V2, labels=LABELS): self.engine = DetectionEngine(model) self.model_labels = read_label_file(labels) self.objs = None self.boxes = None self.scores = None self.labels = None self.threshold = threshold self.num_results = num_results def set_threshold(self, num): self.threshold = num def set_max_results(self, num): self.num_results = num def detect(self, img): img = Image.fromarray(img) self.objs = self.engine.detect_with_image(img, threshold=self.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=self.num_results) self.boxes = [obj.bounding_box.flatten().tolist() for obj in self.objs] self.scores = [obj.score for obj in self.objs] self.labels = [self.model_labels[obj.label_id] for obj in self.objs] return self.objs def get_bounding_boxes(self): return self.boxes def get_scores(self): return self.scores def get_labels(self): return self.labels
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output', help='Output image path.') parser.add_argument('--keep_aspect_ratio', action='store_true', help=( 'keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) args = parser.parse_args() cam = cv2.VideoCapture(2) # Initialize engine. # engine = DetectionEngine(args.model) engine = DetectionEngine('../models/ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite') # engine = DetectionEngine('models/face_detection_front.tflite') # engine = DetectionEngine('models/mobilenet_ssd_v2_coco_quant_postprocess.tflite') # engine = DetectionEngine('models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite') while True: ret, frame = cam.read() frame = cv2.resize(frame, (257, 257)) key = cv2.waitKey(10) & 0xFF if key == ord('q'): cam.release() break img = Image.fromarray(frame) objs = engine.detect_with_image(img, threshold=0.05, keep_aspect_ratio=args.keep_aspect_ratio, relative_coord=False, top_k=10) for obj in objs: box = obj.bounding_box.flatten().tolist() print(obj.score) cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255,0,0), 2) cv2.imshow("capture stream", frame)
def _run_benchmark_for_model(model_name, image): """Benchmarks model with given image. Args: model_name: string, file name of the model. image: string, name of the image used for test. Returns: float, average inference time. """ print('Benchmark for [', model_name, '] on ', image) engine = DetectionEngine(test_utils.test_data_path(model_name)) iterations = 200 if ('edgetpu' in model_name) else 10 with Image.open(test_utils.test_data_path(image)) as img_obj: benchmark_time = timeit.timeit( lambda: engine.detect_with_image(img_obj, threshold=0.4, top_k=10), number=iterations) time_per_inference = (benchmark_time / iterations) * 1000 return time_per_inference
def process_user_pictures(props, source, destination): click.echo('Fetching files...') image_list = [ f for f in os.listdir(source) if os.path.isfile(os.path.join(source, f)) ] detection = DetectionEngine(props['detection']) click.echo('Looking for faces..') found_faces = 0 for filename in image_list: cv2_im = cv2.imread(os.path.join(source, filename)) pil_im = Image.fromarray(cv2_im) #searching for faces in the picture faces = detection.detect_with_image(pil_im, threshold=0.1, top_k=3, keep_aspect_ratio=True, relative_coord=True) click.echo('Found {} faces in {}'.format(len(faces), filename)) #check each face and append results to frame height, width, _ = cv2_im.shape cnt = 0 for face in faces: x0, y0, x1, y1 = face.bounding_box.flatten().tolist() x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) #crop out face from camera picture face_im = cv2_im[y0:y1, x0:x1] #save image new_filename = filename[:filename.rfind('.')] + '_' + str( cnt) + '_' + filename[filename.rfind('.'):] click.echo('Saving {}'.format(new_filename)) cv2.imwrite(os.path.join(destination, new_filename), face_im) cnt += 1 found_faces += cnt click.echo( click.style("Done! From {} images we could find {} faces ".format( len(image_list), found_faces), fg='green'))
def ssd_inferencer(results, frameBuffer, model, device): ssd_engine = None ssd_engine = DetectionEngine(model, device) print("Loaded Graphs!!! (SSD)") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg_ssd = color_image[:, :, ::-1].copy() prepimg_ssd = Image.fromarray(prepimg_ssd) tinf = time.perf_counter() result_ssd = ssd_engine.detect_with_image(prepimg_ssd, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10) print(time.perf_counter() - tinf, "sec (SSD)") results.put(result_ssd)
class FaceDetector: def __init__(self, model_path: str = './model'): self.engine = DetectionEngine(model_path) def predict(self, on_img: Image) -> List[List[int]]: ans = self.engine.detect_with_image(on_img, threshold=0.05, keep_aspect_ratio=False, relative_coord=False, top_k=10) faces = [] if ans: for obj in ans: # if labels: # print(labels[obj.label_id]) # print('score = ', obj.score) faces.append(obj.bounding_box.flatten().tolist()) return faces def check_image_contains_face(self, img: Image): predictions = self.predict(img) return len(predictions) > 0
def main(_): if FLAGS.detect: # Initialize ambient sensors. ambient = bme680.BME680(i2c_addr=bme680.I2C_ADDR_PRIMARY, i2c_device=SMBus(1)) # TODO: Tune settings. ambient.set_humidity_oversample(bme680.OS_2X) ambient.set_pressure_oversample(bme680.OS_4X) ambient.set_temperature_oversample(bme680.OS_8X) ambient.set_filter(bme680.FILTER_SIZE_3) ambient.set_gas_status(bme680.DISABLE_GAS_MEAS) # Load the face detection model. face_detector = DetectionEngine(FLAGS.face_model) # Start the frame processing loop. with PureThermal() as camera: # Initialize thermal image buffers. input_shape = (camera.height(), camera.width()) raw_buffer = np.zeros(input_shape, dtype=np.int16) scaled_buffer = np.zeros(input_shape, dtype=np.uint8) if FLAGS.detect: rgb_buffer = np.zeros((*input_shape, 3), dtype=np.uint8) if FLAGS.visualize: window_buffer = np.zeros((WINDOW_HEIGHT, WINDOW_WIDTH, 3), dtype=np.uint8) raw_scale_factor = (FLAGS.max_temperature - FLAGS.min_temperature) // 255 window_scale_factor_x = WINDOW_WIDTH / camera.width() window_scale_factor_y = WINDOW_HEIGHT / camera.height() if FLAGS.visualize: # Initialize the window. cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) while (not FLAGS.visualize or cv2.getWindowProperty(WINDOW_NAME, 0) != -1): try: start_time = time() if FLAGS.detect: # Acquire ambient sensor readings. if not ambient.get_sensor_data(): logging.warning('Ambient sensor data not ready') ambient_data = ambient.data logging.debug('Ambient temperature: %.f °C' % ambient_data.temperature) logging.debug('Ambient pressure: %.f hPa' % ambient_data.pressure) logging.debug('Ambient humidity: %.f %%' % ambient_data.humidity) # Get the latest frame from the thermal camera and copy it. with camera.frame_lock(): np.copyto(dst=raw_buffer, src=camera.frame()) # Map the raw temperature data to a normal range before # reducing the bit depth and min/max normalizing for better # contrast. np.clip( (raw_buffer - FLAGS.min_temperature) // raw_scale_factor, 0, 255, out=scaled_buffer) cv2.normalize(src=scaled_buffer, dst=scaled_buffer, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) if FLAGS.detect: # Convert to the expected RGB format. cv2.cvtColor(src=scaled_buffer, dst=rgb_buffer, code=cv2.COLOR_GRAY2RGB) # Detect any faces in the frame. faces = face_detector.detect_with_image( Image.fromarray(rgb_buffer), threshold=FLAGS.face_confidence, top_k=FLAGS.max_num_faces, keep_aspect_ratio=True, # Better quality. relative_coord=False, # Expect pixel coordinates. resample=Image.BILINEAR) # Good enough and fast. # TODO: Estimate distance based on face size. # TODO: Model thermal attenuation based on distance and # ambient temperature, pressure, and humidity. # Find the (highest) temperature of each face. if len(faces) == 1: logging.info('1 person') else: logging.info('%d people' % len(faces)) for face in faces: temperature = get_temperature(raw_buffer, face.bounding_box) if not temperature: logging.warning('Empty crop') continue logging.info(format_temperature(temperature)) if FLAGS.visualize: # Apply the colormap. turbo_buffer = TURBO_COLORMAP[scaled_buffer] # Resize for the window. cv2.cvtColor(src=turbo_buffer, dst=turbo_buffer, code=cv2.COLOR_RGB2BGR) cv2.resize(src=turbo_buffer, dst=window_buffer, dsize=(WINDOW_WIDTH, WINDOW_HEIGHT), interpolation=cv2.INTER_CUBIC) if FLAGS.detect: # Draw the face bounding boxes and temperature. for face in faces: bbox = face.bounding_box top_left = (int(window_scale_factor_x * bbox[0, 0]), int(window_scale_factor_y * bbox[0, 1])) bottom_right = (int(window_scale_factor_x * bbox[1, 0]), int(window_scale_factor_y * bbox[1, 1])) cv2.rectangle(window_buffer, top_left, bottom_right, LINE_COLOR, LINE_THICKNESS) temperature = get_temperature( raw_buffer, face.bounding_box) if not temperature: continue label = format_temperature(temperature, add_unit=False) label_size, _ = cv2.getTextSize( label, LABEL_FONT, LABEL_SCALE, LABEL_THICKNESS) label_position = ( (top_left[0] + bottom_right[0]) // 2 - label_size[0] // 2, (top_left[1] + bottom_right[1]) // 2 + label_size[1] // 2) cv2.putText(window_buffer, label, label_position, LABEL_FONT, LABEL_SCALE, LABEL_COLOR, LABEL_THICKNESS, cv2.LINE_AA) # Draw the frame. cv2.imshow(WINDOW_NAME, window_buffer) cv2.waitKey(1) # Calculate timing stats. duration = time() - start_time logging.debug('Frame took %.f ms (%.2f Hz)' % (duration * 1000, 1 / duration)) # Stop on SIGINT. except KeyboardInterrupt: break if FLAGS.visualize: cv2.destroyAllWindows()
class Vision: coral_model_file = "/home/pi/catkin_ws/src/robot5/models/mobilenet_ssd_v2/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite" coral_labels_file = "/home/pi/catkin_ws/src/robot5/models/mobilenet_ssd_v2/coco_labels.txt" coral_confidence = 0.3 caffe_model_file = "/home/pi/catkin_ws/src/robot5/models/res10_300x300_ssd_iter_140000.caffemodel" caffe_confidence = 0.5 caffe_prototxt = "/home/pi/catkin_ws/src/robot5/models/deploy.prototxt.txt" face_cascade = cv2.CascadeClassifier('/home/pi/opencv/data/haarcascades/haarcascade_frontalface_default.xml') eye_cascade = cv2.CascadeClassifier('/home/pi/opencv/data/haarcascades/haarcascade_eye.xml') APRILWIDTH = 172 FOCALLENGTH = 0.304 def __init__(self): self.coral_model = {} self.coral_labels = {} self.caffe_model = {} self.at_detector = {} self.videoStream = {} self.status = None self.captureFrame = None self.visionFrame = None self.thread = Thread(target=self.frameUpdate,args=()) self.thread.daemon = True self.flaskThread = Thread(target=self.runFlask) self.flaskThread.daemon = True self.frameLock = Lock() print("[INFO] Initialising ROS...") #self.pub = rospy.Publisher(name='vision_detect',subscriber_listener=vision_detect,queue_size=5,data_class=vision_detect) self.pub = rospy.Publisher('/vision_detect',vision_detect) rospy.init_node('robot5_vision',anonymous=False) for row in open(self.coral_labels_file): (classID, label) = row.strip().split(maxsplit=1) self.coral_labels[int(classID)] = label.strip() print("[INFO] loading Coral model...") self.coral_model = DetectionEngine(self.coral_model_file) #print("[INFO] loading Caffe model...") #self.caffe_model = cv2.dnn.readNetFromCaffe(self.caffe_prototxt, self.caffe_model_file) self.at_detector = Detector(families='tag36h11', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) print("[INFO] Running Flask...") self.app = Flask(__name__) self.add_routes() self.flaskThread.start() print("[INFO] starting video stream...") self.videoStream = VideoStream(src=0,usePiCamera=True).start() time.sleep(2.0) # warmup self.captureFrame = self.videoStream.read() self.visionFrame = self.captureFrame self.thread.start() time.sleep(0.5) # get first few srun = True print("[INFO] running frames...") while srun: srun = self.doFrame() cv2.destroyAllWindows() self.videoStream.stop() def frameUpdate(self): while True: self.captureFrame = self.videoStream.read() time.sleep(0.03) def gen(self): while True: if self.visionFrame is not None: bout = b"".join([b'--frame\r\nContent-Type: image/jpeg\r\n\r\n', self.visionFrame,b'\r\n']) yield (bout) else: return "" def add_routes(self): @self.app.route("/word/<word>") def some_route(word): self.testout() return "At some route:"+word @self.app.route('/video_feed') def video_feed(): return Response(self.gen(),mimetype='multipart/x-mixed-replace; boundary=frame') def testout(self): print("tested") pass def runFlask(self): self.app.run(debug=False, use_reloader=False,host='0.0.0.0', port=8000) def coralDetect(self,frame,orig): start1 = time.time() results = self.coral_model.detect_with_image(frame, threshold=self.coral_confidence,keep_aspect_ratio=True, relative_coord=False) fcount = 0 points = [] for r in results: box = r.bounding_box.flatten().astype("int") (startX, startY, endX, endY) = box label = self.coral_labels[r.label_id] cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) y = startY - 15 if startY - 15 > 15 else startY + 15 cx = startX + (endX - startX/2) cy = startY + (endY - startY/2) #points.append({"type":"coral","num":fcount,"x":cx,"y":cy,"label":label,"score":r.score,"time":time.time() }) points.append(["coral",fcount,int(cx),int(cy),label,int(r.score*100),rospy.Time.now()]) fcount +=1 text = "{}: {:.2f}%".format(label, r.score * 100) cv2.putText(orig, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) end1 = time.time() #print("#1:",end1-start1) return orig,points def caffeDetect(self,frame,orig): start2 = time.time() (h, w) = frame.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,(300, 300), (104.0, 177.0, 123.0)) self.caffe_model.setInput(blob) detections = self.caffe_model.forward() fcount = 0 points = [] for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence < self.caffe_confidence: continue box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") text = "{:.2f}%".format(confidence * 100) y = startY - 10 if startY - 10 > 10 else startY + 10 cx = startX + (endX - startX/2) cy = startY + (endY - startY/2) #points.append({"type":"caffe","num":fcount,"x":cx,"y":cy,"score":confidence,"time":time.time()}) points.append(["caffe",fcount,int(cx),int(cy),"",int(confidence*10),rospy.Time.now()]) fcount +=1 cv2.rectangle(orig, (startX, startY), (endX, endY),(0, 0, 255), 2) cv2.putText(orig, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) end2 = time.time() #print("#2:",end2-start2) return orig,points def aprilDetect(self,grey,orig): start3 = time.time() Xarray = [338.563277422543, 0.0, 336.45495347495824, 0.0, 338.939280638548, 230.486982216255, 0.0, 0.0, 1.0] camMatrix = np.array(Xarray).reshape((3,3)) params = (camMatrix[0,0],camMatrix[1,1],camMatrix[0,2],camMatrix[1,2]) tags = self.at_detector.detect(grey,True,params,0.065) fcount = 0 points =[] for tag in tags: pwb = tag.corners[2][1] - tag.corners[0][1] pwt = tag.corners[3][1] - tag.corners[1][1] pwy = (pwb+pwt)/2 pwl = tag.corners[3][1] - tag.corners[0][1] pwr = tag.corners[2][1] - tag.corners[1][1] pwx = (pwl+pwr)/2 dist = self.distanceToCamera(self.APRILWIDTH,(pwx)) #print(dist) #points.append({"type":"april","num":fcount,"x":pwx,"y":pwy,"label":tag.id,"score":dist,"time":time.time()}) points.append(["april",fcount,int(pwl + (pwx/2)),int(pwb + (pwy/2)),str(tag.tag_id)+"|"+str(dist),0,rospy.Time.now()]) fcount += 1 cv2.putText(orig, str(dist), (int(pwx), int(pwy)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) for idx in range(len(tag.corners)): cv2.line(orig,tuple(tag.corners[idx-1,:].astype(int)),tuple(tag.corners[idx,:].astype(int)),(0,255,0)) end3 = time.time() #print("#3:",end3-start3) return orig,points def haarDetect(self,grey,orig): start4 = time.time() faces = self.face_cascade.detectMultiScale(grey,1.3,5) fcount=0 points =[] for(x,y,w,h) in faces: #points.append({"type":"haar","num":fcount,"x":x+(w/2),"y":y+(h/2),"time":time.time()}) points.append(["haar",fcount,int(x+(w/2)),int(y+(h/2)),"",0,rospy.Time.now()]) orig = cv2.rectangle(orig,(x,y),(x+w,y+h),(255,255,0),2) roi_gray = grey[y:y+h, x:x+w] roi_color = orig[y:y+h, x:x+w] fcount += 1 eyes = self.eye_cascade.detectMultiScale(roi_gray) for (ex,ey,ew,eh) in eyes: cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2) end4 = time.time() #print("#4:",end4-start4) return orig,points def doFrame(self): frame = self.captureFrame if frame is None: return False frame = imutils.resize(frame, width=500) orig = frame.copy() grey = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) outframe,cpoints = self.coralDetect(frame,orig) outframe,apoints = self.aprilDetect(grey,outframe) outframe,hpoints = self.haarDetect(grey,outframe) #outframe,fpoints = self.caffeDetect(orig,outframe) points = list(itertools.chain(cpoints,apoints,hpoints)) for p in points: self.pub.publish(p[0],p[1],p[2],p[3],p[4],p[5],p[6]) pass ret, self.visionFrame = cv2.imencode('.jpg', outframe) #self.visionFrame = outframe #cv2.imshow("Frame", outframe) #key = cv2.waitKey(1) & 0xFF #if key == ord("q"): # return False return True def distanceToCamera(self,actWidth,perWidth): return ((actWidth*self.FOCALLENGTH)/perWidth)*2
frame = vs.read() frame = cv2.resize(frame, (frame_height, frame_width)) frame = cv2.flip(frame, 0) orig = frame.copy() # prepare the frame for object detection by converting (1) it # from BGR to RGB channel ordering and then (2) from a NumPy # array to PIL image format frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) # make predictions on the input frame start = time.time() results = model.detect_with_image(frame, threshold=args["confidence"], keep_aspect_ratio=True, relative_coord=False) end = time.time() # loop over the results for r in results: # extract the bounding box and box and predicted class label box = r.bounding_box.flatten().astype("int") (startX, startY, endX, endY) = box label = labels[r.label_id] # draw the bounding box and label on the image cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) y = startY - 15 if startY - 15 > 15 else startY + 15 text = "{}: {:.2f}%".format(label, r.score * 100) cv2.putText(orig, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5,