def detection_job(detection_model, image_name, num_inferences):
    """Runs detection job."""
    engine = DetectionEngine(detection_model)
    with open_image(image_name) as img:
      # Resized image.
      _, height, width, _ = engine.get_input_tensor_shape()
      tensor = np.asarray(img.resize((width, height), Image.NEAREST)).flatten()

    # Using `detect_with_input_tensor` to exclude image down-scale cost.
    for _ in range(num_inferences):
      engine.detect_with_input_tensor(tensor, top_k=1)
Beispiel #2
0
def run_two_models_one_tpu(classification_model, detection_model, image_name,
                           num_inferences, batch_size):
    """Runs two models ALTERNATIVELY using one Edge TPU.

  It runs classification model `batch_size` times and then switch to run
  detection model `batch_size` time until each model is run `num_inferences`
  times.

  Args:
    classification_model: string, path to classification model
    detection_model: string, path to detection model.
    image_name: string, path to input image.
    num_inferences: int, number of inferences to run for each model.
    batch_size: int, indicates how many inferences to run one model before
      switching to the other one.

  Returns:
    double, wall time it takes to finish the job.
  """
    start_time = time.perf_counter()
    engine_a = ClassificationEngine(classification_model)
    # `engine_b` shares the same Edge TPU as `engine_a`
    engine_b = DetectionEngine(detection_model, engine_a.device_path())
    with open_image(image_name) as image:
        # Resized image for `engine_a`, `engine_b`.
        tensor_a = get_input_tensor(engine_a, image)
        tensor_b = get_input_tensor(engine_b, image)

    num_iterations = (num_inferences + batch_size - 1) // batch_size
    for _ in range(num_iterations):
        # Using `classify_with_input_tensor` and `detect_with_input_tensor` on purpose to
        # exclude image down-scale cost.
        for _ in range(batch_size):
            engine_a.classify_with_input_tensor(tensor_a, top_k=1)
        for _ in range(batch_size):
            engine_b.detect_with_input_tensor(tensor_b, top_k=1)
    return time.perf_counter() - start_time
Beispiel #3
0
class CoralObjectDetector:
    """Performs inference on Edge TPU.
    """
    def __init__(self, model_path, device_path):
        self.__engine = DetectionEngine(model_path=os.path.join(
            model_path, 'edgetpu.tflite'),
                                        device_path=device_path)

        self.__model_shape = itemgetter(1, 2)(
            self.__engine.get_input_tensor_shape())

    @property
    def device_name(self):
        return "Coral"

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def detect(self, image_shape, image_np, detections: List[Detection]):
        image_np = cv2.resize(image_np,
                              dsize=self.__model_shape,
                              interpolation=cv2.INTER_LINEAR)

        objs = self.__engine.detect_with_input_tensor(
            input_tensor=image_np.flatten(), top_k=len(detections))

        d = 0
        max_width = image_shape[1] - 1
        max_height = image_shape[0] - 1
        while d < len(objs) and d < len(detections):
            detection = detections[d]
            obj = objs[d]
            detection.label = obj.label_id + 1
            detection.confidence = obj.score
            detection.bounding_box.y_min = int(obj.bounding_box[0][1] *
                                               max_height)
            detection.bounding_box.x_min = int(obj.bounding_box[0][0] *
                                               max_width)
            detection.bounding_box.y_max = int(obj.bounding_box[1][1] *
                                               max_height)
            detection.bounding_box.x_max = int(obj.bounding_box[1][0] *
                                               max_width)
            d += 1

        return self.__engine.get_inference_time()
class EdgeTPUInferencer:

    def __init__(self, model):
        self.engine = DetectionEngine(model)

        self.watch = Stopwatch()

    def inference(self, img):

        self.watch.start()
        initial_h, initial_w, _ = img.shape
        if (initial_h, initial_w) != (300, 300):
            frame = cv2.resize(img, (300, 300))
        else:
            frame = img
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        self.watch.stop(Stopwatch.MODE_PREPROCESS)

        self.watch.start()
        ans = self.engine.detect_with_input_tensor(frame.flatten(), threshold=0.5, top_k=10)
        self.watch.stop(Stopwatch.MODE_INFER)

        # Display result
        self.watch.start()
        results = []
        for obj in ans:
            box = obj.bounding_box.flatten().tolist()
            bbox = [0] * 4
            bbox[0] = box[0] * initial_w
            bbox[1] = box[1] * initial_h
            bbox[2] = (box[2] - box[0]) * initial_w
            bbox[3] = (box[3] - box[1]) * initial_h

            result = (bbox, obj.label_id + 1, obj.score)
            results.append(result)
        self.watch.stop(Stopwatch.MODE_POSTPROCESS)

        return results
Beispiel #5
0
class SmartPiCamContr(object):

    # Step 2: Constructor which defines default values for settings
    def __init__(self,
                 appDuration=30,
                 cameraResolution=(304, 304),
                 useVideoPort=True,
                 minObjectScore=0.35):
        self.cameraResolution = cameraResolution
        self.useVideoPort = useVideoPort
        self.appDuration = appDuration  #seconds to run
        self.minObjectScore = minObjectScore

        modelFile = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
        objectLabelsFile = 'coco_labels.txt'
        print("Reading Model: ", modelFile)
        self.engine = DetectionEngine(modelFile)
        print("Reading object labels: ", objectLabelsFile)
        self.labels = self.readLabelFile(objectLabelsFile)
        print("Minimal object score: ", self.minObjectScore)

    # Step 4: Configure PiCam
    # Return parameter: created PiCam
    def configurePiCam(self):
        print("\nConfigure and warming up PiCamera")
        self.cam = PiCamera()
        self.cam.resolution = self.cameraResolution
        print("Camera resolution: " + repr(self.cam.resolution))
        self.cam.start_preview()
        sleep(2)
        self.cam.stop_preview()
        return self.cam

    #Step 7: Take a photo returned as numpy array
    def takePhoto(self):
        picData = np.empty(
            (self.cameraResolution[1], self.cameraResolution[0], 3),
            dtype=np.uint8)
        self.cam.capture(picData,
                         format='rgb',
                         use_video_port=self.useVideoPort)  #24bit rgb format
        # Coco-Model requires 300 x 300 resolution
        # Remove last 4 rows and last 4 colummns in all 3 dimensions
        picData = picData[:-4, :-4]
        return picData

    # Function to read labels from text files.
    def readLabelFile(self, file_path):
        with open(file_path, 'r') as f:
            lines = f.readlines()
        ret = {}
        for line in lines:
            pair = line.strip().split(maxsplit=1)
            ret[int(pair[0])] = pair[1].strip()
        return ret

    #Step 10: Predict the picture by running it on the TPU
    def predict(self, picData):
        print("\nPredicting imgage on TPU")
        print('Shape of data: ', picData.shape)
        flatArray = picData.flatten()  #3D to 1D conversion
        print('Input array size: ', flatArray.shape)
        #Call the TPU to detect objects on the image with a neural network
        result = self.engine.detect_with_input_tensor(
            flatArray, threshold=self.minObjectScore, top_k=10)
        return result

    #Step 12: Analyse the result of inferencing on the TPU.
    #The result is analysed and all objects will be set as detected
    #if they belong to the objects IDs of interest
    def analyseResult(self, predResult, objectIdsOfInterest):
        print("Analysing results...")
        detectedObjList = []
        lbl = ''
        if predResult:
            for obj in predResult:
                if obj.label_id in objectIdsOfInterest:
                    if self.labels:
                        lbl = self.labels[obj.label_id]
                        print(lbl, obj.label_id)
                    print('score = ', obj.score)
                    box = obj.bounding_box.flatten()
                    box *= self.cameraResolution[1]  #scale up to resolution
                    print('box = ', box.tolist())
                    detectedObjList.append((lbl, box))
        if len(detectedObjList) == 0:
            print('No object detected!')
        return detectedObjList

    #Step 15: Depending on the detected objects and location
    #take desired action
    def processResult(self, detectedObjects):
        print('Number of detected objects: ', len(detectedObjects))
Beispiel #6
0
def main():
    cam_w, cam_h = 640, 480
    default_model_dir = "../all_models"
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='number of classes with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='class score threshold')
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    print("Loading %s with %s labels." % (args.model, args.labels))
    engine = DetectionEngine(args.model)
    labels = load_labels(args.labels)

    pygame.init()
    pygame.font.init()
    font = pygame.font.SysFont("Arial", 20)

    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    _, w, h, _ = engine.get_input_tensor_shape()

    print("By default using camera: ", camlist[-1])
    camera = pygame.camera.Camera(camlist[-1], (cam_w, cam_h))
    try:
        display = pygame.display.set_mode((cam_w, cam_h), 0)
    except pygame.error as e:
        sys.stderr.write(
            "\nERROR: Unable to open a display window. Make sure a monitor is attached and that "
            "the DISPLAY environment variable is set. Example: \n"
            ">export DISPLAY=\":0\" \n")
        raise e
    red = pygame.Color(255, 0, 0)

    camera.start()
    try:
        last_time = time.monotonic()
        while True:
            mysurface = camera.get_image()
            imagen = pygame.transform.scale(mysurface, (w, h))
            input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8)
            start_time = time.monotonic()
            results = engine.detect_with_input_tensor(input,
                                                      threshold=args.threshold,
                                                      top_k=args.top_k)
            stop_time = time.monotonic()
            inference_ms = (stop_time - start_time) * 1000.0
            fps_ms = 1.0 / (stop_time - last_time)
            last_time = stop_time
            annotate_text = "Inference: %5.2fms FPS: %3.1f" % (inference_ms,
                                                               fps_ms)
            for result in results:
                x0, y0, x1, y1 = result.bounding_box.flatten().tolist()
                rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w,
                                   (y1 - y0) * cam_h)
                pygame.draw.rect(mysurface, red, rect, 1)
                label = "%.0f%% %s" % (100 * result.score,
                                       labels[result.label_id])
                text = font.render(label, True, red)
                print(label, ' ', end='')
                mysurface.blit(text, (x0 * cam_w, y0 * cam_h))
            text = font.render(annotate_text, True, red)
            print(annotate_text)
            mysurface.blit(text, (0, 0))
            display.blit(mysurface, (0, 0))
            pygame.display.flip()
    finally:
        camera.stop()
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True,
                        type=str)
    parser.add_argument('--ip',
                        "-i",
                        help='File path of the input image.',
                        required=True,
                        type=str)
    parser.add_argument('--report_interval',
                        '-r',
                        help="Duration of reporting interval, in seconds",
                        default=10,
                        type=int)
    parser.add_argument('-v',
                        "--verbose",
                        help="Print information about detected objects",
                        action='store_true')
    args = parser.parse_args()

    relay = Relay(args.ip, args.verbose)

    engine = DetectionEngine(args.model)

    watch = Stopwatch()

    while True:

        if args.verbose:
            print("ready for next inference")
        img = relay.get_image()

        if img is None:
            break

        if args.verbose:
            print("Received image ", watch.numread)

        watch.start()
        initial_h, initial_w, _ = img.shape
        if (initial_h, initial_w) != (300, 300):
            frame = cv2.resize(img, (300, 300))
        else:
            frame = img
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        watch.stop(Stopwatch.MODE_PREPROCESS)

        watch.start()
        ans = engine.detect_with_input_tensor(frame.flatten(),
                                              threshold=0.5,
                                              top_k=10)
        watch.stop(Stopwatch.MODE_INFER)

        if args.verbose:
            print("Got inference results for frame ", watch.numread, ": ", ans)

        watch.start()
        # Display result
        results = []
        for obj in ans:
            box = obj.bounding_box.flatten().tolist()
            bbox = [0] * 4
            bbox[0] = int(box[0] * initial_w)
            bbox[1] = int(box[1] * initial_h)
            bbox[2] = int(box[2] * initial_w)
            bbox[3] = int(box[3] * initial_h)

            result = (bbox, obj.label_id + 1, obj.score)
            results.append(result)

        relay.send_results(results)

        watch.stop(Stopwatch.MODE_POSTPROCESS)

        if watch.report():
            print("TCP Latency to source: ",
                  round(measure_latency(host=args.ip, port=relay.port)[0], 3),
                  "ms")

    relay.close()
Beispiel #8
0
class FaceDetectRPC(object):
    def __init__(self):
        # Init TPU engine.
        self.face_engine = DetectionEngine(FACE_DET_MODEL)

        # Load face recognition model and the label encoder.
        with open(FACE_CLASS_MODEL, 'rb') as fp:
            self.recognizer = pickle.load(fp)
        with open(FACE_LABEL_MAP, 'rb') as fp:
            self.le = pickle.load(fp)

    def detect_faces(self, test_image_paths):
        # List that will hold all images with any face detection information.
        objects_detected_faces = []

        # Loop over the images paths provided.
        for obj in test_image_paths:
            logging.debug('**********Find Face(s) for {}'.format(obj['image']))
            for label in obj['labels']:
                # If the object detected is a person then try to identify face.
                if label['name'] == 'person':
                    # Read image from disk.
                    img = cv2.imread(MOUNT_POINT + obj['image'])
                    if img is None:
                        # Bad image was read.
                        logging.error('Bad image was read.')
                        label['face'] = None
                        continue

                    # First bound the roi using the coord info passed in.
                    # The roi is area around person(s) detected in image.
                    # (x1, y1) are the top left roi coordinates.
                    # (x2, y2) are the bottom right roi coordinates.
                    y2 = int(label['box']['ymin'])
                    x1 = int(label['box']['xmin'])
                    y1 = int(label['box']['ymax'])
                    x2 = int(label['box']['xmax'])
                    roi = img[y2:y1, x1:x2, :]
                    #cv2.imwrite('./roi.jpg', roi)
                    if roi.size == 0:
                        # Bad object roi...move on to next image.
                        logging.error('Bad object roi.')
                        label['face'] = None
                        continue

                    # Need roi shape for later conversion of face coords.
                    (h, w) = roi.shape[:2]
                    # Resize roi for face detection.
                    # The tpu face det model used requires (320, 320).
                    res = resize_to_square(img=roi,
                                           size=320,
                                           keep_aspect_ratio=True,
                                           interpolation=cv2.INTER_AREA)
                    #cv2.imwrite('./res.jpg', res)

                    # Detect the (x, y)-coordinates of the bounding boxes corresponding
                    # to a face in the input image using the TPU engine.
                    # Its assumed that only one face is in the image.
                    # NB: reshape(-1) converts the np img array into 1-d.
                    detection = self.face_engine.detect_with_input_tensor(
                        res.reshape(-1), threshold=0.05, top_k=1)
                    if not detection:
                        # No face detected...move on to next image.
                        logging.debug('No face detected.')
                        label['face'] = None
                        continue

                    # Convert coords and carve out face roi.
                    box = (detection[0].bounding_box.flatten().tolist()
                           ) * np.array([w, h, w, h])
                    (face_left, face_top, face_right,
                     face_bottom) = box.astype('int')
                    face_roi = roi[face_top:face_bottom,
                                   face_left:face_right, :]
                    #cv2.imwrite('./face_roi.jpg', face_roi)
                    (f_h, f_w) = face_roi.shape[:2]
                    # If face width or height are not sufficiently large then skip.
                    if f_h < FACE_MIN or f_w < FACE_MIN:
                        logging.debug('Face too small to recognize.')
                        label['face'] = None
                        continue

                    # Compute the focus measure of the face
                    # using the Variance of Laplacian method.
                    # See https://www.pyimagesearch.com/2015/09/07/blur-detection-with-opencv/
                    gray = cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY)
                    fm = cv2.Laplacian(gray, cv2.CV_64F).var()
                    # If fm below a threshold then face probably isn't clear enough
                    # for face recognition to work, so skip it.
                    if fm < FACE_FOCUS_MEASURE_THRESHOLD:
                        logging.debug('Face too blurry to recognize.')
                        label['face'] = None
                        continue

                    # Find the 128-dimension face encoding for face in image.
                    # Convert image roi from BGR (OpenCV ordering) to dlib ordering (RGB).
                    rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
                    # Convert face bbox into dlib format.
                    boxes = [(face_top, face_right, face_bottom, face_left)]
                    # Generate encodings. Only one face is assumed so take the 1st element.
                    encoding = face_recognition.face_encodings(
                        face_image=rgb,
                        known_face_locations=boxes,
                        num_jitters=FACE_NUM_JITTERS)[0]
                    logging.debug('face encoding {}'.format(encoding))
                    # Perform svm classification on the encodings to recognize the face.
                    (name, proba) = face_classifier(recognizer=self.recognizer,
                                                    le=self.le,
                                                    encoding=encoding,
                                                    min_proba=FACE_MIN_PROBA)

                    # Add face name to label metadata.
                    label['face'] = name
                    # Add face confidence to label metadata.
                    # (First convert NumPy value to native Python type for json serialization.)
                    label['faceProba'] = proba.item()
            # Add processed image to output list.
            objects_detected_faces.append(obj)
        # Convert json to string and return data.
        return (json.dumps(objects_detected_faces))
Beispiel #9
0
class ObjDetectRPC(object):
    def __init__(self):
        self.obj_engine = DetectionEngine(OBJ_MODEL)
        self.labels_map = ReadLabelFile(OBJ_LABEL_MAP)

    def detect_objects(self, test_image_paths):
        objects_in_image = []  # holds all objects found in image
        labels = []  # labels of detected objects
        frame_num = 0  # ZoneMinder current alarm frame number
        monitor = ''  # ZoneMinder current monitor name

        for image_path in test_image_paths:
            logging.debug('**********Find object(s) for {}'.format(image_path))

            # If consecutive frames then repeat last label and skip inference.
            # This behavior controlled by CON_IMG_SKIP.
            skip, frame_num, monitor = skip_inference(frame_num, monitor,
                                                      labels, image_path,
                                                      objects_in_image)
            if skip is True:
                continue

            # Read image from disk.
            img = cv2.imread(MOUNT_POINT + image_path)
            #cv2.imwrite('./obj_img.jpg', img)
            if img is None:
                # Bad image was read.
                logging.error('Bad image was read.')
                objects_in_image.append({'image': image_path, 'labels': []})
                continue

            # Resize. The tpu obj det requires (300, 300).
            res = resize_to_square(img=img,
                                   size=300,
                                   keep_aspect_ratio=True,
                                   interpolation=cv2.INTER_AREA)
            #cv2.imwrite('./obj_res.jpg', res)

            # Run object inference.
            detection = self.obj_engine.detect_with_input_tensor(
                res.reshape(-1), threshold=0.05, top_k=3)

            # Get labels and scores of detected objects.
            labels = []  # new detection, clear labels list.
            (h, w) = img.shape[:2]  # use original image size for box coords
            for obj in detection:
                logging.debug('id: {} name: {} score: {}'.format(
                    obj.label_id, self.labels_map[obj.label_id], obj.score))
                if obj.score > OBJ_MIN_SCORE_THRESH:
                    object_dict = {}
                    object_dict['id'] = obj.label_id
                    object_dict['name'] = self.labels_map[obj.label_id]
                    object_dict['score'] = float(obj.score)
                    (xmin, ymin, xmax,
                     ymax) = (obj.bounding_box.flatten().tolist()) * np.array(
                         [w, h, w, h])
                    object_dict['box'] = {
                        'ymin': ymin,
                        'xmin': xmin,
                        'ymax': ymax,
                        'xmax': xmax
                    }
                    labels.append(object_dict)

            objects_in_image.append({'image': image_path, 'labels': labels})
        return json.dumps(objects_in_image)
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--model', help='File path of Tflite model.', required=True)
  parser.add_argument('--label', help='File path of label file.')
  args = parser.parse_args()

  labels = dataset_utils.read_label_file(args.label) if args.label else None
  engine = DetectionEngine(args.model)

  with picamera.PiCamera() as camera:
    preview_size = (640, 480)
    camera.resolution = preview_size
    camera.framerate = 30
    # camera.hflip = True
    # camera.vflip = True
    # camera.rotation = 90
    _, input_height, input_width, _ = engine.get_input_tensor_shape()

    input_size = (input_width, input_height)

    # Width is rounded up to the nearest multiple of 32,
    # height to the nearest multiple of 16.
    capture_size = (math.ceil(input_width / 32) * 32,
                    math.ceil(input_height / 16) * 16)

    # Actual detection area on preview.
    detect_size = (preview_size[0] * input_size[0] / capture_size[0],
                   preview_size[1] * input_size[1] / capture_size[1])

    # Make annotator smaller for efficiency.
    annotator_factor = 0.5
    annotator_size = (int(preview_size[0] * annotator_factor),
                      int(preview_size[1] * annotator_factor))

    # Font for drawing detection candidates
    font = ImageFont.truetype(
                '/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf',
                size=12)

    camera.start_preview()
    annotator = Annotator(camera,
                          dimensions=annotator_size,
                          default_color=(255, 255, 255, 64))

    def annotate(candidates):
      annotator.clear()

      # Get actual coordinates to draw
      def translate(relative_coord):
        return (detect_size[0] * relative_coord[0] * annotator_factor,
                detect_size[1] * relative_coord[1] * annotator_factor)

      for c in candidates:
        top_left = translate(c.bounding_box[0])
        bottom_right = translate(c.bounding_box[1])

        annotator.bounding_box(top_left + bottom_right)

        text = '{} {:.2f}'.format(labels[c.label_id], c.score) \
                if labels else '{:.2f}'.format(c.score)

        annotator.text(top_left, text, font=font)

      annotator.update()

    try:
      stream = io.BytesIO()
      for _ in camera.capture_continuous(
          stream, format='rgb', use_video_port=True, resize=capture_size):
        stream.truncate()
        stream.seek(0)

        input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8)
        if input_size != capture_size:
          # Crop to input size. Note dimension order (height, width, channels)
          input_tensor = input_tensor.reshape(
              (capture_size[1], capture_size[0], 3))[
                  0:input_height, 0:input_width, :].ravel()

        start_ms = time.time()
        results = engine.detect_with_input_tensor(input_tensor, top_k=3)
        elapsed_ms = time.time() - start_ms

        annotate(results)

        camera.annotate_text = '{:.2f}ms'.format(elapsed_ms * 1000.0)

    finally:
      # Maybe should make this an annotator method
      camera.remove_overlay(annotator._overlay)
      camera.stop_preview()
Beispiel #11
0
    num_of_frames = 0
    while True:
        num_of_frames += 1
        success, img = vidcap.read()
        if not success:
            break

        # Process frame
        image_cv2 = resize_image(img)
        image_pil = Image.fromarray(image_cv2)
        draw = ImageDraw.Draw(image_pil)

        # Run inference and update track
        detections = engine.detect_with_input_tensor(image_cv2.flatten(),
                                                     threshold=0.3,
                                                     top_k=10)
        iou_tracker.update_tracks(detections)

        # Draw detection and save output frame
        draw_boxes(detections, draw)
        output_frames.append(numpy.array(image_pil))

    # Print results
    print('Number of processed frames: {0}'.format(num_of_frames))
    print('Time {0}'.format(time.time() - start))
    print('Cars detected {0}'.format(iou_tracker.get_tracked_number()))

    # Generate video
    out = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'DIVX'), 30,
                          (args.size, args.size))
Beispiel #12
0
class ObjectDetector(object):
    def __init__(self, model_path, label_path, use_coral_flag, use_tpu_flag,
                 res_x, res_y, min_conf_threshold):

        self.res_y = res_y
        self.res_x = res_x
        self.use_coral_flag = use_coral_flag
        if use_coral_flag:
            from edgetpu.detection.engine import DetectionEngine
            from edgetpu.utils import dataset_utils
        self.min_conf_threshold = min_conf_threshold

        # Load the label map
        with open(label_path, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        if self.labels[0] == '???':
            del (self.labels[0])

        if use_tpu_flag:
            self.interpreter = Interpreter(
                model_path=model_path,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        else:
            self.interpreter = Interpreter(model_path=model_path)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.is_floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

        #Coral
        if use_coral_flag:
            self.engine = DetectionEngine(model_path)
            self.labels = dataset_utils.read_label_file(label_path)
            _, height, width, _ = self.engine.get_input_tensor_shape()

    def apply_coral_model(self, input_data):
        print("here")
        ans = self.engine.detect_with_input_tensor(input_data,
                                                   threshold=0.05,
                                                   top_k=10)
        print("here2")
        for obj in ans:
            if self.labels:
                print(self.labels[obj.label_id])
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)

    def apply_tflite_model(self, input_data):
        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects

        return (boxes, classes, scores)

    def process_frame(self, frame):
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.is_floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        if self.use_coral_flag:
            self.apply_coral_model(input_data)
            scores = []
        else:
            (boxes, classes, scores) = self.apply_tflite_model(input_data)

        return (frame, boxes, classes, scores)

    def is_interesting_object(self, scores, classes):
        is_interesting_object = False
        interesting_classes = []
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                is_interesting_object = True
                interesting_classes.append(self.labels[int(classes[i])])
        return is_interesting_object, interesting_classes

    def draw_frame(self, frame, boxes, classes, scores):
        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * self.res_y)))
                xmin = int(max(1, (boxes[i][1] * self.res_x)))
                ymax = int(min(self.res_y, (boxes[i][2] * self.res_y)))
                xmax = int(min(self.res_x, (boxes[i][3] * self.res_x)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              4)

                # Draw label
                object_name = self.labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
        (flag, encodedImage) = cv2.imencode(".jpg", frame)
        return encodedImage