Exemple #1
0
    def thread_job(model_name, input_filename, num_inferences, task_type,
                   device):
        """Runs classification or detection job on one Python thread."""
        tid = threading.get_ident()
        logging.info('Thread: %d, # inferences: %d, model: %s', tid,
                     num_inferences, model_name)

        interpreter = make_interpreter(test_utils.test_data_path(model_name),
                                       device)
        interpreter.allocate_tensors()
        with test_utils.test_image(input_filename) as img:
            if task_type == 'classification':
                resize_image = img.resize(common.input_size(interpreter),
                                          Image.NEAREST)
                common.set_input(interpreter, resize_image)
            elif task_type == 'detection':
                common.set_resized_input(
                    interpreter, img.size,
                    lambda size: img.resize(size, Image.NEAREST))
            else:
                raise ValueError(
                    'task_type should be classification or detection, but is given %s'
                    % task_type)
            for _ in range(num_inferences):
                interpreter.invoke()
                if task_type == 'classification':
                    classify.get_classes(interpreter)
                else:
                    detect.get_objects(interpreter)
        logging.info('Thread: %d, model: %s done', tid, model_name)
Exemple #2
0
    def detect(self, image=None):
        Height, Width = image.shape[:2]
        img = image.copy()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)

        if self.options.get('auto_lock', True):
            self.acquire_lock()

        try:
            if not self.model:
                self.load_model()

            g.logger.Debug(
                1, '|---------- TPU (input image: {}w*{}h) ----------|'.format(
                    Width, Height))
            t = Timer()
            _, scale = common.set_resized_input(
                self.model, img.size,
                lambda size: img.resize(size, Image.ANTIALIAS))
            self.model.invoke()
            objs = detect.get_objects(
                self.model, float(self.options.get('object_min_confidence')),
                scale)

            #outs = self.model.detect_with_image(img, threshold=int(self.options.get('object_min_confidence')),
            #        keep_aspect_ratio=True, relative_coord=False)
            diff_time = t.stop_and_get_ms()

            if self.options.get('auto_lock', True):
                self.release_lock()
        except:
            if self.options.get('auto_lock', True):
                self.release_lock()
            raise

        diff_time = t.stop_and_get_ms()
        g.logger.Debug(
            1, 'perf: processor:{} Coral TPU detection took: {}'.format(
                self.processor, diff_time))

        bbox = []
        labels = []
        conf = []

        for obj in objs:
            # box = obj.bbox.flatten().astype("int")
            bbox.append([
                int(round(obj.bbox.xmin)),
                int(round(obj.bbox.ymin)),
                int(round(obj.bbox.xmax)),
                int(round(obj.bbox.ymax))
            ])

            labels.append(self.classes.get(obj.id))
            conf.append(float(obj.score))

        g.logger.Debug(
            3, 'Coral object returning: {},{},{}'.format(bbox, labels, conf))
        return bbox, labels, conf, ['coral'] * len(labels)
Exemple #3
0
def detect_person(image_input):

    from pycoral.adapters import common
    from pycoral.adapters import detect
    from pycoral.utils.dataset import read_label_file
    from pycoral.utils.edgetpu import make_interpreter
    label_path = os.path.join(BASE_DIR, 'coral_files', 'coco_labels.txt')
    model_path = os.path.join(
        BASE_DIR, 'coral_files',
        'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite')
    print(model_path)
    image = Image.fromarray(image_input)
    print(image)

    labels = read_label_file(label_path)
    print("labels", labels)
    interpreter = make_interpreter(model_path)
    print("INterpreter made")
    interpreter.allocate_tensors()
    print("Tensor allocated")
    _, scale = common.set_resized_input(
        interpreter, image.size,
        lambda size: image.resize(size, Image.ANTIALIAS))
    print("Before invoke")
    interpreter.invoke()
    objs = detect.get_objects(interpreter, 0.4, scale)
    print(objs)
    for obj in objs:
        print(labels.get(obj.id, obj.id))
        print('  id:    ', obj.id)
        print('  score: ', obj.score)
        print('  bbox:  ', obj.bbox)

    return False
Exemple #4
0
  def _ProcessImageInternal(self):
    img = self._image.copy()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)

    # Prepare image data
    _, scale = common.set_resized_input(self.__net, img.size, lambda size : img.resize(size, Image.ANTIALIAS))

    # Invoke the model
    self.__net.invoke()

    # Run the tensorflow model
    detectionData = detect.get_objects(self.__net, self._minConfidence, scale)

    for obj in detectionData:
      if (not self._targetID or (isinstance(self._targetID, list) and obj.id in self._targetID)):
        self._LogObjectFound(obj.id, obj.score)

        # Get the bounding box of the object
        box = obj.bbox

        self._HandleObjectDetectionResult(box.xmin, box.xmax, box.ymin, box.ymax)

        # If we found atleast one object, then we can exit out.
        break

    self._DrawBoundingBox()
Exemple #5
0
 def detection_task(num_inferences):
   tid = threading.get_ident()
   print('Thread: %d, %d inferences for detection task' %
         (tid, num_inferences))
   model_name = 'ssd_mobilenet_v1_coco_quant_postprocess_edgetpu.tflite'
   interpreter = make_interpreter(
       test_utils.test_data_path(model_name), device=':1')
   interpreter.allocate_tensors()
   print('Thread: %d, using device 1' % tid)
   with test_utils.test_image('cat.bmp') as img:
     for _ in range(num_inferences):
       _, scale = common.set_resized_input(
           interpreter,
           img.size,
           lambda size, image=img: image.resize(size, Image.ANTIALIAS))
       interpreter.invoke()
       ret = detect.get_objects(
           interpreter, score_threshold=0.7, image_scale=scale)
       self.assertEqual(len(ret), 1)
       self.assertEqual(ret[0].id, 16)  # cat
       expected_bbox = detect.BBox(
           xmin=int(0.1 * img.size[0]),
           ymin=int(0.1 * img.size[1]),
           xmax=int(0.7 * img.size[0]),
           ymax=int(1.0 * img.size[1]))
       self.assertGreaterEqual(
           detect.BBox.iou(expected_bbox, ret[0].bbox), 0.85)
   print('Thread: %d, done detection task' % tid)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='File path of Tflite model.',
                        required=True)
    parser.add_argument('--labels',
                        help='File path of label file.',
                        required=True)
    parser.add_argument('--picamera',
                        action='store_true',
                        help="Use PiCamera for image capture",
                        default=False)
    parser.add_argument('-t',
                        '--threshold',
                        type=float,
                        default=0.5,
                        help='Classification score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    labels = read_label_file(args.labels) if args.labels else {}
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    # Initialize video stream
    vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start()
    time.sleep(1)

    fps = FPS().start()

    while True:
        try:
            # Read frame from video
            screenshot = vs.read()
            image = Image.fromarray(screenshot)
            _, scale = common.set_resized_input(
                interpreter, image.size,
                lambda size: image.resize(size, Image.ANTIALIAS))
            interpreter.invoke()
            objs = detect.get_objects(interpreter, args.threshold, scale)

            draw_objects(image, objs, labels)

            if (cv2.waitKey(5) & 0xFF == ord('q')):
                fps.stop()
                break

            fps.update()
        except KeyboardInterrupt:
            fps.stop()
            break

    print("Elapsed time: " + str(fps.elapsed()))
    print("Approx FPS: :" + str(fps.fps()))

    cv2.destroyAllWindows()
    vs.stop()
    time.sleep(2)
def detect_func():

    ## parser = argparse.ArgumentParser(
    ## formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    ## parser.add_argument('-m', '--model', required=True,
    ##                     help='File path of .tflite file')
    # parser.add_argument('-i', '--input', required=True,
    #                   help='File path of image to process')
    # parser.add_argument('-l', '--labels', help='File path of labels file')
    # parser.add_argument('-t', '--threshold', type=float, default=0.4,
    #                    help='Score threshold for detected objects')
    # parser.add_argument('-o', '--output',
    #                    help='File path for the result image with annotations')
    # parser.add_argument('-c', '--count', type=int, default=5,
    #                     help='Number of times to run inference')
    #  args = parser.parse_args()

    labels = read_label_file('test_data/coco_labels.txt')
    interpreter = make_interpreter(
        'test_data/ssd_mobilenet_v2_coco_quant_postprocess.tflite')
    interpreter.allocate_tensors()

    image = Image.open('pic.jpg')
    _, scale = common.set_resized_input(
        interpreter, image.size,
        lambda size: image.resize(size, Image.ANTIALIAS))

    print('----INFERENCE TIME----')
    print('Note: The first inference is slow because it includes',
          'loading the model into Edge TPU memory.')
    for _ in range(5):
        start = time.perf_counter()
        interpreter.invoke()
        inference_time = time.perf_counter() - start
        objs = detect.get_objects(interpreter, 0.4, scale)
        print('%.2f ms' % (inference_time * 1000))

    print('-------RESULTS--------')
    if not objs:
        print('No objects detected')

    people_flag = 0
    for obj in objs:
        if obj.id == 0:
            print('people detected!')
            people_flag = 1
        print(labels.get(obj.id, obj.id))
        print('  id:    ', obj.id)
        print('  score: ', obj.score)
        print('  bbox:  ', obj.bbox)

        #  if args.output:
        #    image = image.convert('RGB')
        #    draw_objects(ImageDraw.Draw(image), objs, labels)
        #   image.save(args.output)
        #    image.show()

        return people_flag
Exemple #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        required=True,
                        help='Path of the segmentation model.')
    parser.add_argument('--input',
                        required=True,
                        help='File path of the input image.')
    parser.add_argument('--output',
                        default='semantic_segmentation_result.jpg',
                        help='File path of the output image.')
    parser.add_argument(
        '--keep_aspect_ratio',
        action='store_true',
        default=False,
        help=
        ('keep the image aspect ratio when down-sampling the image by adding '
         'black pixel padding (zeros) on bottom or right. '
         'By default the image is resized and reshaped without cropping. This '
         'option should be the same as what is applied on input images during '
         'model training. Otherwise the accuracy may be affected and the '
         'bounding box of detection result may be stretched.'))
    args = parser.parse_args()

    interpreter = make_interpreter(args.model, device=':0')
    interpreter.allocate_tensors()
    width, height = common.input_size(interpreter)

    img = Image.open(args.input)
    if args.keep_aspect_ratio:
        resized_img, _ = common.set_resized_input(
            interpreter, img.size,
            lambda size: img.resize(size, Image.ANTIALIAS))
    else:
        resized_img = img.resize((width, height), Image.ANTIALIAS)
        common.set_input(interpreter, resized_img)

    interpreter.invoke()

    result = segment.get_output(interpreter)
    if len(result.shape) == 3:
        result = np.argmax(result, axis=-1)

    # If keep_aspect_ratio, we need to remove the padding area.
    new_width, new_height = resized_img.size
    result = result[:new_height, :new_width]
    mask_img = Image.fromarray(label_to_color_image(result).astype(np.uint8))

    # Concat resized input image and processed segmentation results.
    output_img = Image.new('RGB', (2 * new_width, new_height))
    output_img.paste(resized_img, (0, 0))
    output_img.paste(mask_img, (width, 0))
    output_img.save(args.output)
    print('Done. Results saved at', args.output)
Exemple #9
0
def _calculate_overlay(frame):
    global _overlayObjs
    global _overlay
    # Updates overlay_pane by inferencing the latest frame.
    # Runs on a mutex, so it will onyl run once at a time.
    # It runs in a thread so it is protecte

    # prepare the frame for classification by converting (1) it from
    # BGR to RGB channel ordering and then (2) from a NumPy array to
    # PIL image format
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = Image.fromarray(frame)

    start = time.perf_counter()
    if initialized:
        print("Initialized")
        if interpreter is None:
            print(
                "Interpreter is none and this is initialized ERROR ERROR ERROR"
            )
        else:
            print("Interpreter is not none")
            print(interpreter)
        _, scale = common.set_resized_input(
            interpreter, frame.size,
            lambda size: frame.resize(size, Image.ANTIALIAS))

        interpreter.invoke()
        inference_time = time.perf_counter() - start
        _overlayObjs = detect.get_objects(interpreter, confidence, scale)
        print(_overlayObjs)

        #print('%.2f ms' % (inference_time * 1000))

        def overlay_function(frame):
            # ensure at least one result was found
            for obj in _overlayObjs:
                bbox = obj.bbox
                frame = cv2.rectangle(frame, (bbox.xmin, bbox.ymin),
                                      (bbox.xmax, bbox.ymax), (0, 0, 255), 2)
                frame = cv2.putText(
                    frame, '%s %.2f' % (labels.get(obj.id, obj.id), obj.score),
                    (bbox.xmin + 20, bbox.ymin + 20), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (0, 0, 255))
                #draw.text((bbox.xmin + 10, bbox.ymin + 10),
                #          '%s\n%.2f' % (labels.get(obj.id, obj.id), obj.score),
                #          fill='red')

            return frame

        _overlay = overlay_function
    else:
        print("Uninitialized")
    def detection_job(detection_model, image_name, num_inferences):
        """Runs detection job."""
        interpreter = make_interpreter(detection_model, device=':1')
        interpreter.allocate_tensors()
        with open_image(image_name) as image:
            _, scale = common.set_resized_input(
                interpreter, image.size,
                lambda size: image.resize(size, Image.NEAREST))

        for _ in range(num_inferences):
            interpreter.invoke()
            detect.get_objects(interpreter,
                               score_threshold=0.,
                               image_scale=scale)
Exemple #11
0
    def detect(self, image, offset):
        image = Image.fromarray(image)
        _, scale = common.set_resized_input(
            self.interpreter, image.size,
            lambda size: image.resize(size, Image.ANTIALIAS))
        self.interpreter.invoke()
        objs = detect.get_objects(self.interpreter, 0.5, scale)

        observations = []
        for o in objs:
            observations.append(
                (self.labels.get(o.id, o.id), o.score,
                 (max(int(o.bbox.xmin * scale[1] + offset[0]),
                      0), max(int(o.bbox.ymin * scale[0] + offset[1]),
                              0), int(o.bbox.xmax * scale[1] + offset[0]),
                  int(o.bbox.ymax * scale[0] + offset[1]))))
        return observations
def run_two_models_one_tpu(classification_model, detection_model, image_name,
                           num_inferences, batch_size):
    """Runs two models ALTERNATIVELY using one Edge TPU.

  It runs classification model `batch_size` times and then switch to run
  detection model `batch_size` time until each model is run `num_inferences`
  times.

  Args:
    classification_model: string, path to classification model
    detection_model: string, path to detection model.
    image_name: string, path to input image.
    num_inferences: int, number of inferences to run for each model.
    batch_size: int, indicates how many inferences to run one model before
      switching to the other one.

  Returns:
    double, wall time it takes to finish the job.
  """
    start_time = time.perf_counter()
    interpreter_a = make_interpreter(classification_model, device=':0')
    interpreter_a.allocate_tensors()
    interpreter_b = make_interpreter(detection_model, device=':0')
    interpreter_b.allocate_tensors()

    with open_image(image_name) as image:
        size_a = common.input_size(interpreter_a)
        common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST))
        _, scale_b = common.set_resized_input(
            interpreter_b, image.size,
            lambda size: image.resize(size, Image.NEAREST))

    num_iterations = (num_inferences + batch_size - 1) // batch_size
    for _ in range(num_iterations):
        for _ in range(batch_size):
            interpreter_a.invoke()
            classify.get_classes(interpreter_a, top_k=1)
        for _ in range(batch_size):
            interpreter_b.invoke()
            detect.get_objects(interpreter_b,
                               score_threshold=0.,
                               image_scale=scale_b)
    return time.perf_counter() - start_time
Exemple #13
0
  def get_objects(self, frame, threshold=0.01):
    """
    Gets a list of objects detected in the given image frame.

    Args:
      frame: The bitmap image to pass through the model.
      threshold: The minimum confidence score for returned results.

    Returns:
      A list of `Object` objects, each of which contains a detected object's
      id, score, and bounding box as `BBox`.
      See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.detect.Object
    """
    height, width, _ = frame.shape
    _, scale = common.set_resized_input(self.interpreter, (width, height),
                                        lambda size: cv2.resize(frame, size, fx=0, fy=0,
                                                                interpolation=cv2.INTER_CUBIC))
    self.interpreter.invoke()
    return detect.get_objects(self.interpreter, threshold, scale)
def run_two_models_one_tpu(classification_model, detection_model, image_name,
                           num_inferences, batch_size):
    start_time = time.perf_counter()
    interpreter_a = make_interpreter(classification_model, device=':0')
    interpreter_a.allocate_tensors()
    interpreter_b = make_interpreter(detection_model, device=':0')
    interpreter_b.allocate_tensors()

    identification = []
    classification = []

    with open_image(image_name) as image:
        size_a = common.input_size(interpreter_a)
        common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST))
        _, scale_b = common.set_resized_input(
            interpreter_b, image.size,
            lambda size: image.resize(size, Image.NEAREST))

    num_iterations = (num_inferences + batch_size - 1) // batch_size
    for _ in tqdm(range(num_iterations)):
        for _ in range(batch_size):
            identification_start_time = time.perf_counter()
            interpreter_b.invoke()
            detect.get_objects(interpreter_b, score_threshold=0.,
                               image_scale=scale_b)
            identification.append(time.perf_counter() -
                                  identification_start_time)
        for _ in range(batch_size):
            classification_start_time = time.perf_counter()
            interpreter_a.invoke()
            result1 = classify.get_classes(interpreter_a, top_k=4)
            interpreter_a.invoke()
            result2 = classify.get_classes(interpreter_a, top_k=4)
            interpreter_a.invoke()
            result3 = classify.get_classes(interpreter_a, top_k=4)

            classification.append(time.perf_counter() -
                                  classification_start_time)
    total_time = time.perf_counter() - start_time
    return total_time, identification, classification
def predict():
    data = {"success": False}

    if flask.request.method == "POST":
        if flask.request.files.get("image"):
            image_file = flask.request.files["image"]
            image_bytes = image_file.read()
            image = Image.open(io.BytesIO(image_bytes))

            size = common.input_size(interpreter)
            image = image.convert("RGB").resize(size, Image.ANTIALIAS)

            # Run an inference
            common.set_input(interpreter, image)
            interpreter.invoke()
            _, scale = common.set_resized_input(
                interpreter, image.size,
                lambda size: image.resize(size, Image.ANTIALIAS))

            threshold = 0.4
            objs = detect.get_objects(interpreter, threshold, scale)

            if objs:
                data["success"] = True
                preds = []

                for obj in objs:
                    preds.append({
                        "confidence": float(obj.score),
                        "label": labels[obj.id],
                        "y_min": int(obj.bbox[1]),
                        "x_min": int(obj.bbox[0]),
                        "y_max": int(obj.bbox[3]),
                        "x_max": int(obj.bbox[2]),
                    })
                data["predictions"] = preds

    # return the data dictionary as a JSON response
    return flask.jsonify(data)
def main():
    labels = read_label_file("models/coco_labels.txt")

    interpreter = make_interpreter(
        "models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite")
    interpreter.allocate_tensors()
    threshold = 0.4
    printInfo("ready")
    while True:
        line = sys.stdin.readline().rstrip("\n")
        try:
            #load image from shinobi stream
            rawImage = BytesIO(base64.b64decode(line))
            image = Image.open(rawImage)
            #resize the image for object detection using built in coral code
            #it will set it to 300x300 and provide a scale for object detection later
            _, scale = common.set_resized_input(
                interpreter, image.size,
                lambda size: image.resize(size, Image.ANTIALIAS))

            start = time.perf_counter()
            interpreter.invoke()

            inference_time = time.perf_counter() - start
            #passing the scale from above, this function creates the bounding boxes
            #it takes the 300x300 image and divides the scale ratio for original coordinates
            objs = detect.get_objects(interpreter, threshold, scale)
            output = []
            for obj in objs:
                label = labels.get(obj.id, obj.id)
                labelID = obj.id
                score = obj.score
                bbox = obj.bbox
                output.append({"bbox": bbox, "class": label, "score": score})
            #outputted data is based on original feed in image size
            printData(output, (inference_time * 1000))
        except Exception as e:
            printError(str(e))
    def callback(self, data):

        cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")

        img = Image.fromarray(cv_image)
        if self.keep_aspect_ratio:
            resized_img, _ = common.set_resized_input(
                self.interpreter, img.size,
                lambda size: img.resize(size, Image.ANTIALIAS))
        else:
            resized_img = img.resize(
                (self.model_input_width, self.model_input_height),
                Image.ANTIALIAS)
            common.set_input(interpreter, resized_img)

        self.interpreter.invoke()

        result = segment.get_output(self.interpreter)
        if len(result.shape) == 3:
            result = np.argmax(result, axis=-1)

        # If keep_aspect_ratio, we need to remove the padding area.
        new_width, new_height = resized_img.size
        result = result[:new_height, :new_width]
        mask_img = Image.fromarray(
            self.label_to_color_image(result).astype(np.uint8))

        # Concat resized input image and processed segmentation results.
        output_img = Image.new('RGB', (2 * new_width, new_height))
        output_img.paste(resized_img, (0, 0))
        output_img.paste(mask_img, (self.model_input_width, 0))
        original_width, original_height = img.size
        recovered_cvimg = np.array(
            output_img.resize((2 * original_width, original_height),
                              Image.ANTIALIAS))
        cv2.imshow("resizedimg", recovered_cvimg)
        cv2.waitKey(3)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--width", help="Resolution width.", default=640)
    parser.add_argument("--height", help="Resolution height.", default=480)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize colormap
    colormap = label_util.create_pascal_label_colormap()

    # Initialize engine.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    width, height = common.input_size(interpreter)

    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):

                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                start = time.perf_counter()

                # Create inpute tensor
                # camera resolution (640, 480) => input tensor size (513, 513)
                _, scale = common.set_resized_input(
                    interpreter,
                    (resolution_width, rezolution_height),
                    lambda size: cv2.resize(image, size),
                )
                # Run inference.
                interpreter.invoke()

                elapsed_ms = (time.perf_counter() - start) * 1000

                # Create segmentation map
                result = segment.get_output(interpreter)
                seg_map = result[:height, :width]
                seg_image = label_util.label_to_color_image(colormap, seg_map)

                # segmentation map resize 513, 513 => camera resolution(640, 480)
                seg_image = cv2.resize(seg_image,
                                       (resolution_width, rezolution_height))
                out_image = image // 2 + seg_image // 2
                im = cv2.cvtColor(out_image,
                                  cv2.COLOR_RGB2BGR)  # display image

                # Calc fps.
                fps = 1000.0 / elapsed_ms
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps)
                visual.draw_caption(im, (10, 30), fps_text)

                # Display image
                cv2.imshow(WINDOW_NAME, im)
                key = cv2.waitKey(10) & 0xFF
                if key == ord("q"):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Exemple #19
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-m', '--model', required=True,
                        help='Model directory')
    args = parser.parse_args()


    # Load tflite model
    detector = edgetpu.make_interpreter(os.path.join(args.model, "detector.tflite"))
    detector.allocate_tensors()

    labels = dataset.read_label_file(os.path.join(args.model, 'labels.txt'))

    # Load webcam
    prevTime = 0
    cap = cv.VideoCapture(0)
    rows, cols = 320, 320
    cap.set(cv.CAP_PROP_FRAME_WIDTH, 320)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, 320)

    # Run model
    while(True):
        _, image = cap.read()
        
        # 이미지의 중심점을 기준으로 90도 회전 하면서 0.5배 Scale
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        M= cv.getRotationMatrix2D((cols/2, rows/2),180, 1)
        image = cv.warpAffine(image, M, (cols, rows))
        image = Image.fromarray(image, "RGB")

        _, scale = common.set_resized_input(detector, image.size, lambda size: image.resize(size, Image.ANTIALIAS))
        
        # Insert FPS
        curTime = time.time()

        detector.invoke()

        objs = detect.get_objects(detector, 0.6, scale)

        draw_image = image.copy()

        if not objs:
            draw_no_detect(draw_image)
        else:
            draw_objects(draw_image, objs, labels)
        
        sec = curTime - prevTime
        prevTime = curTime
        fps = 1/(sec)
        str = "FPS : %0.1f" % fps
        draw_text(draw_image, str, (0, 0))

        draw_image = np.array(draw_image)
        draw_image = cv.cvtColor(draw_image, cv.COLOR_RGB2BGR)
        
        # Display frame
        cv.imshow("Frame", draw_image)
        
        key = cv.waitKey(1) & 0xff
        if key==27:
            # Stop using ESC
            break
Exemple #20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--nano",
                        help="Works with JETSON Nao and Pi Camera.",
                        action="store_true")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize colormap
    colormap = label_util.create_pascal_label_colormap()

    # Initialize engine.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    width, height = common.input_size(interpreter)

    if args.nano == True:
        GST_STR = "nvarguscamerasrc \
            ! video/x-raw(memory:NVMM), width={0:d}, height={1:d}, format=(string)NV12, framerate=(fraction)30/1 \
            ! nvvidconv flip-method=2 !  video/x-raw, width=(int){2:d}, height=(int){3:d}, format=(string)BGRx \
            ! videoconvert \
            ! appsink".format(args.width, args.height, args.width, args.height)
        cap = cv2.VideoCapture(GST_STR, cv2.CAP_GSTREAMER)

    else:
        cap = cv2.VideoCapture(0)
        cap.set(3, args.width)
        cap.set(4, args.height)

    cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    while cap.isOpened():
        _, frame = cap.read()

        start = time.perf_counter()

        # Create inpute tensor
        # camera resolution  => input tensor size (513, 513)
        input_buf = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        _, scale = common.set_resized_input(
            interpreter,
            (cap_width, cap_height),
            lambda size: cv2.resize(input_buf, size),
        )

        # Run inference
        interpreter.invoke()

        elapsed_ms = (time.perf_counter() - start) * 1000

        # Create segmentation map
        result = segment.get_output(interpreter)
        seg_map = result[:height, :width]
        seg_image = label_util.label_to_color_image(colormap, seg_map)

        # segmentation map resize 513, 513 => camera resolution
        seg_image = cv2.resize(seg_image, (args.width, args.height))
        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)

        # Calc fps.
        fps = 1000.0 / elapsed_ms
        fps_text = "{0:.2f}ms, {1:.2f}fps".format(elapsed_ms, fps)
        visual.draw_caption(im, (10, 30), fps_text)

        # Display image
        cv2.imshow(WINDOW_NAME, im)
        key = cv2.waitKey(10) & 0xFF
        if key == ord("q"):
            break

        if args.nano != True:
            for i in range(10):
                ret, frame = cap.read()

    # When everything done, release the window
    cap.release()
    cv2.destroyAllWindows()
Exemple #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", help="File path of Tflite model.", required=True)
    parser.add_argument("--label", help="File path of label file.", required=True)
    parser.add_argument(
        "--threshold", help="threshold to filter results.", default=0.5, type=float
    )
    parser.add_argument("--width", help="Resolution width.", default=640, type=int)
    parser.add_argument("--height", help="Resolution height.", default=480, type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO
    )
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(
                rawCapture, format="rgb", use_video_port=True
            ):
                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Run inference.
                start = time.perf_counter()

                _, scale = common.set_resized_input(
                    interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size)
                )
                interpreter.invoke()

                elapsed_ms = engine.get_inference_time()

                # Display result.
		        objects = detect.get_objects(interpreter, args.threshold, scale)
		        if objects:
		            for obj in objects:
		                label_name = "Unknown"
		                if labels:
		                    labels.get(obj.id, "Unknown")
		                    label_name = labels[obj.id]
		                caption = "{0}({1:.2f})".format(label_name, obj.score)
		
		                # Draw a rectangle and caption.
		                box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax)
		                visual.draw_rectangle(im, box, colors[obj.id])
		                visual.draw_caption(im, box, caption)

                # Calc fps.
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

                # Display fps
                fps_text = "{0:.2f}ms".format(elapsed_ms)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Exemple #22
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '-m', '--model', required=True, help='File path of .tflite file.')
    parser.add_argument(
        '-r', '--roi', required=True, help='ROI [Face, Top, Whole]')
    args = parser.parse_args()

    if args.roi.lower() == 'top':
        _NUM_KEYPOINTS = 11
    elif args.roi.lower() == 'face':
        _NUM_KEYPOINTS = 5
    else: 
        _NUM_KEYPOINTS = 17

    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    # Load webcam
    prevTime = 0
    cap = cv.VideoCapture(0)
    rows, cols = 320, 320
    cap.set(cv.CAP_PROP_FRAME_WIDTH, 320)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, 320)

    # Run model
    while(True):
        _, image = cap.read()
        
        # 이미지의 중심점을 기준으로 90도 회전 하면서 0.5배 Scale
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        M= cv.getRotationMatrix2D((cols/2, rows/2),180, 1)
        image = cv.warpAffine(image, M, (cols, rows))
        image = Image.fromarray(image, "RGB")

        # resized_img = image.resize(common.input_size(interpreter), Image.ANTIALIAS)
        # common.set_input(interpreter, resized_img)
        common.set_resized_input(interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS))
        # Insert FPS
        curTime = time.time()
        
        interpreter.invoke()
        
        pose = common.output_tensor(interpreter, 0).copy().reshape(17, 3)
        
        draw = ImageDraw.Draw(image)
        width, height = image.size
        for i in range(0, _NUM_KEYPOINTS):
            draw.ellipse(
                xy=[
                    pose[i][1] * width - 2, pose[i][0] * height - 2,
                    pose[i][1] * width + 2, pose[i][0] * height + 2
                ],
                fill=(255, 0, 0))

        sec = curTime - prevTime
        prevTime = curTime
        fps = 1/(sec)
        str = "FPS : %0.1f" % fps
        draw_text(image, str, (0, 0))

        image = np.array(image)
        image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
        
        # Display frame
        cv.imshow("Frame", image)
        
        key = cv.waitKey(1) & 0xff
        if key==27:
            # Stop using ESC
            break
Exemple #23
0
 def detect(self, pilImage):
     _, scale = common.set_resized_input(
         self.interpreter, pilImage.size, lambda size: pilImage.resize(size))
     self.interpreter.invoke()
     return detect.get_objects(self.interpreter, score_threshold=0.6, image_scale=scale)
def main():
    global message
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-m',
                        '--model',
                        required=True,
                        help='File path of .tflite file')
    parser.add_argument('-l', '--labels', help='File path of labels file')
    parser.add_argument('-t',
                        '--threshold',
                        type=float,
                        default=0.4,
                        help='Score threshold for detected objects')
    parser.add_argument('-o',
                        '--output',
                        help='File path for the result image with annotations')
    parser.add_argument('-c',
                        '--count',
                        type=int,
                        default=5,
                        help='Number of times to run inference')
    args = parser.parse_args()

    labels = read_label_file(args.labels) if args.labels else {}
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    #cap = cv2.VideoCapture(0)
    # HM-10 Module MAC Address and UUID
    #address = ("DC5D07D7-38D1-4B52-94DA-4BDC300F5506") #uncomment for macos
    #write_characteristic = "0000FFE1-0000-1000-8000-00805f9b34fb"

    # Connecting to Bluetooth Module
    #address = "64:69:4E:89:2B:C5"
    #client = BleakClient(address)

    _thread.start_new_thread(asyncio.run, (connectionHandler(), ))
    #if not client.is_connected:
    #asyncio.run(connect(client))

    #initialize eye detector
    eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

    #print('----INFERENCE TIME----')
    #print('Note: The first inference is slow because it includes',
    #      'loading the model into Edge TPU memory.')

    stream = io.BytesIO()
    with picamera.PiCamera() as camera:
        camera.start_preview()
        #counts the number of consective frames during which the driver is distracted
        distraction_event_duration = 0
        already_distracted = False
        while True:
            camera.capture(stream, format='jpeg')
            image = Image.open(stream)
            #ret, frame = cap.read()
            #image = Image.fromarray(frame)
            _, scale = common.set_resized_input(
                interpreter, image.size,
                lambda size: image.resize(size, Image.ANTIALIAS))
            start = time.perf_counter()
            interpreter.invoke()
            objs = detect.get_objects(interpreter, args.threshold, scale)

            #print('-------RESULTS--------')
            if not objs:
                #print('No objects detected')
                a = ''

            else:
                #If more than one face is detected, just use whatever is at index 0.
                face = objs[0]
                #extract bounding box coordinates
                left = face.bbox.xmin
                right = face.bbox.xmax
                bottom = face.bbox.ymax
                top = face.bbox.ymin
                w = right - left
                h = bottom - top
                #print(f'left: {left}, right: {right}, bottom: {bottom}, top: {top}')
                #convert video frame to a numpy array
                #TODO: WE WILL NEED TO CHANGE THIS WHEN THE PI CAMERA COMES IN
                #numpy_frame = frame
                numpy_frame = numpy.asarray(image)
                #crop out the drivers face using bbox coordinates
                cropped_numpy_frame = numpy_frame[bottom:top, left:right]
                #run eye detector
                roi_color = numpy_frame[top:bottom, left:right]
                #cv2.imshow('frame', roi_color)
                eyes = eye_cascade.detectMultiScale(roi_color,
                                                    minSize=(int(w / 20),
                                                             int(h / 20)),
                                                    maxSize=(int(w / 6),
                                                             int(h / 6)),
                                                    minNeighbors=5)
                num_eyes_detected = len(eyes)
                #print(num_eyes_detected, "Eyes Detected")
                if (num_eyes_detected < 2):
                    distraction_event_duration += 1
                else:
                    distraction_event_duration = 0
                #if the driver is distracted for 4 consecutive frames, play an audible alert
                if distraction_event_duration >= 4:
                    #send a 5 second long alert to the Arduino
                    if not already_distracted:
                        #print("Playing p")
                        #asyncio.run(speakerCommand(client, write_characteristic, 'p'))
                        message = 'p'
                    already_distracted = True
                    #time.sleep(5)
                    #speakerCommand(client, write_characteristic, 's')
                else:
                    if already_distracted:
                        #print("Playing s")
                        #asyncio.run(speakerCommand(client, write_characteristic, 's'))
                        message = 's'
                    already_distracted = False

            #dont need this, but might be good to reference
            '''for obj in objs:
        print(labels.get(obj.id, obj.id))
        print('  id:    ', obj.id)
        print('  score: ', obj.score)
        print('  bbox:  ', obj.bbox)'''

            #stream.seek(0)
            #stream.truncate()
            if args.output:
                image = image.convert('RGB')
                draw_objects(ImageDraw.Draw(image), objs, labels)
                image.save(args.output)
                image.show()

            inference_time = time.perf_counter() - start
            #print('%.2f ms' % (inference_time * 1000))
            #cv2.imshow('frame', frame)

            #if cv2.waitKey(1) & 0xFF == ord('q'):
            #    break

    #cap.release()
    #cv2.destroyAllWindows()
    message = 'd'
    time.sleep(3)
Exemple #25
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '-m',
        '--model',
        default="ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite",
        help='File path of .tflite file')
    parser.add_argument('-i',
                        '--input',
                        required=True,
                        help='File path of image to process')
    parser.add_argument('-t',
                        '--threshold',
                        type=float,
                        default=0.4,
                        help='Score threshold for detected objects')
    parser.add_argument('-o',
                        '--output',
                        default="out.jpg",
                        help='File path for the result image with annotations')
    args = parser.parse_args()

    ## ========== ========== ===========
    ## Load the network
    ## ========== ========== ===========

    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    ## ========== ========== ===========
    ## Compute bounding boxes
    ## ========== ========== ===========

    image = Image.open(args.input)
    _, scale = common.set_resized_input(
        interpreter, image.size,
        lambda size: image.resize(size, Image.ANTIALIAS))

    start = time.perf_counter()
    interpreter.invoke()
    inference_time = time.perf_counter() - start
    objs = detect.get_objects(interpreter, args.threshold, scale)
    print('%.2f ms' % (inference_time * 1000))

    ## ========== ========== ===========
    ## Crop the image
    ## ========== ========== ===========

    ## Ensure that there is only one face in the image
    assert len(objs) == 1

    bbox = objs[0].bbox

    sx = int((bbox[0] + bbox[2]) / 2)
    sy = int((bbox[1] + bbox[3]) / 2)
    ss = int(max((bbox[3] - bbox[1]), (bbox[2] - bbox[0])) / 2.5)

    print((sx - ss, sy - ss, sx + ss, sy + ss))

    cropped_image = image.crop((sx - ss, sy - ss, sx + ss, sy + ss))
    cropped_image.resize((240, 240)).save(args.output)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        type=float,
                        default=0.5)
    parser.add_argument("--width", help="Resolution width.", default=640)
    parser.add_argument("--height", help="Resolution height.", default=480)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    is_inpaint_mode = False
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                start_ms = time.time()

                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Run inference.
                start = time.perf_counter()

                _, scale = common.set_resized_input(
                    interpreter,
                    (resolution_width, rezolution_height),
                    lambda size: cv2.resize(image, size),
                )
                interpreter.invoke()

                # Display result.
                objects = detect.get_objects(interpreter, args.threshold,
                                             scale)

                if is_inpaint_mode == True:
                    mask = np.full((args.height, args.width),
                                   0,
                                   dtype=np.uint8)
                    for obj in objects:
                        if labels and obj.id in labels:
                            # Draw a mask rectangle.
                            box = (
                                obj.bbox.xmin,
                                obj.bbox.ymin,
                                obj.bbox.xmax,
                                obj.bbox.ymax,
                            )
                            visual.draw_rectangle(mask,
                                                  box, (255, 255, 255),
                                                  thickness=-1)

                    # Image Inpainting
                    dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA)
                    # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS)

                else:
                    for obj in objects:
                        if labels and obj.id in labels:
                            label_name = labels[obj.id]
                            caption = "{0}({1:.2f})".format(
                                label_name, obj.score)

                            # Draw a rectangle and caption.
                            box = (
                                obj.bbox.xmin,
                                obj.bbox.ymin,
                                obj.bbox.xmax,
                                obj.bbox.ymax,
                            )
                            visual.draw_rectangle(im, box, colors[obj.id])
                            visual.draw_caption(im, box, caption)
                    dst = im

                # Calc fps.
                elapsed_ms = time.time() - start_ms
                fps = 1 / elapsed_ms

                # Display fps
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(
                    (elapsed_ms * 1000.0), fps)
                visual.draw_caption(dst, (10, 30), fps_text)

                # Display image
                cv2.imshow(WINDOW_NAME, dst)
                key = cv2.waitKey(10) & 0xFF
                if key == ord("q"):
                    break
                elif key == ord(" "):
                    is_inpaint_mode = not is_inpaint_mode
                    print("inpant mode change :", is_inpaint_mode)

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Exemple #27
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-m',
                        '--model',
                        required=True,
                        help='File path of .tflite file')
    parser.add_argument('-i',
                        '--input',
                        required=True,
                        help='File path of image to process')
    parser.add_argument('-l', '--labels', help='File path of labels file')
    parser.add_argument('-t',
                        '--threshold',
                        type=float,
                        default=0.4,
                        help='Score threshold for detected objects')
    parser.add_argument('-o',
                        '--output',
                        help='File path for the result image with annotations')
    parser.add_argument('-c',
                        '--count',
                        type=int,
                        default=5,
                        help='Number of times to run inference')
    args = parser.parse_args()

    labels = read_label_file(args.labels) if args.labels else {}
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    image = Image.open(args.input)
    _, scale = common.set_resized_input(
        interpreter, image.size,
        lambda size: image.resize(size, Image.ANTIALIAS))

    print('----INFERENCE TIME----')
    print('Note: The first inference is slow because it includes',
          'loading the model into Edge TPU memory.')
    for _ in range(args.count):
        start = time.perf_counter()
        interpreter.invoke()
        inference_time = time.perf_counter() - start
        objs = detect.get_objects(interpreter, args.threshold, scale)
        print('%.2f ms' % (inference_time * 1000))

    print('-------RESULTS--------')
    if not objs:
        print('No objects detected')

    for obj in objs:
        print(labels.get(obj.id, obj.id))
        print('  id:    ', obj.id)
        print('  score: ', obj.score)
        print('  bbox:  ', obj.bbox)

    if args.output:
        image = image.convert('RGB')
        draw_objects(ImageDraw.Draw(image), objs, labels)
        image.save(args.output)
        image.show()
Exemple #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k",
                        help="keep top k candidates.",
                        default=3,
                        type=int)
    parser.add_argument("--threshold",
                        help="Score threshold.",
                        default=0.0,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    args = parser.parse_args()

    with open(args.label, "r") as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    # Initialize window.
    cv2.namedWindow(WINDOW_NAME)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    width, height = common.input_size(interpreter)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height

    with picamera.PiCamera() as camera:
        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30

        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Run inference.
                start = time.perf_counter()

                _, scale = common.set_resized_input(
                    interpreter,
                    (resolution_width, rezolution_height),
                    lambda size: cv2.resize(image, size),
                )
                interpreter.invoke()

                results = classify.get_classes(interpreter, args.top_k,
                                               args.threshold)
                elapsed_ms = (time.perf_counter() - start) * 1000

                # Check result.
                if results:
                    for i in range(len(results)):
                        label = "{0} ({1:.2f})".format(labels[results[i][0]],
                                                       results[i][1])
                        pos = 60 + (i * 30)
                        visual.draw_caption(im, (10, pos), label)

                # Calc fps.
                fps = 1 / elapsed_ms * 1000
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_fps = 1 / avg_elapsed_ms
                    avg_text = " AGV: {0:.2f}ms, {1:.2f}fps".format(
                        (avg_elapsed_ms * 1000.0), avg_fps)

                # Display fps
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(
                    (elapsed_ms * 1000.0), fps)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    break

        finally:
            camera.stop_preview()
Exemple #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--videopath",
                        help="File path of Videofile.",
                        default="")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    # Video capture.
    if args.videopath == "":
        print("Open camera.")
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    else:
        print("Open video file: ", args.videopath)
        cap = cv2.VideoCapture(args.videopath)

    cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    elapsed_list = []

    while cap.isOpened():
        _, frame = cap.read()
        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Run inference.
        start = time.perf_counter()

        _, scale = common.set_resized_input(interpreter,
                                            (cap_width, cap_height),
                                            lambda size: cv2.resize(im, size))
        interpreter.invoke()

        elapsed_ms = (time.perf_counter() - start) * 1000

        # Display result.
        objects = detect.get_objects(interpreter, args.threshold, scale)
        if objects:
            for obj in objects:
                label_name = "Unknown"
                if labels:
                    labels.get(obj.id, "Unknown")
                    label_name = labels[obj.id]
                caption = "{0}({1:.2f})".format(label_name, obj.score)

                # Draw a rectangle and caption.
                box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax,
                       obj.bbox.ymax)
                visual.draw_rectangle(frame, box, colors[obj.id])
                visual.draw_caption(frame, box, caption)

        # Calc fps.
        elapsed_list.append(elapsed_ms)
        avg_text = ""
        if len(elapsed_list) > 100:
            elapsed_list.pop(0)
            avg_elapsed_ms = np.mean(elapsed_list)
            avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

        # Display fps
        fps_text = "{0:.2f}ms".format(elapsed_ms)
        visual.draw_caption(frame, (10, 30), fps_text + avg_text)

        # display
        cv2.imshow(WINDOW_NAME, frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    # When everything done, release the window
    cv2.destroyAllWindows()
Exemple #30
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--model',
      required=True,
      help='Detection SSD model path (must have post-processing operator).')
  parser.add_argument('--label', help='Labels file path.')
  parser.add_argument(
      '--score_threshold',
      help='Threshold for returning the candidates.',
      type=float,
      default=0.1)
  parser.add_argument(
      '--tile_sizes',
      help=('Sizes of the tiles to split, could be more than one layer as a '
            'list a with comma delimiter in widthxheight. Example: '
            '"300x300,250x250,.."'),
      required=True)
  parser.add_argument(
      '--tile_overlap',
      help=('Number of pixels to overlap the tiles. tile_overlap should be >= '
            'than half of the min desired object size, otherwise small objects '
            'could be missed on the tile boundary.'),
      type=int,
      default=15)
  parser.add_argument(
      '--iou_threshold',
      help=('threshold to merge bounding box duing nms'),
      type=float,
      default=.1)
  parser.add_argument('--input', help='Input image path.', required=True)
  parser.add_argument('--output', help='Output image path.')
  args = parser.parse_args()

  interpreter = make_interpreter(args.model)
  interpreter.allocate_tensors()
  labels = read_label_file(args.label) if args.label else {}

  # Open image.
  img = Image.open(args.input).convert('RGB')
  draw = ImageDraw.Draw(img)

  objects_by_label = dict()
  img_size = img.size
  tile_sizes = [
      map(int, tile_size.split('x')) for tile_size in args.tile_sizes.split(',')
  ]
  for tile_size in tile_sizes:
    for tile_location in tiles_location_gen(img_size, tile_size,
                                            args.tile_overlap):
      tile = img.crop(tile_location)
      _, scale = common.set_resized_input(
          interpreter, tile.size,
          lambda size, img=tile: img.resize(size, Image.NEAREST))
      interpreter.invoke()
      objs = detect.get_objects(interpreter, args.score_threshold, scale)

      for obj in objs:
        bbox = [obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax]
        bbox = reposition_bounding_box(bbox, tile_location)

        label = labels.get(obj.id, '')
        objects_by_label.setdefault(label,
                                    []).append(Object(label, obj.score, bbox))

  for label, objects in objects_by_label.items():
    idxs = non_max_suppression(objects, args.iou_threshold)
    for idx in idxs:
      draw_object(draw, objects[idx])

  img.show()
  if args.output:
    img.save(args.output)