Beispiel #1
0
def main():
    cv_publisher = Publisher(105)
    MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/'
    MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_pupper_quant_edgetpu.tflite'
    LABEL_PATH = MODELS_DIR + 'pupper_labels.txt'
    LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt'
    labels = dataset_utils.read_label_file(LABEL_PATH)
    engine = DetectionEngine(MODEL_PATH)

    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        _, height, width, _ = engine.get_input_tensor_shape()
        try:
            stream = io.BytesIO()
            #count = 0
            for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)):
                stream.truncate()
                stream.seek(0)
                input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                #image = Image.frombuffer('RGB',(width,height), stream.getvalue())
                image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb'
                #draw = ImageDraw.Draw(image)
                start_ms = time.time()
                results = engine.detect_with_image(image,threshold=0.2,keep_aspect_ratio=True,relative_coord=False,top_k=10)
                elapsed_ms = time.time() - start_ms
                
                detectedObjs = []
                for obj in results:
                    if (obj.label_id in range(3)):
                        box = obj.bounding_box.flatten().tolist()
                        #draw.rectangle(box, outline='red')
                        #draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score))
                        w = box[0] - box[2]
                        h = box[1] - box[3]
                        objInfo = {'bbox_x':float(box[0]),
                                   'bbox_y':float(box[1]),
                                   'bbox_h':float(h),
                                   'bbox_w':float(w),
                                   'bbox_label':labels[obj.label_id],
                                   'bbox_confidence': float(obj.score)
                                   }
                        detectedObjs.append(objInfo)
                try:
                    cv_publisher.send(detectedObjs)
                except BaseException as e:
                    print('Failed to send bounding boxes. CV UDP subscriber likely not initialized')
                    pass
                #print(detectedObjs)

                #with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images_120120/' + str(count) + '.png','wb') as f:
                #    image.save(f)
                #count+=1
        except BaseException as e:
            with open(LOG_FILE,'w') as f:
                f.write("Failed to run detection loop:\n {0}\n".format(traceback.format_exc()))
  def detection_job(detection_model, image_name, num_inferences):
    """Runs detection job."""
    engine = DetectionEngine(detection_model)
    with open_image(image_name) as img:
      # Resized image.
      _, height, width, _ = engine.get_input_tensor_shape()
      tensor = np.asarray(img.resize((width, height), Image.NEAREST)).flatten()

    # Using `detect_with_input_tensor` to exclude image down-scale cost.
    for _ in range(num_inferences):
      engine.detect_with_input_tensor(tensor, top_k=1)
def main(argv):
    argparser = build_argparser()
    args = argparser.parse_args(argv)

    labels = load_labels(args.label)
    engine = DetectionEngine(args.model)

    camera = picamera.PiCamera()
    camera.resolution = (640, 480)
    camera.framerate = 30
    _, width, height, channels = engine.get_input_tensor_shape()

    overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0))
    overlay = camera.add_overlay(overlay_img.tobytes(), size=overlay_img.size)
    overlay.layer = 3

    try:
        start_time = time.time()
        camera.start_preview(fullscreen=True)
        buff = io.BytesIO()
        for _ in camera.capture_continuous(buff,
                                           format='rgb',
                                           use_video_port=True,
                                           resize=(width, height)):
            buff.truncate()
            buff.seek(0)

            array = np.frombuffer(buff.getvalue(), dtype=np.uint8)

            # Do inference
            start_ms = time.time()
            detected = engine.DetectWithInputTensor(array, top_k=10)
            elapsed_ms = time.time() - start_ms

            if detected:
                camera.annotate_text = ('%d objects detected.\n%.2fms' %
                                        (len(detected), elapsed_ms * 1000.0))
                overlay_img = Image.new('RGBA', (width, height), (0, 0, 0, 0))
                draw = ImageDraw.Draw(overlay_img)
                for obj in detected:
                    # relative coord to abs coord.
                    box = obj.bounding_box * [[width, height]]
                    draw.rectangle(box.flatten().tolist(),
                                   COLORS[obj.label_id % len(COLORS)])
                overlay.update(overlay_img.tobytes())
            if time.time() - start_time >= args.time:
                break
    finally:
        camera.stop_preview()
        camera.close()

    return 0
def main():
    cv_publisher = Publisher(105)
    MODELS_DIR = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/'
    MODEL_PATH = MODELS_DIR + 'ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite'
    LABEL_PATH = MODELS_DIR + 'coco_labels.txt'
    LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/vision_log.txt'
    labels = dataset_utils.read_label_file(LABEL_PATH)
    engine = DetectionEngine(MODEL_PATH)

    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        _, height, width, _ = engine.get_input_tensor_shape()
        
        stream = io.BytesIO()
        count = 0
        for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)):
            stream.truncate()
            stream.seek(0)
            input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8)
            #image = Image.frombuffer('RGB',(width,height), stream.getvalue())
            image = Image.frombuffer('RGB',(320,304), stream.getvalue()) # to account for automatic upscaling by picamera when format='rgb'
            draw = ImageDraw.Draw(image)
            start_ms = time.time()
            results = engine.detect_with_image(image,threshold=0.1,keep_aspect_ratio=True,relative_coord=False,top_k=51)
            elapsed_ms = time.time() - start_ms
                
            detectedObjs = []
            for obj in results:
                if (obj.label_id == 0 or obj.label_id == 36):
                    if (obj.label_id == 36):
                        print('Tennis ball detected')
                    box = obj.bounding_box.flatten().tolist()
                    draw.rectangle(box, outline='red')
                    draw.text((box[0],box[1]), labels[obj.label_id] + " " + str(obj.score))
                    w = box[0] - box[2]
                    h = box[1] - box[3]
                    objInfo = {'bbox_x':float(box[0]),
                               'bbox_y':float(box[1]),
                               'bbox_h':float(h),
                               'bbox_w':float(w),
                               'bbox_label':labels[obj.label_id],
                               'bbox_confidence': float(obj.score)
                               }
                    detectedObjs.append(objInfo)
            cv_publisher.send(detectedObjs)
            #print(detectedObjs)

            with open('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '.png','wb') as f:
                image.save(f)
            count+=1
Beispiel #5
0
class CoralObjectDetector:
    """Performs inference on Edge TPU.
    """
    def __init__(self, model_path, device_path):
        self.__engine = DetectionEngine(model_path=os.path.join(
            model_path, 'edgetpu.tflite'),
                                        device_path=device_path)

        self.__model_shape = itemgetter(1, 2)(
            self.__engine.get_input_tensor_shape())

    @property
    def device_name(self):
        return "Coral"

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def detect(self, image_shape, image_np, detections: List[Detection]):
        image_np = cv2.resize(image_np,
                              dsize=self.__model_shape,
                              interpolation=cv2.INTER_LINEAR)

        objs = self.__engine.detect_with_input_tensor(
            input_tensor=image_np.flatten(), top_k=len(detections))

        d = 0
        max_width = image_shape[1] - 1
        max_height = image_shape[0] - 1
        while d < len(objs) and d < len(detections):
            detection = detections[d]
            obj = objs[d]
            detection.label = obj.label_id + 1
            detection.confidence = obj.score
            detection.bounding_box.y_min = int(obj.bounding_box[0][1] *
                                               max_height)
            detection.bounding_box.x_min = int(obj.bounding_box[0][0] *
                                               max_width)
            detection.bounding_box.y_max = int(obj.bounding_box[1][1] *
                                               max_height)
            detection.bounding_box.x_max = int(obj.bounding_box[1][0] *
                                               max_width)
            d += 1

        return self.__engine.get_inference_time()
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='File path of Tflite model.',
                        required=True)
    parser.add_argument('--label',
                        help='File path of label file.',
                        required=True)
    args = parser.parse_args()

    with open(args.label, 'r', encoding="utf-8") as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    engine = DetectionEngine(args.model)

    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        try:
            stream = io.BytesIO()
            for foo in camera.capture_continuous(stream,
                                                 format='rgb',
                                                 use_video_port=True,
                                                 resize=(width, height)):
                stream.truncate()
                stream.seek(0)
                frame = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                start_ms = time.time()
                results = engine.DetectWithImage(frame,
                                                 threshold=0.05,
                                                 keep_aspect_ratio=True,
                                                 relative_coord=False,
                                                 top_k=10)

                elapsed_ms = time.time() - start_ms
                if results:
                    logging.info("frame has {} objects".format(len(results)))
                    for detected_object in results:
                        logging.info("label: {}, score: {}, bounds: {}".format(
                            labels[detected_object.label_id],
                            detected_object.score,
                            obj.bounding_box.flatten().tolist()))
        finally:
            logging.info("done capturing")
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--object_type', help='Type of object to capture images of', required=True)
    parser.add_argument('--n', help='Number of frames to acquire', required=True)
    args = parser.parse_args()
    print('Setting up to capture ' + args.n + ' images of type ' + args.object_type)
    LOG_FILE = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/training_image_acquistion_log.txt'
    SAVE_PATH = '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/training_images/' + args.object_type + '/'
    if not os.path.isdir(SAVE_PATH):
        os.mkdir(SAVE_PATH)
        count = 0
    else:
        with open(SAVE_PATH + 'image_count.pkl','rb') as f:
            count = pickle.load(f)

    engine = DetectionEngine('/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite')

    input('Press ENTER to begin capturing frames...')
    n = 0
    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        _, height, width, _ = engine.get_input_tensor_shape()
        try:
            stream = io.BytesIO()
            for _ in camera.capture_continuous(SAVE_PATH + args.object_type + '_{timestamp:%f}.png', format='png', use_video_port=True, resize=(width, height)):
                #stream.truncate()
                #stream.seek(0)
                #input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                #image = Image.frombuffer('RGB',(320,304), stream.getvalue())
                #with open(SAVE_PATH + args.object_type + '_' + str(count) + '.jpg','wb') as f:
                #    image.save(f)
                count += 1
                n += 1
                if (n >= int(args.n)):
                    break
                else:
                    print('Captured image #' + str(n))
        except BaseException as e:
            with open(LOG_FILE,'w') as f:
                f.write("Failed to run image acquisition loop: {0}\n".format(str(e)))
    
    with open(SAVE_PATH + 'image_count.pkl','wb') as f:
        pickle.dump(count,f)
Beispiel #8
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of classes with highest score to display')
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='class score threshold')
    args = parser.parse_args()

    print("Loading %s with %s labels."%(args.model, args.labels))
    engine = DetectionEngine(args.model)
    labels = load_labels(args.labels)

    input_shape = engine.get_input_tensor_shape()
    inference_size = (input_shape[1], input_shape[2])

    # Average fps over last 30 frames.
    fps_counter  = common.avg_fps_counter(30)

    def user_callback(input_tensor, src_size, inference_box):
      nonlocal fps_counter
      start_time = time.monotonic()
      objs = engine.detect_with_input_tensor(input_tensor,
                                    threshold=args.threshold,
                                    top_k=args.top_k)
      end_time = time.monotonic()

      text_lines = [
          'Inference: %.2f ms' %((end_time - start_time) * 1000),
          'FPS: %d fps' % (round(next(fps_counter))),
      ]
      print(' '.join(text_lines))
      return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)

    result = gstreamer.run_pipeline(user_callback, appsink_size=inference_size)
class ObjectDetector:
    def init(
            self,
            model_file="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite",
            label_file="coco_labels.txt"):
        self.model_file = model_file
        self.label_file = label_file
        self.labels = ReadLabelFile(self.label_file)
        self.engine = DetectionEngine(self.model_file)

    def detect(self, input_frame):
        if (self.labels == '' or self.engine == ''):
            print("Detector is not initialized!")
            return []
        objects = self.engine.DetectWithInputTensor(input_frame.flatten(),
                                                    threshold=0.5,
                                                    top_k=10)
        _, width, height, channels = self.engine.get_input_tensor_shape()
        detected_objects = []
        if objects:
            for obj in objects:
                box = obj.bounding_box.flatten().tolist()
                box_left = int(box[0] * width)
                box_top = int(box[1] * height)
                box_right = int(box[2] * width)
                box_bottom = int(box[3] * height)
                percentage = int(obj.score * 100)
                label = self.labels[obj.label_id]
                object_info = {
                    'box_left': box_left,
                    'box_right': box_right,
                    'box_top': box_top,
                    'box_bottom': box_bottom,
                    'percentage': percentage,
                    'label': label
                }
                detected_objects.append(object_info)

        return detected_objects
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k", help="keep top k candidates.", default=3)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        type=float,
                        default=0.5)
    parser.add_argument("--width", help="Resolution width.", default=640)
    parser.add_argument("--height", help="Resolution height.", default=480)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    is_inpaint_mode = False
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                start_ms = time.time()

                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                ans = engine.DetectWithImage(
                    input_buf,
                    threshold=args.threshold,
                    keep_aspect_ratio=False,
                    relative_coord=False,
                    top_k=args.top_k,
                )

                # Display result.
                if is_inpaint_mode == True:
                    mask = np.full((args.height, args.width),
                                   0,
                                   dtype=np.uint8)
                    if ans:
                        for obj in ans:
                            if labels and obj.label_id in labels:
                                # Draw a mask rectangle.
                                box = obj.bounding_box.flatten().tolist()
                                visual.draw_rectangle(mask,
                                                      box, (255, 255, 255),
                                                      thickness=-1)

                    # Image Inpainting
                    dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA)
                    # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS)

                else:
                    for obj in ans:
                        if labels and obj.label_id in labels:
                            label_name = labels[obj.label_id]
                            caption = "{0}({1:.2f})".format(
                                label_name, obj.score)

                            # Draw a rectangle and caption.
                            box = obj.bounding_box.flatten().tolist()
                            visual.draw_rectangle(im, box,
                                                  colors[obj.label_id])
                            visual.draw_caption(im, box, caption)
                    dst = im

                # Calc fps.
                elapsed_ms = time.time() - start_ms
                fps = 1 / elapsed_ms

                # Display fps
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(
                    (elapsed_ms * 1000.0), fps)
                visual.draw_caption(dst, (10, 30), fps_text)

                # Display image
                cv2.imshow(WINDOW_NAME, dst)
                key = cv2.waitKey(10) & 0xFF
                if key == ord("q"):
                    break
                elif key == ord(" "):
                    is_inpaint_mode = not is_inpaint_mode
                    print("inpant mode change :", is_inpaint_mode)

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #11
0
def mqtt_device_demo(args):
    """Connects a device, sends data, and receives data."""
    # [START iot_mqtt_run]
    global minimum_backoff_time
    global MAXIMUM_BACKOFF_TIME

    # Publish to the events or state topic based on the flag.
    sub_topic = 'events' if args.message_type == 'event' else 'state'

    mqtt_topic = '/devices/{}/{}'.format(args.device_id, sub_topic)

    jwt_iat = datetime.datetime.utcnow()
    jwt_exp_mins = args.jwt_expires_minutes
    client = get_client(
        args.project_id, args.cloud_region, args.registry_id,
        args.device_id, args.private_key_file, args.algorithm,
        args.ca_certs, args.mqtt_bridge_hostname, args.mqtt_bridge_port)

    # Initialize engine.
    engine = DetectionEngine('./edgetpu/test_data/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite')
    # labels = ReadLabelFile(args_tpu.label) if args_tpu.label else None

    # initializing the camera
    with picamera.PiCamera() as camera:
        camera.resolution = (1024, 768)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        camera.start_preview()
        try:
            stream = io.BytesIO()
            for foo in camera.capture_continuous(stream,
                                                 format='rgb',
                                                 use_video_port=True,
                                                 resize=(width, height)):
                client.loop()

                if should_backoff:
                    # If backoff time is too large, give up.
                    if minimum_backoff_time > MAXIMUM_BACKOFF_TIME:
                        print('Exceeded maximum backoff time. Giving up.')
                        break

                    delay = minimum_backoff_time + random.randint(0, 1000) / 1000.0
                    time.sleep(delay)
                    minimum_backoff_time *= 2
                    client.connect(mqtt_bridge_hostname, mqtt_bridge_port)

                now = datetime.datetime.now()

                stream.truncate()
                stream.seek(0)
                input = np.frombuffer(stream.getvalue(), dtype=np.uint8)

                start_ms = time.time()
                ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10)
                elapsed_ms = time.time() - start_ms
                # Display result.
                print ('-----------------------------------------')
                nPerson = 0
                bbox = list()
                scores = list()
                if ans:
                    for obj in ans:
                        nPerson = nPerson + 1
                        # if labels:
                        #     print(labels[obj.label_id])
                        score = [obj.score]
                        # print ('score = ', obj.score)
                        box = obj.bounding_box.flatten().tolist()
                        # print ('box = ', box)
                        bbox.append(box)
                        scores.append(score)
                    msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)}
                else:
                    msg = {"nPersons": int(nPerson), "bounding_box": str(bbox), "scores": str(scores)}

                bounding_box = [{'box_0': bb[0],
                                 'box_1': bb[1],
                                 'box_2': bb[2],
                                 'box_3': bb[3]} for bb in eval(msg["bounding_box"])]

                scores_msg = [{'score': s[0]} for s in eval(msg["scores"])]

                info = {'nPersons': '{}'.format(nPerson), 'bounding_box': bounding_box,
                        'scores': scores_msg, 'TimeStamp': str(int(time.time()))}

                ###################################

                try:
                    list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson,
                                                              length='short')
                except:
                    list_short = []
                    list_short, info_short = change_info_list(window=30, list=list_short, nPerson=nPerson,
                                                              length='short')

                try:
                    list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson,
                                                            length='long')
                except:
                    list_long = []
                    list_long, info_long = change_info_list(window=300, list=list_long, nPerson=nPerson,
                                                            length='long')

                info.update(info_short)
                info.update(info_long)

                ###################################

                info = json.dumps(info)

                payload = info
                print('Publishing message {}'.format(payload))
                # [START iot_mqtt_jwt_refresh]
                seconds_since_issue = (datetime.datetime.utcnow() - jwt_iat).seconds
                if seconds_since_issue > 60 * jwt_exp_mins:
                    # print('Refreshing token')
                    jwt_iat = datetime.datetime.utcnow()
                    client = get_client(
                        args.project_id, args.cloud_region,
                        args.registry_id, args.device_id, args.private_key_file,
                        args.algorithm, args.ca_certs, args.mqtt_bridge_hostname,
                        args.mqtt_bridge_port)

                # [END iot_mqtt_jwt_refresh]
                # Publish "payload" to the MQTT topic. qos=1 means at least once
                # delivery. Cloud IoT Core also supports qos=0 for at most once
                # delivery.
                client.publish(mqtt_topic, payload, qos=1)

                # Send events every second. State should not be updated as often
                time.sleep(1 if args.message_type == 'event' else 5)

        finally:
            camera.stop_preview()
Beispiel #12
0
def inference_thread(running, state, result_buffer, frame_buffer, args, identity_dict, current_identity):
    global IDLE, TRACK, RESET, FACE_RECOG_THRESHOLD, FACE_RECOG_THRESHOLD_A
    global od_engine, face_detector, facenet_engine, svm_clf
    # Initialize object detection engine.
    od_engine = DetectionEngine(args.od_model)
    print("device_path: ", od_engine.device_path())
    _, od_width, od_height, _ = od_engine.get_input_tensor_shape()
    print("od input dim: ", od_width, od_height)
    # initial face detector using the opencv haarcascade model
    face_detector = FaceDetector(args.hc_model)
    # Initialize facenet engine.
    facenet_engine = BasicEngine(args.fn_model)
    # load the sklearn support vector machine model from disk
    svm_clf = pickle.load(open(args.svm_model, 'rb'))

    while running.value:
        # check if the frame buffer has a frame, else busy waiting
        if frame_buffer.empty():
            continue
        frame = frame_buffer.get()
        tinf = time.perf_counter()

        if state.value == IDLE:
            fd_results = None
            # reorder image frame from BGR to RGB
            img = frame[:,:,::-1]
            # face detection
            faces_coord = face_detector.detect(img, True)
            # image preprocessing, downsampling
            print("faces_coord: ",faces_coord)
            if not isinstance(faces_coord, type(None)):
                # normalize face image
                face_image = np.array(normalize_faces(img ,faces_coord))
                # facenet to generate face embedding
                facenet_engine.RunInference(face_image.flatten())
                face_emb = facenet_engine.get_raw_output().reshape(1,-1)
                # use SVM to classfy identity with face embedding
                pred_prob = svm_clf.predict_proba(face_emb)
                best_class_index = np.argmax(pred_prob, axis=1)[0]
                best_class_prob = pred_prob[0, best_class_index]
                print("best_class_index: ",best_class_index)
                print("best_class_prob: ",best_class_prob)
                print("label", svm_clf.classes_[best_class_index])
                # Check threshold and verify identify is in the identifiy dictionary
                if best_class_prob > FACE_RECOG_THRESHOLD:
                    face_label = svm_clf.classes_[best_class_index]
                    if face_label in identity_dict:
                        print("\n=================================")
                        print("Identity found: ", face_label, " ",identity_dict[face_label],
                            " with Prob = ", best_class_prob)
                        print("=================================\n")
                        current_identity.value = identity_dict[face_label][0] # ID
                result_buffer.put(faces_coord)
        elif state.value == TRACK:
            od_results = None
            # convert numpy array representation to PIL image with rgb format
            img = Image.fromarray(frame[:,:,::-1], 'RGB')
            # Run inference.
            od_results = od_engine.DetectWithImage(img, threshold=0.30, keep_aspect_ratio=True, relative_coord=False, top_k=10)
            # push result to buffer queue
            result_buffer.put(od_results)
        print(time.perf_counter() - tinf, "sec")
    print("[Finish] inference_thread")
Beispiel #13
0
def main():
    mot = MotorController()

    model_filename = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"
    label_filename = "coco_labels.txt"
    engine = DetectionEngine(model_filename)
    labels = _read_label_file(label_filename)
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480

    fnt = ImageFont.load_default()

    # To view preview on VNC,
    # https://raspberrypi.stackexchange.com/a/74390
    with picamera.PiCamera() as camera:
        _monkey_patch_picamera()
        camera.resolution = (CAMERA_WIDTH, CAMERA_HEIGHT)
        camera.framerate = 15
        camera.rotation = 180
        _, width, height, channels = engine.get_input_tensor_shape()
        print("{}, {}".format(width, height))
        overlay_renderer = None
        camera.start_preview()
        try:
            stream = io.BytesIO()
            for foo in camera.capture_continuous(stream,
                                                 format='rgb',
                                                 use_video_port=True):
                # Make Image object from camera stream
                stream.truncate()
                stream.seek(0)
                input = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                input = input.reshape((CAMERA_HEIGHT, CAMERA_WIDTH, 3))
                image = Image.fromarray(input)
                # image.save("out.jpg")

                # Make overlay image plane
                img = Image.new('RGBA',
                                (CAMERA_WIDTH, CAMERA_HEIGHT),
                                (255, 0, 0, 0))
                draw = ImageDraw.Draw(img)
                draw.line((CAMERA_WIDTH//2, 0, CAMERA_WIDTH//2, CAMERA_HEIGHT), width=1)
                draw.line((CAMERA_WIDTH//4, 0, CAMERA_WIDTH//4, CAMERA_HEIGHT), width=1)
                draw.line((3*CAMERA_WIDTH//4, 0, 3*CAMERA_WIDTH//4, CAMERA_HEIGHT), width=1)

                # Run detection
                start_ms = time.time()
                results = engine.DetectWithImage(image,
                                                 threshold=0.2, top_k=5)
                elapsed_ms = (time.time() - start_ms)*1000.0
                obj = None
                if results:
                    obj = next((x for x in results if labels[x.label_id] == "banana"), None)

                if obj:
                    box = obj.bounding_box.flatten().tolist()
                    box[0] *= CAMERA_WIDTH
                    box[1] *= CAMERA_HEIGHT
                    box[2] *= CAMERA_WIDTH
                    box[3] *= CAMERA_HEIGHT
                    draw.rectangle(box, outline='red')
                    draw.text((box[0], box[1]-10), labels[obj.label_id],
                              font=fnt, fill="red")
                    obj_width = box[2] - box[0]
                    obj_center = box[0] + obj_width // 2
                    draw.point((obj_center, box[1] + (box[3] - box[1])//2))
                    print(obj_center - CAMERA_WIDTH // 2)
                    if (obj_center - CAMERA_WIDTH // 2) > CAMERA_WIDTH // 4:
                        print("TURN R")
                        mot.turn_r(radius=30)
                    elif (obj_center - CAMERA_WIDTH // 2) < -CAMERA_WIDTH // 4:
                        print("TURN L")
                        mot.turn_l(radius=30)
                    elif obj_width < CAMERA_WIDTH / 4:
                        print("FORWARD")
                        mot.forward()
                    else:
                        mot.stop()
                    camera.annotate_text = "{0:.2f}ms".format(elapsed_ms)
                else:
                    mot.stop()
                if not overlay_renderer:
                    overlay_renderer = camera.add_overlay(
                        img.tobytes(),
                        size=(CAMERA_WIDTH, CAMERA_HEIGHT), layer=4, alpha=255)
                else:
                    overlay_renderer.update(img.tobytes())
        finally:
            mot.stop()
            if overlay_renderer:
                camera.remove_overlay(overlay_renderer)
            camera.stop_preview()
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='File path of Tflite model.',
                        required=True)
    parser.add_argument('--label',
                        help='File path of label file.',
                        required=True)
    parser.add_argument('--synchronous',
                        help='Use to do anlysis synchronously.',
                        required=False,
                        action="store_true",
                        default=False)
    parser.add_argument('--local',
                        help='send output to local window, instead of twitch',
                        required=False,
                        action="store_true",
                        default=False)
    args = parser.parse_args()

    with open(args.label, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    engine = DetectionEngine(args.model)

    try:
        cap = cv2.VideoCapture(2)
        _, width, height, channels = engine.get_input_tensor_shape()
        font = cv2.FONT_HERSHEY_SIMPLEX
        is_processing = True
        child = None
        child_result = None
        while True:
            ret, frame = cap.read()

            # the resized version of the frame will be used for analysis
            resized = cv2.resize(frame, (width, height))

            if args.synchronous:
                child_result = []
                analyze_frame(resized, child_result, engine, labels,
                              args.local)
            else:
                if not is_processing:
                    # kick off analysis in subprocess, if not currently analyzing
                    is_processing = True
                    next_child_result = []
                    child = multiprocessing.Process(target=analyze_frame,
                                                    args=(resized,
                                                          child_result, engine,
                                                          labels, args.local))
                    child.start()
                elif child is not None and not child.is_alive():
                    child.join()
                    child_result = next_child_result

            frame = draw_boxes_on_frame(child_result, frame)

            if args.local:
                # weird black magic needed to draw to the screen
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                cv2.imshow('frame', frame)
            else:
                sys.stdout.buffer.write(frame.tobytes())

    except KeyboardInterrupt:
        print('Shutting down...')
    finally:
        cap.release()
        cv2.destroyAllWindows()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--draw', help='If to draw the results.', default=True)
    parser.add_argument('--label', help='Path of the labels file.')
    args = parser.parse_args()

    renderer = None

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = read_label_file(args.label) if args.label else None

    shown = False

    frames = 0
    start_seconds = time.time()

    print('opening socket.')

    #  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    receiveSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    #  senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind((TCP_IP, TCP_PORT))
    s.listen(1)
    #  senderSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

    receiveSocket.bind((UDP_IP, UDP_RECEIVE_PORT))
    #  senderSocket.bind((UDP_IP, UDP_SEND_PORT))

    print('listening...')

    _, width, height, channels = engine.get_input_tensor_shape()

    imageSize = width * height * 3

    print('waiting for client')

    conn, addr = s.accept()

    print('Connection address:', addr)
    # Open image.
    while 1:
        print('waiting for packet')
        data, addr = receiveSocket.recvfrom(66507)

        #  print('got packet of length', len(data))

        if (len(data) > 0):
            start_s = time.time()

            #  print('processing image')
            try:
                image = Image.open(io.BytesIO(data)).convert('RGB')
            except OSError:
                print('Could not read image')
                continue

            input = np.frombuffer(image.tobytes(), dtype=np.uint8)

            results = engine.DetectWithInputTensor(input,
                                                   threshold=0.25,
                                                   top_k=10)

            print('time to process image', (time.time() - start_s) * 1000)

            output = to_output(results, image.size, labels)

            message = json.dumps({'results': output}) + '|'

            #  print('sending', message)
            try:
                conn.send(message.encode('utf-8'))
            except ConnectionResetError:
                print('Socket disconnected...waiting for client')
                conn, addr = s.accept()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--draw', help='If to draw the results.', default=True)
    parser.add_argument('--label', help='Path of the labels file.')
    args = parser.parse_args()

    renderer = None

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = read_label_file(args.label) if args.label else None

    shown = False

    frames = 0
    start_seconds = time.time()

    FULL_SIZE_W = 640
    FULL_SIZE_H = 480

    img = Image.new('RGBA', (FULL_SIZE_W, FULL_SIZE_H))
    draw = ImageDraw.Draw(img)

    # Open image.
    with picamera.PiCamera() as camera:
        camera.resolution = (FULL_SIZE_W, FULL_SIZE_H)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()

        print('input dims', width, height)
        camera.start_preview(fullscreen=False,
                             window=(700, 200, FULL_SIZE_W, FULL_SIZE_H))
        #  camera.start_preview()

        # rasberry pi requires images to be resizes to multiples of 32x16
        camera_multiple = (16, 32)

        valid_resize_w = width - width % camera_multiple[1]
        valid_resize_h = height - height % camera_multiple[0]

        padding_w = (width - valid_resize_w) // 2
        padding_h = (height - valid_resize_h) // 2

        scale_w = FULL_SIZE_W / width
        scale_h = FULL_SIZE_H / height

        try:
            stream = io.BytesIO()
            for foo in camera.capture_continuous(
                    stream,
                    format='rgb',
                    #  format='jpeg',
                    use_video_port=True,
                    resize=(valid_resize_w, valid_resize_h)):
                stream.truncate()
                stream.seek(0)
                start_frame = time.time()

                input = np.frombuffer(stream.getvalue(), dtype=np.uint8)

                if padding_w > 0 or padding_h > 0:
                    flattened = pad_and_flatten(
                        input, (valid_resize_h, valid_resize_w), padding_h,
                        padding_w)
                else:
                    flattened = input

                # flatten padded element
                reshape_time = time.time() - start_frame

                start_s = time.time()

                # Run inference.
                results = engine.DetectWithInputTensor(flattened,
                                                       threshold=0.2,
                                                       top_k=10)
                elapsed_s = time.time() - start_frame

                if padding_w > 0 or padding_h > 0:
                    boxes = translate_and_scale_boxes(\
                            results, \
                            (valid_resize_w, valid_resize_h),\
                            (padding_w, padding_h), \
                            (FULL_SIZE_W, FULL_SIZE_H))
                else:
                    boxes = scale_boxes(results, (FULL_SIZE_W, FULL_SIZE_H))

                if args.draw:
                    img.putalpha(0)
                    draw_boxes(draw, boxes)
                    if labels:
                        draw_labels(draw, results, boxes, labels)
                    #  display_results(ans, labels, img)
                    imbytes = img.tobytes()
                    if renderer == None:
                        renderer = camera.add_overlay(imbytes,
                                                      size=img.size,
                                                      layer=4,
                                                      format='rgba',
                                                      fullscreen=False,
                                                      window=(700, 200, 640,
                                                              FULL_SIZE_H))
                    else:
                        #  print('updating')
                        renderer.update(imbytes)

                frame_seconds = time.time()
                #  print(frame_seconds - start_seconds, frames)
                fps = frames * 1.0 / (frame_seconds - start_seconds)
                frames = frames + 1

                #  time.sleep(1)
                camera.annotate_text = "%.2fms, %d fps" % (elapsed_s * 1000.0,
                                                           fps)

        finally:
            camera.stop_preview()
Beispiel #17
0
class App:
    def __init__(self):

        self.frame = None
        self.thread = None
        self.stopEvent = None
        
        self.camera = cv2.VideoCapture(0)
        self.camera.set(3, WIDTH)
        self.camera.set(4, HEIGHT)
        
        self.engine = DetectionEngine('./mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite')
        self.labels = ReadLabelFile('./coco_labels.txt')
        
        self.root = tki.Tk()
        self.root.bind('<Escape>', lambda e: self.onClose())
        self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose)
        self.panel = None

        self.stopEvent = threading.Event()
        self.thread = threading.Thread(target=self.videoLoop, args=())
        self.thread.daemon = True
        self.thread.start()
        
    def findObjects(self, image):
        _, width, height, channels = self.engine.get_input_tensor_shape()
        input = cv2.resize(image, (width, height))
        input = input.reshape((width * height * channels))
        results = self.engine.DetectWithInputTensor(input, top_k=5)
        return results
    
    def videoLoop(self):
        try: 
            while not self.stopEvent.is_set():
                if not self.camera.isOpened():
                    continue
                ret, self.frame = self.camera.read()
                if not ret:
                    continue

                font = cv2.FONT_HERSHEY_SIMPLEX
                self.frame = cv2.cvtColor(self.frame, cv2.COLOR_BGR2RGB)
                results = self.findObjects(self.frame)
                if results:
                    for obj in results:
                        if(obj.score > 0.5):

                            top_left = calculatePosition(obj.bounding_box[0])
                            bottom_right = calculatePosition(obj.bounding_box[1])
                            center_point = (int(top_left[0] + ((bottom_right[0] - top_left[0]) / 2)),
                                            int(top_left[1] + ((bottom_right[1] - top_left[1]) / 2)))
#                            cv2.rectangle(self.frame, top_left, bottom_right, (0, 255, 0), 1)
                
                            label = self.labels[obj.label_id]
                            label_size = cv2.getTextSize(label, font, 0.5,cv2.LINE_AA)
                            label_width = label_size[0][0]
                            label_height = label_size[0][1]
                            
#                            pointer
                            cv2.circle(self.frame, center_point, 5, (0,255,0),-1)
                            cv2.line(self.frame, (int(top_left[0] + label_width/2),top_left[1]), center_point, (0,255,0),2)

#                            label
                            label_x = top_left[0] - 1
                            label_y = top_left[1]-label_height
                            if label_y < 0: label_y = 0
                            cv2.rectangle(self.frame, (label_x, label_y), (label_x+label_width, label_y + label_height), (0,255,0),-1)
                            cv2.putText(self.frame, label, (label_x+5, label_y + label_height-5), font, 0.5, (255,255,255))
           
                image = Image.fromarray(self.frame)     
                image = ImageTk.PhotoImage(image)
        
                if self.panel is None:
                    self.panel = tki.Label(image=image)
                    self.panel.image = image
                    self.panel.pack(side="left", padx=0, pady=0)

                else:
                    self.panel.configure(image=image)
                    self.panel.image = image
                    
            print("[INFO] closing...")
            self.camera.release()
            self.root.destroy()
            return -1
        
        except Exception as e:
            print("[INFO] caught a RuntimeError")
            print(e)
        finally:
            print("[INFO] closing...")
            self.camera.release()
            self.root.destroy()
            return -1


    def onClose(self):
        self.stopEvent.set()
class VideoCamera(object):
    def __init__(self):
        print('starting camera')
        with open(Config.LABEL_PATH, 'r', encoding="utf-8") as f:
            pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
            self.labels = dict((int(k), v) for k, v in pairs)
        self.engine = DetectionEngine(Config.MODEL_PATH)

        # Using OpenCV to capture from device 0. If you have trouble capturing
        # from a webcam, comment the line below out and use a video file
        # instead.
        self.video = cv2.VideoCapture(0)
        if self.video:
            self.video.set(3, 640)
            self.video.set(4, 480)
        # If you decide to use video.mp4, you must have this file in the folder
        # as the main.py.
        # self.video = cv2.VideoCapture('video.mp4')

    def __del__(self):
        print('closing camera')
        self.video.release()

    def get_frame(self):
        start_time = time.time()
        font = cv2.FONT_HERSHEY_SIMPLEX
        topLeftCornerOfText = (10, 20)
        bottomLeftCornerOfText = (10, 470)
        fontScale = 0.6
        fontColor = (random.randint(0, 255), random.randint(0, 255),
                     random.randint(0, 255))
        lineType = 1

        annotate_text = ""

        _, width, height, channels = self.engine.get_input_tensor_shape()
        if not self.video.isOpened():
            print('Camera is not opened')
        ret, img = self.video.read()
        if not ret:
            print('Camera is not read')
        input = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        input = cv2.resize(input, (width, height))
        input = input.reshape((width * height * channels))
        rows = img.shape[0]
        cols = img.shape[1]
        record_time = time.time()
        elapsed_record_ms = record_time - start_time
        #############
        # Run inference.
        ans = self.engine.DetectWithInputTensor(
            input, threshold=Config.DETECT_THRESHOLD, top_k=Config.TOP_K)
        detection_time = time.time()
        elapsed_detection_ms = detection_time - record_time
        # Display result.
        if ans:
            for obj in ans:
                box = obj.bounding_box.flatten().tolist()
                #print ('id=', obj.label_id, 'score = ', obj.score, 'box = ', box)
                # Draw a rectangle.
                x = box[0] * cols
                y = box[1] * rows
                right = box[2] * cols
                bottom = box[3] * rows
                if obj.score > Config.DETECT_THRESHOLD:
                    cv2.rectangle(img, (int(x), int(y)),
                                  (int(right), int(bottom)),
                                  fontColor,
                                  thickness=1)
                    annotate_text = "%s %.2f" % (self.labels[obj.label_id],
                                                 obj.score)
                    annotate_text_time = time.time()
                    cv2.putText(img, annotate_text, (int(x), int(bottom)),
                                font, fontScale, fontColor, lineType)
        elapsed_frame_ms = (time.time() - start_time) * 1000.0
        frame_rate_text = "FPS: %.2f record: %.2fms detection: %.2fms" % (
            1000.0 / elapsed_frame_ms, elapsed_record_ms * 1000.0,
            elapsed_detection_ms * 1000.0)
        cv2.putText(img, frame_rate_text, topLeftCornerOfText, font, fontScale,
                    fontColor, lineType)
        ret, jpeg = cv2.imencode('.jpg', img)
        return jpeg.tobytes()
Beispiel #19
0
class ObjectDetector(object):
    def __init__(self, model_path, label_path, use_coral_flag, use_tpu_flag,
                 res_x, res_y, min_conf_threshold):

        self.res_y = res_y
        self.res_x = res_x
        self.use_coral_flag = use_coral_flag
        if use_coral_flag:
            from edgetpu.detection.engine import DetectionEngine
            from edgetpu.utils import dataset_utils
        self.min_conf_threshold = min_conf_threshold

        # Load the label map
        with open(label_path, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        if self.labels[0] == '???':
            del (self.labels[0])

        if use_tpu_flag:
            self.interpreter = Interpreter(
                model_path=model_path,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        else:
            self.interpreter = Interpreter(model_path=model_path)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.is_floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

        #Coral
        if use_coral_flag:
            self.engine = DetectionEngine(model_path)
            self.labels = dataset_utils.read_label_file(label_path)
            _, height, width, _ = self.engine.get_input_tensor_shape()

    def apply_coral_model(self, input_data):
        print("here")
        ans = self.engine.detect_with_input_tensor(input_data,
                                                   threshold=0.05,
                                                   top_k=10)
        print("here2")
        for obj in ans:
            if self.labels:
                print(self.labels[obj.label_id])
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)

    def apply_tflite_model(self, input_data):
        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects

        return (boxes, classes, scores)

    def process_frame(self, frame):
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.is_floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        if self.use_coral_flag:
            self.apply_coral_model(input_data)
            scores = []
        else:
            (boxes, classes, scores) = self.apply_tflite_model(input_data)

        return (frame, boxes, classes, scores)

    def is_interesting_object(self, scores, classes):
        is_interesting_object = False
        interesting_classes = []
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                is_interesting_object = True
                interesting_classes.append(self.labels[int(classes[i])])
        return is_interesting_object, interesting_classes

    def draw_frame(self, frame, boxes, classes, scores):
        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * self.res_y)))
                xmin = int(max(1, (boxes[i][1] * self.res_x)))
                ymax = int(min(self.res_y, (boxes[i][2] * self.res_y)))
                xmax = int(min(self.res_x, (boxes[i][3] * self.res_x)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              4)

                # Draw label
                object_name = self.labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
        (flag, encodedImage) = cv2.imencode(".jpg", frame)
        return encodedImage
        engine = DetectionEngine(args.model_file, device_path=args.device_path)
    else:
        engine = DetectionEngine(args.model_file)
    print("device path:", engine.device_path())

    output_sizes = engine.get_all_output_tensors_sizes()
    # print("output sizes:", output_sizes)

    count = 0
    indices = []
    for i in output_sizes:
        count = count + i
        indices.append(count)
    # print("indices:", indices)

    input_tensor_shape = engine.get_input_tensor_shape()
    if (input_tensor_shape.size != 4 or input_tensor_shape[3] != 3
            or input_tensor_shape[0] != 1):
        raise RuntimeError(
            'Invalid input tensor shape! Expected: [1, height, width, 3]')
    _, height, width, _ = input_tensor_shape
    print("height, width:", height, width)

    img = Image.open(args.image)
    img = img.resize((width, height))

    input_tensor = np.asarray(img).flatten()

    loop_counts = int(args.loop_counts)
    if (loop_counts > 1):
        for a in range(5):
Beispiel #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
            '--model', help='File path of Tflite model.', required=True)
    parser.add_argument(
            '--label', help='File path of label file.', required=True)
    parser.add_argument(
            '--top_k', help="keep top k candidates.", default=3)
    parser.add_argument(
            '--threshold', help="threshold to filter results.", default=0.5, type=float)
    parser.add_argument(
            '--width', help="Resolution width.", default=640, type=int)
    parser.add_argument(
            '--height', help="Resolution height.", default=480, type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                 format='rgb',
                                                 use_video_port=True):
                rawCapture.truncate(0)

                # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                start_ms = time.time()
                ans = engine.DetectWithImage(input_buf, threshold=args.threshold,
                       keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k)
                # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05,
                #         keep_aspect_ratio=False, relative_coord=False, top_k=10)
                elapsed_ms = time.time() - start_ms

                # Display result.
                if ans:
                    for obj in ans:
                        label_name = 'Unknown'
                        if labels:
                            label_name = labels[obj.label_id]
                        caption = '{0}({1:.2f})'.format(label_name, obj.score)

                        # Draw a rectangle and caption.
                        box = obj.bounding_box.flatten().tolist()
                        visual.draw_rectangle(im, box, colors[obj.label_id])
                        visual.draw_caption(im, box, caption)

                # Calc fps.
                fps = 1 / elapsed_ms
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_fps = 1 / avg_elapsed_ms
                    avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format(
                        (avg_elapsed_ms * 1000.0), avg_fps)

                # Display fps
                fps_text = '{0:.2f}ms, {1:.2f}fps'.format(
                        (elapsed_ms * 1000.0), fps)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #22
0
def main():
    cam_w, cam_h = 640, 480
    default_model_dir = "./all_models"
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='number of classes with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='class score threshold')
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    print("Loading %s with %s labels." % (args.model, args.labels))
    engine = DetectionEngine(args.model)
    labels = load_labels(args.labels)

    pygame.init()
    pygame.font.init()
    font = pygame.font.SysFont("Arial", 20)

    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    _, w, h, _ = engine.get_input_tensor_shape()
    camera = pygame.camera.Camera(camlist[0], (cam_w, cam_h))
    display = pygame.display.set_mode((cam_w, cam_h), 0)

    red = pygame.Color(255, 0, 0)

    camera.start()
    try:
        last_time = time.monotonic()
        while True:
            mysurface = camera.get_image()
            imagen = pygame.transform.scale(mysurface, (w, h))
            input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8)
            start_time = time.monotonic()
            results = engine.DetectWithInputTensor(input,
                                                   threshold=args.threshold,
                                                   top_k=args.top_k)
            stop_time = time.monotonic()
            inference_ms = (stop_time - start_time) * 1000.0
            fps_ms = 1.0 / (stop_time - last_time)
            last_time = stop_time
            annotate_text = "Inference: %5.2fms FPS: %3.1f" % (inference_ms,
                                                               fps_ms)
            for result in results:
                x0, y0, x1, y1 = result.bounding_box.flatten().tolist()
                rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w,
                                   (y1 - y0) * cam_h)
                pygame.draw.rect(mysurface, red, rect, 1)
                label = "%.0f%% %s" % (100 * result.score,
                                       labels[result.label_id])
                text = font.render(label, True, red)
                mysurface.blit(text, (x0 * cam_w, y0 * cam_h))
            text = font.render(annotate_text, True, red)
            mysurface.blit(text, (0, 0))
            display.blit(mysurface, (0, 0))
            pygame.display.flip()
    finally:
        camera.stop()
Beispiel #23
0
      '--output', help='File path of the output image.')
  args = parser.parse_args()

  if not args.output:
    output_name = 'object_detection_result.jpg'
  else:
    output_name = args.output

# Initialize engine.
engine = DetectionEngine(args.model)
labels = ReadLabelFile(args.label) if args.label else None

with picamera.PiCamera() as camera:
    camera.resolution = (1028, 712)
    camera.framerate = 30
    _, width, height, channels = engine.get_input_tensor_shape()
    camera.start_preview()
    try:
        stream = io.BytesIO()
        for foo in camera.capture_continuous(stream,
                                             format='rgb',
                                             use_video_port=True,
                                             resize=(width, height)):
            stream.truncate()
            stream.seek(0)
            input = np.frombuffer(stream.getvalue(), dtype=np.uint8)
            # cv2.imwrite('current_frame.jpg', input)
            # img = Image.open('current_frame.jpg')
            # draw = ImageDraw.Draw(img)
            start_ms = time.time()
            ans = engine.DetectWithInputTensor(input, threshold=0.5, top_k=10)
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--model', help='File path of Tflite model.', required=True)
  parser.add_argument('--label', help='File path of label file.')
  args = parser.parse_args()

  labels = dataset_utils.read_label_file(args.label) if args.label else None
  engine = DetectionEngine(args.model)

  with picamera.PiCamera() as camera:
    preview_size = (640, 480)
    camera.resolution = preview_size
    camera.framerate = 30
    # camera.hflip = True
    # camera.vflip = True
    # camera.rotation = 90
    _, input_height, input_width, _ = engine.get_input_tensor_shape()

    input_size = (input_width, input_height)

    # Width is rounded up to the nearest multiple of 32,
    # height to the nearest multiple of 16.
    capture_size = (math.ceil(input_width / 32) * 32,
                    math.ceil(input_height / 16) * 16)

    # Actual detection area on preview.
    detect_size = (preview_size[0] * input_size[0] / capture_size[0],
                   preview_size[1] * input_size[1] / capture_size[1])

    # Make annotator smaller for efficiency.
    annotator_factor = 0.5
    annotator_size = (int(preview_size[0] * annotator_factor),
                      int(preview_size[1] * annotator_factor))

    # Font for drawing detection candidates
    font = ImageFont.truetype(
                '/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf',
                size=12)

    camera.start_preview()
    annotator = Annotator(camera,
                          dimensions=annotator_size,
                          default_color=(255, 255, 255, 64))

    def annotate(candidates):
      annotator.clear()

      # Get actual coordinates to draw
      def translate(relative_coord):
        return (detect_size[0] * relative_coord[0] * annotator_factor,
                detect_size[1] * relative_coord[1] * annotator_factor)

      for c in candidates:
        top_left = translate(c.bounding_box[0])
        bottom_right = translate(c.bounding_box[1])

        annotator.bounding_box(top_left + bottom_right)

        text = '{} {:.2f}'.format(labels[c.label_id], c.score) \
                if labels else '{:.2f}'.format(c.score)

        annotator.text(top_left, text, font=font)

      annotator.update()

    try:
      stream = io.BytesIO()
      for _ in camera.capture_continuous(
          stream, format='rgb', use_video_port=True, resize=capture_size):
        stream.truncate()
        stream.seek(0)

        input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8)
        if input_size != capture_size:
          # Crop to input size. Note dimension order (height, width, channels)
          input_tensor = input_tensor.reshape(
              (capture_size[1], capture_size[0], 3))[
                  0:input_height, 0:input_width, :].ravel()

        start_ms = time.time()
        results = engine.detect_with_input_tensor(input_tensor, top_k=3)
        elapsed_ms = time.time() - start_ms

        annotate(results)

        camera.annotate_text = '{:.2f}ms'.format(elapsed_ms * 1000.0)

    finally:
      # Maybe should make this an annotator method
      camera.remove_overlay(annotator._overlay)
      camera.stop_preview()