Beispiel #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image',
                        help='File path of the image.',
                        required=True)
    args = parser.parse_args()

    output_name = '/opt/demo/images/objectdetection_{0}.jpg'.format(
        strftime("%Y%m%d%H%M%S", gmtime()))

    # Initialize engine.
    engine = DetectionEngine(
        '/opt/demo/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite')

    # Open image.
    img = Image.open(args.image)
    draw = ImageDraw.Draw(img)

    # Run inference.
    ans = engine.DetectWithImage(img,
                                 threshold=0.05,
                                 keep_aspect_ratio=True,
                                 relative_coord=False,
                                 top_k=10)

    # Display result.
    if ans:
        for obj in ans:
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)
            draw.rectangle(box, outline='red')
        img.save(output_name)
    else:
        print('No object detected!')
class Model():
    def __init__(self):
        default_model_dir = './app/all_models'
        default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
        default_labels = 'coco_labels.txt'
        parser = argparse.ArgumentParser()
        parser.add_argument('--model',
                            help='.tflite model path',
                            default=os.path.join(default_model_dir,
                                                 default_model))
        parser.add_argument('--labels',
                            help='label file path',
                            default=os.path.join(default_model_dir,
                                                 default_labels))
        parser.add_argument(
            '--top_k',
            type=int,
            default=3,
            help='number of classes with highest score to display')
        parser.add_argument('--threshold',
                            type=float,
                            default=0.1,
                            help='class score threshold')
        self.args = parser.parse_args()

        print("Loading %s with %s labels." %
              (self.args.model, self.args.labels))
        self.engine = DetectionEngine(self.args.model)
        self.labels = load_labels(self.args.labels)

        self.last_time = time.monotonic()

    def user_callback(self, image):

        #added
        global flaskImage
        global flaskStatus
        flaskImage = image
        start_time = time.monotonic()
        objs = self.engine.DetectWithImage(image,
                                           threshold=self.args.threshold,
                                           keep_aspect_ratio=True,
                                           relative_coord=True,
                                           top_k=self.args.top_k)
        end_time = time.monotonic()
        text_lines = [
            'Inference: %.2f ms' % ((end_time - start_time) * 1000),
            'FPS: %.2f fps' % (1.0 / (end_time - self.last_time)),
        ]
        objBoxes = []
        for obj in objs:
            x0, y0, x1, y1 = obj.bounding_box.flatten().tolist()
            percent = int(100 * obj.score)
            #print(percent)
            objBoxes.append([percent, x0, y0, x1, y1])

        self.last_time = end_time

        flaskStatus = objBoxes
        return (flaskStatus)
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument('--input',
                        help='File path of the input image.',
                        required=True)
    parser.add_argument('--output', help='File path of the output image.')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.45,
                        help='Threshold for DetectionEngine')
    args = parser.parse_args()

    if not args.output:
        output_name = 'object_detection_result.jpg'
    else:
        output_name = args.output

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Open image.
    img = Image.open(args.input)
    draw = ImageDraw.Draw(img)

    # Run inference.
    ans = engine.DetectWithImage(img,
                                 threshold=args.threshold,
                                 keep_aspect_ratio=True,
                                 relative_coord=False,
                                 top_k=10)

    # Display result.
    if ans:
        for obj in ans:
            print('-----------------------------------------')
            if labels:
                print(labels[obj.label_id])
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)
            # Draw a rectangle.
            draw.rectangle(box, outline='red')
        img.save(output_name)
        if platform.machine() == 'x86_64':
            # For gLinux, simply show the image.
            img.show()
        elif platform.machine() == 'armv7l':
            # For Raspberry Pi, you need to install 'feh' to display image.
            subprocess.Popen(['feh', output_name])
        else:
            print('Please check ', output_name)
    else:
        print('No object detected!')
Beispiel #4
0
def inf_proc(qa, qb, args, status):
    from edgetpu.detection.engine import DetectionEngine
    engine = DetectionEngine(args.model)
    while True:
        img = qa.get()
        if status.value != 0:break
        ans = engine.DetectWithImage(img, threshold=args.threshold, keep_aspect_ratio=True,
                                     relative_coord=False, top_k=10)
        if qb.full(): qb.get()
        qb.put(ans)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model', help='File path of Tflite model.', required=True)
    parser.add_argument(
        '--label', help='File path of label file.', required=True)
    parser.add_argument(
        '--maxobjects', type=int, default=3, help='Maximum objects')
    parser.add_argument(
        '--threshold', type=float, default=0.3, help="Minimum threshold")
    parser.add_argument(
        '--picamera', action='store_true',
        help="Use PiCamera for image capture", default=False)
    args = parser.parse_args()

    # Prepare labels.
    labels = ReadLabelFile(args.label) if args.label else None
    # Initialize engine.
    engine = DetectionEngine(args.model)

    # Initialize video stream
    vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start()
    time.sleep(1)

    fps = FPS().start()

    while True:
        try:
            # Read frame from video
            screenshot = vs.read()
            image = Image.fromarray(screenshot)

            # Perform inference
            results = engine.DetectWithImage(image, threshold=args.threshold, keep_aspect_ratio=True, relative_coord=False, top_k=args.maxobjects)

            # draw image
            draw_image(image, results, labels)

            # closing condition
            if cv2.waitKey(5) & 0xFF == ord('q'):
                fps.stop()
                break

            fps.update()
        except KeyboardInterrupt:
            fps.stop()
            break

    print("Elapsed time: " + str(fps.elapsed()))
    print("Approx FPS: :" + str(fps.fps()))

    cv2.destroyAllWindows()
    vs.stop()
    time.sleep(2)
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    args = parser.parse_args()

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = {0: "hatch", 1: "cargo"}

    cs = cscore.CameraServer.getInstance()
    camera = cs.startAutomaticCapture()
    camera.setResolution(WIDTH, HEIGHT)
    cvSink = cs.getVideo()
    img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8)

    output = cs.putVideo("MLOut", WIDTH, HEIGHT)

    start = time()
    # Open image.
    while True:
        t, img = cvSink.grabFrame(img)
        frame = Image.fromarray(img)
        draw = ImageDraw.Draw(frame)

        # Run inference.
        ans = engine.DetectWithImage(frame,
                                     threshold=0.05,
                                     keep_aspect_ratio=True,
                                     relative_coord=False,
                                     top_k=10)

        # Display result.
        if ans:
            for obj in ans:
                print('-----------------------------------------')
                if labels:
                    print(labels[obj.label_id])
                print('score = ', obj.score)
                box = obj.bounding_box.flatten().tolist()
                x1, y1, x2, y2 = box
                print(abs(x1 - x2))
                print('box = ', box)
                # Draw a rectangle.
                draw.rectangle(box, outline='green')
                output.putFrame(np.array(frame))
        else:
            print('No object detected!')
            output.putFrame(img)
        print("FPS:", 1 / (time() - start))

        start = time()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument('--input',
                        help='File path of the input image.',
                        required=True)
    parser.add_argument('--output', help='File path of the output image.')
    args = parser.parse_args()

    if not args.output:
        output_name = 'object_detection_result.jpg'
    else:
        output_name = args.output

    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    img = Image.open(args.input)
    draw = ImageDraw.Draw(img)
    imcv = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2BGR)

    ans = engine.DetectWithImage(img,
                                 threshold=0.40,
                                 keep_aspect_ratio=True,
                                 relative_coord=False,
                                 top_k=10)
    if ans:
        for obj in ans:
            print('-----------------------------------------')
            box = obj.bounding_box.flatten().tolist()
            if labels[obj.label_id] == 'white' or labels[
                    obj.label_id] == 'red':
                if get_pink_bounding_box(imcv, box):
                    print(labels[5])
                else:
                    print(labels[obj.label_id])
            else:
                print(labels[obj.label_id])
            print('score = ', obj.score)
            print('box = ', box)
            draw.rectangle(box, outline='red')
        img.save(output_name)
        if platform.machine() == 'x86_64':
            img.show()
        elif platform.machine() == 'armv7l':
            subprocess.Popen(['feh', output_name])
        else:
            print('Please check ', output_name)
    else:
        print('No object detected!')
Beispiel #8
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument('--top_k',
                        type=int,
                        default=3,
                        help='number of classes with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='class score threshold')
    args = parser.parse_args()

    print("Loading %s with %s labels." % (args.model, args.labels))
    engine = DetectionEngine(args.model)
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(0)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame
        cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)

        pil_im = Image.fromarray(cv2_im)

        objs = engine.DetectWithImage(pil_im,
                                      threshold=args.threshold,
                                      keep_aspect_ratio=True,
                                      relative_coord=True,
                                      top_k=args.top_k)

        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)

        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Beispiel #9
0
def func_detection(buffer_cap2det, lock_cap2det, result_det2disp,
                   lock_det2disp):
    global g_is_exit
    engine = DetectionEngine(MODEL_NAME)
    buff = None
    while True:
        time_start = time.time()
        # print("func_detection")

        ### Receive captured image from CAPTURE
        lock_cap2det.acquire()
        if len(buffer_cap2det) > 0:
            buff = buffer_cap2det.pop()
        lock_cap2det.release()

        if buff is not None:
            ### Detect objects
            pil_img = cv2pil(buff)
            ans = engine.DetectWithImage(pil_img,
                                         threshold=0.5,
                                         keep_aspect_ratio=False,
                                         relative_coord=True,
                                         top_k=10)
            results = []
            if ans:
                for obj in ans:
                    # print ('-----------------------------------------')
                    # print('label = ', label2string[obj.label_id])
                    # print('score = ', obj.score)
                    box = obj.bounding_box.flatten().tolist()
                    # print ('box = ', box)
                    results.append([
                        box[0], box[1], box[2], box[3],
                        label2string[obj.label_id]
                    ])

            ### Send detection results to DISPLAY
            lock_det2disp.acquire()
            for i in range(len(result_det2disp)):
                # delete all of the old buffers (no need to release memory explicitly (GC will be done automatically))
                _ = result_det2disp.pop()
            result_det2disp.append(results)
            lock_det2disp.release()
        else:
            time.sleep(0.001)
        if g_is_exit == True:
            break

        time_end = time.time()
        # print ("Detection:{0}".format((time_end - time_start) * 1000) + "[msec]")
    print("func_detection: exit")
def recognition(frameBuffer, objsBuffer, stop_prog):
    engine = DetectionEngine('mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite')
    with open("trained_knn_model.clf", 'rb') as f:
        knn_clf = pickle.load(f)
    while True:
        if stop_prog.is_set():
            break
        if frameBuffer.empty():
            continue
        t0 = time.monotonic()
        bgr_img = frameBuffer.get()
        rgb_img = bgr_img[:, :, ::-1].copy()
        arr_img = Image.fromarray(rgb_img)
        t1 = time.monotonic()
        objs = engine.DetectWithImage(arr_img, threshold = 0.1, keep_aspect_ratio = True, relative_coord = False, top_k = 100)
        t2 = time.monotonic()
        coral_boxes = []
        for obj in objs:
            x0, y0, x1, y1 = obj.bounding_box.flatten().tolist()
            w = x1-x0
            h = y1-y0
            x0 = int(x0+w/10)
            y0 = int(y0+h/4)
            x1 = int(x1-w/10)
            y1 = int(y1)
            coral_boxes.append((y0, x1, y1, x0))
        t3 = time.monotonic()
        kk = 1
        if coral_boxes:
            enc = face_recognition.face_encodings(rgb_img, coral_boxes)
            closest_distances = knn_clf.kneighbors(enc, n_neighbors=1)
            are_matches = [closest_distances[0][i][0] <= 0.55 for i in range(len(coral_boxes))]
            predR = []
            locR = []
            for pred, loc, rec in zip(knn_clf.predict(enc), coral_boxes, are_matches):
                if rec:
                    predR.append(pred)
                else:
                    predR.append("unknown_{n}".format(n=kk))
                    kk += 1
                locR.append(loc)
            if objsBuffer.empty():
                objsBuffer.put({"boxes": locR, "names": predR})
        else:
            if objsBuffer.empty():
                objsBuffer.put(None)
        t4 = time.monotonic()
        print('Prep time = {dt1:.1f}ms, Infer time = {dt2:.1f}ms, Face enc time = {dt3:.1f}ms, Overall time = {dt4:.1f}ms'.format(
            dt1=(t1-t0)*1000, dt2=(t2-t1)*1000, dt3=(t4-t3)*1000, dt4 = (t4-t0)*1000))
Beispiel #11
0
def main():
    model = "path to model"
    threshold = 0.3
    max_objects = 3

    # Prepare labels.
    labels = None
    # Initialize engine.
    engine = DetectionEngine(args.model)

    # Initialize video stream
    vs = VideoStream(usePiCamera=True, resolution=(640, 480)).start()
    time.sleep(1)

    fps = FPS().start()

    while True:
        try:
            # Read frame from video
            screenshot = vs.read()
            image = Image.fromarray(screenshot)

            # Perform inference
            results = engine.DetectWithImage(image,
                                             threshold=threshold,
                                             keep_aspect_ratio=True,
                                             relative_coord=False,
                                             top_k=maxobjects)

            # draw image
            draw_image(image, results, labels)

            # closing condition
            if cv2.waitKey(5) & 0xFF == ord('q'):
                fps.stop()
                break

            fps.update()
        except KeyboardInterrupt:
            fps.stop()
            break

    print("Elapsed time: " + str(fps.elapsed()))
    print("Approx FPS: :" + str(fps.fps()))

    cv2.destroyAllWindows()
    vs.stop()
    time.sleep(2)
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='File path of Tflite model.',
                        required=True)
    parser.add_argument('--label',
                        help='File path of label file.',
                        required=True)
    args = parser.parse_args()

    with open(args.label, 'r', encoding="utf-8") as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    engine = DetectionEngine(args.model)

    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        try:
            stream = io.BytesIO()
            for foo in camera.capture_continuous(stream,
                                                 format='rgb',
                                                 use_video_port=True,
                                                 resize=(width, height)):
                stream.truncate()
                stream.seek(0)
                frame = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                start_ms = time.time()
                results = engine.DetectWithImage(frame,
                                                 threshold=0.05,
                                                 keep_aspect_ratio=True,
                                                 relative_coord=False,
                                                 top_k=10)

                elapsed_ms = time.time() - start_ms
                if results:
                    logging.info("frame has {} objects".format(len(results)))
                    for detected_object in results:
                        logging.info("label: {}, score: {}, bounds: {}".format(
                            labels[detected_object.label_id],
                            detected_object.score,
                            obj.bounding_box.flatten().tolist()))
        finally:
            logging.info("done capturing")
def inferencer(results, frameBuffer, model, camera_width, camera_height):
    engine = DetectionEngine(model)
    while True:
        if frameBuffer.empty():
            continue
        # Run inference.
        color_image = frameBuffer.get()
        prepimg = color_image[:, :, ::-1].copy()
        prepimg = Image.fromarray(prepimg)
        tinf = time.perf_counter()
        ans = engine.DetectWithImage(prepimg,
                                     threshold=0.3,
                                     keep_aspect_ratio=True,
                                     relative_coord=False,
                                     top_k=10)
        #print(time.perf_counter() - tinf, "sec")
        results.put(ans)
Beispiel #14
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--model', help='Path of the detection model.', required=True)
  parser.add_argument(
      '--label', help='Path of the labels file.')
  parser.add_argument(
      '--image', help='File path of the input image.', required=True)
  parser.add_argument(
      '--output', help='File path of the output image.')
  args = parser.parse_args()

  if not args.output:
    output_name = 'object_detection_result.jpg'
  else:
    output_name = args.output

  # Initialize engine.
  engine = DetectionEngine(args.model)
  labels = ReadLabelFile(args.label) if args.label else None

  # Open image.
  img = Image.open(args.input)
  draw = ImageDraw.Draw(img)

  # Run inference.
  ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=True,
                               relative_coord=False, top_k=10)

  # Display result.
  if ans:
    for obj in ans:
      print ('-----------------------------------------')
      if labels:
        print(labels[obj.label_id])
      print ('score = ', obj.score)
      box = obj.bounding_box.flatten().tolist()
      print ('box = ', box)
      # Draw a rectangle.
      draw.rectangle(box, outline='red')
    img.save(output_name)

    print ('Please check ', output_name)
  else:
    print ('No object detected!')
def inferencer(results, frameBuffer, model, camera_width, camera_height):

    engine = None

    # Acquisition of TPU list without model assignment
    devices = edgetpu_utils.ListEdgeTpuPaths(
        edgetpu_utils.EDGE_TPU_STATE_UNASSIGNED)

    devopen = False
    for device in devices:
        try:
            engine = DetectionEngine(model, device)
            devopen = True
            break
        except:
            continue

    if devopen == False:
        print("TPU Devices open Error!!!")
        sys.exit(1)

    print("Loaded Graphs!!! ")

    while True:

        if frameBuffer.empty():
            continue

        # Run inference.
        color_image = frameBuffer.get()
        prepimg = color_image[:, :, ::-1].copy()
        prepimg = Image.fromarray(prepimg)

        tinf = time.perf_counter()
        ans = engine.DetectWithImage(prepimg,
                                     threshold=0.5,
                                     keep_aspect_ratio=True,
                                     relative_coord=False,
                                     top_k=10)
        print(time.perf_counter() - tinf, "sec")
        results.put(ans)
Beispiel #16
0
def recognition(frameBuffer, objsBuffer, stop_prog, path):
    engine = DetectionEngine(
        'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite')
    face_cnt = 0
    while True:
        if stop_prog.is_set():
            break
        if frameBuffer.empty():
            continue
        t0 = time.monotonic()
        bgr_img = frameBuffer.get()
        rgb_img = bgr_img[:, :, ::-1].copy()
        arr_img = Image.fromarray(rgb_img)
        t1 = time.monotonic()
        objs = engine.DetectWithImage(arr_img,
                                      threshold=0.1,
                                      keep_aspect_ratio=True,
                                      relative_coord=False,
                                      top_k=5)
        t2 = time.monotonic()
        coral_boxes = []
        m = 0
        for obj in objs:
            x0, y0, x1, y1 = obj.bounding_box.flatten().tolist()
            w = x1 - x0
            h = y1 - y0
            x0 = int(x0 + w / 10)
            y0 = int(y0 + h / 4)
            x1 = int(x1 - w / 10)
            y1 = int(y1)
            coral_boxes.append((y0, x1, y1, x0))
            face_mini = bgr_img[y0:y1, x0:x1]
            cv2.imwrite(path + "face_" + str(face_cnt) + "_" + str(m) + ".jpg",
                        face_mini)
            print(path + "face_" + str(face_cnt) + "_" + str(m) + ".jpg")
            m += 1
            face_cnt += 1
        sleep(1)
        enc = []
        objsBuffer.put({"boxes": coral_boxes, "encodings": enc})
Beispiel #17
0
class SynchronizedDetectionEngine(Engine):
    @inject
    def __init__(self, config: ConfigProvider):
        from edgetpu.detection.engine import DetectionEngine
        self._engine = DetectionEngine(config.detector_model_file)
        self._engine_lock = Lock()
        self._pending_processing_start = 0

    def detect(self, img, threshold):
        with self._engine_lock:
            self._pending_processing_start = time.monotonic()
            result = self._engine.DetectWithImage(img,
                                                  threshold=threshold,
                                                  keep_aspect_ratio=True,
                                                  relative_coord=False,
                                                  top_k=1000)
            self._pending_processing_start = 0
            return result

    def get_pending_processing_seconds(self) -> float:
        start = self._pending_processing_start
        return (time.monotonic() - start) if start != 0 else 0
def ssd_inferencer(results, frameBuffer, model, device):

    ssd_engine = None
    ssd_engine = DetectionEngine(model, device)
    print("Loaded Graphs!!! (SSD)")

    while True:

        if frameBuffer.empty():
            continue

        # Run inference.
        color_image = frameBuffer.get()
        prepimg_ssd = color_image[:, :, ::-1].copy()
        prepimg_ssd = Image.fromarray(prepimg_ssd)
        tinf = time.perf_counter()
        result_ssd = ssd_engine.DetectWithImage(prepimg_ssd,
                                                threshold=0.5,
                                                keep_aspect_ratio=True,
                                                relative_coord=False,
                                                top_k=10)
        print(time.perf_counter() - tinf, "sec (SSD)")
        results.put(result_ssd)
Beispiel #19
0
def main():
    '''Main function for running head tracking.
    It will initialize picam, CoralTPU with a given model (e.g. face recorgnition)
    Then, it will capture an image, feed it into CoralTPU, get coordinates and move servos
    (pan, tilt) towards the center of the box
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model', help='Path of the detection model.', required=True)

    args = parser.parse_args()

    image_number = 0

    # Let's create the images directory
    if not exists(IMAGES_DIR):
        makedirs(IMAGES_DIR)

    # Initialize the pan-tilt thing
    reset_pan_tilt()

    # Initialize engine.

    engine = DetectionEngine(args.model)

    # Initialize the camera
    #cam = cv2.VideoCapture(camera)
    camera = PiCamera()
    time.sleep(2)
    camera.resolution = IMAGE_SIZE
    camera.vflip = True
    # Create the in-memory stream
    stream = io.BytesIO()

    debug = DEBUG
    image_center = IMAGE_CENTER

    object_on_sight = False
    timer = None

    print("Capture started")
    while True:
        try:
            #ret, cv2_im = cam.read()
            stream = io.BytesIO() #wipe the contents
            camera.capture(stream, format='jpeg', use_video_port=True)
            stream.seek(0)
            # we need to flip it here!
            pil_im = Image.open(stream)
            #cv2_im = np.array(pil_im)
            #cv2_im = cv2.flip(cv2_im, 1) #Flip horizontally
            #cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)

            # Remember. Doc at https://coral.ai/docs/reference/edgetpu.detection.engine/
            ans = engine.DetectWithImage(pil_im, threshold=0.3, keep_aspect_ratio=True,
                                        relative_coord=False, top_k=1)
            if ans:
                #for obj in ans:  # For multiple objects on screen
                obj = ans[0]  # Follow only one object, the first detected object

                object_on_sight = True
                timer = None
                result = show_box_center_and_size(obj.bounding_box)
                moved = center_camera(result, image_center, debug)

                if moved == 0:  # it didn't move, and there is an object on sight
                    image_number = save_picture(pil_im, image_number)

            else:
                #print("No objects detected with the given threshold")
                object_on_sight = False

            if not object_on_sight and not timer:  # if there was an object before, and is no longer on screen
                timer = time.time()  # We start a timer for reseting the angles later

            elif not object_on_sight and timer:
                elapsed_time = time.time() - timer
                # If 5 seconds have passed without activity. More than 8, do nothing
                if elapsed_time > 5 and elapsed_time < 8:
                    reset_pan_tilt()
                    timer = None  # We stop the timer

        except KeyboardInterrupt:
            print("Closing program")
            reset_pan_tilt()
            sys.exit()

        except:
            reset_pan_tilt()
            raise
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument(
        '--mode',
        help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION',
        required=True)
    parser.add_argument('--camera',
                        help='Camera source (if multiple available)',
                        type=int,
                        required=False)

    args = parser.parse_args()

    # Initialize engine.

    if args.mode == "OBJECT_DETECTION":
        engine = DetectionEngine(args.model)
    elif args.mode == "IMAGE_CLASSIFICATION":
        engine = ClassificationEngine(args.model)
    else:
        print(
            "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION"
        )
        exit()

    labels = read_label_file(args.label) if args.label else None
    label = None
    camera = args.camera if args.camera else 0

    # Initialize the camera
    #cam = cv2.VideoCapture(camera)
    camera = PiCamera()
    time.sleep(2)
    camera.resolution = (640, 480)
    # Create the in-memory stream
    stream = io.BytesIO()

    # Initialize the timer for fps
    start_time = time.time()
    frame_times = deque(maxlen=40)

    while True:
        #ret, cv2_im = cam.read()
        stream = io.BytesIO()  #wipe the contents
        camera.capture(stream, format='jpeg', use_video_port=True)
        stream.seek(0)
        pil_im = Image.open(stream)
        cv2_im = np.array(pil_im)
        cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)

        if args.mode == "OBJECT_DETECTION":
            ans = engine.DetectWithImage(pil_im,
                                         threshold=0.05,
                                         keep_aspect_ratio=True,
                                         relative_coord=False,
                                         top_k=10)
            if ans:
                for obj in ans:
                    if obj.score > 0.4:
                        if labels:
                            label = labels[obj.label_id] + " - {0:.2f}".format(
                                obj.score)
                        draw_rectangles(obj.bounding_box, cv2_im, label=label)
            else:
                draw_text(cv2_im, 'No object detected!')

        else:
            i = 0
            for result in engine.ClassifyWithImage(pil_im, top_k=5):
                if result:
                    label = labels[result[0]]
                    score = result[1]

                    draw_text(cv2_im, label, i)
                    i += 1
                else:
                    draw_text(cv2_im, 'No classification detected!')
        lastInferenceTime = engine.get_inference_time()
        frame_times.append(time.time())
        fps = len(frame_times) / float(frame_times[-1] - frame_times[0] +
                                       0.001)
        draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime))
        #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime))

        #flipping the image: cv2.flip(cv2_im, 1)

        #cv2_im = cv2.resize(cv2_im, (800, 600))
        cv2.imshow('object detection', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            exit()
            break
Beispiel #21
0
import cv2
from edgetpu.detection.engine import DetectionEngine
from PIL import Image

cap = cv2.VideoCapture(0)
model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
engine = DetectionEngine(model)

while True:
    ret, frame = cap.read()
    if ret:
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        h, w = frame.shape[:2]
        im_pil = Image.fromarray(img)
        results = engine.DetectWithImage(im_pil)
        for result in results:
            b = result.bounding_box
            x0, y0 = b[0]
            x1, y1 = b[1]
            x0 = int(x0 * w)
            y0 = int(y0 * h)
            x1 = int(x1 * w)
            y1 = int(y1 * h)
            cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
        cv2.imshow('', frame)

    k = cv2.waitKey(1)
    if k == 27:
        break
Beispiel #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
            '--model', help='File path of Tflite model.', required=True)
    parser.add_argument(
            '--label', help='File path of label file.', required=True)
    parser.add_argument(
            '--top_k', help="keep top k candidates.", default=3)
    parser.add_argument(
            '--threshold', help="threshold to filter results.", default=0.5, type=float)
    parser.add_argument(
            '--width', help="Resolution width.", default=640, type=int)
    parser.add_argument(
            '--height', help="Resolution height.", default=480, type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                 format='rgb',
                                                 use_video_port=True):
                rawCapture.truncate(0)

                # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                start_ms = time.time()
                ans = engine.DetectWithImage(input_buf, threshold=args.threshold,
                       keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k)
                # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05,
                #         keep_aspect_ratio=False, relative_coord=False, top_k=10)
                elapsed_ms = time.time() - start_ms

                # Display result.
                if ans:
                    for obj in ans:
                        label_name = 'Unknown'
                        if labels:
                            label_name = labels[obj.label_id]
                        caption = '{0}({1:.2f})'.format(label_name, obj.score)

                        # Draw a rectangle and caption.
                        box = obj.bounding_box.flatten().tolist()
                        visual.draw_rectangle(im, box, colors[obj.label_id])
                        visual.draw_caption(im, box, caption)

                # Calc fps.
                fps = 1 / elapsed_ms
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_fps = 1 / avg_elapsed_ms
                    avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format(
                        (avg_elapsed_ms * 1000.0), avg_fps)

                # Display fps
                fps_text = '{0:.2f}ms, {1:.2f}fps'.format(
                        (elapsed_ms * 1000.0), fps)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #23
0
def inference_thread(running, state, result_buffer, frame_buffer, args, identity_dict, current_identity):
    global IDLE, TRACK, RESET, FACE_RECOG_THRESHOLD, FACE_RECOG_THRESHOLD_A
    global od_engine, face_detector, facenet_engine, svm_clf
    # Initialize object detection engine.
    od_engine = DetectionEngine(args.od_model)
    print("device_path: ", od_engine.device_path())
    _, od_width, od_height, _ = od_engine.get_input_tensor_shape()
    print("od input dim: ", od_width, od_height)
    # initial face detector using the opencv haarcascade model
    face_detector = FaceDetector(args.hc_model)
    # Initialize facenet engine.
    facenet_engine = BasicEngine(args.fn_model)
    # load the sklearn support vector machine model from disk
    svm_clf = pickle.load(open(args.svm_model, 'rb'))

    while running.value:
        # check if the frame buffer has a frame, else busy waiting
        if frame_buffer.empty():
            continue
        frame = frame_buffer.get()
        tinf = time.perf_counter()

        if state.value == IDLE:
            fd_results = None
            # reorder image frame from BGR to RGB
            img = frame[:,:,::-1]
            # face detection
            faces_coord = face_detector.detect(img, True)
            # image preprocessing, downsampling
            print("faces_coord: ",faces_coord)
            if not isinstance(faces_coord, type(None)):
                # normalize face image
                face_image = np.array(normalize_faces(img ,faces_coord))
                # facenet to generate face embedding
                facenet_engine.RunInference(face_image.flatten())
                face_emb = facenet_engine.get_raw_output().reshape(1,-1)
                # use SVM to classfy identity with face embedding
                pred_prob = svm_clf.predict_proba(face_emb)
                best_class_index = np.argmax(pred_prob, axis=1)[0]
                best_class_prob = pred_prob[0, best_class_index]
                print("best_class_index: ",best_class_index)
                print("best_class_prob: ",best_class_prob)
                print("label", svm_clf.classes_[best_class_index])
                # Check threshold and verify identify is in the identifiy dictionary
                if best_class_prob > FACE_RECOG_THRESHOLD:
                    face_label = svm_clf.classes_[best_class_index]
                    if face_label in identity_dict:
                        print("\n=================================")
                        print("Identity found: ", face_label, " ",identity_dict[face_label],
                            " with Prob = ", best_class_prob)
                        print("=================================\n")
                        current_identity.value = identity_dict[face_label][0] # ID
                result_buffer.put(faces_coord)
        elif state.value == TRACK:
            od_results = None
            # convert numpy array representation to PIL image with rgb format
            img = Image.fromarray(frame[:,:,::-1], 'RGB')
            # Run inference.
            od_results = od_engine.DetectWithImage(img, threshold=0.30, keep_aspect_ratio=True, relative_coord=False, top_k=10)
            # push result to buffer queue
            result_buffer.put(od_results)
        print(time.perf_counter() - tinf, "sec")
    print("[Finish] inference_thread")
Beispiel #24
0
                if args.model_name == Models.tf_lite:
                    model_type = 'tf_lite'
                    image = load_image_into_numpy_array(image_filepath)                                 # image to numpy array
                    resized_image = cv2.resize(image, model_image_dim, interpolation = cv2.INTER_AREA)  # resized to 300x300xRGB
                    reshaped_image = np.reshape(resized_image, model_input_dim)                         # reshape for model (1,300,300,3)
                    bbox_array, class_id_array, prob_array = send_image_to_model(logger, image_filepath, reshaped_image, interpreter)
                elif args.model_name == Models.edge_tpu:
                    model_type = 'edge_tpu'    # we'll need this to process the output differently
                    # cv2_img = cv2.cvtColor(reshaped_image, cv2.COLOR_BGR2RGB)
                    # pil_image = Image.fromarray(cv2_img)         # expecting a PIL image
                    # #cv2_image = load_image_into_numpy_array('/home/jay/Downloads/parrot.jpg')
                    # print ("cv2 image shape:", cv2_image.shape)
                    pil_image = Image.open(image_filepath)
                    # returns class:  DetectionCandidate
                    start_time = time.time()
                    ans = engine.DetectWithImage(pil_image, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10)
                    finish_time = time.time()
                    logger.info("time spent: {:.4f}".format(finish_time - start_time))
                    bbox_list = []
                    class_id_list = []
                    prob_list = []
                    for i,obj in enumerate(ans):
                        box = obj.bounding_box.flatten().tolist()
                        bbox_list.append(box)
                        class_id_list.append(obj.label_id)
                        prob_list.append(obj.score)
                    bbox_array = array(bbox_list)
                    class_id_array = array(class_id_list)
                    prob_array = array(prob_list)

                inference_image, orig_image_dim, detected_objects = inference_to_image(model_type, logger, 
Beispiel #25
0
def main():

    load_time = time.time()

    # Initialize engine.
    engine = DetectionEngine(Model_weight)
    labels = None

    # Face recognize engine
    face_engine = ClassificationEngine(FaceNet_weight)
    # read embedding
    class_arr, emb_arr = read_embedding(Embedding_book)

    l = time.time() - load_time

    with tf.Graph().as_default():
        with tf.compat.v1.Session() as sess:

            cap = cv2.VideoCapture(0)

            while (True):
                t1 = cv2.getTickCount()
                print('Load_model: {:.2f} sec'.format(l))

                ret, frame = cap.read()

                img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img)

                # Run inference.
                ans = engine.DetectWithImage(img,
                                             threshold=0.05,
                                             keep_aspect_ratio=False,
                                             relative_coord=False,
                                             top_k=10)

                img = numpy.asarray(img)
                # Display result.
                if ans:
                    crop_img = crop_image(ans, frame)

                    if cv2.waitKey(1) == ord('a'):

                        for k in range(0, len(crop_img)):
                            new_class_name = input(
                                'Please input your name of class:')
                            new_save = cv2.cvtColor(crop_img[k],
                                                    cv2.COLOR_BGR2RGB)
                            cv2.imwrite(
                                'pictures/' + str(new_class_name) + '.jpg',
                                new_save)

                        Create_embeddings(face_engine)
                        class_arr, emb_arr = read_embedding(
                            'embedding_book/embeddings.h5')

                    embs = Tpu_FaceRecognize(face_engine, crop_img)

                    face_num = len(ans)
                    face_class = ['Others'] * face_num

                    for i in range(face_num):
                        diff = np.mean(np.square(embs[i] - emb_arr), axis=1)
                        min_diff = min(diff)

                        if min_diff < THRED:

                            index = np.argmin(diff)
                            face_class[i] = class_arr[index]

                    print('Face_class:', face_class)
                    print('Classes:', class_arr)

                    for count, obj in enumerate(ans):
                        print('-----------------------------------------')
                        if labels:
                            print(labels[obj.label_id])
                        print('Score = ', obj.score)
                        box = obj.bounding_box.flatten().tolist()

                        # Draw a rectangle and label
                        cv2.rectangle(img, (int(box[0]), int(box[1])),
                                      (int(box[2]), int(box[3])),
                                      (255, 255, 0), 2)
                        cv2.putText(img, '{}'.format(face_class[count]),
                                    (int(box[0]), int(box[1]) - 5),
                                    cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1,
                                    cv2.LINE_AA)

                t2 = cv2.getTickCount()
                t = (t2 - t1) / cv2.getTickFrequency()
                fps = 1.0 / t
                cv2.putText(img, 'fps: {:.2f}'.format(fps), (5, 20),
                            cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1,
                            cv2.LINE_AA)

                cv2.putText(img, 'A: Add new class', (5, 450),
                            cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1,
                            cv2.LINE_AA)
                cv2.putText(img, 'Q: Quit', (5, 470), cv2.FONT_HERSHEY_PLAIN,
                            1, (255, 0, 0), 1, cv2.LINE_AA)
                img_ = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

                cv2.imshow('frame', img_)

                if cv2.waitKey(1) == ord('q'):
                    break

            cap.release()
            cv2.destroyAllWindows()
class EdgeTPUFaceDetector(ConnectionBasedTransport):

    def __init__(self):
        super(EdgeTPUFaceDetector, self).__init__()
        rospack = rospkg.RosPack()
        pkg_path = rospack.get_path('coral_usb')
        self.bridge = CvBridge()
        self.classifier_name = rospy.get_param(
            '~classifier_name', rospy.get_name())
        model_file = os.path.join(
            pkg_path,
            './models/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite')
        model_file = rospy.get_param('~model_file', model_file)

        self.engine = DetectionEngine(model_file)
        # only for human face
        self.label_ids = [0]
        self.label_names = ['face']

        # dynamic reconfigure
        self.srv = Server(EdgeTPUFaceDetectorConfig, self.config_callback)

        self.pub_rects = self.advertise(
            '~output/rects', RectArray, queue_size=1)
        self.pub_class = self.advertise(
            '~output/class', ClassificationResult, queue_size=1)
        self.pub_image = self.advertise(
            '~output/image', Image, queue_size=1)

    def subscribe(self):
        self.sub_image = rospy.Subscriber(
            '~input', Image, self.image_cb, queue_size=1, buff_size=2**26)

    def unsubscribe(self):
        self.sub_image.unregister()

    @property
    def visualize(self):
        return self.pub_image.get_num_connections() > 0

    def config_callback(self, config, level):
        self.score_thresh = config.score_thresh
        self.top_k = config.top_k
        return config

    def image_cb(self, msg):
        img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
        H, W = img.shape[:2]
        objs = self.engine.DetectWithImage(
            PIL.Image.fromarray(img), threshold=self.score_thresh,
            keep_aspect_ratio=True, relative_coord=True,
            top_k=self.top_k)

        bboxes = []
        scores = []
        labels = []
        rect_msg = RectArray(header=msg.header)
        for obj in objs:
            x_min, y_min, x_max, y_max = obj.bounding_box.flatten().tolist()
            x_min = int(np.round(x_min * W))
            y_min = int(np.round(y_min * H))
            x_max = int(np.round(x_max * W))
            y_max = int(np.round(y_max * H))
            bboxes.append([y_min, x_min, y_max, x_max])
            scores.append(obj.score)
            labels.append(self.label_ids.index(int(obj.label_id)))
            rect = Rect(
                x=x_min, y=y_min,
                width=x_max-x_min, height=y_max-y_min)
            rect_msg.rects.append(rect)
        bboxes = np.array(bboxes)
        scores = np.array(scores)
        labels = np.array(labels)

        cls_msg = ClassificationResult(
            header=msg.header,
            classifier=self.classifier_name,
            target_names=self.label_names,
            labels=labels,
            label_names=[self.label_names[l] for l in labels],
            label_proba=scores)

        self.pub_rects.publish(rect_msg)
        self.pub_class.publish(cls_msg)

        if self.visualize:
            fig = plt.figure(
                tight_layout={'pad': 0})
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.axis('off')
            fig.add_axes(ax)
            vis_bbox(
                img[:, :, ::-1].transpose((2, 0, 1)),
                bboxes, labels, scores,
                label_names=self.label_names, ax=ax)
            fig.canvas.draw()
            w, h = fig.canvas.get_width_height()
            vis_img = np.fromstring(
                fig.canvas.tostring_rgb(), dtype=np.uint8)
            vis_img.shape = (h, w, 3)
            fig.clf()
            plt.close()
            vis_msg = self.bridge.cv2_to_imgmsg(vis_img, 'rgb8')
            # BUG: https://answers.ros.org/question/316362/sensor_msgsimage-generates-float-instead-of-int-with-python3/  # NOQA
            vis_msg.step = int(vis_msg.step)
            vis_msg.header = msg.header
            self.pub_image.publish(vis_msg)
class FaceDetectorEdgeTPU:

    def __init__(self):
        # Read input parameters
        self.input_image_compressed = rospy.get_param('~input_image_compressed', "usb_cam/image_raw/compressed")
        self.output_image_compressed = rospy.get_param('~output_image', "face_image/compressed")
        self.model_path = rospy.get_param('~model_path', "model.tflite")
        self.threshold = rospy.get_param('~threshold', 0.8)
        self.rotation_angle = rospy.get_param('~rotation_angle', 0.0)
        self.labels_file = rospy.get_param('~labels_file', "")
        self.tracked_object = rospy.get_param('~tracked_object', "person")
        self.enable_labeling = False

        self.labels = {}

        # fix path if required
        if len(self.model_path) > 0:
            if "pkg://" in self.model_path:
                rp = rospkg.RosPack()
                path = rp.get_path('braccio_camai')
                self.model_path = self.model_path.replace("pkg://braccio_camai", path)
        else:
            rospy.loginfo("Invalid model path")
            return

        if len(self.labels_file) > 0:
            if "pkg://" in self.labels_file:
                rp = rospkg.RosPack()
                path = rp.get_path('braccio_camai')
                self.labels_file = self.labels_file.replace("pkg://braccio_camai", path)

            # loop over the class labels file
            for row in open(self.labels_file):
                # unpack the row and update the labels dictionary
                (classID, label) = row.strip().split(maxsplit=1)
                self.labels[int(classID)] = label.strip()
                
            self.enable_labeling = True

        # print input parameters
        rospy.loginfo("input_image_compressed: " + self.input_image_compressed)
        rospy.loginfo("output_image_compressed: " + self.output_image_compressed)
        rospy.loginfo("model_path: " + self.model_path)
        rospy.loginfo("threshold: " + str(self.threshold))
        rospy.loginfo("rotation_angle: " + str(self.rotation_angle))
        rospy.loginfo("labels_file: " + str(self.labels_file))
        rospy.loginfo("tracked_object: " + str(self.tracked_object))

        self.current_image = CompressedImage()
    
        rospy.loginfo("Loading Tensorflow model")
        self.model = DetectionEngine(self.model_path)

        self.pub_image = rospy.Publisher(self.output_image_compressed, CompressedImage, queue_size=1)
        self.pub_box = rospy.Publisher("bounding_box", Int16MultiArray, queue_size=1)

        self.subscriber = rospy.Subscriber(self.input_image_compressed,  CompressedImage, self.callback, queue_size=1)

        rospy.loginfo("detection started")

        while not rospy.is_shutdown():
            self.process_current_image()

        #rospy.spin()

    def process_current_image(self):
        # No image data received
        if len(self.current_image.data) == 0:
            return

        # skip is no subscribers request for detections
        if self.pub_box.get_num_connections() == 0 and self.pub_image.get_num_connections() == 0:
            return

        np_arr = np.fromstring(self.current_image.data, np.uint8)
        frame_ori = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)

        frame = self.rotate_image(frame_ori, self.rotation_angle)

        orig = frame.copy()
        frame = Image.fromarray(frame)

        # make predictions on the input frame
        results = self.model.DetectWithImage(frame, threshold=self.threshold, keep_aspect_ratio=True, relative_coord=False)

        # loop over the results
        for r in results:
            # extract the bounding box
            box = r.bounding_box.flatten().astype("int")
            (startX, startY, endX, endY) = box
            
            if self.enable_labeling:
                label = self.labels[r.label_id]
                if label != self.tracked_object:
                    continue # skip current object
                y = startY - 15 if startY - 15 > 15 else startY + 15
                text = "{}: {:.2f}%".format(label, r.score * 100)
                cv2.putText(orig, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # draw the bounding box and label on the image
            cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

            box_msg = Int16MultiArray()
            box_msg.data = [r.label_id, startX, startY, endX, endY]
            self.pub_box.publish(box_msg)
            break

        # skip if no subscribers are registered
        if self.pub_image.get_num_connections() == 0:
            return

        #### Create CompressedIamge ####
        msg = CompressedImage()
        msg.header.stamp = rospy.Time.now()
        msg.format = "jpeg"
        msg.data = np.array(cv2.imencode('.jpg', orig)[1]).tostring()

        # Publish image with face detections
        self.pub_image.publish(msg)

    def callback(self, ros_data):
        self.current_image = ros_data

    def rotate_image(self, image, angle):
        image_center = tuple(np.array(image.shape[1::-1]) / 2)
        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
        result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
        return result
Beispiel #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        default=
        "/home/libre/models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite",
        help="Path of the detection model.")
    parser.add_argument("--label",
                        default="/home/libre/detection/coco_labels.txt",
                        help="Path of the labels file.")
    parser.add_argument("--usbcamno",
                        type=int,
                        default=0,
                        help="USB Camera number.")
    parser.add_argument("--cam_w", type=int, default=320, help="Camera width")
    parser.add_argument("--cam_h", type=int, default=320, help="Camera width")
    args = parser.parse_args()

    fps = ""
    detectfps = ""
    framecount = 0
    detectframecount = 0
    time1 = 0
    time2 = 0
    box_color = (255, 128, 0)
    box_thickness = 1
    label_background_color = (125, 175, 75)
    label_text_color = (255, 255, 255)
    percentage = 0.0

    camera_width = cam_w
    camera_height = cam_h

    cap = cv2.VideoCapture(args.usbcamno)
    cap.set(cv2.CAP_PROP_FPS, 150)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    while True:
        t1 = time.perf_counter()

        ret, color_image = cap.read()
        if not ret:
            break

        # Run inference.
        prepimg = color_image[:, :, ::-1].copy()
        prepimg = Image.fromarray(prepimg)

        tinf = time.perf_counter()
        ans = engine.DetectWithImage(prepimg,
                                     threshold=0.5,
                                     keep_aspect_ratio=True,
                                     relative_coord=False,
                                     top_k=10)
        print(time.perf_counter() - tinf, "sec")

        # Display result.
        if ans:
            detectframecount += 1
            for obj in ans:
                box = obj.bounding_box.flatten().tolist()
                box_left = int(box[0])
                box_top = int(box[1])
                box_right = int(box[2])
                box_bottom = int(box[3])
                cv2.rectangle(color_image, (box_left, box_top),
                              (box_right, box_bottom), box_color,
                              box_thickness)

                percentage = int(obj.score * 100)
                label_text = labels[obj.label_id] + " (" + str(
                    percentage) + "%)"

                label_size = cv2.getTextSize(label_text,
                                             cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                             1)[0]
                label_left = box_left
                label_top = box_top - label_size[1]
                if (label_top < 1):
                    label_top = 1
                label_right = label_left + label_size[0]
                label_bottom = label_top + label_size[1]
                cv2.rectangle(color_image, (label_left - 1, label_top - 1),
                              (label_right + 1, label_bottom + 1),
                              label_background_color, -1)
                cv2.putText(color_image, label_text,
                            (label_left, label_bottom),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)

        cv2.putText(color_image, fps, (camera_width - 170, 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1,
                    cv2.LINE_AA)
        cv2.putText(color_image, detectfps, (camera_width - 170, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1,
                    cv2.LINE_AA)

        cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE)
        cv2.imshow('USB Camera', color_image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        # FPS calculation
        framecount += 1
        if framecount >= 15:
            fps = "(Playback) {:.1f} FPS".format(time1 / 15)
            detectfps = "(Detection) {:.1f} FPS".format(detectframecount /
                                                        time2)
            framecount = 0
            detectframecount = 0
            time1 = 0
            time2 = 0
        t2 = time.perf_counter()
        elapsedTime = t2 - t1
        time1 += 1 / elapsedTime
        time2 += elapsedTime
Beispiel #29
0
        continue

    ##create numpy array of depth and color frames
    depth_image = np.asanyarray(depth_frame.get_data())
    color_image = np.asanyarray(color_frame.get_data())
    color_image_copy = np.asanyarray(color_frame.get_data())
    ##resize image based upon argument and create a copy to annotate and display
    color_image = imutils.resize(color_image, width=args["width"])
    orig = color_image
    #color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
    color_image = Image.fromarray(color_image)

    ##start a timer for inferencing time and feed the frame into the model
    start_inference = time.time()
    results = model.DetectWithImage(color_image,
                                    threshold=args["confidence"],
                                    keep_aspect_ratio=True,
                                    relative_coord=False)
    end_inference = time.time()

    depth_colormap = cv2.applyColorMap(
        cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
    depth_colormap = imutils.resize(depth_colormap, width=args["width"])

    ##put a bounding box on the result in the copy image
    for r in results:
        bounding_box = r.bounding_box.flatten().astype("int")  #try changing r
        (startX, startY, endX, endY) = bounding_box  #try changing startx etc
        label = labels[r.label_id]
        cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
        cv2.rectangle(depth_colormap, (startX, startY), (endX, endY),
                      (255, 255, 255), 2)
Beispiel #30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        help=
        'Path of the detection model, it must be a SSD model with postprocessing operator.',
        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument('--input',
                        help='File path of the input image.',
                        required=True)
    parser.add_argument('--output', help='File path of the output image.')
    parser.add_argument(
        '--keep_aspect_ratio',
        dest='keep_aspect_ratio',
        action='store_true',
        help=
        ('keep the image aspect ratio when down-sampling the image by adding '
         'black pixel padding (zeros) on bottom or right. '
         'By default the image is resized and reshaped without cropping. This '
         'option should be the same as what is applied on input images during '
         'model training. Otherwise the accuracy may be affected and the '
         'bounding box of detection result may be stretched.'))
    parser.set_defaults(keep_aspect_ratio=False)
    args = parser.parse_args()

    if not args.output:
        output_name = 'object_detection_result.jpg'
    else:
        output_name = args.output

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = dataset_utils.ReadLabelFile(args.label) if args.label else None

    # Open image.
    img = Image.open(args.input)
    draw = ImageDraw.Draw(img)

    # Run inference.
    ans = engine.DetectWithImage(img,
                                 threshold=0.05,
                                 keep_aspect_ratio=args.keep_aspect_ratio,
                                 relative_coord=False,
                                 top_k=10)

    # Display result.
    if ans:
        for obj in ans:
            print('-----------------------------------------')
            if labels:
                print(labels[obj.label_id])
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)
            # Draw a rectangle.
            draw.rectangle(box, outline='red')
        img.save(output_name)
        if platform.machine() == 'x86_64':
            # For gLinux, simply show the image.
            img.show()
        elif platform.machine() == 'armv7l':
            # For Raspberry Pi, you need to install 'feh' to display image.
            subprocess.Popen(['feh', output_name])
        else:
            print('Please check ', output_name)
    else:
        print('No object detected!')