예제 #1
0
class CoralObjectDetector:
    """Performs inference on Edge TPU.
    """
    def __init__(self, model_path, device_path):
        self.__engine = DetectionEngine(model_path=os.path.join(
            model_path, 'edgetpu.tflite'),
                                        device_path=device_path)

        self.__model_shape = itemgetter(1, 2)(
            self.__engine.get_input_tensor_shape())

    @property
    def device_name(self):
        return "Coral"

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def detect(self, image_shape, image_np, detections: List[Detection]):
        image_np = cv2.resize(image_np,
                              dsize=self.__model_shape,
                              interpolation=cv2.INTER_LINEAR)

        objs = self.__engine.detect_with_input_tensor(
            input_tensor=image_np.flatten(), top_k=len(detections))

        d = 0
        max_width = image_shape[1] - 1
        max_height = image_shape[0] - 1
        while d < len(objs) and d < len(detections):
            detection = detections[d]
            obj = objs[d]
            detection.label = obj.label_id + 1
            detection.confidence = obj.score
            detection.bounding_box.y_min = int(obj.bounding_box[0][1] *
                                               max_height)
            detection.bounding_box.x_min = int(obj.bounding_box[0][0] *
                                               max_width)
            detection.bounding_box.y_max = int(obj.bounding_box[1][1] *
                                               max_height)
            detection.bounding_box.x_max = int(obj.bounding_box[1][0] *
                                               max_width)
            d += 1

        return self.__engine.get_inference_time()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument(
        '--mode',
        help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION',
        required=True)
    parser.add_argument('--camera',
                        help='Camera source (if multiple available)',
                        type=int,
                        required=False)

    args = parser.parse_args()

    # Initialize engine.

    if args.mode == "OBJECT_DETECTION":
        engine = DetectionEngine(args.model)
    elif args.mode == "IMAGE_CLASSIFICATION":
        engine = ClassificationEngine(args.model)
    else:
        print(
            "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION"
        )
        exit()

    labels = read_label_file(args.label) if args.label else None
    label = None
    camera = args.camera if args.camera else 0

    # Initialize the camera
    #cam = cv2.VideoCapture(camera)
    camera = PiCamera()
    time.sleep(2)
    camera.resolution = (640, 480)
    # Create the in-memory stream
    stream = io.BytesIO()

    # Initialize the timer for fps
    start_time = time.time()
    frame_times = deque(maxlen=40)

    while True:
        #ret, cv2_im = cam.read()
        stream = io.BytesIO()  #wipe the contents
        camera.capture(stream, format='jpeg', use_video_port=True)
        stream.seek(0)
        pil_im = Image.open(stream)
        cv2_im = np.array(pil_im)
        cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)

        if args.mode == "OBJECT_DETECTION":
            ans = engine.DetectWithImage(pil_im,
                                         threshold=0.05,
                                         keep_aspect_ratio=True,
                                         relative_coord=False,
                                         top_k=10)
            if ans:
                for obj in ans:
                    if obj.score > 0.4:
                        if labels:
                            label = labels[obj.label_id] + " - {0:.2f}".format(
                                obj.score)
                        draw_rectangles(obj.bounding_box, cv2_im, label=label)
            else:
                draw_text(cv2_im, 'No object detected!')

        else:
            i = 0
            for result in engine.ClassifyWithImage(pil_im, top_k=5):
                if result:
                    label = labels[result[0]]
                    score = result[1]

                    draw_text(cv2_im, label, i)
                    i += 1
                else:
                    draw_text(cv2_im, 'No classification detected!')
        lastInferenceTime = engine.get_inference_time()
        frame_times.append(time.time())
        fps = len(frame_times) / float(frame_times[-1] - frame_times[0] +
                                       0.001)
        draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime))
        #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime))

        #flipping the image: cv2.flip(cv2_im, 1)

        #cv2_im = cv2.resize(cv2_im, (800, 600))
        cv2.imshow('object detection', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            exit()
            break
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='Path of the detection model.',
                        required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument(
        '--mode',
        help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION',
        required=True)
    parser.add_argument('--camera',
                        help='Camera source (if multiple available)',
                        type=int,
                        required=False)

    args = parser.parse_args()

    # Initialize engine.

    if args.mode == "OBJECT_DETECTION":
        engine = DetectionEngine(args.model)
    elif args.mode == "IMAGE_CLASSIFICATION":
        engine = ClassificationEngine(args.model)
    else:
        print(
            "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION"
        )
        exit()

    labels = read_label_file(args.label) if args.label else None
    label = None
    camera = args.camera if args.camera else 0

    # Initialize the camera
    cam = cv2.VideoCapture(camera)

    # Initialize the timer for fps
    start_time = time.time()
    frame_times = deque(maxlen=40)

    while True:
        ret, cv2_im = cam.read()

        #we are transforming the npimage to img, and the TPU library/utils are doing the
        #inverse process
        #The CV2 Way
        #pil_im = Image.fromarray(cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB))
        pil_im = Image.fromarray(np.uint8(cv2_im)).convert('RGB')
        #This is the tf utils way for the transformation. It needs numpy, and is slightly slower

        if args.mode == "OBJECT_DETECTION":
            ans = engine.DetectWithImage(pil_im,
                                         threshold=0.05,
                                         keep_aspect_ratio=True,
                                         relative_coord=False,
                                         top_k=10)
            if ans:
                print("{} object(s) detected".format(len(ans)))
                for obj in ans:
                    if obj.score > 0.4:
                        if labels:
                            label = labels[obj.label_id]
                            if SHOW_CONFIDENCE_IN_LABEL:
                                label = label + "({0:.2f})".format(obj.score)

                        draw_rectangles(obj.bounding_box, cv2_im, label=label)
            else:
                draw_text(cv2_im, 'No object detected!')

        else:
            i = 0
            for result in engine.ClassifyWithImage(pil_im, top_k=5):
                if result:
                    label = labels[result[0]]
                    score = result[1]

                    draw_text(cv2_im, label, i)
                    i += 1
                else:
                    draw_text(cv2_im, 'No classification detected!')
        lastInferenceTime = engine.get_inference_time()
        frame_times.append(time.time())
        fps = len(frame_times) / float(frame_times[-1] - frame_times[0] +
                                       0.001)
        draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime))

        # flipping the image:
        #cv2.flip(cv2_im, 1)
        #resizing the image
        #cv2_im = cv2.resize(cv2_im, (800, 600))
        cv2.imshow('object detection', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break

    #end
    exit()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k", help="keep top k candidates.", default=3)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--videopath",
                        help="File path of Videofile.",
                        default="")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    # Video capture.
    if args.videopath == "":
        print('open camera.')
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    else:
        print(args.videopath)
        cap = cv2.VideoCapture(args.videopath)

    elapsed_list = []

    while (cap.isOpened()):
        _, frame = cap.read()
        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        input_buf = PIL.Image.fromarray(im)

        # Run inference.
        start_ms = time.time()
        ans = engine.detect_with_image(
            input_buf,
            threshold=args.threshold,
            keep_aspect_ratio=False,
            relative_coord=False,
            top_k=args.top_k,
        )
        elapsed_ms = engine.get_inference_time()

        # Display result.
        if ans:
            for obj in ans:
                label_name = "Unknown"
                if labels:
                    label_name = labels[obj.label_id]
                caption = "{0}({1:.2f})".format(label_name, obj.score)

                # Draw a rectangle and caption.
                box = obj.bounding_box.flatten().tolist()
                visual.draw_rectangle(frame, box, colors[obj.label_id])
                visual.draw_caption(frame, box, caption)

        # Calc fps.
        elapsed_list.append(elapsed_ms)
        avg_text = ""
        if len(elapsed_list) > 100:
            elapsed_list.pop(0)
            avg_elapsed_ms = np.mean(elapsed_list)
            avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

        # Display fps
        fps_text = "{0:.2f}ms".format(elapsed_ms)
        visual.draw_caption(frame, (10, 30), fps_text + avg_text)

        # display
        cv2.imshow(WINDOW_NAME, frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    # When everything done, release the window
    cv2.destroyAllWindows()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k", help="keep top k candidates.", default=3)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                rawCapture.truncate(0)

                # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                start_ms = time.time()
                ans = engine.detect_with_image(
                    input_buf,
                    threshold=args.threshold,
                    keep_aspect_ratio=False,
                    relative_coord=False,
                    top_k=args.top_k,
                )
                elapsed_ms = engine.get_inference_time()

                # Display result.
                if ans:
                    for obj in ans:
                        label_name = "Unknown"
                        if labels:
                            label_name = labels[obj.label_id]
                        caption = "{0}({1:.2f})".format(label_name, obj.score)

                        # Draw a rectangle and caption.
                        box = obj.bounding_box.flatten().tolist()
                        visual.draw_rectangle(im, box, colors[obj.label_id])
                        visual.draw_caption(im, box, caption)

                # Calc fps.
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

                # Display fps
                fps_text = "{0:.2f}ms".format(elapsed_ms)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
예제 #6
0
import statistics
import numpy as np
from edgetpu.detection.engine import DetectionEngine

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = '/frozen_inference_graph.pb'

# Load the edgetpu engine and labels
engine = DetectionEngine(PATH_TO_CKPT)

frame = np.zeros((300, 300, 3), np.uint8)
flattened_frame = np.expand_dims(frame, axis=0).flatten()

detection_times = []

for x in range(0, 1000):
    objects = engine.DetectWithInputTensor(flattened_frame,
                                           threshold=0.1,
                                           top_k=3)
    detection_times.append(engine.get_inference_time())

print("Average inference time: " + str(statistics.mean(detection_times)))
예제 #7
0
class Neural:
    def __init__(self, parameters):
        self.engine = DetectionEngine(parameters.model_path + '/' +
                                      parameters.model_file)
        self.labels = Neural.read_label_file(parameters.model_path + '/' +
                                             parameters.label_file)
        last_key = sorted(self.labels.keys())[len(self.labels.keys()) - 1]
        self.colors = Neural.random_colors(last_key)
        self.parameters = parameters
        self.scene = []

    def process(self, frame):
        self.scene = []
        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        input_buf = Image.fromarray(frame)
        im = frame

        ans = self.engine.detect_with_image(
            input_buf,
            threshold=self.parameters.detection_threshold,
            keep_aspect_ratio=False,
            relative_coord=False,
            top_k=10)

        self.elapsed_ms = self.engine.get_inference_time()

        if ans:
            for obj in ans:
                self.scene.append({
                    'label': self.labels[obj.label_id],
                    'bbox': obj.bounding_box.flatten().tolist(),
                    'score': obj.score
                })

                label_name = "Unknown"
                if self.labels:
                    label_name = self.labels[obj.label_id]

                caption = "{0}({1:.2f})".format(label_name, obj.score)
                # Draw a rectangle and caption.
                box = obj.bounding_box.flatten().tolist()
                Neural.draw_rectangle(im, box, self.colors[obj.label_id])
                Neural.draw_caption(im, box, caption)

        return im

    def draw_rectangle(image, box, color, thickness=3):
        """ Draws a rectangle.

        Args:
            image: The image to draw on.
            box: A list of 4 elements (x1, y1, x2, y2).
            color: Rectangle color.
            thickness: Thickness of lines.
        """
        b = np.array(box).astype(int)
        cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness)

    def draw_caption(image, box, caption):
        """ Draws a caption above the box in an image.

        Args:
            image: The image to draw on.
            box: A list of 4 elements (x1, y1, x2, y2).
            caption: String containing the text to draw.
        """
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1]), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1]), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (255, 255, 255), 1)

    def read_label_file(file_path):
        """ Function to read labels from text files.

        Args:
            file_path: File path to labels.
        """
        with open(file_path, "r") as f:
            lines = f.readlines()

        ret = {}
        for line in lines:
            pair = line.strip().split(maxsplit=1)
            ret[int(pair[0])] = pair[1].strip()
        return ret

    def random_colors(N):
        """ Random color generator.
        """
        N = N + 1
        hsv = [(i / N, 1.0, 1.0) for i in range(N)]
        colors = list(
            map(lambda c: tuple(int(i * 255) for i in colorsys.hsv_to_rgb(*c)),
                hsv))
        random.shuffle(colors)
        return colors
예제 #8
0
    writer = None
    i = 0

    vs = cv2.VideoCapture(args.video)
    while True:
        (grabbed, frame) = vs.read()
        if not grabbed:
            break
        i += 1
        print(f"Processing on {i}th frame...", end='\r')

        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        ans = engine.DetectWithImage(image,
                                     threshold=args.detection_threshold,
                                     keep_aspect_ratio=False,
                                     resample=Image.NEAREST,
                                     relative_coord=False,
                                     top_k=10)

        logger.info("%.3f %d" %
                    (engine.get_inference_time(),
                     count_target_bbox(ans, labels, args.target_labels)))

        bboxs = generate_sort_tracker_bbox(ans, labels, args.target_labels)
        trackers = mot_tracker.update(numpy.asarray(bboxs))
        img_str = cv2.imencode('.jpg', frame)[1].tostring()
        vclient.push_frame(args.name, i, img_str, trackers.tolist())

    vs.release()
    # writer.release()