Beispiel #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
            '--model', help='File path of Tflite model.', required=True)
    parser.add_argument(
            '--label', help='File path of label file.', required=True)
    parser.add_argument(
            '--top_k', help="keep top k candidates.", default=3)
    parser.add_argument(
            '--threshold', help="threshold to filter results.", default=0.5, type=float)
    parser.add_argument(
            '--width', help="Resolution width.", default=640, type=int)
    parser.add_argument(
            '--height', help="Resolution height.", default=480, type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                 format='rgb',
                                                 use_video_port=True):
                rawCapture.truncate(0)

                # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                start_ms = time.time()
                ans = engine.DetectWithImage(input_buf, threshold=args.threshold,
                       keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k)
                # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05,
                #         keep_aspect_ratio=False, relative_coord=False, top_k=10)
                elapsed_ms = time.time() - start_ms

                # Display result.
                if ans:
                    for obj in ans:
                        label_name = 'Unknown'
                        if labels:
                            label_name = labels[obj.label_id]
                        caption = '{0}({1:.2f})'.format(label_name, obj.score)

                        # Draw a rectangle and caption.
                        box = obj.bounding_box.flatten().tolist()
                        visual.draw_rectangle(im, box, colors[obj.label_id])
                        visual.draw_caption(im, box, caption)

                # Calc fps.
                fps = 1 / elapsed_ms
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_fps = 1 / avg_elapsed_ms
                    avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format(
                        (avg_elapsed_ms * 1000.0), avg_fps)

                # Display fps
                fps_text = '{0:.2f}ms, {1:.2f}fps'.format(
                        (elapsed_ms * 1000.0), fps)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--thread", help="Num threads.", default=2, type=int)
    parser.add_argument("--videopath",
                        help="File path of Videofile.",
                        default="")
    parser.add_argument("--output", help="File path of result.", default="")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize TF-Lite interpreter.
    interpreter = make_interpreter(args.model, args.thread)
    interpreter.allocate_tensors()
    _, height, width, channel = interpreter.get_input_details()[0]["shape"]
    print("Interpreter(height, width, channel): ", height, width, channel)

    # Read label and generate random colors.
    labels = read_label_file(args.label) if args.label else None
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    random.seed(42)
    colors = visual.random_colors(last_key)

    # Video capture.
    if args.videopath == "":
        print("open camera.")
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    else:
        print("open video file", args.videopath)
        cap = cv2.VideoCapture(args.videopath)

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("Input(height, width, fps): ", h, w, fps)

    model_name = os.path.splitext(os.path.basename(args.model))[0]

    # Output Video file
    # Define the codec and create VideoWriter object
    video_writer = None
    if args.output != "":
        fourcc = cv2.VideoWriter_fourcc(*"MP4V")
        video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h))

    elapsed_list = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("VideoCapture read return false.")
            break

        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        resize_im = cv2.resize(im, (width, height))

        # Run inference.
        start = time.perf_counter()

        set_input_tensor(interpreter, resize_im)
        interpreter.invoke()
        objs = get_output(interpreter, args.threshold)

        inference_time = (time.perf_counter() - start) * 1000

        # Display result.
        for obj in objs:
            class_id = int(obj["class_id"])
            caption = "{0}({1:.2f})".format(labels[class_id], obj["score"])

            # Convert the bounding box figures from relative coordinates
            # to absolute coordinates based on the original resolution
            ymin, xmin, ymax, xmax = obj["bounding_box"]
            xmin = int(xmin * w)
            xmax = int(xmax * w)
            ymin = int(ymin * h)
            ymax = int(ymax * h)

            # Draw a rectangle and caption.
            visual.draw_rectangle(frame, (xmin, ymin, xmax, ymax),
                                  colors[class_id])
            visual.draw_caption(frame, (xmin, ymin, xmax, ymax), caption)

        # Calc fps.
        elapsed_list.append(inference_time)
        avg_text = ""
        if len(elapsed_list) > 100:
            elapsed_list.pop(0)
            avg_elapsed_ms = np.mean(elapsed_list)
            avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

        # Display fps
        fps_text = "Inference: {0:.2f}ms".format(inference_time)
        display_text = model_name + " " + fps_text + avg_text
        visual.draw_caption(frame, (10, 30), display_text)

        # Output video file
        if video_writer is not None:
            video_writer.write(frame)

        # Display
        cv2.imshow(WINDOW_NAME, frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    # When everything done, release the window
    cap.release()
    if video_writer is not None:
        video_writer.release()
    cv2.destroyAllWindows()
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--videopath",
                        help="File path of Videofile.",
                        default="")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    # Video capture.
    if args.videopath == "":
        print("Open camera.")
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    else:
        print("Open video file: ", args.videopath)
        cap = cv2.VideoCapture(args.videopath)

    cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    elapsed_list = []

    while cap.isOpened():
        _, frame = cap.read()
        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Run inference.
        start = time.perf_counter()

        _, scale = common.set_resized_input(interpreter,
                                            (cap_width, cap_height),
                                            lambda size: cv2.resize(im, size))
        interpreter.invoke()

        elapsed_ms = (time.perf_counter() - start) * 1000

        # Display result.
        objects = detect.get_objects(interpreter, args.threshold, scale)
        if objects:
            for obj in objects:
                label_name = "Unknown"
                if labels:
                    labels.get(obj.id, "Unknown")
                    label_name = labels[obj.id]
                caption = "{0}({1:.2f})".format(label_name, obj.score)

                # Draw a rectangle and caption.
                box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax,
                       obj.bbox.ymax)
                visual.draw_rectangle(frame, box, colors[obj.id])
                visual.draw_caption(frame, box, caption)

        # Calc fps.
        elapsed_list.append(elapsed_ms)
        avg_text = ""
        if len(elapsed_list) > 100:
            elapsed_list.pop(0)
            avg_elapsed_ms = np.mean(elapsed_list)
            avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

        # Display fps
        fps_text = "{0:.2f}ms".format(elapsed_ms)
        visual.draw_caption(frame, (10, 30), fps_text + avg_text)

        # display
        cv2.imshow(WINDOW_NAME, frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", help="File path of Tflite model.", required=True)
    parser.add_argument("--label", help="File path of label file.", required=True)
    parser.add_argument(
        "--threshold", help="threshold to filter results.", default=0.5, type=float
    )
    parser.add_argument("--width", help="Resolution width.", default=640, type=int)
    parser.add_argument("--height", help="Resolution height.", default=480, type=int)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO
    )
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    elapsed_list = []
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(
                rawCapture, format="rgb", use_video_port=True
            ):
                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Run inference.
                start = time.perf_counter()

                _, scale = common.set_resized_input(
                    interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size)
                )
                interpreter.invoke()

                elapsed_ms = engine.get_inference_time()

                # Display result.
		        objects = detect.get_objects(interpreter, args.threshold, scale)
		        if objects:
		            for obj in objects:
		                label_name = "Unknown"
		                if labels:
		                    labels.get(obj.id, "Unknown")
		                    label_name = labels[obj.id]
		                caption = "{0}({1:.2f})".format(label_name, obj.score)
		
		                # Draw a rectangle and caption.
		                box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax)
		                visual.draw_rectangle(im, box, colors[obj.id])
		                visual.draw_caption(im, box, caption)

                # Calc fps.
                elapsed_list.append(elapsed_ms)
                avg_text = ""
                if len(elapsed_list) > 100:
                    elapsed_list.pop(0)
                    avg_elapsed_ms = np.mean(elapsed_list)
                    avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

                # Display fps
                fps_text = "{0:.2f}ms".format(elapsed_ms)
                visual.draw_caption(im, (10, 30), fps_text + avg_text)

                # display
                cv2.imshow(WINDOW_NAME, im)
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    break

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
from tqdm import tqdm
import json
import cv2
import random

from settings import COSSY_DIR
from utils.visualization import random_colors, _draw_xywha

if __name__ == "__main__":
    ann_data = json.load(open(COSSY_DIR + '/annotations/MW-R_mot.json'))

    nColors = 12
    COLORS = random_colors(num=nColors, dtype='uint8').tolist()

    fw, fh = 768, 768
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # out_path = f'./videos_with_ann/Edge_test_mot.avi'
    # vout = cv2.VideoWriter(out_path, fourcc, 10, (fw,fh))

    random.shuffle(ann_data['videos'])
    for i, vidinfo in enumerate(tqdm(ann_data['videos'])):
        id2color = dict()
        vname = vidinfo['id']
        for imname, img_anns in zip(vidinfo['file_names'],
                                    vidinfo['annotations']):
            # impath = os.path.join(f'./frames/{vname}/{imname}')
            impath = f'{COSSY_DIR}/frames/{imname}'
            im = cv2.imread(impath)
            object_ids = [ann.get('person_id', None) for ann in img_anns]
            for ann in img_anns:
                assert ann['category_id'] == 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        type=float,
                        default=0.5)
    parser.add_argument("--width", help="Resolution width.", default=640)
    parser.add_argument("--height", help="Resolution height.", default=480)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine and load labels.
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    is_inpaint_mode = False
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                start_ms = time.time()

                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Run inference.
                start = time.perf_counter()

                _, scale = common.set_resized_input(
                    interpreter,
                    (resolution_width, rezolution_height),
                    lambda size: cv2.resize(image, size),
                )
                interpreter.invoke()

                # Display result.
                objects = detect.get_objects(interpreter, args.threshold,
                                             scale)

                if is_inpaint_mode == True:
                    mask = np.full((args.height, args.width),
                                   0,
                                   dtype=np.uint8)
                    for obj in objects:
                        if labels and obj.id in labels:
                            # Draw a mask rectangle.
                            box = (
                                obj.bbox.xmin,
                                obj.bbox.ymin,
                                obj.bbox.xmax,
                                obj.bbox.ymax,
                            )
                            visual.draw_rectangle(mask,
                                                  box, (255, 255, 255),
                                                  thickness=-1)

                    # Image Inpainting
                    dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA)
                    # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS)

                else:
                    for obj in objects:
                        if labels and obj.id in labels:
                            label_name = labels[obj.id]
                            caption = "{0}({1:.2f})".format(
                                label_name, obj.score)

                            # Draw a rectangle and caption.
                            box = (
                                obj.bbox.xmin,
                                obj.bbox.ymin,
                                obj.bbox.xmax,
                                obj.bbox.ymax,
                            )
                            visual.draw_rectangle(im, box, colors[obj.id])
                            visual.draw_caption(im, box, caption)
                    dst = im

                # Calc fps.
                elapsed_ms = time.time() - start_ms
                fps = 1 / elapsed_ms

                # Display fps
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(
                    (elapsed_ms * 1000.0), fps)
                visual.draw_caption(dst, (10, 30), fps_text)

                # Display image
                cv2.imshow(WINDOW_NAME, dst)
                key = cv2.waitKey(10) & 0xFF
                if key == ord("q"):
                    break
                elif key == ord(" "):
                    is_inpaint_mode = not is_inpaint_mode
                    print("inpant mode change :", is_inpaint_mode)

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k", help="keep top k candidates.", default=3)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        default=0.5,
                        type=float)
    parser.add_argument("--width",
                        help="Resolution width.",
                        default=640,
                        type=int)
    parser.add_argument("--height",
                        help="Resolution height.",
                        default=480,
                        type=int)
    parser.add_argument("--videopath",
                        help="File path of Videofile.",
                        default="")
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    # Video capture.
    if args.videopath == "":
        print('open camera.')
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    else:
        print(args.videopath)
        cap = cv2.VideoCapture(args.videopath)

    elapsed_list = []

    while (cap.isOpened()):
        _, frame = cap.read()
        im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        input_buf = PIL.Image.fromarray(im)

        # Run inference.
        start_ms = time.time()
        ans = engine.detect_with_image(
            input_buf,
            threshold=args.threshold,
            keep_aspect_ratio=False,
            relative_coord=False,
            top_k=args.top_k,
        )
        elapsed_ms = engine.get_inference_time()

        # Display result.
        if ans:
            for obj in ans:
                label_name = "Unknown"
                if labels:
                    label_name = labels[obj.label_id]
                caption = "{0}({1:.2f})".format(label_name, obj.score)

                # Draw a rectangle and caption.
                box = obj.bounding_box.flatten().tolist()
                visual.draw_rectangle(frame, box, colors[obj.label_id])
                visual.draw_caption(frame, box, caption)

        # Calc fps.
        elapsed_list.append(elapsed_ms)
        avg_text = ""
        if len(elapsed_list) > 100:
            elapsed_list.pop(0)
            avg_elapsed_ms = np.mean(elapsed_list)
            avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms)

        # Display fps
        fps_text = "{0:.2f}ms".format(elapsed_ms)
        visual.draw_caption(frame, (10, 30), fps_text + avg_text)

        # display
        cv2.imshow(WINDOW_NAME, frame)
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    # When everything done, release the window
    cv2.destroyAllWindows()
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model",
                        help="File path of Tflite model.",
                        required=True)
    parser.add_argument("--label",
                        help="File path of label file.",
                        required=True)
    parser.add_argument("--top_k", help="keep top k candidates.", default=3)
    parser.add_argument("--threshold",
                        help="threshold to filter results.",
                        type=float,
                        default=0.5)
    parser.add_argument("--width", help="Resolution width.", default=640)
    parser.add_argument("--height", help="Resolution height.", default=480)
    args = parser.parse_args()

    # Initialize window.
    cv2.namedWindow(
        WINDOW_NAME,
        cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO)
    cv2.moveWindow(WINDOW_NAME, 100, 200)

    # Initialize engine.
    engine = DetectionEngine(args.model)
    labels = ReadLabelFile(args.label) if args.label else None

    # Generate random colors.
    last_key = sorted(labels.keys())[len(labels.keys()) - 1]
    colors = visual.random_colors(last_key)

    is_inpaint_mode = False
    resolution_width = args.width
    rezolution_height = args.height
    with picamera.PiCamera() as camera:

        camera.resolution = (resolution_width, rezolution_height)
        camera.framerate = 30
        _, width, height, channels = engine.get_input_tensor_shape()
        rawCapture = PiRGBArray(camera)

        # allow the camera to warmup
        time.sleep(0.1)

        try:
            for frame in camera.capture_continuous(rawCapture,
                                                   format="rgb",
                                                   use_video_port=True):
                start_ms = time.time()

                rawCapture.truncate(0)

                image = frame.array
                im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                input_buf = PIL.Image.fromarray(image)

                # Run inference.
                ans = engine.DetectWithImage(
                    input_buf,
                    threshold=args.threshold,
                    keep_aspect_ratio=False,
                    relative_coord=False,
                    top_k=args.top_k,
                )

                # Display result.
                if is_inpaint_mode == True:
                    mask = np.full((args.height, args.width),
                                   0,
                                   dtype=np.uint8)
                    if ans:
                        for obj in ans:
                            if labels and obj.label_id in labels:
                                # Draw a mask rectangle.
                                box = obj.bounding_box.flatten().tolist()
                                visual.draw_rectangle(mask,
                                                      box, (255, 255, 255),
                                                      thickness=-1)

                    # Image Inpainting
                    dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA)
                    # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS)

                else:
                    for obj in ans:
                        if labels and obj.label_id in labels:
                            label_name = labels[obj.label_id]
                            caption = "{0}({1:.2f})".format(
                                label_name, obj.score)

                            # Draw a rectangle and caption.
                            box = obj.bounding_box.flatten().tolist()
                            visual.draw_rectangle(im, box,
                                                  colors[obj.label_id])
                            visual.draw_caption(im, box, caption)
                    dst = im

                # Calc fps.
                elapsed_ms = time.time() - start_ms
                fps = 1 / elapsed_ms

                # Display fps
                fps_text = "{0:.2f}ms, {1:.2f}fps".format(
                    (elapsed_ms * 1000.0), fps)
                visual.draw_caption(dst, (10, 30), fps_text)

                # Display image
                cv2.imshow(WINDOW_NAME, dst)
                key = cv2.waitKey(10) & 0xFF
                if key == ord("q"):
                    break
                elif key == ord(" "):
                    is_inpaint_mode = not is_inpaint_mode
                    print("inpant mode change :", is_inpaint_mode)

        finally:
            camera.stop_preview()

    # When everything done, release the window
    cv2.destroyAllWindows()