Exemple #1
0
def Image_camera(num=0):
    #1はUSBカメラ、0は内蔵カメラ
    cap = cv2.VideoCapture(num)
    code = ""
    zdeg = {}

    while (True):
        # Capture frame-by-frame
        ret, frame = cap.read()
        #cv2.StartWindowThread()
        # Display the resulting frame
        cv2.imshow('frame', frame)
        #Escで抜ける
        key = cv2.waitKey(1) & 0xFF

        #if(isBasis):
        if (key == ord('f')):
            th = threading.Thread(target=basis,
                                  name='th',
                                  args=(np.array(frame, dtype=np.float32), ))
            th.setDaemon(True)
            th.start()

        #緊急用
        if (key == ord('q')):
            break

        if (key == ord('s')):
            img = np.array(frame, dtype=np.float32)
            pose_arr = model(img)
            pose_arr = pose_arr[0]
            result_img = draw_person_pose(img, pose_arr)
            pose_arr = pose_arr[0]
            if (isKeypoint(pose_arr)):
                if (isSide(pose_arr, armave, legave)):
                    zdeg = zaxis(armave, legave, pose_arr)
                    code = CreateSide(zdeg)
                else:
                    code = paint(pose_arr)
            break

    cap.release()
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    return code
Exemple #2
0
def Image_path(path):
    #1はUSBカメラ、0は内蔵カメラ
    code = ""
    zdeg = {}
    frame = cv2.imread(path)

    img = np.array(frame, dtype=np.float32)
    pose_arr = model(img)
    pose_arr = pose_arr[0]
    result_img = draw_person_pose(img, pose_arr)
    pose_arr = pose_arr[0]
    if (isKeypoint(pose_arr)):
        if (isSide(pose_arr, armave, legave)):
            zdeg = zaxis(armave, legave, pose_arr)
            code = CreateSide(zdeg)
        else:
            code = paint(pose_arr)
    return code
Exemple #3
0
def convertData(gesture):
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    dataset = buildGestureDict("dataset/")
    gesturedf = pd.read_csv("sample.csv")
    for video in dataset[gesture]["videos"]:
        print("Currently processing the video for " + video["filename"])
        startvideo = time.time()
        cap = cv2.VideoCapture(video["filepath"])
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print("Amount of Frames:", amount_of_frames)
        cap.set(cv2.CAP_PROP_FPS, 5)
        ret, img = cap.read()
        counter = 1
        df = pd.DataFrame(columns=["Head", "Left", "Right"])
        frame_tracker = int(amount_of_frames / 12)
        framecounter = 0
        #print(frame_tracker)
        left = 0
        right = 0
        while ret:
            ret, img = cap.read()
            # get video frame
            if not ret:
                print("Failed to capture image")
                break
            person_pose_array, _ = pose_detector(img)
            res_img = cv2.addWeighted(img, 0.6,
                                      draw_person_pose(img, person_pose_array),
                                      0.4, 0)
            if (counter % frame_tracker == 0):
                for person_pose in person_pose_array:
                    firstPerson = True
                    if not firstPerson:
                        continue
                    unit_length = pose_detector.get_unit_length(person_pose)
                    # hands estimation
                    # print("Estimating hands keypoints...")
                    hands = pose_detector.crop_hands(img, person_pose,
                                                     unit_length)
                    if hands["left"] is not None:
                        hand_img = hands["left"]["img"]
                        bbox = hands["left"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="left")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        left = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        left = [[1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000]]

                    if hands["right"] is not None:
                        hand_img = hands["right"]["img"]
                        bbox = hands["right"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="right")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        right = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        right = [[1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000]]
                    print("Body Pose")
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = person_pose.tolist()
                    for z in range(len(person_pose)):
                        if (person_pose[z] != None):
                            person_pose[z] = list(np.delete(person_pose[z], 2))
                            person_pose[z] = [int(a) for a in person_pose[z]]
                    print(person_pose)
                    print("Left")
                    print(left)
                    print("Right")
                    print(right)
                cv2.imshow("result", res_img)
                head = person_pose
                for x in range(len(head)):
                    if (head[x] == None):
                        head[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                head = pca.fit_transform(head)
                dfhead = pd.DataFrame(data=head)
                dfhead = dfhead.T
                dfhead = dfhead.rename(
                    columns={
                        0: "head_1",
                        1: "head_2",
                        2: "head_3",
                        3: "head_4",
                        4: "head_5",
                        5: "head_6",
                        6: "head_7",
                        7: "head_8",
                        8: "head_9",
                        9: "head_10",
                        10: "head_11",
                        11: "head_12",
                        12: "head_13",
                        13: "head_14"
                    })
                for x in range(len(left)):
                    if (left[x] == None):
                        left[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                left = pca.fit_transform(left)
                dfleft = pd.DataFrame(data=left)
                dfleft = dfleft.T
                dfleft = dfleft.rename(
                    columns={
                        0: "left_1",
                        1: "left_2",
                        2: "left_3",
                        3: "left_4",
                        4: "left_5",
                        5: "left_6",
                        6: "left_7",
                        7: "left_8",
                        8: "left_9",
                        9: "left_10",
                        10: "left_11",
                        11: "left_12",
                        12: "left_13",
                        13: "left_14",
                        14: "left_15",
                        15: "left_16",
                        16: "left_17",
                        17: "left_18",
                        18: "left_19",
                        19: "left_20",
                        20: "left_21"
                    })
                for x in range(len(right)):
                    if (right[x] == None):
                        right[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                right = pca.fit_transform(right)
                dfright = pd.DataFrame(data=right)
                dfright = dfright.T
                dfright = dfright.rename(
                    columns={
                        0: "right_1",
                        1: "right_2",
                        2: "right_3",
                        3: "right_4",
                        4: "right_5",
                        5: "right_6",
                        6: "right_7",
                        7: "right_8",
                        8: "right_9",
                        9: "right_10",
                        10: "right_11",
                        11: "right_12",
                        12: "right_13",
                        13: "right_14",
                        14: "right_15",
                        15: "right_16",
                        16: "right_17",
                        17: "right_18",
                        18: "right_19",
                        19: "right_20",
                        20: "right_21"
                    })
                df2 = pd.concat([dfhead, dfleft, dfright], axis=1)
                df2["frame"] = framecounter
                df2["gesture"] = video["gesture"]
                df2["speaker"] = video["actor"]
                framecounter = framecounter + 1
                df2["frame"] = df2["frame"].astype(int)
                newdf = newdf.append(df2, sort=False)
                gesturedf = gesturedf.append(df2, sort=False)
                firstPerson = False
            else:
                cv2.imshow("result", img)
                counter = counter + 1
                #print("Frame",counter)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break  #print(df)
        cap.release()
        cv2.destroyAllWindows()
    gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False)
    print("Done Recording for: " + gesture)
    print("Took " + str(time.time() - startvideo) + "seconds")
        th = threading.Thread(target=basis,
                              name='th',
                              args=(np.array(frame, dtype=np.float32), ))
        th.setDaemon(True)
        th.start()

    #緊急用
    if (key == ord('q')):
        break

    if (key == ord('s')):
        print("start")
        img = np.array(frame, dtype=np.float32)
        pose_arr = model(img)
        pose_arr = pose_arr[0]
        result_img = draw_person_pose(img, pose_arr)
        plt.figure(figsize=(6, 6))
        plt.imshow(255 - result_img[:, :, ::-1])
        pose_arr = pose_arr[0]
        if (isKeypoint(pose_arr)):
            while (basisT == False):
                counting = counting + 1
            if (isSide(pose_arr, armave, legave)):
                zdeg = zaxis(armave, legave, pose_arr)
                code = CreateSide(zdeg)
            else:
                code = paint(pose_arr)
        else:
            code = "SAY 'キーポイントが足りません' 5"
        break
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    face_detector = FaceDetector("facenet",
                                 "models/facenet.npz",
                                 device=args.gpu)

    # read image
    img = cv2.imread(args.img)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)
    res_img = cv2.addWeighted(img, 0.6,
                              draw_person_pose(img, person_pose_array), 0.4, 0)

    # each person detected
    for person_pose in person_pose_array:
        unit_length = pose_detector.get_unit_length(person_pose)

        # face estimation
        print("Estimating face keypoints...")
        cropped_face_img, bbox = pose_detector.crop_face(
            img, person_pose, unit_length)
        if cropped_face_img is not None:
            face_keypoints = face_detector(cropped_face_img)
            res_img = draw_face_keypoints(res_img, face_keypoints,
                                          (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                          (255, 255, 255), 1)
Exemple #6
0
    cap2 = cv2.VideoCapture(1)  #VideoReader(args.video)
    idx = 0
    f = open("C:/Users/Boombada/Desktop/myPy/inputData.txt", 'a')
    f1 = open("C:/Users/Boombada/Desktop/myPy/Label.txt", 'a')

    index = 0
    #  for img in video_provider:
    while (cap.isOpened() & cap2.isOpened()):
        ret1, img = cap.read()
        ret2, img2 = cap2.read()

        poses, _ = pose_detector(img)
        poses2, _ = pose_detector(img2)

        res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, poses), 0.4,
                                  0)
        res_img2 = cv2.addWeighted(img2, 0.6, draw_person_pose(img2, poses2),
                                   0.4, 0)

        #logger.debug("type: {}".format(type(poses)))
        #logger.debug("shape: {}".format(poses.shape))
        logger.debug("A")
        logger.debug(poses)

        logger.debug(poses2)

        # cv2.imshow(file_body_name + '_result', res_img)
        if (ret1 & ret2):
            cv2.imshow('video', res_img)
            cv2.imshow('video2', res_img2)
# load model
pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu)
hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu)
cap = cv2.VideoCapture('sign language/sample.mp4')
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
cap.set(cv2.CAP_PROP_FPS, 4)
counter=0
while(cap.isOpened()):
    ret, img = cap.read()
    if not ret:
        print("Failed to capture image")
        break

    person_pose_array, _ = pose_detector(img)
    res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)
    # each person detected
    for person_pose in person_pose_array:
        unit_length = pose_detector.get_unit_length(person_pose)
        #print(person_pose)
        # hands estimation
        hands = pose_detector.crop_hands(img, person_pose, unit_length)
        if hands["left"] is not None:
            hand_img = hands["left"]["img"]
            bbox = hands["left"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="left")
            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            #print("Left")
            #print(hand_keypoints)
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    while True:
        # get video frame

        ret, img = cap.read()
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        if not ret:
            print("Failed to capture image")
            break

        person_pose_array, _ = pose_detector(img)
        data = draw_person_pose(img, person_pose_array)
        res_img = cv2.addWeighted(img, 0.6, data, 0.4, 0)
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))

        #cv2.imshow("result", res_img)
        out = cv2.VideoWriter('outpy1.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))

while (True):
    ret, frame = cap.read()

    if ret == True:

        # Write the frame into the file 'output.avi'
def main(cap, im_scale=2, view_results=False):
    debug_i = 0
    fps_timer_arr = [0] * 16
    fps = 0

    # load model
    pose_device = 0
    pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models'
    pose_detector = PoseDetector("posenet",
                                 f"{pose_model_dir}/coco_posenet.npz",
                                 device=pose_device)
    hand_detector = HandDetector("handnet",
                                 f"{pose_model_dir}/handnet.npz",
                                 device=pose_device)

    # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE))
    if view_results: cv2.namedWindow('display')

    video_label_file = VideoLabelFile(cap.video_fname,
                                      fname_add='pre_points_pose')
    labels_current = defaultdict(lambda: [])
    labels_all_previous = video_label_file.load_previous()

    im_input = cap.read()
    im_input_shape = im_input.shape[0:2]

    first_run = True

    while (not cap.eof):
        fps_time_begin = time.perf_counter()
        debug_i += 1

        im_input = cap.read()
        current_frame_id = cap.frame_idx()
        # print(cap.info())

        im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale),
                                        round(im_input_shape[0] / im_scale)))
        if first_run:
            print(
                f"Video size {im_input.shape} -> Model input size {im_pose.shape}"
            )
            first_run = False

        ##########################################
        person_pose_array, _ = pose_detector(im_pose)
        im_display = cv2.addWeighted(
            im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0)

        for person_pose in person_pose_array:
            unit_length = pose_detector.get_unit_length(person_pose)

            # arr = np.array([a for a in person_pose if a is not None])
            # if arr.any():
            #     arr[:, 0:2] *= im_scale
            #     labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()])

            # hands estimation
            hands = pose_detector.crop_hands(im_pose, person_pose, unit_length)
            if hands["left"] is not None:
                hand_img = hands["left"]["img"]
                bbox = hands["left"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="left")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

            if hands["right"] is not None:
                hand_img = hands["right"]["img"]
                bbox = hands["right"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="right")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

        #############################################
        for l in labels_current[current_frame_id]:
            cv2.circle(im_display,
                       (round(l[0][0] / im_scale), round(l[0][1] / im_scale)),
                       10, (255, 0, 0), 2)
            cv2.circle(im_display,
                       (round(l[1][0] / im_scale), round(l[1][1] / im_scale)),
                       10, (0, 255, 0), 2)

        cv2.putText(im_display,
                    f"frame {int(current_frame_id)}, fps: {int(fps)}.",
                    (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (255, 255, 255), 2)

        if view_results:
            #cv2.imshow('display', im_display)
            cv2.imshow('display', im_pose)
        else:
            print(".", end="")
            sys.stdout.flush()

        # labels_current[current_frame_id].append

        #############################################
        ## KEYBOARD

        k = cv2.waitKey(5)
        if k == 27:  # esc
            break
        elif k == ord('c'):
            import ipdb
            ipdb.set_trace()
            # ipdb.set_trace()
            # pdb.set_trace()

        fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin
        fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr))

    print(". ")
    # cap.release()
    video_label_file.save_current_labels(labels_current,
                                         append_previous=False,
                                         custom_lists=True)

    if view_results: cv2.destroyAllWindows()
import argparse
import chainer
from pose_detector import PoseDetector, draw_person_pose

chainer.using_config('enable_backprop', False)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu)

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    while True:
        # get video frame
        ret, img = cap.read()

        if not ret:
            print("Failed to capture image")
            break

        person_pose_array, _ = pose_detector(img)
        res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)
        cv2.imshow("result", res_img)
        cv2.waitKey(1)
Exemple #11
0
def estimate_pose(img_path, gpu = -1):
    # parser = argparse.ArgumentParser(description='Pose detector')
    # parser.add_argument('--img', help='image file path')
    # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    # args = parser.parse_args()

    # load model
    print("Loading pose detection model...")
    pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu)
    print("Loading hand detection model...")
    hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu)
    # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu)

    # read image
    img = cv2.imread(img_path)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)

    res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)

    # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized
    has_detected = False

    # each person detected
    for person_pose in person_pose_array:
        if has_detected:
            continue

        has_detected = True

        print("Body:", person_pose)
        unit_length = pose_detector.get_unit_length(person_pose)

        # face estimation
        # print("Estimating face keypoints...")
        # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length)
        # if cropped_face_img is not None:
        #     face_keypoints = face_detector(cropped_face_img)
        #     res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1]))
        #     cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        # hands estimation
        print("Estimating hands keypoints...")
        hands = pose_detector.crop_hands(img, person_pose, unit_length)
        if hands["left"] is not None:
            hand_img = hands["left"]["img"]
            bbox = hands["left"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="left")
            print("Left hand: ", print_arr(hand_keypoints))

            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        if hands["right"] is not None:
            hand_img = hands["right"]["img"]
            bbox = hands["right"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="right")
            print("Right hand: ", print_arr(hand_keypoints))
            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

    print('Saving result into result.png...')
    cv2.imwrite('result.png', res_img)