Example #1
0
def run_model(img_path, model):
    img_raw = cv2.imread(img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    down_scale_factor = 1.0

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    outputs = model(img[np.newaxis, ...]).numpy()

    outputs = recover_pad_output(outputs, pad_params)

    name = img_path.split('/')[-1].split('.')[0]

    if not os.path.exists('outputs'):
        os.makedirs('outputs')

    saved_img_path = OUTPUT + name + '_OUTPUT.png'

    for prior_index in range(len(outputs)):
        draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                        img_width_raw)
        cv2.imwrite(saved_img_path, img_raw)

    return saved_img_path
Example #2
0
def main(
    image_path,
    config_path,
    export_path,
    ckpt_path,
    score_thres,
    iou_thres,
    detection_width,
    result_save_path,
):
    config = load_yaml(config_path)

    if not Path(export_path).joinpath("saved_model.pb").exists() and ckpt_path is not None:
        export_to_saved_model(ckpt_path, export_path, config)
    elif not Path(export_path).joinpath("saved_model.pb").exists() and ckpt_path is None:
        raise ValueError(f"Must provide a checkpoint to export model.")

    loaded_model = tf.saved_model.load(export_path)
    print("model_loaded")

    img_raw = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
    img_height_raw, img_width_raw, _ = img_rgb.shape
    outputs = _run_detection(loaded_model, img_rgb, score_thres, iou_thres, detection_width)

    # draw and save results
    result_save_path = Path(result_save_path)
    result_save_path.mkdir(exist_ok=True, parents=True)
    save_img_path = result_save_path.joinpath("result_" + Path(image_path).name)
    for prior_index in range(len(outputs)):
        draw_bbox_landm(
            img_raw, outputs[prior_index], img_height_raw, img_width_raw, draw_score=True, draw_lm=True
        )
    cv2.imwrite(str(save_img_path), img_raw)
    print(f"Results saved at {save_img_path}")
Example #3
0
def main(_argv):
    cfg = load_yaml(FLAGS.cfg_path)
    input_size = FLAGS.size
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    if FLAGS.framework == 'tf':
        infer = tf.keras.models.load_model(FLAGS.weights)

    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        signature_keys = list(saved_model_loaded.signatures.keys())
        print(signature_keys)
        infer = saved_model_loaded.signatures['serving_default']
    logging.info('weights loaded')

    sum = 0

    img_raw = cv2.imread(FLAGS.image)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
    batched_input = img[np.newaxis, ...]
    if FLAGS.framework == 'tf':
        # pred_bbox = run_model(images_data)
        outputs = infer(batched_input).numpy()
    elif FLAGS.framework == 'trt':
        pred_bbox = infer(batched_input)
        # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    # draw and save results
    save_img_path = 'out.jpg'
    for prior_index in range(len(outputs)):
        draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                        img_width_raw)
        cv2.imwrite(save_img_path, img_raw)
    print(f"[*] save result at {save_img_path}")
Example #4
0
def process_single_image(img_path, img_outputpath, model, cfg, data):

    if not os.path.exists(img_path):
        print(f"cannot find image path from {img_path}")
        exit()

    img_raw = cv2.imread(img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    # Run model
    outputs = model(img[np.newaxis, ...]).numpy()
    # Recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    landmarks = []
    for prior_index in range(len(outputs)):
        x = draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
        landmarks.append(x)
    cv2.imwrite(img_outputpath, img_raw)

    return get_json_landmark_data(data, img_outputpath, landmarks)
Example #5
0
def main(_argv):
    # init
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = "./checkpoints/" + cfg["sub_name"]
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testset_folder = cfg["testing_dataset_path"]
    testset_list = os.path.join(testset_folder, "label.txt")

    img_paths, _ = load_info(testset_list)
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 1600
        max_size = 2150
        img_shape = img.shape
        img_size_min = np.min(img_shape[0:2])
        img_size_max = np.max(img_shape[0:2])
        resize = float(target_size) / float(img_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * img_size_max) > max_size:
            resize = float(max_size) / float(img_size_max)
        if FLAGS.origin_size:
            if os.path.basename(img_path) == "6_Funeral_Funeral_6_618.jpg":
                resize = 0.5  # this image is too big to avoid OOM problem
            else:
                resize = 1

        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image_(img, max_steps=max(cfg["steps"]))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(FLAGS.save_folder, sub_dir,
                                 img_name.replace(".jpg", ".txt"))

        pathlib.Path(os.path.join(FLAGS.save_folder,
                                  sub_dir)).mkdir(parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, conf in zip(bboxs, confs):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                confidence = str(conf)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(
                    h) + " " + confidence + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join("./results", cfg["sub_name"],
                                  sub_dir)).mkdir(parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                if outputs[prior_index][15] >= FLAGS.vis_th:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
            cv2.imwrite(
                os.path.join("./results", cfg["sub_name"], sub_dir, img_name),
                img_raw)
Example #6
0
def capture_face_img():
    gauid = cv2.imread('g.png')

    capture = cv2.VideoCapture(0)  # 카메라 불러오기
    capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    start_time = time.time()
    timer = time.time()
    timer_flag = 0
    name_count = 0
    while True:
        ret, frame = capture.read()  # 카메리의 입력을 읽어와서
        frame = cv2.flip(frame, 1)  ##출력영상 좌우반전
        # cv2.imshow("Frame", frame) # 화면에 출력합니다

        key = cv2.waitKey(33)

        outputs = get_fv.get_face_value(frame)
        print(outputs)

        frame_height, frame_width, _ = frame.shape
        gauid = cv2.resize(gauid,
                           dsize=(frame_width, frame_height),
                           interpolation=cv2.INTER_AREA)

        # landmark save
        Nfaces = len(outputs)
        lms = np.zeros((Nfaces, 10))
        lms[:, 0:5] = outputs[:, [4, 6, 8, 10, 12]] * frame_width
        lms[:, 5:10] = outputs[:, [5, 7, 9, 11, 13]] * frame_height
        #print(lms)

        if len(outputs) == 0:
            cv2.putText(frame, "No Person in Frame", (200, 25),
                        cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1)
        # 1명이상 탐지됨
        elif len(outputs) >= 1:
            box_arr = []
            for prior_index in range(len(outputs)):
                x1, y1, x2, y2 = int(outputs[prior_index][0] * frame_width), int(outputs[prior_index][1] * frame_height), \
                int(outputs[prior_index][2] * frame_width), int(outputs[prior_index][3] * frame_height)
                box_arr.append((x2 - x1) * (y2 - y1))
            max_size = max(box_arr)
            max_idx = box_arr.index(max_size)
            max_x1, max_y1, max_x2, max_y2 = int(outputs[max_idx][0] * frame_width), int(outputs[max_idx][1] * frame_height), \
            int(outputs[max_idx][2] * frame_width), int(outputs[max_idx][3] * frame_height)

            #얼굴박스의 중심값 좌표
            center_x = int(outputs[max_idx][0] * frame_width) + (
                (int(outputs[max_idx][2] * frame_width) -
                 int(outputs[max_idx][0] * frame_width)) / 2)
            center_y = int(outputs[max_idx][1] * frame_height) + (
                (int(outputs[max_idx][3] * frame_height) -
                 int(outputs[max_idx][1] * frame_height)) / 2)
            is_center_x = center_x < (int(frame_width / 2) +
                                      40) and center_x > (
                                          int(frame_width / 2) - 40)
            is_center_y = center_y < (int(frame_height / 2) +
                                      20) and center_y > (
                                          int(frame_height / 2) - 60)
            cv2.circle(frame, (int(frame_width / 2), int(frame_height / 2)), 1,
                       (255, 255, 0), 1)
            #좌표 확인
            cv2.putText(
                frame, "bbox_center" + "(" + str(center_x) + "," +
                str(center_y) + ")", (400, 40), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                (255, 255, 0), 1)
            cv2.putText(
                frame, "bbox_center" + "(" + str(is_center_x) + "," +
                str(is_center_y) + ")", (400, 60), cv2.FONT_HERSHEY_DUPLEX,
                0.5, (255, 255, 0), 1)
            #얼굴크기 및 가이드영역(프레임 가운데)으로 얼굴 맞추기
            if max_size >= 150 * 150 and is_center_x and is_center_y:
                draw_bbox_landm(frame, outputs[box_arr.index(max_size)],
                                frame_height, frame_width)
                cv2.putText(frame,
                            "(" + str(x2 - x1) + "," + str(y2 - y1) + ")",
                            (25, 140), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                            (255, 255, 0), 1)

                # 3축 확인 필터
                if (find_roll(lms[max_idx]) >= -10 and \
                    find_roll(lms[max_idx]) <= 10) and \
                (find_yaw(lms[max_idx]) >= -15 and \
                    find_yaw(lms[max_idx]) <= 15) and \
                (find_pitch(lms[max_idx]) >= -2 and \
                    find_pitch(lms[max_idx]) <= 2):
                    cv2.putText(frame, "Angle Filter Pass", (200, 25),
                                cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 1)
                    #타이머 작동
                    if time.time() - timer < 0:
                        pass
                    elif time.time() - timer < 3:
                        cv2.putText(frame,
                                    str(3 - int(time.time() - timer)) + 'sec ',
                                    (500, 30), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                    (255, 0, 0), 1)
                    if (time.time() - timer >= 3):  # 얼굴 검출 상태에서 3초간 대기
                        cv2.imwrite('{}.jpg'.format(name_count), frame)  # 캡쳐
                        name_count += 1
                        timer = time.time()
                        timer_flag = 1
                else:
                    cv2.putText(frame, "Angle Filter NO Pass", (200, 25),
                                cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1)
                    timer = time.time()
            elif max_size < 150 * 150 and is_center_x and is_center_y:
                cv2.putText(frame, "Please come closer", (200, 25),
                            cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1)
                timer = time.time()
            else:
                cv2.putText(frame, "Please come to the center", (200, 25),
                            cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1)
                timer = time.time()

        # calculate fps
        fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
        start_time = time.time()
        cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75,
                    (0, 255, 0), 1)
        cv2.imshow('frame', frame & gauid)

        # if key == 99: # c 키 의 ascii 코드가 99
        #     img_path = 'captured/{}.png'.format(uuid.uuid4())
        #     start = time.time()
        #     f_value = get_fv.get_face_value(frame)
        #     print('얼굴사진 추출', time.time()-start)
        #     print(f_value)
        # if(f_value['success']):
        #     print(f_value['face_value'])
        #     # x1, y1, x2, y2 = f_value['face_value'][:4]
        #     # start = time.time()
        #     # response = requests.post(myurl, data=pickle.dumps({'label': myname, 'img': frame[y1:y2, x1:x2]}), headers=headers)
        #     # print('face img 전송 시간', time.time()-start)
        #     # print(response)
        # # img_rgb[top:bottom, left: right]
        # # alignment = f_cap.align_face(frame)
        # # print(alignment['success'])
        # # if (alignment['success']):
        # #     # cv2.imwrite(img_path, alignment['faceImg'])
        # #     _, img_encoded = cv2.imencode('.jpg', alignment['faceImg'])
        # #     print('face img 전송 시작', datetime.datetime.now())

        # #     # files = {'file': open(img_path, 'rb')}
        # #     response = requests.post(myurl, data=pickle.dumps({'label': myname, 'img': img_encoded.tostring()}), headers=headers)
        # #     print(response)
        # else:
        #     continue
        if key == 27:
            capture.release()
            cv2.destroyAllWindows()
            break
    return frame
Example #7
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        file_path = '/Users/lichaochao/Downloads/images_UMU/'
        for file_name in os.listdir(file_path + 'source_images/'):
            image_path = file_path + 'source_images/' + file_name
            if not os.path.exists(image_path):
                print(f"cannot find image path from {image_path}")
                continue

            img_raw = cv2.imread(image_path)
            img = np.float32(img_raw.copy())

            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
            img_height, img_width, _ch = img.shape

            # run model
            outputs = model(img[np.newaxis, ...])

            preds = tf.concat([
                outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                outputs[2][0, :, 1][..., tf.newaxis]
            ], -1)

            priors = prior_box_tf((img_height, img_width), cfg['min_sizes'],
                                  cfg['steps'], cfg['clip'])
            decode_preds = decode_tf(preds, priors, cfg['variances'])

            selected_indices = tf.image.non_max_suppression(
                boxes=decode_preds[:, :4],
                scores=decode_preds[:, -1],
                max_output_size=tf.shape(decode_preds)[0],
                iou_threshold=FLAGS.iou_th,
                score_threshold=FLAGS.score_th)

            outputs = tf.gather(decode_preds, selected_indices).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)
            has_face = False
            is_smile = False
            for prior_index in range(len(outputs)):
                ann = outputs[prior_index]
                if ann[-1] >= 0.5:
                    has_face = True
                    x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height)
                    x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height)

                    text = "face: {:.2f}".format(ann[-1] * 100)
                    cv2.putText(img, text, (x1 + 5, y1 - 10),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                    if ann[-2] >= 0.5:
                        is_smile = True
                        smile_text = "smile: {:.2f}".format(ann[-2] * 100)
                        cv2.putText(img, smile_text, (x1 + 5, y1 + 30),
                                    cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                    (255, 255, 255))
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            if is_smile:
                dst_file_path = file_path + '/smile_face/' + file_name
            elif has_face:
                dst_file_path = file_path + '/face/' + file_name
            else:
                dst_file_path = file_path + '/no_face/' + file_name
            cv2.imwrite(dst_file_path, img)
            print(dst_file_path)

    else:
        cam = cv2.VideoCapture('./data/linda_umu.mp4')
        # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        resize = FLAGS.down_scale_factor
        frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize
        frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                start_time = time.time()
                cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX,
                            0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
Example #8
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4")

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
Example #9
0
    using_flip=True, using_distort=False, using_encoding=using_encoding,
    priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh,
    variances=variances, shuffle=False)

start_time = time.time()
for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)):
    print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape)

    if not visualization:
        continue

    img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8)
    if not using_encoding:
        # labels includes loc, landm, landm_valid.
        targets = labels.numpy()[0]
        for target in targets:
            draw_bbox_landm(img, target, img_dim, img_dim)
    else:
        # labels includes loc, landm, landm_valid, conf.
        targets = decode_tf(labels[0], priors, variances=variances).numpy()
        for prior_index in range(len(targets)):
            if targets[prior_index][-1] == 1:
                draw_bbox_landm(img, targets[prior_index], img_dim, img_dim)
                draw_anchor(img, priors[prior_index], img_dim, img_dim)

    cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    if cv2.waitKey(0) == ord('q'):
        exit()

print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
def main(_):

    min_sizes = [[16, 32], [64, 128], [256, 512]]
    steps = [8, 16, 32]
    clip = False

    img_dim = 640
    priors = prior_box((img_dim, img_dim), min_sizes, steps, clip)

    variances = [0.1, 0.2]
    match_thresh = 0.45
    ignore_thresh = 0.3
    batch_size = 1
    shuffle = True
    using_flip = True
    using_distort = True
    using_bin = True
    buffer_size = 4000
    number_cycles = 2
    threads = 2

    check_dataset = load_tfrecord_dataset(dataset_root=FLAGS.dataset_path,
                                          split=FLAGS.split,
                                          threads=threads,
                                          number_cycles=number_cycles,
                                          batch_size=batch_size,
                                          hvd=[],
                                          img_dim=img_dim,
                                          using_bin=using_bin,
                                          using_flip=using_flip,
                                          using_distort=using_distort,
                                          using_encoding=FLAGS.using_encoding,
                                          priors=priors,
                                          match_thresh=match_thresh,
                                          ignore_thresh=ignore_thresh,
                                          variances=variances,
                                          shuffle=shuffle,
                                          buffer_size=buffer_size)

    time.time()
    for idx, (inputs, labels, _) in enumerate(check_dataset):
        print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape)

        if not FLAGS.visualization:
            continue

        img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8)
        if not FLAGS.using_encoding:
            # labels includes loc, landm, landm_valid.
            targets = labels.numpy()[0]
            for target in targets:
                draw_bbox_landm(img, target, img_dim, img_dim)
        else:
            # labels includes loc, landm, landm_valid, conf.
            targets = decode_tf(labels[0], priors, variances=variances).numpy()
            for prior_index in range(len(targets)):
                if targets[prior_index][-1] != 1:
                    continue

                draw_bbox_landm(img, targets[prior_index], img_dim, img_dim)
                draw_anchor(img, priors[prior_index], img_dim, img_dim)

        cv2.imwrite('{}/{}.png'.format(FLAGS.output_path, str(idx)),
                    img[:, :, ::-1])
Example #11
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 320
        img_size_max = np.max(img.shape[0:2])
        resize = float(target_size) / float(img_size_max)
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img, outputs[prior_index], target_size,
                            target_size)
        cv2.imwrite(save_img_path, img)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture('./data/lichaochao.mp4')
        # cam = cv2.VideoCapture(0)
        frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        fps = cam.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter('chaochao1.mp4',
                              fourcc,
                              fps=fps,
                              frameSize=(frame_height, frame_width))

        resize = FLAGS.down_scale_factor
        frame_height *= resize
        frame_width *= resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                # start_time = time.time()
                # cv2.putText(frame, fps_str, (25, 50),
                #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            out.write(frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
def main(_):
    min_sizes = [[16, 32], [64, 128], [256, 512]]
    steps = [8, 16, 32]
    clip = False

    img_dim = 640
    priors = prior_box((img_dim, img_dim), min_sizes, steps, clip)

    variances = [0.1, 0.2]
    match_thresh = 0.45
    ignore_thresh = 0.3
    num_samples = 100

    if FLAGS.using_encoding:
        assert FLAGS.batch_size == 1

    if FLAGS.using_bin:
        tfrecord_name = './data/widerface_train_bin.tfrecord'
    else:
        tfrecord_name = './data/widerface_train.tfrecord'

    train_dataset = load_tfrecord_dataset(tfrecord_name,
                                          FLAGS.batch_size,
                                          img_dim=640,
                                          using_bin=FLAGS.using_bin,
                                          using_flip=True,
                                          using_distort=False,
                                          using_encoding=FLAGS.using_encoding,
                                          priors=priors,
                                          match_thresh=match_thresh,
                                          ignore_thresh=ignore_thresh,
                                          variances=variances,
                                          shuffle=False)

    start_time = time.time()
    for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)):
        print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape)

        if not FLAGS.visualization:
            continue

        img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8)
        if not FLAGS.using_encoding:
            # labels includes loc, landm, landm_valid.
            targets = labels.numpy()[0]
            for target in targets:
                draw_bbox_landm(img, target, img_dim, img_dim)
        else:
            # labels includes loc, landm, landm_valid, conf.
            targets = decode_tf(labels[0], priors, variances=variances).numpy()
            for prior_index in range(len(targets)):
                if targets[prior_index][-1] != 1:
                    continue

                draw_bbox_landm(img, targets[prior_index], img_dim, img_dim)
                draw_anchor(img, priors[prior_index], img_dim, img_dim)

        cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        if cv2.waitKey(0) == ord('q'):
            exit()

    print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
Example #13
0
def main(_argv):
    # init

    face_aligner = FaceAligner()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        save_count = 0
        for path, subdirs, files in os.walk(FLAGS.img_path):

            for name in files:
                if name.endswith('.jpg'):
                    img_path = os.path.join(path, name)

                    if not os.path.exists(img_path):
                        print(f"cannot find image path from {img_path}")
                        exit()

                    if save_count < FLAGS.img_num:
                        print("[*] Processing on single image {}".format(
                            img_path))

                        img_raw = cv2.imread(img_path)
                        img_height_raw, img_width_raw, _ = img_raw.shape
                        img = np.float32(img_raw.copy())

                        if FLAGS.down_scale_factor < 1.0:
                            img = cv2.resize(img, (0, 0),
                                             fx=FLAGS.down_scale_factor,
                                             fy=FLAGS.down_scale_factor,
                                             interpolation=cv2.INTER_LINEAR)
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                        # pad input image to avoid unmatched shape problem
                        img, pad_params = pad_input_image(img,
                                                          max_steps=max(
                                                              cfg['steps']))

                        # run model
                        outputs = model(img[np.newaxis, ...]).numpy()

                        # recover padding effect
                        outputs = recover_pad_output(outputs, pad_params)

                        # draw and save results
                        save_img_path = os.path.join(
                            'out_' + os.path.basename(img_path))

                        for prior_index in range(len(outputs)):
                            draw_bbox_landm(img_raw, outputs[prior_index],
                                            img_height_raw, img_width_raw)
                            cv2.imwrite(save_img_path, img_raw)
                        print(f"[*] save result at {save_img_path}")
                        save_count += 1

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()

        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape

            orig_frame = frame.copy()

            face = None

            img = cv2.resize(frame, (512, 512))
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            start_time = time.time()
            outputs = model(img[np.newaxis, ...]).numpy()
            inference_time = f"Inf: {time.time() - start_time}"

            cv2.putText(frame, inference_time, (25, 50),
                        cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                preds = decode_predictions((frame_width, frame_height),
                                           outputs)

                for key, value in preds.items():
                    bbox = value[0]['bbox']
                    left_eye = value[0]['left_eye']
                    right_eye = value[0]['right_eye']

                    # Our face ROI
                    face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    # Eyes
                    x1_le = left_eye[0] - 25
                    y1_le = left_eye[1] - 25
                    x2_le = left_eye[0] + 25
                    y2_le = left_eye[1] + 25

                    x1_re = right_eye[0] - 25
                    y1_re = right_eye[1] - 25
                    x2_re = right_eye[0] + 25
                    y2_re = right_eye[1] + 25

                    if left_eye[1] > right_eye[1]:
                        A = (right_eye[0], left_eye[1])
                    else:
                        A = (left_eye[0], right_eye[1])

                    # Calc our rotating degree
                    delta_x = right_eye[0] - left_eye[0]
                    delta_y = right_eye[1] - left_eye[1]
                    angle = np.arctan(
                        delta_y / (delta_x + 1e-17))  # avoid devision by zero
                    angle = (angle * 180) / np.pi

                    # compute the desired right eye x-coordinate based on the
                    # desired x-coordinate of the left eye
                    desiredRightEyeX = 1.0 - 0.35

                    # determine the scale of the new resulting image by taking
                    # the ratio of the distance between eyes in the *current*
                    # image to the ratio of distance between eyes in the
                    # *desired* image
                    dist = np.sqrt((delta_x**2) + (delta_y**2))
                    desiredDist = (desiredRightEyeX - 0.35)
                    desiredDist *= 256
                    scale = desiredDist / dist

                    eyesCenter = ((left_eye[0] + right_eye[0]) // 2,
                                  (left_eye[1] + right_eye[1]) // 2)

                    cv2.circle(frame, A, 5, (255, 0, 0), -1)

                    cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le),
                                cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

                    cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3)
                    cv2.line(frame, left_eye, A, (0, 200, 200), 3)
                    cv2.line(frame, right_eye, A, (0, 200, 200), 3)

                    cv2.line(frame, (left_eye[0], left_eye[1]),
                             (right_eye[0], right_eye[1]), (0, 200, 200), 3)

                    rotated = face_aligner.align(orig_frame, left_eye,
                                                 right_eye)

                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if face is not None:
                cv2.imshow('face aligned', rotated)

            if cv2.waitKey(1) == ord('q'):
                exit()
Example #14
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint12
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testing_dataset_path = cfg['testing_dataset_path']
    img_paths, _ = load_info(testing_dataset_path, './data/CelebA/train_labels.txt')
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        # testing scale
        if not FLAGS.origin_size:
            target_size = 320
            img_size_max = np.max(img.shape[0:2])
            resize = float(target_size) / float(img_size_max)
            img = cv2.resize(img, None, None, fx=resize, fy=resize,
                             interpolation=cv2.INTER_LINEAR)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(
            FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt'))

        pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(
            parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            smile_confs = outputs[:, 4]
            face_confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, smile_conf, face_conf in zip(bboxs, smile_confs, face_confs):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + str(smile_conf) + " " + str(
                    face_conf) + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join(
            './results', cfg['sub_name'], sub_dir)).mkdir(
            parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                if outputs[prior_index][-1] >= FLAGS.vis_th:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
            cv2.imwrite(os.path.join('./results', cfg['sub_name'], sub_dir,
                                     img_name), img_raw)
Example #15
0
def main(_argv):
    # init

    CONFIG_PATH = './configs/retinaface_mbv2.yaml'
    GPU = '0'
    IOU_TH = 0.4
    SCORE_TH = 0.5
    WEBCAM = False
    DOWN_FACTOR = 1.0

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = GPU

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(CONFIG_PATH)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=IOU_TH,
                            score_th=SCORE_TH)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    return model

    if not WEBCAM:
        #         if not os.path.exists(IMG_PATH):
        #             print(f"cannot find image path from {IMG_PATH}")
        #             exit()

        #         print("[*] Processing on single image {}".format(IMG_PATH))

        #         img_raw = cv2.imread(IMG_PATH)
        img_raw = input_image
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if DOWN_FACTOR < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=DOWN_FACTOR,
                             fy=DOWN_FACTOR,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = "result.jpg"
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if DOWN_FACTOR < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=DOWN_FACTOR,
                                 fy=DOWN_FACTOR,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
    return outputs
Example #16
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:

        cam = cv2.VideoCapture(FLAGS.vid_path)
        fps = int(cam.get(cv2.CAP_PROP_FPS))

        ### Saving Video to file
        frame_width = int(cam.get(3))
        frame_height = int(cam.get(4))

        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        # import os
        if not os.path.exists('./output'):
            print('Creating folder: output/ for saving video.')
            os.makedirs('./output')
        out = cv2.VideoWriter('./output/output.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))

        start_time = time.time()
        counter = 0
        frameCount = 0
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")
            frameCount = frameCount + 1

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                croppedFace = draw_bbox_landm(frame, outputs[prior_index],
                                              frame_height, frame_width)
                if frameCount >= fps * FLAGS.dfps:
                    fileName = "%d.png" % counter
                    cv2.imwrite(FLAGS.dst_path + fileName, croppedFace)
                    print('Saved:', fileName)
                    counter = counter + 1
                    frameCount = 0

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            if FLAGS.preview:
                cv2.imshow('frame', frame)
            out.write(frame)
            if cv2.waitKey(1) == ord('q'):
                exit()