Python pad_input_imageの例、modules.utils.pad_input_image Pythonの例

コード例 #1

0

ファイルを表示

def run_model(img_path, model):
    img_raw = cv2.imread(img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    down_scale_factor = 1.0

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    outputs = model(img[np.newaxis, ...]).numpy()

    outputs = recover_pad_output(outputs, pad_params)

    name = img_path.split('/')[-1].split('.')[0]

    if not os.path.exists('outputs'):
        os.makedirs('outputs')

    saved_img_path = OUTPUT + name + '_OUTPUT.png'

    for prior_index in range(len(outputs)):
        draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                        img_width_raw)
        cv2.imwrite(saved_img_path, img_raw)

    return saved_img_path

コード例 #2

0

ファイルを表示

ファイル: train.py プロジェクト: miliadis/retinaface-tf2

    def test_step(inputs, img_name):
        _, img_height_raw, img_width_raw, _ = inputs.shape
        # pad input image to avoid unmatched shape problem
        img = inputs[0].numpy()
        # if img_name == '6_Funeral_Funeral_6_618':
        #     resize = 0.5 # this image is too big to avoid OOM problem
        #     img = cv2.resize(img, None, None, fx=resize, fy=resize,
        #                      interpolation=cv2.INTER_LINEAR)
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
        input_img = img[np.newaxis, ...]
        predictions = model(input_img, training=False)
        outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy()
        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        bboxs = outputs[:, :4]
        confs = outputs[:, -1]
        pred_boxes = []
        for box, conf in zip(bboxs, confs):
            x = int(box[0] * img_width_raw)
            y = int(box[1] * img_height_raw)
            w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
            h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
            pred_boxes.append([x, y, w, h, conf])

        pred_boxes = np.array(pred_boxes).astype('float')

        return pred_boxes

コード例 #3

0

ファイルを表示

def get_face_value(img_raw, down_scale_factor=0.3):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)

    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    # 빠르게 얼굴을 찾기위해 이미지 크기를 줄여서 탐색
    if down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=down_scale_factor,
                         fy=down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    outputs = model(img[np.newaxis, ...]).numpy()

    # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    #output된 얼굴들에 대한 정보가 들어있는 배열
    fvalues = map(
        lambda output: FV.FaceValue(output, img_width_raw, img_height_raw),
        outputs)

    result = list(fvalues)
    print('get face value', result)
    return result

コード例 #4

0

ファイルを表示

def process_single_image(img_path, img_outputpath, model, cfg, data):

    if not os.path.exists(img_path):
        print(f"cannot find image path from {img_path}")
        exit()

    img_raw = cv2.imread(img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    # Run model
    outputs = model(img[np.newaxis, ...]).numpy()
    # Recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    landmarks = []
    for prior_index in range(len(outputs)):
        x = draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
        landmarks.append(x)
    cv2.imwrite(img_outputpath, img_raw)

    return get_json_landmark_data(data, img_outputpath, landmarks)

コード例 #5

0

ファイルを表示

ファイル: retinaface.py プロジェクト: ThanHuuTuan/attendance_system

    def __detect_faces(self, frame):
        img = np.float32(frame.copy())
        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=32)
        
        # run model
        outputs = self.model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        return outputs

コード例 #6

0

ファイルを表示

ファイル: detect.py プロジェクト: hunglc007/retinaface-tf2

def main(_argv):
    cfg = load_yaml(FLAGS.cfg_path)
    input_size = FLAGS.size
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    if FLAGS.framework == 'tf':
        infer = tf.keras.models.load_model(FLAGS.weights)

    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        signature_keys = list(saved_model_loaded.signatures.keys())
        print(signature_keys)
        infer = saved_model_loaded.signatures['serving_default']
    logging.info('weights loaded')

    sum = 0

    img_raw = cv2.imread(FLAGS.image)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
    batched_input = img[np.newaxis, ...]
    if FLAGS.framework == 'tf':
        # pred_bbox = run_model(images_data)
        outputs = infer(batched_input).numpy()
    elif FLAGS.framework == 'trt':
        pred_bbox = infer(batched_input)
        # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    # draw and save results
    save_img_path = 'out.jpg'
    for prior_index in range(len(outputs)):
        draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                        img_width_raw)
        cv2.imwrite(save_img_path, img_raw)
    print(f"[*] save result at {save_img_path}")

コード例 #7

0

ファイルを表示

ファイル: get_face_value.py プロジェクト: bishil06/faceid_mirror

def get_face_value(img_raw, down_scale_factor=0.3):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)

    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=down_scale_factor,
                         fy=down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    outputs = model(img[np.newaxis, ...]).numpy()

    # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    def output_to_fvalue(out, w, h):
        result = {}

        left = int(out[0] * w)
        top = int(out[1] * h)
        right = int(out[2] * w)
        bottom = int(out[3] * h)
        result['bbox'] = [left, top, right, bottom]
        result['bbox_size'] = (result['bbox'][3] - result['bbox'][1]) * (
            result['bbox'][2] - result['bbox'][0])

        result['landm'] = {}
        result['landm']['left_eye'] = (int(out[4] * w), int(out[5] * h))
        result['landm']['right_eye'] = (int(out[6] * w), int(out[7] * h))
        result['landm']['nose'] = (int(out[8] * w), int(out[9] * h))
        result['landm']['mouse_left'] = (int(out[10] * w), int(out[11] * h))
        result['landm']['mouse_right'] = (int(out[12] * w), int(out[13] * h))
        return result

    fvalues = map(
        lambda output: output_to_fvalue(output, img_width_raw, img_height_raw),
        outputs)
    return list(fvalues)

コード例 #8

0

ファイルを表示

ファイル: retinaface.py プロジェクト: haoluong/attendance_system

    def __detect_faces(self, frame):
        img = np.float32(frame.copy())
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_height, img_width, _ = img.shape
        if img.shape[1] != 640:
            img = cv2.resize(img, (int(img_height * 640 / img_width), 640),
                             interpolation=cv2.INTER_CUBIC)
        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=32)

        # run model
        outputs = self.model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        return outputs

コード例 #9

0

ファイルを表示

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4")

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()

コード例 #10

0

ファイルを表示

def main(_argv):
    mkdir(FLAGS.destination_dir)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)
    aligner = FaceAligner(desiredFaceSize=128)
    # define network
    model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()
    total = 0
    processed_total = 0
    CLASS_NAMES = np.array(os.listdir(FLAGS.folder_path))
    temp = os.listdir(FLAGS.destination_dir)
    temp.sort()
    CLASS_NAMES.sort()
    for f in CLASS_NAMES:
        processed_image = 0
        ######################################
        # Need modified for using
        ######################################
        if os.path.isfile(FLAGS.folder_path+f):
            continue
        if f in temp and f != temp[-1]:
          continue
        items = os.listdir(FLAGS.folder_path+f)
        mkdir(FLAGS.destination_dir+f)
        for path in items:
            frame = cv2.imread(FLAGS.folder_path + f +'/'+ path)
            if frame is None:
              continue
            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)
            if len(outputs) < 1:
              continue
            ann = max(outputs, key=lambda x: (x[2]-x[0])*(x[3]-x[1]))
            b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \
                    int(ann[2] * frame_width), int(ann[3] * frame_height)
            if (b_box[0]<0) or (b_box[1]<0) or (b_box[2]>=frame_width) or (b_box[3]>=frame_height):
              continue
            keypoints = {
                'left_eye': (ann[4] * frame_width,ann[5] * frame_height),
                'right_eye': (ann[6] * frame_width,ann[7] * frame_height),
                'nose': (ann[8], ann[9]),
                'left_mouth': (ann[10] * frame_width, ann[11] * frame_height),
                'right_mouth': (ann[12] * frame_width,ann[13] * frame_height),
            }
            # croped_image = frame[b_box[1]:b_box[3],b_box[0]:b_box[2], :]
            # out_frame = cv2.resize(croped_image, (112,112), interpolation=cv2.INTER_CUBIC)
            out_frame = aligner.align(frame, keypoints, b_box)
            # for i in range(4,14):
            #     if i%2 == 0:
            #         ann[i] = int(ann[i]*frame_width)
            #     else:
            #         ann[i] = int(ann[i]*frame_height)
            # out_frame = norm_crop(frame, np.array([ann[4:6],ann[6:8],ann[8:10],ann[10:12],ann[12:14]]))
            try:
                cv2.imwrite(FLAGS.destination_dir + f +'/'+ path, out_frame)
                processed_image += 1
            except FileExistsError as e:
                pass
        
        print(f + " Done")
        total += len(items)
        processed_total += processed_image

コード例 #11

0

ファイルを表示

ファイル: test.py プロジェクト: brahimbellahcen/SmileFace

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 320
        img_size_max = np.max(img.shape[0:2])
        resize = float(target_size) / float(img_size_max)
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img, outputs[prior_index], target_size,
                            target_size)
        cv2.imwrite(save_img_path, img)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture('./data/lichaochao.mp4')
        # cam = cv2.VideoCapture(0)
        frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        fps = cam.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter('chaochao1.mp4',
                              fourcc,
                              fps=fps,
                              frameSize=(frame_height, frame_width))

        resize = FLAGS.down_scale_factor
        frame_height *= resize
        frame_width *= resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                # start_time = time.time()
                # cv2.putText(frame, fps_str, (25, 50),
                #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            out.write(frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()

コード例 #12

0

ファイルを表示

ファイル: test_widerface.py プロジェクト: miliadis/retinaface-tf2

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testset_folder = cfg['testing_dataset_path']
    testset_list = os.path.join(testset_folder, 'label.txt')

    img_paths, _ = load_info(testset_list)
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 1600
        max_size = 2150
        img_shape = img.shape
        img_size_min = np.min(img_shape[0:2])
        img_size_max = np.max(img_shape[0:2])
        resize = float(target_size) / float(img_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * img_size_max) > max_size:
            resize = float(max_size) / float(img_size_max)
        if FLAGS.origin_size:
            if os.path.basename(img_path) == '6_Funeral_Funeral_6_618.jpg':
                resize = 0.5  # this image is too big to avoid OOM problem
            else:
                resize = 1

        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(FLAGS.save_folder, sub_dir,
                                 img_name.replace('.jpg', '.txt'))

        pathlib.Path(os.path.join(FLAGS.save_folder,
                                  sub_dir)).mkdir(parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, conf in zip(bboxs, confs):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                confidence = str(conf)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) \
                    + " " + confidence + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join('./results', cfg['sub_name'],
                                  sub_dir)).mkdir(parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                if outputs[prior_index][15] >= FLAGS.vis_th:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
            cv2.imwrite(
                os.path.join('./results', cfg['sub_name'], sub_dir, img_name),
                img_raw)

コード例 #13

0

ファイルを表示

ファイル: retinaface-cropper.py プロジェクト: LSEDev/DeepfakeDetection

def main(_argv):

    # FUNCTIONS FOR CROPPING
    #####################################################################################################
    def bounding_box(img, ann, img_height, img_width):
        x1, y1, x2, y2 = int(ann[0] * img_width), int(ann[1] * img_height), \
                         int(ann[2] * img_width), int(ann[3] * img_height)
        return x1, y1, x2, y2

    def calc_points(x, y, side):
        return int(x - side / 2), int(x +
                                      side / 2), int(y -
                                                     side / 2), int(y +
                                                                    side / 2)

    def adjust_points(x_center, y_center, original_longest, scaling_factor,
                      min_scaling_factor):
        factors = np.arange(scaling_factor, min_scaling_factor - 0.04, -0.05)
        for factor in factors:
            # calculate nex points
            x1, x2, y1, y2 = calc_points(x_center, y_center,
                                         int(original_longest * factor))

            for i in range(FLAGS.max_iter):
                if x1 < 0:
                    x2 -= x1
                    x1 = 0
                if y1 < 0:
                    y2 -= y1
                    y1 = 0
                if x2 > img_raw.shape[1]:
                    x1 -= x2
                    x2 = img_raw.shape[1]
                if y2 > img_raw.shape[0]:
                    y1 -= y2
                    y2 = img_raw.shape[0]

                if x1 >= 0 and y1 >= 0 and x2 <= img_raw.shape[
                        1] and y2 <= img_raw.shape[0]:
                    return x1, x2, y1, y2, True

        print("Not cropping", img_path,
              "due to a problem with a cropping square box")
        return x1, x2, y1, y2, False

    def get_dim(lst):
        return [(lst[3] - lst[1]) * (lst[2] - lst[0])]

    def get_max(outputs, lst):
        area = [i[0] for i in lst]
        prob = [i[1] for i in lst]
        max_area_index = set([i for i, j in enumerate(area) if j == max(area)])
        max_prob_index = set([i for i, j in enumerate(prob) if j == max(prob)])
        indecies = list(max_area_index.intersection(max_prob_index))
        if len(indecies) >= 1: return [outputs[indecies[0]]]
        elif len(indecies
                 ) == 0:  # if there is a mismatch, return the largest element
            if len(list(max_area_index)) >= 1:
                return [outputs[list(max_area_index)[0]]]
            else:  # precautionary because there should always be at least one face
                print("Not cropping", img_path,
                      "due to a problem with returning the largest element")
                return []

    #####################################################################################################

    # MODEL
    #####################################################################################################
    # initialisation
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()
    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()
    #####################################################################################################

    # CROPPING
    #####################################################################################################
    # check if the path exits
    if not os.path.exists(FLAGS.path):
        print(f"cannot find the specified path from {FLAGS.path}")
        exit()

    # make a corresponding directory
    try:
        os.mkdir(FLAGS.path.replace("images", "cropped_images"))
    except FileExistsError:
        print(FLAGS.path.replace("images", "cropped_images"), "already exists")

    # eget subdirectories within the specified folder
    subdirectories = [FLAGS.path+'/'+i for i in os.listdir(FLAGS.path) \
                      if os.path.isdir(FLAGS.path+'/'+i)]

    # loop through each folder
    for subdir in sorted(subdirectories):

        # create corresponding folders for cropped data and get all images in a given folder
        if 'original' in subdir: x = 3
        else: x = 7

        try:
            os.mkdir(subdir.replace("images", "cropped_images"))
            images_lst = glob.glob(subdir + "/*.png")
            cropped_images_lst = []
            print(subdir[len(subdir) - x:len(subdir)])

        except FileExistsError:
            # count number of existing images in this subdirectory, if same as original, skip
            images_lst = glob.glob(subdir + "/*.png")
            cropped_images_lst = glob.glob(
                subdir.replace("images", "cropped_images") + "/*.png")
            cropped_images_lst = [
                e[len(e) - 8:len(e)] for e in cropped_images_lst
            ]

            if len(images_lst) == len(cropped_images_lst):
                print(subdir[len(subdir) - x:len(subdir)],
                      "has already been generated")
                continue
            else:
                print(subdir[len(subdir) - x:len(subdir)])

        # loop through each image in a given folder
        for img_path in sorted(images_lst):

            if img_path[len(img_path) - 8:len(img_path)] in cropped_images_lst:
                continue

            img_raw = cv2.imread(img_path)
            img_height_raw, img_width_raw, _ = img_raw.shape
            img = np.float32(img_raw.copy())
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image (unmatched shape problem), run model, recover padding effect
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
            outputs = model(img[np.newaxis, ...]).numpy()
            outputs = recover_pad_output(outputs, pad_params)

            # get rid of elements which are faces with less that threshold probability
            outputs = [i for i in outputs if i[15] >= FLAGS.threshold_prob]

            # flag any images which have no recognised faces in them
            if len(outputs) == 0:
                print("no faces detected for", img_path)

                # if more than one face detected, select the largest and most definite
            elif len(outputs) > 1:
                f = [list(bounding_box(img_raw, i[0:4], img_height_raw, img_width_raw)) + [i[15]] \
                           for i in outputs]
                f = [get_dim(i[0:4]) + [i[4]] for i in f]
                outputs = get_max(outputs, f)

            # keeping as a loop in case we decide to use multiple faces per frame in the future
            # get cropping coordinates and save results
            for prior_index in range(len(outputs)):
                # get the bounding box coordinates
                bb_x1, bb_y1, bb_x2, bb_y2 = bounding_box(
                    img_raw, outputs[prior_index], img_height_raw,
                    img_width_raw)
                # scale up the magnitude of the longest side
                original_longest = int(max(bb_x2 - bb_x1, bb_y2 - bb_y1))
                longest = int(original_longest * FLAGS.scaling_factor)
                x_center = int((bb_x1 + bb_x2) / 2)
                y_center = int((bb_y1 + bb_y2) / 2)

                x1, x2, y1, y2, save_image = adjust_points(
                    x_center, y_center, original_longest, FLAGS.scaling_factor,
                    FLAGS.min_scaling_factor)

                if save_image:
                    try:
                        save_img_path = os.path.join(subdir.replace("images", "cropped_images") \
                            + "/" + img_path.replace(subdir + '/', ''))
                        cv2.imwrite(save_img_path, img_raw[y1:y2, x1:x2])

                    except:
                        print(img_path, "is not cropped for unknown reasons")

コード例 #14

0

ファイルを表示

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        file_path = '/Users/lichaochao/Downloads/images_UMU/'
        for file_name in os.listdir(file_path + 'source_images/'):
            image_path = file_path + 'source_images/' + file_name
            if not os.path.exists(image_path):
                print(f"cannot find image path from {image_path}")
                continue

            img_raw = cv2.imread(image_path)
            img = np.float32(img_raw.copy())

            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
            img_height, img_width, _ch = img.shape

            # run model
            outputs = model(img[np.newaxis, ...])

            preds = tf.concat([
                outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                outputs[2][0, :, 1][..., tf.newaxis]
            ], -1)

            priors = prior_box_tf((img_height, img_width), cfg['min_sizes'],
                                  cfg['steps'], cfg['clip'])
            decode_preds = decode_tf(preds, priors, cfg['variances'])

            selected_indices = tf.image.non_max_suppression(
                boxes=decode_preds[:, :4],
                scores=decode_preds[:, -1],
                max_output_size=tf.shape(decode_preds)[0],
                iou_threshold=FLAGS.iou_th,
                score_threshold=FLAGS.score_th)

            outputs = tf.gather(decode_preds, selected_indices).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)
            has_face = False
            is_smile = False
            for prior_index in range(len(outputs)):
                ann = outputs[prior_index]
                if ann[-1] >= 0.5:
                    has_face = True
                    x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height)
                    x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height)

                    text = "face: {:.2f}".format(ann[-1] * 100)
                    cv2.putText(img, text, (x1 + 5, y1 - 10),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                    if ann[-2] >= 0.5:
                        is_smile = True
                        smile_text = "smile: {:.2f}".format(ann[-2] * 100)
                        cv2.putText(img, smile_text, (x1 + 5, y1 + 30),
                                    cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                    (255, 255, 255))
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            if is_smile:
                dst_file_path = file_path + '/smile_face/' + file_name
            elif has_face:
                dst_file_path = file_path + '/face/' + file_name
            else:
                dst_file_path = file_path + '/no_face/' + file_name
            cv2.imwrite(dst_file_path, img)
            print(dst_file_path)

    else:
        cam = cv2.VideoCapture('./data/linda_umu.mp4')
        # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        resize = FLAGS.down_scale_factor
        frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize
        frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                start_time = time.time()
                cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX,
                            0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()

コード例 #15

0

ファイルを表示

def main(_argv):
    mkdir(FLAGS.destination_dir)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)
    aligner = FaceAligner()
    # define network
    model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()
    with open(FLAGS.destination_dir+'log.txt', 'a+') as log_txt:
        total = 0
        processed_total = 0
        CLASS_NAMES = np.array(os.listdir(FLAGS.folder_path))
        CLASS_NAMES.sort()
        for f in CLASS_NAMES[1978:]:
            processed_image = 0
            images = []
            labels = []
            ######################################
            # Need modified for using
            ######################################
            items = os.listdir(FLAGS.folder_path+f)
            for path in items:
                frame = cv2.imread(FLAGS.folder_path + f +'/'+ path)
                if frame is None:
                  continue
                frame_height, frame_width, _ = frame.shape
                img = np.float32(frame.copy())
                if FLAGS.down_scale_factor < 1.0:
                    img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor,
                                        fy=FLAGS.down_scale_factor,
                                        interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

                # run model
                outputs = model(img[np.newaxis, ...]).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs, pad_params)
                if len(outputs) < 1:
                  continue
                ann = max(outputs, key=lambda x: (x[2]-x[0])*(x[3]-x[1]))
                b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \
                        int(ann[2] * frame_width), int(ann[3] * frame_height)
                if (b_box[0]<0) or (b_box[1]<0) or (b_box[2]>=frame_width) or (b_box[3]>=frame_height):
                  continue
                keypoints = {
                    'left_eye': (int(ann[4] * frame_width),int(ann[5] * frame_height)),
                    'right_eye': (int(ann[6] * frame_width),int(ann[7] * frame_height)),
                }
                # print(keypoints)
                out_frame = aligner.align(frame, keypoints, b_box)
                # cv2.imshow('original', frame)
                # cv2.imshow('aligned', out_frame)
                # if cv2.waitKey(0) & 0xFF == ord('q'):
                #     continue
                try:
                    images.append(out_frame.reshape(1,112,112,3))
                    labels.append((CLASS_NAMES == f).reshape(1,-1))
                    log_txt.write(FLAGS.destination_dir + f +'/'+ path+"\n")
                    processed_image += 1
                except FileExistsError as e:
                    pass
            images_np = np.concatenate((tuple(images)), axis=0)
            labels_np = np.concatenate((tuple(labels)), axis=0)
            np.savez_compressed(FLAGS.destination_dir+"casia_image_{}.npz".format(f), images_np)
            np.savez_compressed(FLAGS.destination_dir+"casia_label_{}.npz".format(f), labels_np)
            print(f + " Done")
            log_txt.write(f + " Processed: " + str(processed_image) + ' / ' + str(len(items)) +"\n")
            total += len(items)
            processed_total += processed_image
        log_txt.write( "Processed total: " + str(processed_total) + ' / ' + str(total))

コード例 #16

0

ファイルを表示

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not os.path.exists(FLAGS.img_path):
        print(f"cannot find image path from {FLAGS.img_path}")
        exit()

    print("[*] Processing on single image {}".format(FLAGS.img_path))

    img_raw = cv2.imread(FLAGS.img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    # run model
    outputs = model(img[np.newaxis, ...]).numpy()

    # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    # draw and save results
    imgs = []
    DIM = 64
    save_img_path = os.path.join('data/out_' +
                                 os.path.basename(FLAGS.img_path))
    for prior_index in range(9):
        if (prior_index < len(outputs)):
            img = get_bbox_imgs(img_raw, outputs[prior_index], img_height_raw,
                                img_width_raw)
            img = cv2.resize(img, (DIM, DIM))
            imgs.append(img)
        else:
            imgs.append(Image.new('RGB', (DIM, DIM)))
    imga = imgs[0]
    for img in imgs[1:3]:
        imga = np.concatenate((imga, img), axis=1)
    imgb = imgs[3]
    for img in imgs[4:6]:
        imgb = np.concatenate((imgb, img), axis=1)
    imgf = np.concatenate((imga, imgb), axis=0)
    imgc = imgs[6]
    for img in imgs[7:9]:
        imgc = np.concatenate((imgc, img), axis=1)
    imgf = np.concatenate((imgf, imgc), axis=0)
    cv2.imwrite(save_img_path, imgf)

    print(f"[*] save result at {save_img_path}")

コード例 #17

0

ファイルを表示

def main(_argv):
    # init

    CONFIG_PATH = './configs/retinaface_mbv2.yaml'
    GPU = '0'
    IOU_TH = 0.4
    SCORE_TH = 0.5
    WEBCAM = False
    DOWN_FACTOR = 1.0

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = GPU

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(CONFIG_PATH)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=IOU_TH,
                            score_th=SCORE_TH)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    return model

    if not WEBCAM:
        #         if not os.path.exists(IMG_PATH):
        #             print(f"cannot find image path from {IMG_PATH}")
        #             exit()

        #         print("[*] Processing on single image {}".format(IMG_PATH))

        #         img_raw = cv2.imread(IMG_PATH)
        img_raw = input_image
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if DOWN_FACTOR < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=DOWN_FACTOR,
                             fy=DOWN_FACTOR,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = "result.jpg"
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if DOWN_FACTOR < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=DOWN_FACTOR,
                                 fy=DOWN_FACTOR,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
    return outputs

コード例 #18

0

ファイルを表示

ファイル: test.py プロジェクト: rahul-islam/retinaface

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:

        cam = cv2.VideoCapture(FLAGS.vid_path)
        fps = int(cam.get(cv2.CAP_PROP_FPS))

        ### Saving Video to file
        frame_width = int(cam.get(3))
        frame_height = int(cam.get(4))

        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        # import os
        if not os.path.exists('./output'):
            print('Creating folder: output/ for saving video.')
            os.makedirs('./output')
        out = cv2.VideoWriter('./output/output.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))

        start_time = time.time()
        counter = 0
        frameCount = 0
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")
            frameCount = frameCount + 1

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                croppedFace = draw_bbox_landm(frame, outputs[prior_index],
                                              frame_height, frame_width)
                if frameCount >= fps * FLAGS.dfps:
                    fileName = "%d.png" % counter
                    cv2.imwrite(FLAGS.dst_path + fileName, croppedFace)
                    print('Saved:', fileName)
                    counter = counter + 1
                    frameCount = 0

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            if FLAGS.preview:
                cv2.imshow('frame', frame)
            out.write(frame)
            if cv2.waitKey(1) == ord('q'):
                exit()

コード例 #19

0

ファイルを表示

ファイル: track_native.py プロジェクト: haoluong/attendance_system

def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)
    aligner = FaceAligner()
    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()
    if FLAGS.input_stream == '0':
        input_stream = 0
    elif FLAGS.input_stream == 'rtsp':
        input_stream = settings.RTSP_ADDR
    else:
        input_stream = FLAGS.input_stream
    cam = cv2.VideoCapture(input_stream)  #("/home/hao/Videos/Webcam/3.webm")
    mbv2 = tf.keras.models.load_model(settings.CHECKPOINT_PATH)
    anchor_dataset = np.load(settings.ANCHOR_PATH)['arr_0']
    label_dataset = np.load(settings.LABEL_PATH)['arr_0']
    start_time = time.time()
    i = 0
    while cam.isOpened():
        _, frame = cam.read()
        if frame is None:
            print("no cam input")

        frame_height, frame_width, _ = frame.shape
        img = np.float32(frame.copy())
        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw results
        for prior_index in range(len(outputs)):
            ann = outputs[prior_index]
            b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \
                     int(ann[2] * frame_width), int(ann[3] * frame_height)
            if (b_box[0] < 0) or (b_box[1] < 0) or (
                    b_box[2] >= frame_width) or (b_box[3] >= frame_height):
                continue
            keypoints = {
                'left_eye': (ann[4] * frame_width, ann[5] * frame_height),
                'right_eye': (ann[6] * frame_width, ann[7] * frame_height),
                'nose': (ann[8], ann[9]),
                'left_mouth': (ann[10] * frame_width, ann[11] * frame_height),
                'right_mouth': (ann[12] * frame_width, ann[13] * frame_height),
            }
            out_frame = aligner.align(frame, keypoints, b_box)
            scaled = out_frame  #cv2.resize(out_frame, (settings.IMAGE_SIZE, settings.IMAGE_SIZE), interpolation=cv2.INTER_CUBIC)
            scaled_reshape = scaled.reshape(-1, settings.IMAGE_SIZE,
                                            settings.IMAGE_SIZE, 3)
            embed_vector = mbv2(scaled_reshape / 255.0)
            label, prob = classify(embed_vector, anchor_dataset, label_dataset)
            if prob < 0.5:
                label = "Unknown"
            cv2.rectangle(frame, (b_box[0], b_box[1]), (b_box[2], b_box[3]),
                          (0, 255, 0), 2)
            cv2.putText(frame, label, (b_box[0], b_box[1]),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            text = "{:.4f}".format(prob)
            cv2.putText(frame, text, (b_box[0], b_box[1] + 15),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

        # calculate fps
        fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
        start_time = time.time()
        cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75,
                    (0, 255, 0), 2)
        i += 1
        # show frame
        # cv2.imwrite('UNKNOWN/4/'+str(i)+'.jpeg', frame)
        cv2.imshow("frame", frame)
        if cv2.waitKey(1) == ord('q'):
            exit()

コード例 #20

0

ファイルを表示

def main(_argv):
    # init

    face_aligner = FaceAligner()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        save_count = 0
        for path, subdirs, files in os.walk(FLAGS.img_path):

            for name in files:
                if name.endswith('.jpg'):
                    img_path = os.path.join(path, name)

                    if not os.path.exists(img_path):
                        print(f"cannot find image path from {img_path}")
                        exit()

                    if save_count < FLAGS.img_num:
                        print("[*] Processing on single image {}".format(
                            img_path))

                        img_raw = cv2.imread(img_path)
                        img_height_raw, img_width_raw, _ = img_raw.shape
                        img = np.float32(img_raw.copy())

                        if FLAGS.down_scale_factor < 1.0:
                            img = cv2.resize(img, (0, 0),
                                             fx=FLAGS.down_scale_factor,
                                             fy=FLAGS.down_scale_factor,
                                             interpolation=cv2.INTER_LINEAR)
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                        # pad input image to avoid unmatched shape problem
                        img, pad_params = pad_input_image(img,
                                                          max_steps=max(
                                                              cfg['steps']))

                        # run model
                        outputs = model(img[np.newaxis, ...]).numpy()

                        # recover padding effect
                        outputs = recover_pad_output(outputs, pad_params)

                        # draw and save results
                        save_img_path = os.path.join(
                            'out_' + os.path.basename(img_path))

                        for prior_index in range(len(outputs)):
                            draw_bbox_landm(img_raw, outputs[prior_index],
                                            img_height_raw, img_width_raw)
                            cv2.imwrite(save_img_path, img_raw)
                        print(f"[*] save result at {save_img_path}")
                        save_count += 1

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()

        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape

            orig_frame = frame.copy()

            face = None

            img = cv2.resize(frame, (512, 512))
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            start_time = time.time()
            outputs = model(img[np.newaxis, ...]).numpy()
            inference_time = f"Inf: {time.time() - start_time}"

            cv2.putText(frame, inference_time, (25, 50),
                        cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                preds = decode_predictions((frame_width, frame_height),
                                           outputs)

                for key, value in preds.items():
                    bbox = value[0]['bbox']
                    left_eye = value[0]['left_eye']
                    right_eye = value[0]['right_eye']

                    # Our face ROI
                    face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    # Eyes
                    x1_le = left_eye[0] - 25
                    y1_le = left_eye[1] - 25
                    x2_le = left_eye[0] + 25
                    y2_le = left_eye[1] + 25

                    x1_re = right_eye[0] - 25
                    y1_re = right_eye[1] - 25
                    x2_re = right_eye[0] + 25
                    y2_re = right_eye[1] + 25

                    if left_eye[1] > right_eye[1]:
                        A = (right_eye[0], left_eye[1])
                    else:
                        A = (left_eye[0], right_eye[1])

                    # Calc our rotating degree
                    delta_x = right_eye[0] - left_eye[0]
                    delta_y = right_eye[1] - left_eye[1]
                    angle = np.arctan(
                        delta_y / (delta_x + 1e-17))  # avoid devision by zero
                    angle = (angle * 180) / np.pi

                    # compute the desired right eye x-coordinate based on the
                    # desired x-coordinate of the left eye
                    desiredRightEyeX = 1.0 - 0.35

                    # determine the scale of the new resulting image by taking
                    # the ratio of the distance between eyes in the *current*
                    # image to the ratio of distance between eyes in the
                    # *desired* image
                    dist = np.sqrt((delta_x**2) + (delta_y**2))
                    desiredDist = (desiredRightEyeX - 0.35)
                    desiredDist *= 256
                    scale = desiredDist / dist

                    eyesCenter = ((left_eye[0] + right_eye[0]) // 2,
                                  (left_eye[1] + right_eye[1]) // 2)

                    cv2.circle(frame, A, 5, (255, 0, 0), -1)

                    cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le),
                                cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

                    cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3)
                    cv2.line(frame, left_eye, A, (0, 200, 200), 3)
                    cv2.line(frame, right_eye, A, (0, 200, 200), 3)

                    cv2.line(frame, (left_eye[0], left_eye[1]),
                             (right_eye[0], right_eye[1]), (0, 200, 200), 3)

                    rotated = face_aligner.align(orig_frame, left_eye,
                                                 right_eye)

                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if face is not None:
                cv2.imshow('face aligned', rotated)

            if cv2.waitKey(1) == ord('q'):
                exit()