Beispiel #1
0
def get_model(cfg_path):
    # init
    IOU_TH = 0.5
    SCORE_TH = 0.4
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=IOU_TH,
                            score_th=SCORE_TH)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    return model
Beispiel #2
0
def initialize():
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    # Setup logger
    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # Define network
    model = RetinaFaceModel(cfg, training=False,
                            iou_th=FLAGS.iou_th, score_th=FLAGS.score_th)

    # Load checkpoints
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)

    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print(
            "[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    return model, cfg
Beispiel #3
0
def main(_argv):
    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = FLAGS.weights
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    model.summary()
    for i in model.layers:
        print(i.output)
    model.save(FLAGS.output)
Beispiel #4
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        file_path = '/Users/lichaochao/Downloads/images_UMU/'
        for file_name in os.listdir(file_path + 'source_images/'):
            image_path = file_path + 'source_images/' + file_name
            if not os.path.exists(image_path):
                print(f"cannot find image path from {image_path}")
                continue

            img_raw = cv2.imread(image_path)
            img = np.float32(img_raw.copy())

            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
            img_height, img_width, _ch = img.shape

            # run model
            outputs = model(img[np.newaxis, ...])

            preds = tf.concat([
                outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                outputs[2][0, :, 1][..., tf.newaxis]
            ], -1)

            priors = prior_box_tf((img_height, img_width), cfg['min_sizes'],
                                  cfg['steps'], cfg['clip'])
            decode_preds = decode_tf(preds, priors, cfg['variances'])

            selected_indices = tf.image.non_max_suppression(
                boxes=decode_preds[:, :4],
                scores=decode_preds[:, -1],
                max_output_size=tf.shape(decode_preds)[0],
                iou_threshold=FLAGS.iou_th,
                score_threshold=FLAGS.score_th)

            outputs = tf.gather(decode_preds, selected_indices).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)
            has_face = False
            is_smile = False
            for prior_index in range(len(outputs)):
                ann = outputs[prior_index]
                if ann[-1] >= 0.5:
                    has_face = True
                    x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height)
                    x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height)

                    text = "face: {:.2f}".format(ann[-1] * 100)
                    cv2.putText(img, text, (x1 + 5, y1 - 10),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                    if ann[-2] >= 0.5:
                        is_smile = True
                        smile_text = "smile: {:.2f}".format(ann[-2] * 100)
                        cv2.putText(img, smile_text, (x1 + 5, y1 + 30),
                                    cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                    (255, 255, 255))
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            if is_smile:
                dst_file_path = file_path + '/smile_face/' + file_name
            elif has_face:
                dst_file_path = file_path + '/face/' + file_name
            else:
                dst_file_path = file_path + '/no_face/' + file_name
            cv2.imwrite(dst_file_path, img)
            print(dst_file_path)

    else:
        cam = cv2.VideoCapture('./data/linda_umu.mp4')
        # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        resize = FLAGS.down_scale_factor
        frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize
        frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                start_time = time.time()
                cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX,
                            0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
Beispiel #5
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4")

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
from modules.models import RetinaFaceModel
from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm,
                           pad_input_image, recover_pad_output)

set_memory_growth()
# tf.debugging.set_log_device_placement(True)

cfg_path = './configs/retinaface_mbv2.yaml'
gpu = '0'
iou_th = 0.4
score_th = 0.5

cfg = load_yaml(cfg_path)

model = RetinaFaceModel(cfg, training=False, iou_th=iou_th, score_th=score_th)

checkpoint_dir = './checkpoints/' + cfg['sub_name']
checkpoint = tf.train.Checkpoint(model=model)
if tf.train.latest_checkpoint(checkpoint_dir):
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    print("[*] load ckpt from {}.".format(
        tf.train.latest_checkpoint(checkpoint_dir)))
else:
    print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
    exit()


def get_face_value(img_raw, down_scale_factor=0.3):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
Beispiel #7
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load model from weights.h5
    # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 320
        img_size_max = np.max(img.shape[0:2])
        resize = float(target_size) / float(img_size_max)
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img, outputs[prior_index], target_size,
                            target_size)
        cv2.imwrite(save_img_path, img)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture('./data/lichaochao.mp4')
        # cam = cv2.VideoCapture(0)
        frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        fps = cam.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter('chaochao1.mp4',
                              fourcc,
                              fps=fps,
                              frameSize=(frame_height, frame_width))

        resize = FLAGS.down_scale_factor
        frame_height *= resize
        frame_width *= resize

        max_steps = max(cfg['steps'])
        img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0
        img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0
        priors = prior_box_tf(
            (frame_height + img_pad_h, frame_width + img_pad_w),
            cfg['min_sizes'], cfg['steps'], cfg['clip'])

        frame_index = 0
        outputs = []
        start_time = time.time()
        while cam.isOpened():
            _, frame = cam.read()
            if frame is None:
                print('no cam')
                break
            if frame_index < 5:
                frame_index += 1
                # continue
            else:
                frame_index = 0

                img = np.float32(frame.copy())
                if resize < 1:
                    img = cv2.resize(img, (0, 0),
                                     fx=resize,
                                     fy=resize,
                                     interpolation=cv2.INTER_LINEAR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # pad input image to avoid unmatched shape problem
                img, pad_params = pad_input_image(img, max_steps=max_steps)

                # run model
                outputs = model(img[np.newaxis, ...])

                preds = tf.concat([
                    outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis],
                    outputs[2][0, :, 1][..., tf.newaxis]
                ], -1)

                decode_preds = decode_tf(preds, priors, cfg['variances'])

                selected_indices = tf.image.non_max_suppression(
                    boxes=decode_preds[:, :4],
                    scores=decode_preds[:, -1],
                    max_output_size=tf.shape(decode_preds)[0],
                    iou_threshold=FLAGS.iou_th,
                    score_threshold=FLAGS.score_th)

                outputs = tf.gather(decode_preds, selected_indices).numpy()

                # recover padding effect
                outputs = recover_pad_output(outputs,
                                             pad_params,
                                             resize=resize)

                # calculate fps
                # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
                # start_time = time.time()
                # cv2.putText(frame, fps_str, (25, 50),
                #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            # start_time = time.time()
            # cv2.putText(frame, fps_str, (25, 25),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # show frame
            out.write(frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
Beispiel #8
0
from modules.models import RetinaFaceModel
from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm,
                           pad_input_image, recover_pad_output)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

logger = tf.get_logger()
logger.disabled = True
logger.setLevel(logging.FATAL)
set_memory_growth()

cfg_path = './retinaface_mbv2.yaml'
cfg = load_yaml(cfg_path)

model = RetinaFaceModel(cfg, training=False, iou_th=0.4, score_th=0.5)

checkpoint_dir = './checkpoints/retinaface_mbv2'
checkpoint = tf.train.Checkpoint(model=model)

if tf.train.latest_checkpoint(checkpoint_dir):
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    print("[*] load ckpt from {}.".format(
        tf.train.latest_checkpoint(checkpoint_dir)))
else:
    print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
    exit()

app = Flask(__name__)
run_with_ngrok(app)
Beispiel #9
0
def main(_argv):
    # init

    face_aligner = FaceAligner()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        save_count = 0
        for path, subdirs, files in os.walk(FLAGS.img_path):

            for name in files:
                if name.endswith('.jpg'):
                    img_path = os.path.join(path, name)

                    if not os.path.exists(img_path):
                        print(f"cannot find image path from {img_path}")
                        exit()

                    if save_count < FLAGS.img_num:
                        print("[*] Processing on single image {}".format(
                            img_path))

                        img_raw = cv2.imread(img_path)
                        img_height_raw, img_width_raw, _ = img_raw.shape
                        img = np.float32(img_raw.copy())

                        if FLAGS.down_scale_factor < 1.0:
                            img = cv2.resize(img, (0, 0),
                                             fx=FLAGS.down_scale_factor,
                                             fy=FLAGS.down_scale_factor,
                                             interpolation=cv2.INTER_LINEAR)
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                        # pad input image to avoid unmatched shape problem
                        img, pad_params = pad_input_image(img,
                                                          max_steps=max(
                                                              cfg['steps']))

                        # run model
                        outputs = model(img[np.newaxis, ...]).numpy()

                        # recover padding effect
                        outputs = recover_pad_output(outputs, pad_params)

                        # draw and save results
                        save_img_path = os.path.join(
                            'out_' + os.path.basename(img_path))

                        for prior_index in range(len(outputs)):
                            draw_bbox_landm(img_raw, outputs[prior_index],
                                            img_height_raw, img_width_raw)
                            cv2.imwrite(save_img_path, img_raw)
                        print(f"[*] save result at {save_img_path}")
                        save_count += 1

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()

        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape

            orig_frame = frame.copy()

            face = None

            img = cv2.resize(frame, (512, 512))
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            start_time = time.time()
            outputs = model(img[np.newaxis, ...]).numpy()
            inference_time = f"Inf: {time.time() - start_time}"

            cv2.putText(frame, inference_time, (25, 50),
                        cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                preds = decode_predictions((frame_width, frame_height),
                                           outputs)

                for key, value in preds.items():
                    bbox = value[0]['bbox']
                    left_eye = value[0]['left_eye']
                    right_eye = value[0]['right_eye']

                    # Our face ROI
                    face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    # Eyes
                    x1_le = left_eye[0] - 25
                    y1_le = left_eye[1] - 25
                    x2_le = left_eye[0] + 25
                    y2_le = left_eye[1] + 25

                    x1_re = right_eye[0] - 25
                    y1_re = right_eye[1] - 25
                    x2_re = right_eye[0] + 25
                    y2_re = right_eye[1] + 25

                    if left_eye[1] > right_eye[1]:
                        A = (right_eye[0], left_eye[1])
                    else:
                        A = (left_eye[0], right_eye[1])

                    # Calc our rotating degree
                    delta_x = right_eye[0] - left_eye[0]
                    delta_y = right_eye[1] - left_eye[1]
                    angle = np.arctan(
                        delta_y / (delta_x + 1e-17))  # avoid devision by zero
                    angle = (angle * 180) / np.pi

                    # compute the desired right eye x-coordinate based on the
                    # desired x-coordinate of the left eye
                    desiredRightEyeX = 1.0 - 0.35

                    # determine the scale of the new resulting image by taking
                    # the ratio of the distance between eyes in the *current*
                    # image to the ratio of distance between eyes in the
                    # *desired* image
                    dist = np.sqrt((delta_x**2) + (delta_y**2))
                    desiredDist = (desiredRightEyeX - 0.35)
                    desiredDist *= 256
                    scale = desiredDist / dist

                    eyesCenter = ((left_eye[0] + right_eye[0]) // 2,
                                  (left_eye[1] + right_eye[1]) // 2)

                    cv2.circle(frame, A, 5, (255, 0, 0), -1)

                    cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le),
                                cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2)

                    cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3)
                    cv2.line(frame, left_eye, A, (0, 200, 200), 3)
                    cv2.line(frame, right_eye, A, (0, 200, 200), 3)

                    cv2.line(frame, (left_eye[0], left_eye[1]),
                             (right_eye[0], right_eye[1]), (0, 200, 200), 3)

                    rotated = face_aligner.align(orig_frame, left_eye,
                                                 right_eye)

                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if face is not None:
                cv2.imshow('face aligned', rotated)

            if cv2.waitKey(1) == ord('q'):
                exit()
Beispiel #10
0
def train_retinaface(cfg):

    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    if cfg['distributed']:
        import horovod.tensorflow as hvd
        # Initialize Horovod
        hvd.init()
    else:
        hvd = []
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    reset_random_seeds()

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth(hvd)

    # define network
    model = RetinaFaceModel(cfg, training=True)
    model.summary(line_length=80)

    # define prior box
    priors = prior_box((cfg['input_size'], cfg['input_size']),
                       cfg['min_sizes'],  cfg['steps'], cfg['clip'])

    # load dataset
    train_dataset = load_dataset(cfg, priors, 'train', hvd)
    if cfg['evaluation_during_training']:
        val_dataset = load_dataset(cfg, priors, 'val', [])

    # define optimizer
    if cfg['distributed']:
        init_lr = cfg['init_lr'] * hvd.size()
        min_lr = cfg['min_lr'] * hvd.size()
        steps_per_epoch = cfg['dataset_len'] // (cfg['batch_size'] * hvd.size())
    else:
        init_lr = cfg['init_lr']
        min_lr = cfg['min_lr']
        steps_per_epoch = cfg['dataset_len'] // cfg['batch_size']

    learning_rate = MultiStepWarmUpLR(
        initial_learning_rate=init_lr,
        lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']],
        lr_rate=cfg['lr_rate'],
        warmup_steps=cfg['warmup_epoch'] * steps_per_epoch,
        min_lr=min_lr)

    optimizer = tf.keras.optimizers.SGD(
        learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # define losses function
    multi_box_loss = MultiBoxLoss(num_class=cfg['num_class'])

    # load checkpoint
    checkpoint_dir = os.path.join(cfg['output_path'], 'checkpoints', cfg['sub_name'])
    checkpoint = tf.train.Checkpoint(epoch=tf.Variable(0, name='epoch'),
                                     optimizer=optimizer,
                                     model=model)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=checkpoint_dir,
                                         max_to_keep=3)

    os.makedirs(checkpoint_dir, exist_ok=True)
    with open(os.path.join(checkpoint_dir, 'cfg.pickle'), 'wb') as handle:
        pickle.dump(cfg, handle, protocol=pickle.HIGHEST_PROTOCOL)

    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        print('[*] load ckpt from {}'.format(manager.latest_checkpoint))
    else:
        print("[*] training from scratch.")

    # define training step function
    @tf.function
    def train_step(inputs, labels, first_batch, epoch):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)

            losses = {}
            losses['reg'] = tf.reduce_sum(model.losses)
            losses['loc'], losses['landm'], losses['class'] = \
                multi_box_loss(labels, predictions)
            total_loss = tf.add_n([l for l in losses.values()])

        if cfg['distributed']:
            # Horovod: add Horovod Distributed GradientTape.
            tape = hvd.DistributedGradientTape(tape)

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if cfg['distributed'] and first_batch and epoch:
            hvd.broadcast_variables(model.variables, root_rank=0)
            hvd.broadcast_variables(optimizer.variables(), root_rank=0)

        return total_loss, losses

    def test_step(inputs, img_name):
        _, img_height_raw, img_width_raw, _ = inputs.shape
        # pad input image to avoid unmatched shape problem
        img = inputs[0].numpy()
        # if img_name == '6_Funeral_Funeral_6_618':
        #     resize = 0.5 # this image is too big to avoid OOM problem
        #     img = cv2.resize(img, None, None, fx=resize, fy=resize,
        #                      interpolation=cv2.INTER_LINEAR)
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
        input_img = img[np.newaxis, ...]
        predictions = model(input_img, training=False)
        outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy()
        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        bboxs = outputs[:, :4]
        confs = outputs[:, -1]
        pred_boxes = []
        for box, conf in zip(bboxs, confs):
            x = int(box[0] * img_width_raw)
            y = int(box[1] * img_height_raw)
            w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
            h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
            pred_boxes.append([x, y, w, h, conf])

        pred_boxes = np.array(pred_boxes).astype('float')

        return pred_boxes

    #training loop
    summary_writer = tf.summary.create_file_writer(os.path.join(cfg['output_path'], 'logs', cfg['sub_name']))
    prog_bar = ProgressBar(steps_per_epoch, 0)

    if cfg['evaluation_during_training']:
        widerface_eval_hard = WiderFaceEval(split='hard')

    for epoch in range(cfg['epoch']):
        try:
            actual_epoch = epoch + 1

            if cfg['distributed']:
                if hvd.rank() == 0:
                    print("\nStart of epoch %d" % (actual_epoch,))
            else:
                print("\nStart of epoch %d" % (actual_epoch,))

            checkpoint.epoch.assign_add(1)
            start_time = time.time()

            #Iterate over the batches of the dataset.
            for batch, (x_batch_train, y_batch_train, img_name) in enumerate(train_dataset):
                total_loss, losses = train_step(x_batch_train, y_batch_train, batch == 0, epoch == 0)

                if cfg['distributed']:
                    if hvd.rank() == 0:
                        # prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                        #     checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))
                        if batch % 100 == 0:
                            print("batch={}/{},  epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                                batch, steps_per_epoch, checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))
                else:
                    prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                        checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))

            # Display metrics at the end of each epoch.
            # train_acc = train_acc_metric.result()
            # print("\nTraining loss over epoch: %.4f" % (float(total_loss.numpy()),))

            if cfg['distributed']:
                if hvd.rank() == 0:
                    print("Time taken: %.2fs" % (time.time() - start_time))
                    manager.save()
                    print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint))
            else:
                print("Time taken: %.2fs" % (time.time() - start_time))
                manager.save()
                print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint))

            if cfg['evaluation_during_training']:
                # Run a validation loop at the end of each epoch.
                for batch, (x_batch_val, y_batch_val, img_name) in enumerate(val_dataset.take(500)):
                    if '/' in img_name.numpy()[0].decode():
                        img_name = img_name.numpy()[0].decode().split('/')[1].split('.')[0]
                    else:
                        img_name = []
                    pred_boxes = test_step(x_batch_val, img_name)
                    gt_boxes = labels_to_boxes(y_batch_val)
                    widerface_eval_hard.update(pred_boxes, gt_boxes, img_name)

                ap_hard = widerface_eval_hard.calculate_ap()
                widerface_eval_hard.reset()

                if cfg['distributed']:
                    if hvd.rank() == 0:
                        print("Validation acc: %.4f" % (float(ap_hard),))
                else:
                    print("Validation acc: %.4f" % (float(ap_hard),))

            def tensorboard_writer():
                with summary_writer.as_default():
                    tf.summary.scalar('loss/total_loss', total_loss, step=actual_epoch)
                    for k, l in losses.items():
                        tf.summary.scalar('loss/{}'.format(k), l, step=actual_epoch)
                    tf.summary.scalar('learning_rate', optimizer._decayed_lr(tf.float32), step=actual_epoch)
                    if cfg['evaluation_during_training']:
                        tf.summary.scalar('Val AP', ap_hard, step=actual_epoch)

            if cfg['distributed']:
                if hvd.rank() == 0:
                    tensorboard_writer()
            else:
                tensorboard_writer()

        except Exception as E:
            print(E)
            continue

    if cfg['distributed']:
        if hvd.rank() == 0:
            manager.save()
            print("\n[*] training done! save ckpt file at {}".format(
                manager.latest_checkpoint))
    else:
        manager.save()
        print("\n[*] training done! save ckpt file at {}".format(
            manager.latest_checkpoint))
Beispiel #11
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not os.path.exists(FLAGS.img_path):
        print(f"cannot find image path from {FLAGS.img_path}")
        exit()

    print("[*] Processing on single image {}".format(FLAGS.img_path))

    img_raw = cv2.imread(FLAGS.img_path)
    img_height_raw, img_width_raw, _ = img_raw.shape
    img = np.float32(img_raw.copy())

    if FLAGS.down_scale_factor < 1.0:
        img = cv2.resize(img, (0, 0),
                         fx=FLAGS.down_scale_factor,
                         fy=FLAGS.down_scale_factor,
                         interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # pad input image to avoid unmatched shape problem
    img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

    # run model
    outputs = model(img[np.newaxis, ...]).numpy()

    # recover padding effect
    outputs = recover_pad_output(outputs, pad_params)

    # draw and save results
    imgs = []
    DIM = 64
    save_img_path = os.path.join('data/out_' +
                                 os.path.basename(FLAGS.img_path))
    for prior_index in range(9):
        if (prior_index < len(outputs)):
            img = get_bbox_imgs(img_raw, outputs[prior_index], img_height_raw,
                                img_width_raw)
            img = cv2.resize(img, (DIM, DIM))
            imgs.append(img)
        else:
            imgs.append(Image.new('RGB', (DIM, DIM)))
    imga = imgs[0]
    for img in imgs[1:3]:
        imga = np.concatenate((imga, img), axis=1)
    imgb = imgs[3]
    for img in imgs[4:6]:
        imgb = np.concatenate((imgb, img), axis=1)
    imgf = np.concatenate((imga, imgb), axis=0)
    imgc = imgs[6]
    for img in imgs[7:9]:
        imgc = np.concatenate((imgc, img), axis=1)
    imgf = np.concatenate((imgf, imgc), axis=0)
    cv2.imwrite(save_img_path, imgf)

    print(f"[*] save result at {save_img_path}")
Beispiel #12
0
def main(_argv):
    # init

    CONFIG_PATH = './configs/retinaface_mbv2.yaml'
    GPU = '0'
    IOU_TH = 0.4
    SCORE_TH = 0.5
    WEBCAM = False
    DOWN_FACTOR = 1.0

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = GPU

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(CONFIG_PATH)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=IOU_TH,
                            score_th=SCORE_TH)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    return model

    if not WEBCAM:
        #         if not os.path.exists(IMG_PATH):
        #             print(f"cannot find image path from {IMG_PATH}")
        #             exit()

        #         print("[*] Processing on single image {}".format(IMG_PATH))

        #         img_raw = cv2.imread(IMG_PATH)
        img_raw = input_image
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if DOWN_FACTOR < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=DOWN_FACTOR,
                             fy=DOWN_FACTOR,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = "result.jpg"
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:
        cam = cv2.VideoCapture(0)

        start_time = time.time()
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if DOWN_FACTOR < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=DOWN_FACTOR,
                                 fy=DOWN_FACTOR,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                draw_bbox_landm(frame, outputs[prior_index], frame_height,
                                frame_width)

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
    return outputs
Beispiel #13
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    if not FLAGS.webcam:
        if not os.path.exists(FLAGS.img_path):
            print(f"cannot find image path from {FLAGS.img_path}")
            exit()

        print("[*] Processing on single image {}".format(FLAGS.img_path))

        img_raw = cv2.imread(FLAGS.img_path)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if FLAGS.down_scale_factor < 1.0:
            img = cv2.resize(img, (0, 0),
                             fx=FLAGS.down_scale_factor,
                             fy=FLAGS.down_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # draw and save results
        save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path))
        for prior_index in range(len(outputs)):
            draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
                            img_width_raw)
            cv2.imwrite(save_img_path, img_raw)
        print(f"[*] save result at {save_img_path}")

    else:

        cam = cv2.VideoCapture(FLAGS.vid_path)
        fps = int(cam.get(cv2.CAP_PROP_FPS))

        ### Saving Video to file
        frame_width = int(cam.get(3))
        frame_height = int(cam.get(4))

        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        # import os
        if not os.path.exists('./output'):
            print('Creating folder: output/ for saving video.')
            os.makedirs('./output')
        out = cv2.VideoWriter('./output/output.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))

        start_time = time.time()
        counter = 0
        frameCount = 0
        while True:
            _, frame = cam.read()
            if frame is None:
                print("no cam input")
            frameCount = frameCount + 1

            frame_height, frame_width, _ = frame.shape
            img = np.float32(frame.copy())
            if FLAGS.down_scale_factor < 1.0:
                img = cv2.resize(img, (0, 0),
                                 fx=FLAGS.down_scale_factor,
                                 fy=FLAGS.down_scale_factor,
                                 interpolation=cv2.INTER_LINEAR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image to avoid unmatched shape problem
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

            # run model
            outputs = model(img[np.newaxis, ...]).numpy()

            # recover padding effect
            outputs = recover_pad_output(outputs, pad_params)

            # draw results
            for prior_index in range(len(outputs)):
                croppedFace = draw_bbox_landm(frame, outputs[prior_index],
                                              frame_height, frame_width)
                if frameCount >= fps * FLAGS.dfps:
                    fileName = "%d.png" % counter
                    cv2.imwrite(FLAGS.dst_path + fileName, croppedFace)
                    print('Saved:', fileName)
                    counter = counter + 1
                    frameCount = 0

            # calculate fps
            fps_str = "FPS: %.2f" % (1 / (time.time() - start_time))
            start_time = time.time()
            cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX,
                        0.75, (0, 255, 0), 2)

            # show frame
            if FLAGS.preview:
                cv2.imshow('frame', frame)
            out.write(frame)
            if cv2.waitKey(1) == ord('q'):
                exit()
def main(_argv):

    # FUNCTIONS FOR CROPPING
    #####################################################################################################
    def bounding_box(img, ann, img_height, img_width):
        x1, y1, x2, y2 = int(ann[0] * img_width), int(ann[1] * img_height), \
                         int(ann[2] * img_width), int(ann[3] * img_height)
        return x1, y1, x2, y2

    def calc_points(x, y, side):
        return int(x - side / 2), int(x +
                                      side / 2), int(y -
                                                     side / 2), int(y +
                                                                    side / 2)

    def adjust_points(x_center, y_center, original_longest, scaling_factor,
                      min_scaling_factor):
        factors = np.arange(scaling_factor, min_scaling_factor - 0.04, -0.05)
        for factor in factors:
            # calculate nex points
            x1, x2, y1, y2 = calc_points(x_center, y_center,
                                         int(original_longest * factor))

            for i in range(FLAGS.max_iter):
                if x1 < 0:
                    x2 -= x1
                    x1 = 0
                if y1 < 0:
                    y2 -= y1
                    y1 = 0
                if x2 > img_raw.shape[1]:
                    x1 -= x2
                    x2 = img_raw.shape[1]
                if y2 > img_raw.shape[0]:
                    y1 -= y2
                    y2 = img_raw.shape[0]

                if x1 >= 0 and y1 >= 0 and x2 <= img_raw.shape[
                        1] and y2 <= img_raw.shape[0]:
                    return x1, x2, y1, y2, True

        print("Not cropping", img_path,
              "due to a problem with a cropping square box")
        return x1, x2, y1, y2, False

    def get_dim(lst):
        return [(lst[3] - lst[1]) * (lst[2] - lst[0])]

    def get_max(outputs, lst):
        area = [i[0] for i in lst]
        prob = [i[1] for i in lst]
        max_area_index = set([i for i, j in enumerate(area) if j == max(area)])
        max_prob_index = set([i for i, j in enumerate(prob) if j == max(prob)])
        indecies = list(max_area_index.intersection(max_prob_index))
        if len(indecies) >= 1: return [outputs[indecies[0]]]
        elif len(indecies
                 ) == 0:  # if there is a mismatch, return the largest element
            if len(list(max_area_index)) >= 1:
                return [outputs[list(max_area_index)[0]]]
            else:  # precautionary because there should always be at least one face
                print("Not cropping", img_path,
                      "due to a problem with returning the largest element")
                return []

    #####################################################################################################

    # MODEL
    #####################################################################################################
    # initialisation
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()
    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()
    #####################################################################################################

    # CROPPING
    #####################################################################################################
    # check if the path exits
    if not os.path.exists(FLAGS.path):
        print(f"cannot find the specified path from {FLAGS.path}")
        exit()

    # make a corresponding directory
    try:
        os.mkdir(FLAGS.path.replace("images", "cropped_images"))
    except FileExistsError:
        print(FLAGS.path.replace("images", "cropped_images"), "already exists")

    # eget subdirectories within the specified folder
    subdirectories = [FLAGS.path+'/'+i for i in os.listdir(FLAGS.path) \
                      if os.path.isdir(FLAGS.path+'/'+i)]

    # loop through each folder
    for subdir in sorted(subdirectories):

        # create corresponding folders for cropped data and get all images in a given folder
        if 'original' in subdir: x = 3
        else: x = 7

        try:
            os.mkdir(subdir.replace("images", "cropped_images"))
            images_lst = glob.glob(subdir + "/*.png")
            cropped_images_lst = []
            print(subdir[len(subdir) - x:len(subdir)])

        except FileExistsError:
            # count number of existing images in this subdirectory, if same as original, skip
            images_lst = glob.glob(subdir + "/*.png")
            cropped_images_lst = glob.glob(
                subdir.replace("images", "cropped_images") + "/*.png")
            cropped_images_lst = [
                e[len(e) - 8:len(e)] for e in cropped_images_lst
            ]

            if len(images_lst) == len(cropped_images_lst):
                print(subdir[len(subdir) - x:len(subdir)],
                      "has already been generated")
                continue
            else:
                print(subdir[len(subdir) - x:len(subdir)])

        # loop through each image in a given folder
        for img_path in sorted(images_lst):

            if img_path[len(img_path) - 8:len(img_path)] in cropped_images_lst:
                continue

            img_raw = cv2.imread(img_path)
            img_height_raw, img_width_raw, _ = img_raw.shape
            img = np.float32(img_raw.copy())
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # pad input image (unmatched shape problem), run model, recover padding effect
            img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
            outputs = model(img[np.newaxis, ...]).numpy()
            outputs = recover_pad_output(outputs, pad_params)

            # get rid of elements which are faces with less that threshold probability
            outputs = [i for i in outputs if i[15] >= FLAGS.threshold_prob]

            # flag any images which have no recognised faces in them
            if len(outputs) == 0:
                print("no faces detected for", img_path)

                # if more than one face detected, select the largest and most definite
            elif len(outputs) > 1:
                f = [list(bounding_box(img_raw, i[0:4], img_height_raw, img_width_raw)) + [i[15]] \
                           for i in outputs]
                f = [get_dim(i[0:4]) + [i[4]] for i in f]
                outputs = get_max(outputs, f)

            # keeping as a loop in case we decide to use multiple faces per frame in the future
            # get cropping coordinates and save results
            for prior_index in range(len(outputs)):
                # get the bounding box coordinates
                bb_x1, bb_y1, bb_x2, bb_y2 = bounding_box(
                    img_raw, outputs[prior_index], img_height_raw,
                    img_width_raw)
                # scale up the magnitude of the longest side
                original_longest = int(max(bb_x2 - bb_x1, bb_y2 - bb_y1))
                longest = int(original_longest * FLAGS.scaling_factor)
                x_center = int((bb_x1 + bb_x2) / 2)
                y_center = int((bb_y1 + bb_y2) / 2)

                x1, x2, y1, y2, save_image = adjust_points(
                    x_center, y_center, original_longest, FLAGS.scaling_factor,
                    FLAGS.min_scaling_factor)

                if save_image:
                    try:
                        save_img_path = os.path.join(subdir.replace("images", "cropped_images") \
                            + "/" + img_path.replace(subdir + '/', ''))
                        cv2.imwrite(save_img_path, img_raw[y1:y2, x1:x2])

                    except:
                        print(img_path, "is not cropped for unknown reasons")
Beispiel #15
0
def main(_):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=True)
    model.summary(line_length=80)

    # define prior box
    priors = prior_box((cfg['input_size'], cfg['input_size']),
                       cfg['min_sizes'], cfg['steps'], cfg['clip'])

    # load dataset
    train_dataset = load_dataset(cfg, priors, shuffle=True)

    # define optimizer
    steps_per_epoch = cfg['dataset_len'] // cfg['batch_size']
    learning_rate = MultiStepWarmUpLR(
        initial_learning_rate=cfg['init_lr'],
        lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']],
        lr_rate=cfg['lr_rate'],
        warmup_steps=cfg['warmup_epoch'] * steps_per_epoch,
        min_lr=cfg['min_lr'])
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
                                        momentum=0.9,
                                        nesterov=True)

    # define losses function
    multi_box_loss = MultiBoxLoss()

    # load checkpoint
    checkpoint_dir = '/content/drive/My Drive/Colab/checkpoints/' + cfg[
        'sub_name']
    checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'),
                                     optimizer=optimizer,
                                     model=model)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=checkpoint_dir,
                                         max_to_keep=3)
    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        print('[*] load ckpt from {} at step {}.'.format(
            manager.latest_checkpoint, checkpoint.step.numpy()))
    else:
        print("[*] training from scratch.")

    # define training step function
    @tf.function
    def train_step(inputs, labels):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)

            losses = {}
            losses['reg'] = tf.reduce_sum(model.losses)
            losses['loc'], losses['landm'], losses['class'] = \
                multi_box_loss(labels, predictions)
            total_loss = tf.add_n([l for l in losses.values()])

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        return total_loss, losses

    # training loop
    summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name'])
    remain_steps = max(
        steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0)
    prog_bar = ProgressBar(steps_per_epoch,
                           checkpoint.step.numpy() % steps_per_epoch)

    for inputs, labels in train_dataset.take(remain_steps):
        checkpoint.step.assign_add(1)
        steps = checkpoint.step.numpy()

        total_loss, losses = train_step(inputs, labels)

        prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
            ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'],
            total_loss.numpy(),
            optimizer.lr(steps).numpy()))

        if steps % 10 == 0:
            with summary_writer.as_default():
                tf.summary.scalar('loss/total_loss', total_loss, step=steps)
                for k, l in losses.items():
                    tf.summary.scalar('loss/{}'.format(k), l, step=steps)
                tf.summary.scalar('learning_rate',
                                  optimizer.lr(steps),
                                  step=steps)

        if steps % cfg['save_steps'] == 0:
            manager.save()
            print("\n[*] save ckpt file at {}".format(
                manager.latest_checkpoint))

    manager.save()
    print("\n[*] training done! save ckpt file at {}".format(
        manager.latest_checkpoint))
Beispiel #16
0
def main(_argv):
    # init
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = "./checkpoints/" + cfg["sub_name"]
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testset_folder = cfg["testing_dataset_path"]
    testset_list = os.path.join(testset_folder, "label.txt")

    img_paths, _ = load_info(testset_list)
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        # testing scale
        target_size = 1600
        max_size = 2150
        img_shape = img.shape
        img_size_min = np.min(img_shape[0:2])
        img_size_max = np.max(img_shape[0:2])
        resize = float(target_size) / float(img_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * img_size_max) > max_size:
            resize = float(max_size) / float(img_size_max)
        if FLAGS.origin_size:
            if os.path.basename(img_path) == "6_Funeral_Funeral_6_618.jpg":
                resize = 0.5  # this image is too big to avoid OOM problem
            else:
                resize = 1

        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image_(img, max_steps=max(cfg["steps"]))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(FLAGS.save_folder, sub_dir,
                                 img_name.replace(".jpg", ".txt"))

        pathlib.Path(os.path.join(FLAGS.save_folder,
                                  sub_dir)).mkdir(parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, conf in zip(bboxs, confs):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                confidence = str(conf)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(
                    h) + " " + confidence + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join("./results", cfg["sub_name"],
                                  sub_dir)).mkdir(parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                if outputs[prior_index][15] >= FLAGS.vis_th:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
            cv2.imwrite(
                os.path.join("./results", cfg["sub_name"], sub_dir, img_name),
                img_raw)
Beispiel #17
0
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint12
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testing_dataset_path = cfg['testing_dataset_path']
    img_paths, _ = load_info(testing_dataset_path, './data/CelebA/train_labels.txt')
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        # testing scale
        if not FLAGS.origin_size:
            target_size = 320
            img_size_max = np.max(img.shape[0:2])
            resize = float(target_size) / float(img_size_max)
            img = cv2.resize(img, None, None, fx=resize, fy=resize,
                             interpolation=cv2.INTER_LINEAR)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(
            FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt'))

        pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(
            parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            smile_confs = outputs[:, 4]
            face_confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, smile_conf, face_conf in zip(bboxs, smile_confs, face_confs):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + str(smile_conf) + " " + str(
                    face_conf) + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join(
            './results', cfg['sub_name'], sub_dir)).mkdir(
            parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                if outputs[prior_index][-1] >= FLAGS.vis_th:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
            cv2.imwrite(os.path.join('./results', cfg['sub_name'], sub_dir,
                                     img_name), img_raw)