Esempio n. 1
0
    def encode_filter(filter_files):
        images = []
        faces = []

        FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D,
                                       enable_cuda=True,
                                       flip_input=False)
        for i, filter_file in enumerate(filter_files):
            images.append(skimage.io.imread(str(filter_file)))
            faces.append(FACE_ALIGNMENT.get_landmarks(images[i]))
        FACE_ALIGNMENT = None

        face_recognition_model = face_recognition_models.face_recognition_model_location(
        )
        face_encoder = dlib.face_recognition_model_v1(face_recognition_model)
        for i, face in enumerate(faces):
            if face is None:
                print('Warning: {} has no face.'.format(filter_files[i]))
                continue
            if len(face) > 1:
                print('Warning: {} has more than one face.'.format(
                    filter_files[i]))

            parts = []
            for p in face[0]:
                parts.append(dlib.point(p[0], p[1]))
            raw_landmark_set = dlib.full_object_detection(rect, parts)
            yield numpy.array(
                face_encoder.compute_face_descriptor(images[i],
                                                     raw_landmark_set, 1))
def landmark_preprocess(noisy_path, dataset_path):
    noisy_train_path = os.path.join(noisy_path, "train")
    noisy_test_path = os.path.join(noisy_path, "test")
    noisy_val_path = os.path.join(noisy_path, "validation")

    train_path = os.path.join(dataset_path, "train")
    test_path = os.path.join(dataset_path, "test")
    val_path = os.path.join(dataset_path, "validation")

    checkpath(dataset_path)
    checkpath(train_path)
    checkpath(test_path)
    checkpath(val_path)

    fa = FaceAlignment(LandmarksType._2D, device="cuda:1")

    train_files = file_list(noisy_train_path, ".png")
    for i in train_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(train_path, "lm" + i.split("y")[-1]))

    test_files = file_list(noisy_test_path, ".png")
    for i in test_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(test_path, "lm" + i.split("y")[-1]))

    val_files = file_list(noisy_val_path, ".png")
    print(noisy_val_path)
    for i in val_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(val_path, "lm" + i.split("y")[-1]))
Esempio n. 3
0
    def __init__(self, style_img, input_img, style_mask, input_mask, save=False):
        style_name = os.path.basename(style_img).split('.')[0]
        input_name = os.path.basename(input_img).split('.')[0]

        self.style_img = np.float32(imread(style_img))
        self.input_img = np.float32(imread(input_img))

        self.style_mask = np.float32(imread(style_mask))
        self.input_mask = np.float32(imread(input_mask))

        # Fetch Facial Landmarks
        if os.path.exists('input/%s_%s_lm.pkl' % (style_name, input_name)):
            with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'rb') as f:
                pkl = pickle.load(f)
                self.style_lm = pkl['style']
                self.input_lm = pkl['input']
        else:
            fa = FaceAlignment(LandmarksType._2D, device='cpu', flip_input=False)
            self.style_lm = fa.get_landmarks(self.style_img)[0]
            self.input_lm = fa.get_landmarks(self.input_img)[0]
            with open('input/%s_%s_lm.pkl' % (style_name, input_name),
                      'wb') as f:
                pickle.dump({
                    'style': self.style_lm,
                    'input': self.input_lm
                }, f, protocol=2)

        self.output_filename = '_'.join({input_name, style_name})
        self.save = save
Esempio n. 4
0
def compare(root, f1, f2):
    global face_det
    global face_recon
    global face_align
    if not face_det:
        face_det = FaceDetection(gpu_id)
    if not face_recon:
        face_recon = FaceRecogniton(gpu_id)
    if not face_align:
        face_align = FaceAlignment(gpu_id)
    time_start = time.time()
    img_a = cv2.imread(root + '/' + f1)
    img_b = cv2.imread(root + '/' + f2)

    bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a)
    bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b)
    similarity = 0
    if bbox_list1 and bbox_list2:
        a_aligned_faces = face_align.affine_face(img_a, a_point)
        b_aligned_faces = face_align.affine_face(img_b, b_point)
        similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces)
        #print similarity
        time_end = time.time()
        time_use = int(1000 * (time_end - time_start))
        #print 'time_used:' + str(time_use)
    return similarity, time_use
Esempio n. 5
0
    def __getitem__(self, idx):
        real_idx = self.indexes[idx]
        path = self.files[real_idx]
        print("image file path=", path)
        fa = FaceAlignment(LandmarksType._2D, device=self.device)
        imgUMat = cv2.imread(path)
        x_temp = cv2.cvtColor(imgUMat, cv2.COLOR_BGR2RGB)
        y_temp = fa.get_landmarks(x_temp)[0]
        out = []
        x = PIL.Image.fromarray(x_temp, 'RGB')
        y = plot_landmarks(x_temp, y_temp)
        if self.transform:
            x = self.transform(x)
            y = self.transform(y)
        out.append({'frame': x, 'landmarks': y})

        return real_idx, out
def evaluate(respth='./results/data_src', dspth='../data'):
    respth = osp.join(os.path.abspath(os.path.dirname(__file__)), respth)
    if not os.path.exists(respth):
        os.makedirs(respth)

    face_model = FaceAlignment(LandmarksType._2D, device="cuda")
    data_path = osp.join(os.path.abspath(os.path.dirname(__file__)), dspth)
    for image_path in os.listdir(data_path):
        image = cv2.imread(osp.join(data_path, image_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        landmark = face_model.get_landmarks(image)[-1]
        # print(landmark)
        mask = get_image_hull_mask(np.shape(image), landmark).astype(np.uint8)
        # cv2.imshow("mask", (mask*255).astype(np.uint8))

        image_bgra = merge(image, mask)
        # cv2.imshow("image_bgra", image_bgra)
        # cv2.waitKey(1)
        save_path = osp.join(respth, image_path)
        cv2.imwrite(save_path[:-4] + '.png', image_bgra)
Esempio n. 7
0
def eval(input_path, output_path, checkpoint_path, model, gpu):
    input = Image.open(input_path)
    input = input.convert("RGB")

    w, h = input.size
    w_, h_ = 128 * (w // 128), 128 * (h // 128)

    fa = FaceAlignment(LandmarksType._2D, device="cuda:" + str(gpu))
    landmarks = fa.get_landmarks_from_image(input_path)[0]
    landmark_img = plot_landmarks(np.array(input), landmarks)

    transform_forward = transforms.Compose([
        transforms.Resize((w_, h_)),
        transforms.CenterCrop((w_, h_)),
        transforms.ToTensor()
    ])
    transform_backward = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((w, h)),
        transforms.CenterCrop((w, h)),
    ])

    input = transform_forward(input)
    landmark_img = transform_forward(landmark_img)

    if model == "Pix2Pix":
        NFNet = Pix2Pix()
    else:
        NFNet = ResResNet()

    checkpoint = torch.load(checkpoint_path)
    NFNet.load_state_dict(checkpoint['my_classifier'])
    NFNet.to(gpu)

    x = torch.cat((input, landmark_img), 0)
    x = x.unsqueeze(0)
    x = x.to(gpu)
    output = NFNet(x)
    output = output.to("cpu")
    output = transform_backward(output[0])
    output.save(output_path)
def preprocess_dataset(source,
                       output,
                       device='cpu',
                       size=0,
                       overwrite=False,
                       frame_rate=1):
    """
    Starts the pre-processing of the VoxCeleb dataset used for the Talking Heads models. This process has the following
    steps:

    * Extract all frames of each video in the dataset. Frames of videos that are split in several files are joined
    together.
    * Select K+1 frames of each video that will be kept. K frames will be used to train the embedder network, while
    the other one will be used to train the generator network. The value of K can be configured in the config.py file.
    * Landmarks will be extracted for the face in each of the frames that are being kept.
    * The frames and the corresponding landmarks for each video will be saved in files (one for each video) in the
    output directory.

    We originally tried to process several videos simultaneously using multiprocessing, but this seems to actually
    slow down the process instead of speeding it up.


    :param source: Path to the raw VoxCeleb dataset.
    :param output: Path where the pre-processed videos will be stored.
    :param device: Device used to run the landmark extraction model.
    :param size: Size of the dataset to generate. If 0, the entire raw dataset will be processed, otherwise, as many
    videos will be processed as specified by this parameter.
    :param overwrite: f True, files that have already been processed will be overwritten, otherwise, they will be
    ignored and instead, different files will be loaded.
    """

    logging.info('===== DATASET PRE-PROCESSING =====')
    logging.info(f'Running on {device.upper()}.')
    logging.info(f'Saving K+1 random frames from each video (K = {config.K}).')
    fa = FaceAlignment(LandmarksType._2D, device=device)

    video_list = get_video_list(source, size, output, overwrite=overwrite)

    logging.info(f'Processing {len(video_list)} videos...')
    # pool = Pool(processes=4, initializer=init_pool, initargs=(fa, output))
    # pool.map(process_video_folder, video_list)

    init_pool(fa, output)
    counter = 1
    for v in video_list:
        start_time = datetime.now()
        process_video_folder(v, frame_rate)
        logging.info(
            f'{counter}/{len(video_list)}\t{datetime.now()-start_time}')
        counter += 1

    logging.info(f'All {len(video_list)} videos processed.')
Esempio n. 9
0
    def __init__(self, args):
        self.args = args
        model = edict()

        self.threshold = args.threshold
        self.det_minsize = 50
        self.det_threshold = [0.4, 0.6, 0.6]
        self.det_factor = 0.9
        _vec = args.image_size.split(',')
        assert len(_vec) == 2
        image_size = (int(_vec[0]), int(_vec[1]))
        self.image_size = image_size
        _vec = args.model.split(',')
        assert len(_vec) == 2
        prefix = _vec[0]
        epoch = int(_vec[1])
        print('loading', prefix, epoch)
        ctx = mx.gpu(args.gpu)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['fc1_output']
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
        model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                          image_size[1]))])
        model.set_params(arg_params, aux_params)
        self.model = model
        # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
        mtcnn_path = os.path.join('deploy', 'mtcnn-model')
        detector = MtcnnDetector(model_folder=mtcnn_path,
                                 ctx=ctx,
                                 num_worker=1,
                                 accurate_landmark=True,
                                 threshold=[0.0, 0.0, 0.2])
        self.detector = detector
        self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D,
                                            device='cuda',
                                            flip_input=False)
Esempio n. 10
0
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False):
    logging.info('===== DATASET PRE-PROCESSING =====')
    logging.info(f'Running on {device.upper()}.')
    logging.info(f'Saving K+1 random frames from each video (K = {K}).')
    fa = FaceAlignment(LandmarksType._2D, device=device)

    video_list = get_video_list(source, size, output, overwrite=overwrite)

    logging.info(f'Processing {len(video_list)} videos...')

    init_pool(fa, output)
    counter = 1
    for v in video_list:
        process_video_folder(v)
        logging.info(f'{counter}/{len(video_list)}')
        counter += 1

    logging.info(f'All {len(video_list)} videos processed.')
Esempio n. 11
0
def getTransform(videoName, modelIdx):
    modelList = [
        'sw', 'han', 'tsai', 'father', 'cloud', 'aerith', 'tifa', 'davinci'
    ]
    G = network.Generator()
    G = load_model(G,
                   "app/modules/talkingHeads/resource/" + modelList[modelIdx],
                   modelList[modelIdx])
    G = G.to("cuda:0")
    fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
    e_vector = get_e_vector("app/modules/talkingHeads/resource/" +
                            modelList[modelIdx] + "/" + modelList[modelIdx] +
                            ".npy")
    timestamp = str(int(time.time()))
    print(timestamp)
    #     generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0")
    generate_moving_video(G, "app/static/" + videoName, e_vector,
                          "app/static/result-" + timestamp + ".mp4", "cuda:0",
                          fa)
    return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})
Esempio n. 12
0
def end2end_mask_encoding(
        image: np.ndarray,
        face_aligment_class: face_alignment.FaceAlignment) -> dict:
    landmarks = face_aligment_class.get_landmarks_from_image(image)
    landmarks = np.floor(landmarks[0]).astype(np.int32)

    target_points, s1, s2 = extract_target_points_and_characteristic(landmarks)
    mask_rgba_crop, target_points = extract_polygon(image, target_points)

    mask_rgba_crop, target_points = rotate_image_and_points(
        mask_rgba_crop, s1, target_points)

    res = {
        's1': s1,
        's2': s2,
        'points': target_points.tolist(),
        'base64_img': image_to_string(mask_rgba_crop)
    }

    return res
Esempio n. 13
0
def end2end_mask_generation(image: np.ndarray,
                            masks_database: dict,
                            face_aligment_class: face_alignment.FaceAlignment,
                            input_face_landmarks: np.ndarray = None):

    if input_face_landmarks is None:
        face_landmarks = face_aligment_class.get_landmarks_from_image(image)

        if len(face_landmarks) == 0:
            raise RuntimeError('Can\'t find facial landmarks')

        face_landmarks = np.floor(face_landmarks[0]).astype(np.int32)
    else:
        face_landmarks = input_face_landmarks

    _, _, s2 = extract_target_points_and_characteristic(face_landmarks)

    sampling_mask_data = extract_mask_from_base(masks_database, s2)

    face_with_mask = apply_mask_to_image_with_face(image, sampling_mask_data,
                                                   face_landmarks)

    return face_with_mask
Esempio n. 14
0
def monkey_patch_face_detector(_):
    detector = dlib.get_frontal_face_detector()

    class Rect(object):
        def __init__(self, rect):
            self.rect = rect

    def detect(*args):
        return [Rect(x) for x in detector(*args)]

    return detect


dlib.cnn_face_detection_model_v1 = monkey_patch_face_detector
FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D,
                               enable_cuda=True,
                               flip_input=False)

mean_face_x = numpy.array([
    0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483,
    0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127,
    0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122,
    0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132,
    0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127,
    0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532,
    0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364,
    0.490127, 0.42689
])

mean_face_y = numpy.array([
    0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
Esempio n. 15
0
    e_vector = get_e_vector("app/modules/talkingHeads/resource/" +
                            modelList[modelIdx] + "/" + modelList[modelIdx] +
                            ".npy")
    timestamp = str(int(time.time()))
    print(timestamp)
    #     generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0")
    generate_moving_video(G, "app/static/" + videoName, e_vector,
                          "app/static/result-" + timestamp + ".mp4", "cuda:0",
                          fa)
    return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})


G = network.Generator()
G = load_model(G, "app/modules/talkingHeads/resource/han", "han")
G = G.to("cuda:0")
fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
e_vector = get_e_vector("app/modules/talkingHeads/resource/han/han.npy")


def imgTransform(srcImage, modelIdx):
    image = base64_cv2(srcImage)
    #     image = cv2.imread("app/modules/2.png")
    image = cv2.resize(image, (256, 256))
    #     modelList=['sw','han','tsai']
    #     G = network.Generator()
    #     G = load_model(G, "app/modules/talkingHeads/resource/"+modelList[modelIdx], modelList[modelIdx])
    #     G = G.to("cuda:0")
    #     fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
    #     e_vector = get_e_vector("app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy")
    result = generate_moving_image(G, image, e_vector, "cuda:0", fa)
    print('don')
Esempio n. 16
0
from PIL import Image
import regex
import torch
import subprocess
import hashlib
import sys
from demo import load_checkpoints
from animate import normalize_kp

app = Flask(__name__)

generator, kp_detector = load_checkpoints(
    config_path="first-order-model/config/vox-adv-256.yaml",
    checkpoint_path="vox-adv-cpk.pth.tar",
)
fa = FaceAlignment(LandmarksType._2D)


@app.route("/")
def index():
    return render_template("upload.html")


def data(obj) -> str:
    return f"data: {json.dumps(obj)}\n\n"


@app.route("/upload", methods=["POST"])
def upload():
    for key, file in request.files.items():
        pathlib.Path("static", key).mkdir(exist_ok=True)
Esempio n. 17
0
import cv2
import random
import numpy as np
import pickle as pkl
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
from functools import partial
import multiprocessing as mp
from imutils import face_utils
from hyperparams import Hyperparams as hp
from face_alignment import FaceAlignment, LandmarksType
from utils import detector, predictor, preprocess_input

global face_alignment
face_alignment = FaceAlignment(LandmarksType._2D, device='cuda')

def get_video_list(source = hp.dataset):
    """
    Extracts a list of paths to videos to pre-process during the current run.

    :param source: Path to the root directory of the dataset.
    :return: List of paths to videos.
    """
    video_list = []
    
    for root, dirs, files in tqdm(os.walk(source)):
        if len(files) > 0:
            assert contains_only_videos(files) and len(dirs) == 0
            video_list.append((root, files))
Esempio n. 18
0
 def __init__(self, dimensions='2d'):
     landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D
     self.faceAlignment = FaceAlignment(landmarkType,
                                        flip_input=False,
                                        device='cpu',
                                        verbose=False)
Esempio n. 19
0
class FaceTools:
    """ Face Toolkit
    """
    def __init__(self, dimensions='2d'):
        landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D
        self.faceAlignment = FaceAlignment(landmarkType,
                                           flip_input=False,
                                           device='cpu',
                                           verbose=False)

    @staticmethod
    def _polyArea(points):
        """ calculate the area of a polygon (Gauss equation)
        """
        x = [point[0] for point in points]
        y = [point[1] for point in points]
        return 0.5 * np.abs(
            np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))

    @staticmethod
    def _slope(point1, point2):
        """ compute the slope of the line passing through two points
        """
        return (point2[1] - point1[1]) / (point2[0] - point1[0])

    def landmarks(self, img):
        """ get landmarks
        """
        return self.faceAlignment.get_landmarks(img)[0]

    def geometricFeatures1(self, img, landmarks=None):
        """Improved Performance in Facial Expression Recognition Using 32 Geometric Features
            Linear features(15):
                – 3 for left eyebrow
                – 2 for left eye
                – 1 for cheeks
                – 1 for nose
                – 8 for mouth
            Polygonal features(3):
                – 1 for the left eye
                – 1 between corners of left eye and left corner of mouth
                – 1 for mouth
            Elliptical features(7):
                – 1 for left eyebrow
                – 3 for the left eye: eye, upper and lower eyelids
                – 3 for mouth: upper and lower lips
            Slope features(7):
                – 1 for left eyebrow
                – 6 for mouth corners
        """
        pts = self.faceAlignment.get_landmarks(
            img)[0] if landmarks is None else landmarks
        result = []
        # Linear features
        #eyebrow
        result.append(d(pts[21], pts[22]))
        result.append(d(pts[22], pts[42]))
        result.append(d(pts[26], pts[45]))
        #left eye
        result.append(d(pts[42], pts[45]))
        #d2=43-47 || 44-46
        result.append(d(pts[43], pts[47]))

        #cheeks
        result.append(d(pts[1], pts[15]))

        #nose
        result.append(d(pts[31], pts[35]))

        #mouth
        result.append(d(pts[48], pts[51]))
        result.append(d(pts[51], pts[54]))
        result.append(d(pts[62], pts[66]))
        result.append(d(pts[51], pts[57]))
        result.append(d(pts[50], pts[33]))
        result.append(d(pts[52], pts[33]))
        result.append(d(pts[48], pts[31]))
        result.append(d(pts[54], pts[35]))

        #Polygonal features:
        result.append(FaceTools._polyArea([pts[48], pts[54], pts[57]]))
        result.append(FaceTools._polyArea([pts[54], pts[42], pts[45]]))
        result.append(FaceTools._polyArea([pts[42], pts[22], pts[26],
                                           pts[45]]))

        #Slope features:
        result.append(FaceTools._slope(pts[22], pts[26]))
        result.append(FaceTools._slope(pts[48], pts[31]))
        result.append(FaceTools._slope(pts[54], pts[35]))
        result.append(FaceTools._slope(pts[48], pts[51]))
        result.append(FaceTools._slope(pts[51], pts[54]))
        result.append(FaceTools._slope(pts[54], pts[57]))
        result.append(FaceTools._slope(pts[48], pts[57]))

        return result

    def faceDistances(self, img, landmarks=None):
        """ calculate distances as described in
            'Automatic Facial Expression Recognition Using Combined Geometric Features':
            D1  Left eyebrow length
            D2  Right eyebrow length
            D3  Distance between left and right eyebrow
            D4  Left eye height
            D5  Left eye width
            D6  Right eye height
            D7  Right eye width
            D8  Distance between left eyebrow and left eye
            D9  Distance between right eyebrow and right eye
            D10 Distance between nose tip and upper lip
            D11 Lip width
            D12 Lip height
            D13 Inner lip distance
            D14 Distance between left eye corner and lip left corner
            D15 Distance between right eye corner and lip right corner
        """
        pts = self.faceAlignment.get_landmarks(
            img)[0] if landmarks is None else landmarks
        distances = [
            d(pts[17], pts[18]) + d(pts[18], pts[19]) + d(pts[19], pts[20]) + \
            d(pts[20], pts[21]),
            d(pts[22], pts[23]) + d(pts[23], pts[24]) + d(pts[24], pts[25]) + \
            d(pts[25], pts[26]),
            d(pts[21], pts[22]),
            d(pts[40], pts[38]),
            d(pts[36], pts[39]),
            d(pts[43], pts[47]),
            d(pts[42], pts[45]),
            d(pts[19], pts[37]),
            d(pts[24], pts[44]),
            d(pts[33], pts[51]),
            d(pts[48], pts[54]),
            d(pts[51], pts[57]),
            d(pts[62], pts[66]),
            d(pts[36], pts[48]),
            d(pts[45], pts[54])
        ]

        return distances
Esempio n. 20
0
class FaceModel:
    def __init__(self, args):
        self.args = args
        model = edict()

        self.threshold = args.threshold
        self.det_minsize = 50
        self.det_threshold = [0.4, 0.6, 0.6]
        self.det_factor = 0.9
        _vec = args.image_size.split(',')
        assert len(_vec) == 2
        image_size = (int(_vec[0]), int(_vec[1]))
        self.image_size = image_size
        _vec = args.model.split(',')
        assert len(_vec) == 2
        prefix = _vec[0]
        epoch = int(_vec[1])
        print('loading', prefix, epoch)
        ctx = mx.gpu(args.gpu)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['fc1_output']
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
        model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                          image_size[1]))])
        model.set_params(arg_params, aux_params)
        self.model = model
        # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
        mtcnn_path = os.path.join('deploy', 'mtcnn-model')
        detector = MtcnnDetector(model_folder=mtcnn_path,
                                 ctx=ctx,
                                 num_worker=1,
                                 accurate_landmark=True,
                                 threshold=[0.0, 0.0, 0.2])
        self.detector = detector
        self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D,
                                            device='cuda',
                                            flip_input=False)

    def get_feature(self, face_img):
        detected = True

        #face_img is bgr image
        def mtcnn_align(img):
            ret = self.detector.detect_face_limited(face_img,
                                                    det_type=self.args.det)
            if ret is None:
                # detected = False
                bbox, points = None, None
            else:
                bbox, points = ret
                if bbox.shape[0] == 0:
                    # detected = False
                    bbox, points = None, None
                else:
                    bbox = bbox[0, 0:4]
                    points = points[0, :].reshape((2, 5)).T
            # print(bbox)
            # print(points)
            nimg = face_preprocess.preprocess(face_img,
                                              bbox,
                                              points,
                                              image_size='112,112')
            return nimg

        # skimage.io.imread( str(fn) )
        faces = self.FACE_ALIGNMENT.get_landmarks(face_img)
        if faces is not None:
            if len(faces) > 1:
                faces = faces[0:1]
                # pdb.set_trace()
            points = faces[0]
            alignment = umeyama(points[17:], landmarks_2D, True)[0:2]
            nimg = _aligned_image = transform(face_img, alignment, 112, 0)
        else:
            nimg = mtcnn_align(face_img)
            # nimg = face_preprocess.preprocess(face_img, image_size='112,112')
        # pdb.set_trace()
        nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
        aligned = np.transpose(nimg, (2, 0, 1))
        #print(nimg.shape)
        embedding = None
        for flipid in [0, 1]:
            if flipid == 1:
                if self.args.flip == 0:
                    break
                do_flip(aligned)
            input_blob = np.expand_dims(aligned, axis=0)
            data = mx.nd.array(input_blob)
            db = mx.io.DataBatch(data=(data, ))
            self.model.forward(db, is_train=False)
            _embedding = self.model.get_outputs()[0].asnumpy()
            #print(_embedding.shape)
            if embedding is None:
                embedding = _embedding
            else:
                embedding += _embedding
        embedding = sklearn.preprocessing.normalize(embedding).flatten()
        return detected, embedding