def encode_filter(filter_files): images = [] faces = [] FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D, enable_cuda=True, flip_input=False) for i, filter_file in enumerate(filter_files): images.append(skimage.io.imread(str(filter_file))) faces.append(FACE_ALIGNMENT.get_landmarks(images[i])) FACE_ALIGNMENT = None face_recognition_model = face_recognition_models.face_recognition_model_location( ) face_encoder = dlib.face_recognition_model_v1(face_recognition_model) for i, face in enumerate(faces): if face is None: print('Warning: {} has no face.'.format(filter_files[i])) continue if len(face) > 1: print('Warning: {} has more than one face.'.format( filter_files[i])) parts = [] for p in face[0]: parts.append(dlib.point(p[0], p[1])) raw_landmark_set = dlib.full_object_detection(rect, parts) yield numpy.array( face_encoder.compute_face_descriptor(images[i], raw_landmark_set, 1))
def landmark_preprocess(noisy_path, dataset_path): noisy_train_path = os.path.join(noisy_path, "train") noisy_test_path = os.path.join(noisy_path, "test") noisy_val_path = os.path.join(noisy_path, "validation") train_path = os.path.join(dataset_path, "train") test_path = os.path.join(dataset_path, "test") val_path = os.path.join(dataset_path, "validation") checkpath(dataset_path) checkpath(train_path) checkpath(test_path) checkpath(val_path) fa = FaceAlignment(LandmarksType._2D, device="cuda:1") train_files = file_list(noisy_train_path, ".png") for i in train_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(train_path, "lm" + i.split("y")[-1])) test_files = file_list(noisy_test_path, ".png") for i in test_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(test_path, "lm" + i.split("y")[-1])) val_files = file_list(noisy_val_path, ".png") print(noisy_val_path) for i in val_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(val_path, "lm" + i.split("y")[-1]))
def __init__(self, style_img, input_img, style_mask, input_mask, save=False): style_name = os.path.basename(style_img).split('.')[0] input_name = os.path.basename(input_img).split('.')[0] self.style_img = np.float32(imread(style_img)) self.input_img = np.float32(imread(input_img)) self.style_mask = np.float32(imread(style_mask)) self.input_mask = np.float32(imread(input_mask)) # Fetch Facial Landmarks if os.path.exists('input/%s_%s_lm.pkl' % (style_name, input_name)): with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'rb') as f: pkl = pickle.load(f) self.style_lm = pkl['style'] self.input_lm = pkl['input'] else: fa = FaceAlignment(LandmarksType._2D, device='cpu', flip_input=False) self.style_lm = fa.get_landmarks(self.style_img)[0] self.input_lm = fa.get_landmarks(self.input_img)[0] with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'wb') as f: pickle.dump({ 'style': self.style_lm, 'input': self.input_lm }, f, protocol=2) self.output_filename = '_'.join({input_name, style_name}) self.save = save
def compare(root, f1, f2): global face_det global face_recon global face_align if not face_det: face_det = FaceDetection(gpu_id) if not face_recon: face_recon = FaceRecogniton(gpu_id) if not face_align: face_align = FaceAlignment(gpu_id) time_start = time.time() img_a = cv2.imread(root + '/' + f1) img_b = cv2.imread(root + '/' + f2) bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a) bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b) similarity = 0 if bbox_list1 and bbox_list2: a_aligned_faces = face_align.affine_face(img_a, a_point) b_aligned_faces = face_align.affine_face(img_b, b_point) similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces) #print similarity time_end = time.time() time_use = int(1000 * (time_end - time_start)) #print 'time_used:' + str(time_use) return similarity, time_use
def __getitem__(self, idx): real_idx = self.indexes[idx] path = self.files[real_idx] print("image file path=", path) fa = FaceAlignment(LandmarksType._2D, device=self.device) imgUMat = cv2.imread(path) x_temp = cv2.cvtColor(imgUMat, cv2.COLOR_BGR2RGB) y_temp = fa.get_landmarks(x_temp)[0] out = [] x = PIL.Image.fromarray(x_temp, 'RGB') y = plot_landmarks(x_temp, y_temp) if self.transform: x = self.transform(x) y = self.transform(y) out.append({'frame': x, 'landmarks': y}) return real_idx, out
def evaluate(respth='./results/data_src', dspth='../data'): respth = osp.join(os.path.abspath(os.path.dirname(__file__)), respth) if not os.path.exists(respth): os.makedirs(respth) face_model = FaceAlignment(LandmarksType._2D, device="cuda") data_path = osp.join(os.path.abspath(os.path.dirname(__file__)), dspth) for image_path in os.listdir(data_path): image = cv2.imread(osp.join(data_path, image_path)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) landmark = face_model.get_landmarks(image)[-1] # print(landmark) mask = get_image_hull_mask(np.shape(image), landmark).astype(np.uint8) # cv2.imshow("mask", (mask*255).astype(np.uint8)) image_bgra = merge(image, mask) # cv2.imshow("image_bgra", image_bgra) # cv2.waitKey(1) save_path = osp.join(respth, image_path) cv2.imwrite(save_path[:-4] + '.png', image_bgra)
def eval(input_path, output_path, checkpoint_path, model, gpu): input = Image.open(input_path) input = input.convert("RGB") w, h = input.size w_, h_ = 128 * (w // 128), 128 * (h // 128) fa = FaceAlignment(LandmarksType._2D, device="cuda:" + str(gpu)) landmarks = fa.get_landmarks_from_image(input_path)[0] landmark_img = plot_landmarks(np.array(input), landmarks) transform_forward = transforms.Compose([ transforms.Resize((w_, h_)), transforms.CenterCrop((w_, h_)), transforms.ToTensor() ]) transform_backward = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((w, h)), transforms.CenterCrop((w, h)), ]) input = transform_forward(input) landmark_img = transform_forward(landmark_img) if model == "Pix2Pix": NFNet = Pix2Pix() else: NFNet = ResResNet() checkpoint = torch.load(checkpoint_path) NFNet.load_state_dict(checkpoint['my_classifier']) NFNet.to(gpu) x = torch.cat((input, landmark_img), 0) x = x.unsqueeze(0) x = x.to(gpu) output = NFNet(x) output = output.to("cpu") output = transform_backward(output[0]) output.save(output_path)
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False, frame_rate=1): """ Starts the pre-processing of the VoxCeleb dataset used for the Talking Heads models. This process has the following steps: * Extract all frames of each video in the dataset. Frames of videos that are split in several files are joined together. * Select K+1 frames of each video that will be kept. K frames will be used to train the embedder network, while the other one will be used to train the generator network. The value of K can be configured in the config.py file. * Landmarks will be extracted for the face in each of the frames that are being kept. * The frames and the corresponding landmarks for each video will be saved in files (one for each video) in the output directory. We originally tried to process several videos simultaneously using multiprocessing, but this seems to actually slow down the process instead of speeding it up. :param source: Path to the raw VoxCeleb dataset. :param output: Path where the pre-processed videos will be stored. :param device: Device used to run the landmark extraction model. :param size: Size of the dataset to generate. If 0, the entire raw dataset will be processed, otherwise, as many videos will be processed as specified by this parameter. :param overwrite: f True, files that have already been processed will be overwritten, otherwise, they will be ignored and instead, different files will be loaded. """ logging.info('===== DATASET PRE-PROCESSING =====') logging.info(f'Running on {device.upper()}.') logging.info(f'Saving K+1 random frames from each video (K = {config.K}).') fa = FaceAlignment(LandmarksType._2D, device=device) video_list = get_video_list(source, size, output, overwrite=overwrite) logging.info(f'Processing {len(video_list)} videos...') # pool = Pool(processes=4, initializer=init_pool, initargs=(fa, output)) # pool.map(process_video_folder, video_list) init_pool(fa, output) counter = 1 for v in video_list: start_time = datetime.now() process_video_folder(v, frame_rate) logging.info( f'{counter}/{len(video_list)}\t{datetime.now()-start_time}') counter += 1 logging.info(f'All {len(video_list)} videos processed.')
def __init__(self, args): self.args = args model = edict() self.threshold = args.threshold self.det_minsize = 50 self.det_threshold = [0.4, 0.6, 0.6] self.det_factor = 0.9 _vec = args.image_size.split(',') assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) self.image_size = image_size _vec = args.model.split(',') assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) print('loading', prefix, epoch) ctx = mx.gpu(args.gpu) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['fc1_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') mtcnn_path = os.path.join('deploy', 'mtcnn-model') detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) self.detector = detector self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D, device='cuda', flip_input=False)
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False): logging.info('===== DATASET PRE-PROCESSING =====') logging.info(f'Running on {device.upper()}.') logging.info(f'Saving K+1 random frames from each video (K = {K}).') fa = FaceAlignment(LandmarksType._2D, device=device) video_list = get_video_list(source, size, output, overwrite=overwrite) logging.info(f'Processing {len(video_list)} videos...') init_pool(fa, output) counter = 1 for v in video_list: process_video_folder(v) logging.info(f'{counter}/{len(video_list)}') counter += 1 logging.info(f'All {len(video_list)} videos processed.')
def getTransform(videoName, modelIdx): modelList = [ 'sw', 'han', 'tsai', 'father', 'cloud', 'aerith', 'tifa', 'davinci' ] G = network.Generator() G = load_model(G, "app/modules/talkingHeads/resource/" + modelList[modelIdx], modelList[modelIdx]) G = G.to("cuda:0") fa = FaceAlignment(LandmarksType._2D, device='cuda:0') e_vector = get_e_vector("app/modules/talkingHeads/resource/" + modelList[modelIdx] + "/" + modelList[modelIdx] + ".npy") timestamp = str(int(time.time())) print(timestamp) # generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0") generate_moving_video(G, "app/static/" + videoName, e_vector, "app/static/result-" + timestamp + ".mp4", "cuda:0", fa) return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})
def end2end_mask_encoding( image: np.ndarray, face_aligment_class: face_alignment.FaceAlignment) -> dict: landmarks = face_aligment_class.get_landmarks_from_image(image) landmarks = np.floor(landmarks[0]).astype(np.int32) target_points, s1, s2 = extract_target_points_and_characteristic(landmarks) mask_rgba_crop, target_points = extract_polygon(image, target_points) mask_rgba_crop, target_points = rotate_image_and_points( mask_rgba_crop, s1, target_points) res = { 's1': s1, 's2': s2, 'points': target_points.tolist(), 'base64_img': image_to_string(mask_rgba_crop) } return res
def end2end_mask_generation(image: np.ndarray, masks_database: dict, face_aligment_class: face_alignment.FaceAlignment, input_face_landmarks: np.ndarray = None): if input_face_landmarks is None: face_landmarks = face_aligment_class.get_landmarks_from_image(image) if len(face_landmarks) == 0: raise RuntimeError('Can\'t find facial landmarks') face_landmarks = np.floor(face_landmarks[0]).astype(np.int32) else: face_landmarks = input_face_landmarks _, _, s2 = extract_target_points_and_characteristic(face_landmarks) sampling_mask_data = extract_mask_from_base(masks_database, s2) face_with_mask = apply_mask_to_image_with_face(image, sampling_mask_data, face_landmarks) return face_with_mask
def monkey_patch_face_detector(_): detector = dlib.get_frontal_face_detector() class Rect(object): def __init__(self, rect): self.rect = rect def detect(*args): return [Rect(x) for x in detector(*args)] return detect dlib.cnn_face_detection_model_v1 = monkey_patch_face_detector FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D, enable_cuda=True, flip_input=False) mean_face_x = numpy.array([ 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364, 0.490127, 0.42689 ]) mean_face_y = numpy.array([ 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
e_vector = get_e_vector("app/modules/talkingHeads/resource/" + modelList[modelIdx] + "/" + modelList[modelIdx] + ".npy") timestamp = str(int(time.time())) print(timestamp) # generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0") generate_moving_video(G, "app/static/" + videoName, e_vector, "app/static/result-" + timestamp + ".mp4", "cuda:0", fa) return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp}) G = network.Generator() G = load_model(G, "app/modules/talkingHeads/resource/han", "han") G = G.to("cuda:0") fa = FaceAlignment(LandmarksType._2D, device='cuda:0') e_vector = get_e_vector("app/modules/talkingHeads/resource/han/han.npy") def imgTransform(srcImage, modelIdx): image = base64_cv2(srcImage) # image = cv2.imread("app/modules/2.png") image = cv2.resize(image, (256, 256)) # modelList=['sw','han','tsai'] # G = network.Generator() # G = load_model(G, "app/modules/talkingHeads/resource/"+modelList[modelIdx], modelList[modelIdx]) # G = G.to("cuda:0") # fa = FaceAlignment(LandmarksType._2D, device='cuda:0') # e_vector = get_e_vector("app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy") result = generate_moving_image(G, image, e_vector, "cuda:0", fa) print('don')
from PIL import Image import regex import torch import subprocess import hashlib import sys from demo import load_checkpoints from animate import normalize_kp app = Flask(__name__) generator, kp_detector = load_checkpoints( config_path="first-order-model/config/vox-adv-256.yaml", checkpoint_path="vox-adv-cpk.pth.tar", ) fa = FaceAlignment(LandmarksType._2D) @app.route("/") def index(): return render_template("upload.html") def data(obj) -> str: return f"data: {json.dumps(obj)}\n\n" @app.route("/upload", methods=["POST"]) def upload(): for key, file in request.files.items(): pathlib.Path("static", key).mkdir(exist_ok=True)
import cv2 import random import numpy as np import pickle as pkl import tensorflow as tf from tqdm import tqdm import pandas as pd from functools import partial import multiprocessing as mp from imutils import face_utils from hyperparams import Hyperparams as hp from face_alignment import FaceAlignment, LandmarksType from utils import detector, predictor, preprocess_input global face_alignment face_alignment = FaceAlignment(LandmarksType._2D, device='cuda') def get_video_list(source = hp.dataset): """ Extracts a list of paths to videos to pre-process during the current run. :param source: Path to the root directory of the dataset. :return: List of paths to videos. """ video_list = [] for root, dirs, files in tqdm(os.walk(source)): if len(files) > 0: assert contains_only_videos(files) and len(dirs) == 0 video_list.append((root, files))
def __init__(self, dimensions='2d'): landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D self.faceAlignment = FaceAlignment(landmarkType, flip_input=False, device='cpu', verbose=False)
class FaceTools: """ Face Toolkit """ def __init__(self, dimensions='2d'): landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D self.faceAlignment = FaceAlignment(landmarkType, flip_input=False, device='cpu', verbose=False) @staticmethod def _polyArea(points): """ calculate the area of a polygon (Gauss equation) """ x = [point[0] for point in points] y = [point[1] for point in points] return 0.5 * np.abs( np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) @staticmethod def _slope(point1, point2): """ compute the slope of the line passing through two points """ return (point2[1] - point1[1]) / (point2[0] - point1[0]) def landmarks(self, img): """ get landmarks """ return self.faceAlignment.get_landmarks(img)[0] def geometricFeatures1(self, img, landmarks=None): """Improved Performance in Facial Expression Recognition Using 32 Geometric Features Linear features(15): – 3 for left eyebrow – 2 for left eye – 1 for cheeks – 1 for nose – 8 for mouth Polygonal features(3): – 1 for the left eye – 1 between corners of left eye and left corner of mouth – 1 for mouth Elliptical features(7): – 1 for left eyebrow – 3 for the left eye: eye, upper and lower eyelids – 3 for mouth: upper and lower lips Slope features(7): – 1 for left eyebrow – 6 for mouth corners """ pts = self.faceAlignment.get_landmarks( img)[0] if landmarks is None else landmarks result = [] # Linear features #eyebrow result.append(d(pts[21], pts[22])) result.append(d(pts[22], pts[42])) result.append(d(pts[26], pts[45])) #left eye result.append(d(pts[42], pts[45])) #d2=43-47 || 44-46 result.append(d(pts[43], pts[47])) #cheeks result.append(d(pts[1], pts[15])) #nose result.append(d(pts[31], pts[35])) #mouth result.append(d(pts[48], pts[51])) result.append(d(pts[51], pts[54])) result.append(d(pts[62], pts[66])) result.append(d(pts[51], pts[57])) result.append(d(pts[50], pts[33])) result.append(d(pts[52], pts[33])) result.append(d(pts[48], pts[31])) result.append(d(pts[54], pts[35])) #Polygonal features: result.append(FaceTools._polyArea([pts[48], pts[54], pts[57]])) result.append(FaceTools._polyArea([pts[54], pts[42], pts[45]])) result.append(FaceTools._polyArea([pts[42], pts[22], pts[26], pts[45]])) #Slope features: result.append(FaceTools._slope(pts[22], pts[26])) result.append(FaceTools._slope(pts[48], pts[31])) result.append(FaceTools._slope(pts[54], pts[35])) result.append(FaceTools._slope(pts[48], pts[51])) result.append(FaceTools._slope(pts[51], pts[54])) result.append(FaceTools._slope(pts[54], pts[57])) result.append(FaceTools._slope(pts[48], pts[57])) return result def faceDistances(self, img, landmarks=None): """ calculate distances as described in 'Automatic Facial Expression Recognition Using Combined Geometric Features': D1 Left eyebrow length D2 Right eyebrow length D3 Distance between left and right eyebrow D4 Left eye height D5 Left eye width D6 Right eye height D7 Right eye width D8 Distance between left eyebrow and left eye D9 Distance between right eyebrow and right eye D10 Distance between nose tip and upper lip D11 Lip width D12 Lip height D13 Inner lip distance D14 Distance between left eye corner and lip left corner D15 Distance between right eye corner and lip right corner """ pts = self.faceAlignment.get_landmarks( img)[0] if landmarks is None else landmarks distances = [ d(pts[17], pts[18]) + d(pts[18], pts[19]) + d(pts[19], pts[20]) + \ d(pts[20], pts[21]), d(pts[22], pts[23]) + d(pts[23], pts[24]) + d(pts[24], pts[25]) + \ d(pts[25], pts[26]), d(pts[21], pts[22]), d(pts[40], pts[38]), d(pts[36], pts[39]), d(pts[43], pts[47]), d(pts[42], pts[45]), d(pts[19], pts[37]), d(pts[24], pts[44]), d(pts[33], pts[51]), d(pts[48], pts[54]), d(pts[51], pts[57]), d(pts[62], pts[66]), d(pts[36], pts[48]), d(pts[45], pts[54]) ] return distances
class FaceModel: def __init__(self, args): self.args = args model = edict() self.threshold = args.threshold self.det_minsize = 50 self.det_threshold = [0.4, 0.6, 0.6] self.det_factor = 0.9 _vec = args.image_size.split(',') assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) self.image_size = image_size _vec = args.model.split(',') assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) print('loading', prefix, epoch) ctx = mx.gpu(args.gpu) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['fc1_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') mtcnn_path = os.path.join('deploy', 'mtcnn-model') detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) self.detector = detector self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D, device='cuda', flip_input=False) def get_feature(self, face_img): detected = True #face_img is bgr image def mtcnn_align(img): ret = self.detector.detect_face_limited(face_img, det_type=self.args.det) if ret is None: # detected = False bbox, points = None, None else: bbox, points = ret if bbox.shape[0] == 0: # detected = False bbox, points = None, None else: bbox = bbox[0, 0:4] points = points[0, :].reshape((2, 5)).T # print(bbox) # print(points) nimg = face_preprocess.preprocess(face_img, bbox, points, image_size='112,112') return nimg # skimage.io.imread( str(fn) ) faces = self.FACE_ALIGNMENT.get_landmarks(face_img) if faces is not None: if len(faces) > 1: faces = faces[0:1] # pdb.set_trace() points = faces[0] alignment = umeyama(points[17:], landmarks_2D, True)[0:2] nimg = _aligned_image = transform(face_img, alignment, 112, 0) else: nimg = mtcnn_align(face_img) # nimg = face_preprocess.preprocess(face_img, image_size='112,112') # pdb.set_trace() nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) aligned = np.transpose(nimg, (2, 0, 1)) #print(nimg.shape) embedding = None for flipid in [0, 1]: if flipid == 1: if self.args.flip == 0: break do_flip(aligned) input_blob = np.expand_dims(aligned, axis=0) data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) _embedding = self.model.get_outputs()[0].asnumpy() #print(_embedding.shape) if embedding is None: embedding = _embedding else: embedding += _embedding embedding = sklearn.preprocessing.normalize(embedding).flatten() return detected, embedding