def get_face(self, filename, landmarks=None, size=(cfg.CROP_SIZE, cfg.CROP_SIZE), use_cache=True, from_sequence=False): # landmarks = np.zeros((68, 2), dtype=np.float32) # pose = np.zeros(3, dtype=np.float32) crop_filepath = os.path.join(self.cropped_img_dir, filename + '.jpg') if use_cache and os.path.isfile(crop_filepath): try: crop = io.imread(crop_filepath) except OSError: os.remove(crop_filepath) return self.get_face(filename, landmarks, size, use_cache, from_sequence) if crop.shape[:2] != size: crop = cv2.resize(crop, size, interpolation=cv2.INTER_CUBIC) if landmarks is None: of_conf, landmarks, _ = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename), numpy_lmFilepath=os.path.join(self.npfeature_dir, filename)) landmarks = face_processing.scale_landmarks_to_crop( landmarks, output_size=size) else: # Load image from dataset img_path = os.path.join(self.fullsize_img_dir, filename + '.jpg') img = io.imread(img_path) if img is None: raise IOError( "\tError: Could not load image {}!".format(img_path)) # load landmarks extracted with OpenFace2 if landmarks is None: of_conf, landmarks, _ = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename), numpy_lmFilepath=os.path.join(self.npfeature_dir, filename), from_sequence=from_sequence) if of_conf <= 0.0: log.warning("No landmarks for image {}".format(filename)) # crop, landmarks = face_processing.crop_bump(img, landmarks, output_size=size) crop, landmarks = face_processing.crop_celebHQ(img, landmarks, output_size=size) if use_cache: utils.io.makedirs(crop_filepath) io.imsave(crop_filepath, crop) return crop, landmarks
def __getitem__(self, idx): sample = self.annotations.iloc[idx] filename = sample.fname id = sample.id of_conf, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, filename)) # if of_conf < 0.8: # return self.__getitem__((idx+1) % len(self)) try: # crop, landmarks, pose = ds_utils.get_face(filename+'.jpg', self.fullsize_img_dir, self.cropped_img_dir, # landmarks, pose, use_cache=False) crop, landmarks, pose, cropper = self.face_extractor.get_face(filename + '.jpg', self.fullsize_img_dir, self.cropped_img_dir, landmarks=landmarks, pose=pose, use_cache=self.use_cache, detect_face=False, crop_type='tight', aligned=True) except: print(filename) return self.__getitem__((idx+1) % len(self)) # vis.show_landmarks(crop, landmarks, pose=pose, title='lms', wait=10, color=(0,0,255)) transformed_crop = self.transform(crop) landmarks[..., 0] -= int((crop.shape[0] - transformed_crop.shape[1]) / 2) landmarks[..., 1] -= int((crop.shape[1] - transformed_crop.shape[2]) / 2) item = { 'image': transformed_crop, 'id': id, 'fnames': filename, 'pose': pose, 'landmarks': landmarks, } return item
def __getitem__(self, idx): pair = self.pairs[idx] # name1, img1, name2, img2 filepattern = '{}/{}_{:04d}' fname1 = filepattern.format(pair[0], pair[0], pair[1]) fname2 = filepattern.format(pair[2], pair[2], pair[3]) of_conf1, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, fname1), expected_face_center=[125,125]) # if of_conf < 0.025: # landmarks = None # pose = None bb = self.default_bbox crop, landmarks, pose, cropper = self.face_extractor.get_face(fname1 + '.jpg', self.fullsize_img_dir, self.cropped_img_dir, landmarks=landmarks, pose=pose, use_cache=self.use_cache, bb=bb, detect_face=False, crop_type='tight', aligned=True) transformed_crop1 = self.transform(crop) of_conf2, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, fname2), expected_face_center=[125,125]) # if of_conf < 0.025: # landmarks = None # pose = None crop, landmarks, pose, cropper = self.face_extractor.get_face(fname2 + '.jpg', self.fullsize_img_dir, self.cropped_img_dir, landmarks=landmarks, pose=pose, use_cache=self.use_cache, bb=bb, detect_face=False, crop_type='tight', aligned=True) # import matplotlib.pyplot as plt # plt.imshow(crop) # plt.show() transformed_crop2 = self.transform(crop) return transformed_crop1, transformed_crop2, pair[0], pair[2], pair[0]==pair[2], float(of_conf1), float(of_conf2)
def __getitem__(self, idx): fname = self.annotations.iloc[idx].fname of_conf, landmarks, pose = ds_utils.read_openface_detection( os.path.join(self.feature_dir, fname), numpy_lmFilepath=os.path.join(self.npfeature_dir, fname), from_sequence=False) return { 'fn': fname, 'cnf': of_conf, 'lmx': landmarks[:, 0], 'lmy': landmarks[:, 1], 'h': int(landmarks[:, 1].max() - landmarks[:, 1].min()), 'w': int(landmarks[:, 0].max() - landmarks[:, 0].min()), 'p': pose[0], 'y': pose[1], 'r': pose[2] }
def get_landmarks_for_crop(): pose = np.zeros(3, dtype=np.float32) if self.crop_source == 'lm_openface': openface_filepath = os.path.join(self.feature_dir_of, os.path.splitext(filename)[0]) est_face_center = landmarks_gt.mean(axis=0) of_conf, landmarks_of, pose = ds_utils.read_openface_detection(openface_filepath, expected_face_center=est_face_center) if of_conf < 0.01: landmarks_of = landmarks_gt else: # landmarks_of, pose = self.cropper.apply_crop_to_landmarks(landmarks_of, pose) landmarks_of[:,0] -= cfg.CROP_BORDER landmarks_of[:,1] -= cfg.CROP_BORDER landmarks = landmarks_of elif self.crop_source == 'lm_cnn': try: landmarks = np.load(os.path.join(self.feature_dir_cnn, os.path.splitext(filename)[0]+'.npy')) except FileNotFoundError: landmarks = None elif self.crop_source == 'lm_ground_truth': landmarks = landmarks_gt else: # no landmarks -> crop using bounding boxes landmarks = None return landmarks, pose
def __init__(self, root_dir=cfg.VGGFACE2_ROOT, train=True, color=True, start=None, max_samples=None, deterministic=None, min_conf=cfg.MIN_OPENFACE_CONFIDENCE, use_cache=True, crop_source='bb_ground_truth', detect_face=False, align_face_orientation=True, return_landmark_heatmaps=False, return_modified_images=False, daug=0, landmark_sigma=None, landmark_ids=None, **kwargs): assert(crop_source in ['bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface']) self.mode = TRAIN if train else VAL self.face_extractor = FaceExtractor() self.use_cache = use_cache self.detect_face = detect_face self.align_face_orientation = align_face_orientation self.color = color self.crop_source = crop_source self.return_landmark_heatmaps = return_landmark_heatmaps self.return_modified_images = return_modified_images self.landmark_sigma = landmark_sigma self.landmark_ids = landmark_ids self.root_dir = root_dir root_dir_local = cfg.VGGFACE2_ROOT_LOCAL split_subfolder = 'train' if train else 'test' crop_folder = 'crops' if cfg.INPUT_SIZE == 128: crop_folder += '_128' self.cropped_img_dir = os.path.join(root_dir_local, split_subfolder, crop_folder, crop_source) self.fullsize_img_dir = os.path.join(root_dir, split_subfolder, 'imgs') self.feature_dir = os.path.join(root_dir_local, split_subfolder, 'features') annotation_filename = 'loose_bb_{}.csv'.format(split_subfolder) # annotation_filename = 'loose_landmark_{}.csv'.format(split_subfolder) # self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full_of.pkl') self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full.pkl') if os.path.isfile(self.path_annotations_mod): print('Reading pickle file...') self.annotations = pd.read_pickle(self.path_annotations_mod) print('done.') else: print('Reading CSV file...') self.annotations = pd.read_csv(os.path.join(self.root_dir, 'bb_landmark', annotation_filename)) print('done.') of_confs, poses, landmarks = [], [], [] self.annotations = self.annotations[0:4000000] self.annotations = self.annotations[self.annotations.H > 80] print("Number of images: {}".format(len(self))) def get_face_height(lms): return lms[8,1] - lms[27,1] read_openface_landmarks = True if read_openface_landmarks: for cnt, filename in enumerate(self.annotations.NAME_ID): filename_noext = os.path.splitext(filename)[0] bb = self.annotations.iloc[cnt][1:5].values expected_face_center = [bb[0] + bb[2] / 2.0, bb[1] + bb[3] / 2.0] conf, lms, pose, num_faces = ds_utils.read_openface_detection(os.path.join(self.feature_dir, filename_noext), expected_face_center=expected_face_center, use_cache=True, return_num_faces=True) if num_faces > 1: print("Deleting extracted crop for {}...".format(filename)) cache_filepath = os.path.join(self.cropped_img_dir, 'tight', filename + '.jpg') if os.path.isfile(cache_filepath): os.remove(cache_filepath) of_confs.append(conf) landmarks.append(lms) poses.append(pose) if (cnt+1) % 10000 == 0: log.info(cnt+1) self.annotations['pose'] = poses self.annotations['of_conf'] = of_confs self.annotations['landmarks_of'] = landmarks # assign new continuous ids to persons (0, range(n)) print("Creating id labels...") _ids = self.annotations.NAME_ID _ids = _ids.map(lambda x: int(x.split('/')[0][1:])) self.annotations['ID'] = _ids self.annotations.to_pickle(self.path_annotations_mod) min_face_height = 100 print('Removing faces with height <={:.2f}px...'.format(min_face_height)) self.annotations = self.annotations[self.annotations.H > min_face_height] print("Number of images: {}".format(len(self))) # limit number of samples st,nd = 0, None if start is not None: st = start if max_samples is not None: nd = st+max_samples self.annotations = self.annotations[st:nd] if deterministic is None: deterministic = self.mode != TRAIN self.transform = ds_utils.build_transform(deterministic, self.color, daug) print("Number of images: {}".format(len(self))) print("Number of identities: {}".format(self.annotations.ID.nunique()))
def __init__(self, root_dir=cfg.AFFECTNET_ROOT, train=True, transform=None, crop_type='tight', color=True, start=None, max_samples=None, outlier_threshold=None, deterministic=None, use_cache=True, detect_face=False, align_face_orientation=False, min_conf=cfg.MIN_OPENFACE_CONFIDENCE, daug=0, return_landmark_heatmaps=False, landmark_sigma=9, landmark_ids=range(68), return_modified_images=False, crop_source='lm_openface', **kwargs): assert (crop_type in ['fullsize', 'tight', 'loose']) assert (crop_source in [ 'bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface' ]) self.face_extractor = FaceExtractor() self.mode = TRAIN if train else VAL self.crop_source = crop_source self.use_cache = use_cache self.detect_face = detect_face self.align_face_orientation = align_face_orientation self.return_landmark_heatmaps = return_landmark_heatmaps self.return_modified_images = return_modified_images self.landmark_sigma = landmark_sigma self.landmark_ids = landmark_ids self.start = start self.max_samples = max_samples self.root_dir = root_dir self.crop_type = crop_type self.color = color self.outlier_threshold = outlier_threshold self.transform = transform self.fullsize_img_dir = os.path.join(self.root_dir, 'cropped_Annotated') self.cropped_img_dir = os.path.join(self.root_dir, 'crops', crop_source) self.feature_dir = os.path.join(self.root_dir, 'features') annotation_filename = 'training' if train else 'validation' path_annotations_mod = os.path.join(root_dir, annotation_filename + '.mod.pkl') if os.path.isfile(path_annotations_mod): print('Reading pickle file...') self._annotations = pd.read_pickle(path_annotations_mod) else: print('Reading CSV file...') self._annotations = pd.read_csv( os.path.join(root_dir, annotation_filename + '.csv')) print('done.') # drop non-faces self._annotations = self._annotations[ self._annotations.expression < 8] # Samples in annotation file are somewhat clustered by expression. # Shuffle to create a more even distribution. # NOTE: deterministic, always creates the same order if train: from sklearn.utils import shuffle self._annotations = shuffle(self._annotations, random_state=2) # remove samples with inconsistent expression<->valence/arousal values self._remove_outliers() poses = [] confs = [] landmarks = [] for cnt, filename in enumerate( self._annotations.subDirectory_filePath): if cnt % 1000 == 0: print(cnt) filename_noext = os.path.splitext(filename)[0] conf, lms, pose = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename_noext)) poses.append(pose) confs.append(conf) landmarks.append(lms) self._annotations['pose'] = poses self._annotations['conf'] = confs self._annotations['landmarks_of'] = landmarks # self.annotations.to_csv(path_annotations_mod, index=False) self._annotations.to_pickle(path_annotations_mod) poses = np.abs(np.stack(self._annotations.pose.values)) only_good_image_for_training = True if train and only_good_image_for_training: print(len(self._annotations)) min_rot_deg = 30 max_rot_deg = 90 # print('Limiting rotation to +-[{}-{}] degrees...'.format(min_rot_deg, max_rot_deg)) # self._annotations = self._annotations[(poses[:, 0] < np.deg2rad(max_rot_deg)) & # (poses[:, 1] < np.deg2rad(max_rot_deg)) & # (poses[:, 2] < np.deg2rad(max_rot_deg))] # self._annotations = self._annotations[(np.deg2rad(min_rot_deg) < poses[:, 0]) | # (np.deg2rad(min_rot_deg) < poses[:, 1])] # self._annotations = self._annotations[np.deg2rad(min_rot_deg) < poses[:, 1] ] print(len(self._annotations)) # print('Removing OpenFace confs <={:.2f}...'.format(min_conf)) # self._annotations = self._annotations[self._annotations.conf > cfg.MIN_OPENFACE_CONFIDENCE] # print(len(self._annotations)) # select by Valence/Arousal # min_arousal = 0.0 # print('Removing arousal <={:.2f}...'.format(min_arousal)) # self._annotations = self._annotations[self._annotations.arousal > min_arousal] # print(len(self._annotations)) # There is (at least) one missing image in the dataset. Remove by checking face width: self._annotations = self._annotations[self._annotations.face_width > 0] # self._annotations_balanced = self._annotations # self.filter_labels(label_dict_exclude={'expression': 0}) # self.filter_labels(label_dict_exclude={'expression': 1}) # self._annotations = self._annotations[self._annotations.arousal > 0.2] self.rebalance_classes() if deterministic is None: deterministic = self.mode != TRAIN self.transform = ds_utils.build_transform(deterministic, self.color, daug) transforms = [fp.CenterCrop(cfg.INPUT_SIZE)] transforms += [fp.ToTensor()] transforms += [fp.Normalize([0.518, 0.418, 0.361], [1, 1, 1])] # VGGFace(2) self.crop_to_tensor = tf.Compose(transforms)
def __init__(self, root_dir=cfg.CELEBA_ROOT, train=True, color=True, start=None, max_samples=None, deterministic=None, crop_type='tight', **kwargs): from utils.face_extractor import FaceExtractor self.face_extractor = FaceExtractor() self.mode = TRAIN if train else TEST self.crop_type = crop_type self.root_dir = root_dir root_dir_local = cfg.CELEBA_ROOT_LOCAL assert (crop_type in ['tight', 'loose', 'fullsize']) self.cropped_img_dir = os.path.join(root_dir_local, 'crops') self.fullsize_img_dir = os.path.join(root_dir, 'img_align_celeba') self.feature_dir = os.path.join(root_dir_local, 'features') self.color = color annotation_filename = 'list_landmarks_align_celeba.txt' path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod.pkl') if os.path.isfile(path_annotations_mod): self.annotations = pd.read_pickle(path_annotations_mod) else: print('Reading original TXT file...') self.annotations = pd.read_csv(os.path.join( self.root_dir, 'Anno', annotation_filename), delim_whitespace=True) print('done.') # store OpenFace features in annotation dataframe poses = [] confs = [] landmarks = [] for cnt, filename in enumerate(self.annotations.fname): if cnt % 1000 == 0: print(cnt) filename_noext = os.path.splitext(filename)[0] conf, lms, pose = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename_noext)) poses.append(pose) confs.append(conf) landmarks.append(lms) self.annotations['pose'] = poses self.annotations['conf'] = confs self.annotations['landmarks_of'] = landmarks # add identities to annotations self.identities = pd.read_csv(os.path.join(self.root_dir, 'Anno', 'identity_CelebA.txt'), delim_whitespace=True, header=None, names=['fname', 'id']) self.annotations = pd.merge(self.annotations, self.identities, on='fname', copy=False) # save annations as pickle file self.annotations.to_pickle(path_annotations_mod) # select training or test set (currently not using validation set) SPLIT = { TRAIN: (0, 162772), VAL: (162772, 182639), TEST: (182639, 202601) } self.annotations = self.annotations[ (self.annotations.index >= SPLIT[self.mode][0]) & (self.annotations.index < SPLIT[self.mode][1])] self.annotations = self.annotations.sort_values(by='id') print("Num. faces: {}".format(len(self.annotations))) if 'crops_celeba' in self.cropped_img_dir: min_of_conf = 0.0 else: min_of_conf = 0.5 print("Removing faces with conf < {}".format(min_of_conf)) self.annotations = self.annotations[ self.annotations.conf >= min_of_conf] print("Remaining num. faces: {}".format(len(self.annotations))) # max_rot_deg = 1 # print('Limiting rotation to +-{} degrees...'.format(max_rot_deg)) # poses = np.abs(np.stack(self.annotations.pose.values)) # self.annotations = self.annotations[(poses[:, 0] > np.deg2rad(max_rot_deg)) | # (poses[:, 1] > np.deg2rad(max_rot_deg)) | # (poses[:, 2] > np.deg2rad(max_rot_deg))] # print(len(self.annotations)) # limit number of samples st, nd = 0, None if start is not None: st = start if max_samples is not None: nd = st + max_samples self.annotations = self.annotations[st:nd] self._annotations = self.annotations[st:nd].copy() if deterministic is None: deterministic = self.mode != TRAIN self.transform = ds_utils.build_transform(deterministic, self.color)
def __init__(self, root_dir=cfg.VGGFACE2_ROOT, train=True, color=True, start=None, max_samples=None, deterministic=None, min_conf=cfg.MIN_OPENFACE_CONFIDENCE, use_cache=True, crop_source='bb_ground_truth', detect_face=False, align_face_orientation=True, return_landmark_heatmaps=False, return_modified_images=False, daug=0, landmark_sigma=None, landmark_ids=None, **kwargs): assert (crop_source in [ 'bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface' ]) self.mode = TRAIN if train else VAL self.face_extractor = FaceExtractor() self.use_cache = use_cache self.detect_face = detect_face self.align_face_orientation = align_face_orientation self.color = color self.crop_source = crop_source self.return_landmark_heatmaps = return_landmark_heatmaps self.return_modified_images = return_modified_images self.landmark_sigma = landmark_sigma self.landmark_ids = landmark_ids self.root_dir = root_dir root_dir_local = cfg.VGGFACE2_ROOT_LOCAL split_subfolder = 'train' if train else 'test' self.cropped_img_dir = os.path.join(root_dir_local, split_subfolder, 'crops', crop_source) self.fullsize_img_dir = os.path.join(root_dir, split_subfolder, 'imgs') self.feature_dir = os.path.join(root_dir_local, split_subfolder, 'features') annotation_filename = 'loose_bb_{}.csv'.format(split_subfolder) # annotation_filename = 'loose_landmark_{}.csv'.format(split_subfolder) # self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full_of.pkl') self.path_annotations_mod = os.path.join( root_dir_local, annotation_filename + '.mod_full.pkl') if os.path.isfile(self.path_annotations_mod): print('Reading pickle file...') self.annotations = pd.read_pickle(self.path_annotations_mod) print('done.') else: print('Reading CSV file...') self.annotations = pd.read_csv( os.path.join(self.root_dir, 'bb_landmark', annotation_filename)) print('done.') of_confs, poses, landmarks = [], [], [] self.annotations = self.annotations[0:4000000] self.annotations = self.annotations[self.annotations.H > 80] print("Number of images: {}".format(len(self))) def get_face_height(lms): return lms[8, 1] - lms[27, 1] read_openface_landmarks = True if read_openface_landmarks: for cnt, filename in enumerate(self.annotations.NAME_ID): filename_noext = os.path.splitext(filename)[0] bb = self.annotations.iloc[cnt][1:5].values expected_face_center = [ bb[0] + bb[2] / 2.0, bb[1] + bb[3] / 2.0 ] conf, lms, pose, num_faces = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename_noext), expected_face_center=expected_face_center, use_cache=True, return_num_faces=True) if num_faces > 1: print("Deleting extracted crop for {}...".format( filename)) cache_filepath = os.path.join(self.cropped_img_dir, 'tight', filename + '.jpg') if os.path.isfile(cache_filepath): os.remove(cache_filepath) # numpy_lmfile = os.path.join(self.feature_dir, filename) + '.npz' # if os.path.isfile(numpy_lmfile): # os.remove(numpy_lmfile) of_confs.append(conf) landmarks.append(lms) poses.append(pose) if (cnt + 1) % 10000 == 0: log.info(cnt + 1) # if (cnt+1) % 1000 == 0: # print('saving annotations...') # self.annotations.to_pickle(self.path_annotations_mod) self.annotations['pose'] = poses self.annotations['of_conf'] = of_confs self.annotations['landmarks_of'] = landmarks # self.annotations['face_height'] = self.annotations.landmarks_of.map(get_face_height) # assign new continuous ids to persons (0, range(n)) print("Creating id labels...") _ids = self.annotations.NAME_ID _ids = _ids.map(lambda x: int(x.split('/')[0][1:])) self.annotations['ID'] = _ids # unique_ids = _ids.unique() # uid2idx = dict(zip(unique_ids, range(1,len(unique_ids)+1))) # self.annotations['ID'] = _ids.map(uid2idx) self.annotations.to_pickle(self.path_annotations_mod) select_subset = False if select_subset: print("Number of images: {}".format(len(self))) self.annotations = self.annotations[ self.annotations.of_conf > min_conf] print("Number of images: {}".format(len(self))) min_rot_deg = 0 max_rot_deg = 90 print('Limiting rotation to +-[{}-{}] degrees...'.format( min_rot_deg, max_rot_deg)) poses = np.abs(np.stack(self.annotations.pose.values)) self.annotations = self.annotations[ (poses[:, 0] < np.deg2rad(max_rot_deg)) & (poses[:, 1] < np.deg2rad(max_rot_deg)) & (poses[:, 2] < np.deg2rad(max_rot_deg))] # self.annotations = self.annotations[(np.deg2rad(min_rot_deg) < poses[:, 0]) | # (np.deg2rad(min_rot_deg) < poses[:, 1])] min_face_height = 100 print( 'Removing faces with height <={:.2f}px...'.format(min_face_height)) self.annotations = self.annotations[ self.annotations.H > min_face_height] print("Number of images: {}".format(len(self))) # width = self.annotations.W # height = self.annotations.H # ratio = width / height # self.annotations = self.annotations[(ratio > 0.5) & (ratio < 0.60)] # self.annotations = self.annotations[ratio < 0.65] # self.annotations = self.annotations[ratio > 0.9] # FIXME: shuffle for find_similar_images # self.annotations = self.annotations[:1000000] # from sklearn.utils import shuffle # self.annotations = shuffle(self.annotations, random_state=2) #############3 # limit number of samples st, nd = 0, None if start is not None: st = start if max_samples is not None: nd = st + max_samples self.annotations = self.annotations[st:nd] if deterministic is None: deterministic = self.mode != TRAIN self.transform = ds_utils.build_transform(deterministic, self.color, daug) print("Number of images: {}".format(len(self))) print("Number of identities: {}".format(self.annotations.ID.nunique()))