Ejemplo n.º 1
0
    def get_face(self,
                 filename,
                 landmarks=None,
                 size=(cfg.CROP_SIZE, cfg.CROP_SIZE),
                 use_cache=True,
                 from_sequence=False):
        # landmarks = np.zeros((68, 2), dtype=np.float32)
        # pose = np.zeros(3, dtype=np.float32)
        crop_filepath = os.path.join(self.cropped_img_dir, filename + '.jpg')

        if use_cache and os.path.isfile(crop_filepath):
            try:
                crop = io.imread(crop_filepath)
            except OSError:
                os.remove(crop_filepath)
                return self.get_face(filename, landmarks, size, use_cache,
                                     from_sequence)
            if crop.shape[:2] != size:
                crop = cv2.resize(crop, size, interpolation=cv2.INTER_CUBIC)
            if landmarks is None:
                of_conf, landmarks, _ = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename),
                    numpy_lmFilepath=os.path.join(self.npfeature_dir,
                                                  filename))
            landmarks = face_processing.scale_landmarks_to_crop(
                landmarks, output_size=size)
        else:
            # Load image from dataset
            img_path = os.path.join(self.fullsize_img_dir, filename + '.jpg')
            img = io.imread(img_path)
            if img is None:
                raise IOError(
                    "\tError: Could not load image {}!".format(img_path))

            # load landmarks extracted with OpenFace2
            if landmarks is None:
                of_conf, landmarks, _ = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename),
                    numpy_lmFilepath=os.path.join(self.npfeature_dir,
                                                  filename),
                    from_sequence=from_sequence)
                if of_conf <= 0.0:
                    log.warning("No landmarks for image {}".format(filename))

            # crop, landmarks = face_processing.crop_bump(img, landmarks, output_size=size)
            crop, landmarks = face_processing.crop_celebHQ(img,
                                                           landmarks,
                                                           output_size=size)

            if use_cache:
                utils.io.makedirs(crop_filepath)
                io.imsave(crop_filepath, crop)

        return crop, landmarks
Ejemplo n.º 2
0
Archivo: lfw.py Proyecto: browatbn2/MAD
    def __getitem__(self, idx):
        sample = self.annotations.iloc[idx]
        filename = sample.fname
        id = sample.id
        of_conf, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, filename))
        # if of_conf < 0.8:
        #     return self.__getitem__((idx+1) % len(self))

        try:
            # crop, landmarks, pose = ds_utils.get_face(filename+'.jpg', self.fullsize_img_dir, self.cropped_img_dir,
            #                                           landmarks, pose, use_cache=False)
            crop, landmarks, pose, cropper = self.face_extractor.get_face(filename + '.jpg', self.fullsize_img_dir,
                                                                          self.cropped_img_dir, landmarks=landmarks,
                                                                          pose=pose, use_cache=self.use_cache,
                                                                          detect_face=False, crop_type='tight',
                                                                          aligned=True)
        except:
            print(filename)
            return self.__getitem__((idx+1) % len(self))

        # vis.show_landmarks(crop, landmarks, pose=pose, title='lms', wait=10, color=(0,0,255))

        transformed_crop = self.transform(crop)

        landmarks[..., 0] -= int((crop.shape[0] - transformed_crop.shape[1]) / 2)
        landmarks[..., 1] -= int((crop.shape[1] - transformed_crop.shape[2]) / 2)

        item = {
            'image': transformed_crop,
            'id': id,
            'fnames': filename,
            'pose': pose,
            'landmarks': landmarks,
        }
        return item
Ejemplo n.º 3
0
Archivo: lfw.py Proyecto: browatbn2/MAD
    def __getitem__(self, idx):
        pair = self.pairs[idx]  # name1, img1, name2, img2

        filepattern = '{}/{}_{:04d}'

        fname1 = filepattern.format(pair[0], pair[0], pair[1])
        fname2 = filepattern.format(pair[2], pair[2], pair[3])

        of_conf1, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, fname1),
                                                                     expected_face_center=[125,125])

        # if of_conf < 0.025:
        #     landmarks = None
        #     pose = None
        bb = self.default_bbox


        crop, landmarks, pose, cropper = self.face_extractor.get_face(fname1 + '.jpg', self.fullsize_img_dir,
                                                                      self.cropped_img_dir, landmarks=landmarks,
                                                                      pose=pose, use_cache=self.use_cache,
                                                                      bb=bb,
                                                                      detect_face=False, crop_type='tight',
                                                                      aligned=True)

        transformed_crop1 = self.transform(crop)

        of_conf2, landmarks, pose = ds_utils.read_openface_detection(os.path.join(self.feature_dir, fname2),
                                                                     expected_face_center=[125,125])
        # if of_conf < 0.025:
        #     landmarks = None
        #     pose = None
        crop, landmarks, pose, cropper = self.face_extractor.get_face(fname2 + '.jpg', self.fullsize_img_dir,
                                                                      self.cropped_img_dir, landmarks=landmarks,
                                                                      pose=pose, use_cache=self.use_cache,
                                                                      bb=bb,
                                                                      detect_face=False, crop_type='tight',
                                                                      aligned=True)
            # import matplotlib.pyplot as plt
            # plt.imshow(crop)
            # plt.show()
        transformed_crop2 = self.transform(crop)

        return transformed_crop1, transformed_crop2, pair[0], pair[2], pair[0]==pair[2], float(of_conf1), float(of_conf2)
Ejemplo n.º 4
0
 def __getitem__(self, idx):
     fname = self.annotations.iloc[idx].fname
     of_conf, landmarks, pose = ds_utils.read_openface_detection(
         os.path.join(self.feature_dir, fname),
         numpy_lmFilepath=os.path.join(self.npfeature_dir, fname),
         from_sequence=False)
     return {
         'fn': fname,
         'cnf': of_conf,
         'lmx': landmarks[:, 0],
         'lmy': landmarks[:, 1],
         'h': int(landmarks[:, 1].max() - landmarks[:, 1].min()),
         'w': int(landmarks[:, 0].max() - landmarks[:, 0].min()),
         'p': pose[0],
         'y': pose[1],
         'r': pose[2]
     }
Ejemplo n.º 5
0
 def get_landmarks_for_crop():
     pose = np.zeros(3, dtype=np.float32)
     if self.crop_source == 'lm_openface':
         openface_filepath = os.path.join(self.feature_dir_of, os.path.splitext(filename)[0])
         est_face_center = landmarks_gt.mean(axis=0)
         of_conf, landmarks_of, pose = ds_utils.read_openface_detection(openface_filepath, expected_face_center=est_face_center)
         if of_conf < 0.01:
             landmarks_of = landmarks_gt
         else:
             # landmarks_of, pose = self.cropper.apply_crop_to_landmarks(landmarks_of, pose)
             landmarks_of[:,0] -= cfg.CROP_BORDER
             landmarks_of[:,1] -= cfg.CROP_BORDER
         landmarks  = landmarks_of
     elif self.crop_source == 'lm_cnn':
         try:
             landmarks = np.load(os.path.join(self.feature_dir_cnn, os.path.splitext(filename)[0]+'.npy'))
         except FileNotFoundError:
             landmarks = None
     elif self.crop_source == 'lm_ground_truth':
         landmarks = landmarks_gt
     else:
         # no landmarks -> crop using bounding boxes
         landmarks = None
     return landmarks, pose
Ejemplo n.º 6
0
    def __init__(self, root_dir=cfg.VGGFACE2_ROOT, train=True, color=True, start=None,
                 max_samples=None, deterministic=None, min_conf=cfg.MIN_OPENFACE_CONFIDENCE, use_cache=True,
                 crop_source='bb_ground_truth', detect_face=False, align_face_orientation=True,
                 return_landmark_heatmaps=False, return_modified_images=False,
                 daug=0, landmark_sigma=None, landmark_ids=None, **kwargs):

        assert(crop_source in ['bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface'])

        self.mode = TRAIN if train else VAL

        self.face_extractor = FaceExtractor()
        self.use_cache = use_cache
        self.detect_face = detect_face
        self.align_face_orientation = align_face_orientation
        self.color = color
        self.crop_source = crop_source
        self.return_landmark_heatmaps = return_landmark_heatmaps
        self.return_modified_images = return_modified_images
        self.landmark_sigma = landmark_sigma
        self.landmark_ids = landmark_ids

        self.root_dir = root_dir
        root_dir_local = cfg.VGGFACE2_ROOT_LOCAL
        split_subfolder = 'train' if train else 'test'
        crop_folder = 'crops'
        if cfg.INPUT_SIZE == 128:
            crop_folder += '_128'
        self.cropped_img_dir = os.path.join(root_dir_local, split_subfolder, crop_folder, crop_source)
        self.fullsize_img_dir = os.path.join(root_dir, split_subfolder, 'imgs')
        self.feature_dir = os.path.join(root_dir_local, split_subfolder, 'features')
        annotation_filename = 'loose_bb_{}.csv'.format(split_subfolder)
        # annotation_filename = 'loose_landmark_{}.csv'.format(split_subfolder)

        # self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full_of.pkl')
        self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full.pkl')
        if os.path.isfile(self.path_annotations_mod):
            print('Reading pickle file...')
            self.annotations = pd.read_pickle(self.path_annotations_mod)
            print('done.')
        else:
            print('Reading CSV file...')
            self.annotations = pd.read_csv(os.path.join(self.root_dir, 'bb_landmark', annotation_filename))
            print('done.')

            of_confs, poses, landmarks = [], [], []
            self.annotations = self.annotations[0:4000000]
            self.annotations = self.annotations[self.annotations.H > 80]
            print("Number of images: {}".format(len(self)))

            def get_face_height(lms):
                return lms[8,1] - lms[27,1]

            read_openface_landmarks = True
            if read_openface_landmarks:
                for cnt, filename in enumerate(self.annotations.NAME_ID):
                    filename_noext = os.path.splitext(filename)[0]

                    bb = self.annotations.iloc[cnt][1:5].values
                    expected_face_center = [bb[0] + bb[2] / 2.0, bb[1] + bb[3] / 2.0]

                    conf, lms, pose, num_faces  = ds_utils.read_openface_detection(os.path.join(self.feature_dir, filename_noext),
                                                                       expected_face_center=expected_face_center,
                                                                       use_cache=True, return_num_faces=True)

                    if num_faces > 1:
                        print("Deleting extracted crop for {}...".format(filename))
                        cache_filepath = os.path.join(self.cropped_img_dir, 'tight', filename + '.jpg')
                        if os.path.isfile(cache_filepath):
                            os.remove(cache_filepath)

                    of_confs.append(conf)
                    landmarks.append(lms)
                    poses.append(pose)
                    if (cnt+1) % 10000 == 0:
                        log.info(cnt+1)
                self.annotations['pose'] = poses
                self.annotations['of_conf'] = of_confs
                self.annotations['landmarks_of'] = landmarks

            # assign new continuous ids to persons (0, range(n))
            print("Creating id labels...")
            _ids = self.annotations.NAME_ID
            _ids = _ids.map(lambda x: int(x.split('/')[0][1:]))
            self.annotations['ID'] = _ids

            self.annotations.to_pickle(self.path_annotations_mod)

        min_face_height = 100
        print('Removing faces with height <={:.2f}px...'.format(min_face_height))
        self.annotations = self.annotations[self.annotations.H > min_face_height]
        print("Number of images: {}".format(len(self)))

        # limit number of samples
        st,nd = 0, None
        if start is not None:
            st = start
        if max_samples is not None:
            nd = st+max_samples
        self.annotations = self.annotations[st:nd]

        if deterministic is None:
            deterministic = self.mode != TRAIN
        self.transform = ds_utils.build_transform(deterministic, self.color, daug)

        print("Number of images: {}".format(len(self)))
        print("Number of identities: {}".format(self.annotations.ID.nunique()))
Ejemplo n.º 7
0
    def __init__(self,
                 root_dir=cfg.AFFECTNET_ROOT,
                 train=True,
                 transform=None,
                 crop_type='tight',
                 color=True,
                 start=None,
                 max_samples=None,
                 outlier_threshold=None,
                 deterministic=None,
                 use_cache=True,
                 detect_face=False,
                 align_face_orientation=False,
                 min_conf=cfg.MIN_OPENFACE_CONFIDENCE,
                 daug=0,
                 return_landmark_heatmaps=False,
                 landmark_sigma=9,
                 landmark_ids=range(68),
                 return_modified_images=False,
                 crop_source='lm_openface',
                 **kwargs):
        assert (crop_type in ['fullsize', 'tight', 'loose'])
        assert (crop_source in [
            'bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface'
        ])

        self.face_extractor = FaceExtractor()

        self.mode = TRAIN if train else VAL

        self.crop_source = crop_source
        self.use_cache = use_cache
        self.detect_face = detect_face
        self.align_face_orientation = align_face_orientation
        self.return_landmark_heatmaps = return_landmark_heatmaps
        self.return_modified_images = return_modified_images
        self.landmark_sigma = landmark_sigma
        self.landmark_ids = landmark_ids

        self.start = start
        self.max_samples = max_samples

        self.root_dir = root_dir
        self.crop_type = crop_type
        self.color = color
        self.outlier_threshold = outlier_threshold
        self.transform = transform
        self.fullsize_img_dir = os.path.join(self.root_dir,
                                             'cropped_Annotated')
        self.cropped_img_dir = os.path.join(self.root_dir, 'crops',
                                            crop_source)
        self.feature_dir = os.path.join(self.root_dir, 'features')

        annotation_filename = 'training' if train else 'validation'
        path_annotations_mod = os.path.join(root_dir,
                                            annotation_filename + '.mod.pkl')
        if os.path.isfile(path_annotations_mod):
            print('Reading pickle file...')
            self._annotations = pd.read_pickle(path_annotations_mod)
        else:
            print('Reading CSV file...')
            self._annotations = pd.read_csv(
                os.path.join(root_dir, annotation_filename + '.csv'))
            print('done.')

            # drop non-faces
            self._annotations = self._annotations[
                self._annotations.expression < 8]

            # Samples in annotation file are somewhat clustered by expression.
            # Shuffle to create a more even distribution.
            # NOTE: deterministic, always creates the same order
            if train:
                from sklearn.utils import shuffle
                self._annotations = shuffle(self._annotations, random_state=2)

                # remove samples with inconsistent expression<->valence/arousal values
                self._remove_outliers()

            poses = []
            confs = []
            landmarks = []
            for cnt, filename in enumerate(
                    self._annotations.subDirectory_filePath):
                if cnt % 1000 == 0:
                    print(cnt)
                filename_noext = os.path.splitext(filename)[0]
                conf, lms, pose = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename_noext))
                poses.append(pose)
                confs.append(conf)
                landmarks.append(lms)
            self._annotations['pose'] = poses
            self._annotations['conf'] = confs
            self._annotations['landmarks_of'] = landmarks
            # self.annotations.to_csv(path_annotations_mod, index=False)
            self._annotations.to_pickle(path_annotations_mod)

        poses = np.abs(np.stack(self._annotations.pose.values))

        only_good_image_for_training = True
        if train and only_good_image_for_training:
            print(len(self._annotations))

            min_rot_deg = 30
            max_rot_deg = 90
            # print('Limiting rotation to +-[{}-{}] degrees...'.format(min_rot_deg, max_rot_deg))
            # self._annotations = self._annotations[(poses[:, 0] < np.deg2rad(max_rot_deg)) &
            #                                       (poses[:, 1] < np.deg2rad(max_rot_deg)) &
            #                                       (poses[:, 2] < np.deg2rad(max_rot_deg))]
            # self._annotations = self._annotations[(np.deg2rad(min_rot_deg) < poses[:, 0]) |
            #                                       (np.deg2rad(min_rot_deg) < poses[:, 1])]
            # self._annotations = self._annotations[np.deg2rad(min_rot_deg) < poses[:, 1] ]

            print(len(self._annotations))

            # print('Removing OpenFace confs <={:.2f}...'.format(min_conf))
            # self._annotations = self._annotations[self._annotations.conf > cfg.MIN_OPENFACE_CONFIDENCE]
            # print(len(self._annotations))

            # select by Valence/Arousal
            # min_arousal = 0.0
            # print('Removing arousal <={:.2f}...'.format(min_arousal))
            # self._annotations = self._annotations[self._annotations.arousal > min_arousal]
            # print(len(self._annotations))

        # There is (at least) one missing image in the dataset. Remove by checking face width:
        self._annotations = self._annotations[self._annotations.face_width > 0]

        # self._annotations_balanced = self._annotations
        # self.filter_labels(label_dict_exclude={'expression': 0})
        # self.filter_labels(label_dict_exclude={'expression': 1})
        # self._annotations = self._annotations[self._annotations.arousal > 0.2]

        self.rebalance_classes()

        if deterministic is None:
            deterministic = self.mode != TRAIN
        self.transform = ds_utils.build_transform(deterministic, self.color,
                                                  daug)

        transforms = [fp.CenterCrop(cfg.INPUT_SIZE)]
        transforms += [fp.ToTensor()]
        transforms += [fp.Normalize([0.518, 0.418, 0.361],
                                    [1, 1, 1])]  # VGGFace(2)
        self.crop_to_tensor = tf.Compose(transforms)
Ejemplo n.º 8
0
    def __init__(self,
                 root_dir=cfg.CELEBA_ROOT,
                 train=True,
                 color=True,
                 start=None,
                 max_samples=None,
                 deterministic=None,
                 crop_type='tight',
                 **kwargs):

        from utils.face_extractor import FaceExtractor
        self.face_extractor = FaceExtractor()

        self.mode = TRAIN if train else TEST

        self.crop_type = crop_type
        self.root_dir = root_dir
        root_dir_local = cfg.CELEBA_ROOT_LOCAL
        assert (crop_type in ['tight', 'loose', 'fullsize'])
        self.cropped_img_dir = os.path.join(root_dir_local, 'crops')
        self.fullsize_img_dir = os.path.join(root_dir, 'img_align_celeba')
        self.feature_dir = os.path.join(root_dir_local, 'features')
        self.color = color
        annotation_filename = 'list_landmarks_align_celeba.txt'

        path_annotations_mod = os.path.join(root_dir_local,
                                            annotation_filename + '.mod.pkl')
        if os.path.isfile(path_annotations_mod):
            self.annotations = pd.read_pickle(path_annotations_mod)
        else:
            print('Reading original TXT file...')
            self.annotations = pd.read_csv(os.path.join(
                self.root_dir, 'Anno', annotation_filename),
                                           delim_whitespace=True)
            print('done.')

            # store OpenFace features in annotation dataframe
            poses = []
            confs = []
            landmarks = []
            for cnt, filename in enumerate(self.annotations.fname):
                if cnt % 1000 == 0:
                    print(cnt)
                filename_noext = os.path.splitext(filename)[0]
                conf, lms, pose = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename_noext))
                poses.append(pose)
                confs.append(conf)
                landmarks.append(lms)
            self.annotations['pose'] = poses
            self.annotations['conf'] = confs
            self.annotations['landmarks_of'] = landmarks

            # add identities to annotations
            self.identities = pd.read_csv(os.path.join(self.root_dir, 'Anno',
                                                       'identity_CelebA.txt'),
                                          delim_whitespace=True,
                                          header=None,
                                          names=['fname', 'id'])
            self.annotations = pd.merge(self.annotations,
                                        self.identities,
                                        on='fname',
                                        copy=False)

            # save annations as pickle file
            self.annotations.to_pickle(path_annotations_mod)

        # select training or test set (currently not using validation set)
        SPLIT = {
            TRAIN: (0, 162772),
            VAL: (162772, 182639),
            TEST: (182639, 202601)
        }
        self.annotations = self.annotations[
            (self.annotations.index >= SPLIT[self.mode][0])
            & (self.annotations.index < SPLIT[self.mode][1])]

        self.annotations = self.annotations.sort_values(by='id')

        print("Num. faces: {}".format(len(self.annotations)))
        if 'crops_celeba' in self.cropped_img_dir:
            min_of_conf = 0.0
        else:
            min_of_conf = 0.5
        print("Removing faces with conf < {}".format(min_of_conf))
        self.annotations = self.annotations[
            self.annotations.conf >= min_of_conf]
        print("Remaining num. faces: {}".format(len(self.annotations)))

        # max_rot_deg = 1
        # print('Limiting rotation to +-{} degrees...'.format(max_rot_deg))
        # poses = np.abs(np.stack(self.annotations.pose.values))
        # self.annotations = self.annotations[(poses[:, 0] > np.deg2rad(max_rot_deg)) |
        #                                     (poses[:, 1] > np.deg2rad(max_rot_deg)) |
        #                                     (poses[:, 2] > np.deg2rad(max_rot_deg))]
        # print(len(self.annotations))

        # limit number of samples
        st, nd = 0, None
        if start is not None:
            st = start
        if max_samples is not None:
            nd = st + max_samples
        self.annotations = self.annotations[st:nd]
        self._annotations = self.annotations[st:nd].copy()

        if deterministic is None:
            deterministic = self.mode != TRAIN
        self.transform = ds_utils.build_transform(deterministic, self.color)
Ejemplo n.º 9
0
    def __init__(self,
                 root_dir=cfg.VGGFACE2_ROOT,
                 train=True,
                 color=True,
                 start=None,
                 max_samples=None,
                 deterministic=None,
                 min_conf=cfg.MIN_OPENFACE_CONFIDENCE,
                 use_cache=True,
                 crop_source='bb_ground_truth',
                 detect_face=False,
                 align_face_orientation=True,
                 return_landmark_heatmaps=False,
                 return_modified_images=False,
                 daug=0,
                 landmark_sigma=None,
                 landmark_ids=None,
                 **kwargs):

        assert (crop_source in [
            'bb_ground_truth', 'lm_ground_truth', 'lm_cnn', 'lm_openface'
        ])

        self.mode = TRAIN if train else VAL

        self.face_extractor = FaceExtractor()
        self.use_cache = use_cache
        self.detect_face = detect_face
        self.align_face_orientation = align_face_orientation
        self.color = color
        self.crop_source = crop_source
        self.return_landmark_heatmaps = return_landmark_heatmaps
        self.return_modified_images = return_modified_images
        self.landmark_sigma = landmark_sigma
        self.landmark_ids = landmark_ids

        self.root_dir = root_dir
        root_dir_local = cfg.VGGFACE2_ROOT_LOCAL
        split_subfolder = 'train' if train else 'test'
        self.cropped_img_dir = os.path.join(root_dir_local, split_subfolder,
                                            'crops', crop_source)
        self.fullsize_img_dir = os.path.join(root_dir, split_subfolder, 'imgs')
        self.feature_dir = os.path.join(root_dir_local, split_subfolder,
                                        'features')
        annotation_filename = 'loose_bb_{}.csv'.format(split_subfolder)
        # annotation_filename = 'loose_landmark_{}.csv'.format(split_subfolder)

        # self.path_annotations_mod = os.path.join(root_dir_local, annotation_filename + '.mod_full_of.pkl')
        self.path_annotations_mod = os.path.join(
            root_dir_local, annotation_filename + '.mod_full.pkl')
        if os.path.isfile(self.path_annotations_mod):
            print('Reading pickle file...')
            self.annotations = pd.read_pickle(self.path_annotations_mod)
            print('done.')
        else:
            print('Reading CSV file...')
            self.annotations = pd.read_csv(
                os.path.join(self.root_dir, 'bb_landmark',
                             annotation_filename))
            print('done.')

            of_confs, poses, landmarks = [], [], []
            self.annotations = self.annotations[0:4000000]
            self.annotations = self.annotations[self.annotations.H > 80]
            print("Number of images: {}".format(len(self)))

            def get_face_height(lms):
                return lms[8, 1] - lms[27, 1]

            read_openface_landmarks = True
            if read_openface_landmarks:
                for cnt, filename in enumerate(self.annotations.NAME_ID):
                    filename_noext = os.path.splitext(filename)[0]

                    bb = self.annotations.iloc[cnt][1:5].values
                    expected_face_center = [
                        bb[0] + bb[2] / 2.0, bb[1] + bb[3] / 2.0
                    ]

                    conf, lms, pose, num_faces = ds_utils.read_openface_detection(
                        os.path.join(self.feature_dir, filename_noext),
                        expected_face_center=expected_face_center,
                        use_cache=True,
                        return_num_faces=True)

                    if num_faces > 1:
                        print("Deleting extracted crop for {}...".format(
                            filename))
                        cache_filepath = os.path.join(self.cropped_img_dir,
                                                      'tight',
                                                      filename + '.jpg')
                        if os.path.isfile(cache_filepath):
                            os.remove(cache_filepath)

                        # numpy_lmfile = os.path.join(self.feature_dir, filename) + '.npz'
                        # if os.path.isfile(numpy_lmfile):
                        #     os.remove(numpy_lmfile)

                    of_confs.append(conf)
                    landmarks.append(lms)
                    poses.append(pose)
                    if (cnt + 1) % 10000 == 0:
                        log.info(cnt + 1)
                    # if (cnt+1) % 1000 == 0:
                    #     print('saving annotations...')
                    #     self.annotations.to_pickle(self.path_annotations_mod)
                self.annotations['pose'] = poses
                self.annotations['of_conf'] = of_confs
                self.annotations['landmarks_of'] = landmarks
                # self.annotations['face_height'] = self.annotations.landmarks_of.map(get_face_height)

            # assign new continuous ids to persons (0, range(n))
            print("Creating id labels...")
            _ids = self.annotations.NAME_ID
            _ids = _ids.map(lambda x: int(x.split('/')[0][1:]))
            self.annotations['ID'] = _ids

            # unique_ids = _ids.unique()
            # uid2idx = dict(zip(unique_ids, range(1,len(unique_ids)+1)))
            # self.annotations['ID'] = _ids.map(uid2idx)

            self.annotations.to_pickle(self.path_annotations_mod)

        select_subset = False
        if select_subset:
            print("Number of images: {}".format(len(self)))
            self.annotations = self.annotations[
                self.annotations.of_conf > min_conf]
            print("Number of images: {}".format(len(self)))
            min_rot_deg = 0
            max_rot_deg = 90
            print('Limiting rotation to +-[{}-{}] degrees...'.format(
                min_rot_deg, max_rot_deg))
            poses = np.abs(np.stack(self.annotations.pose.values))
            self.annotations = self.annotations[
                (poses[:, 0] < np.deg2rad(max_rot_deg))
                & (poses[:, 1] < np.deg2rad(max_rot_deg)) &
                (poses[:, 2] < np.deg2rad(max_rot_deg))]
            # self.annotations = self.annotations[(np.deg2rad(min_rot_deg) < poses[:, 0]) |
            #                                     (np.deg2rad(min_rot_deg) < poses[:, 1])]

        min_face_height = 100
        print(
            'Removing faces with height <={:.2f}px...'.format(min_face_height))
        self.annotations = self.annotations[
            self.annotations.H > min_face_height]
        print("Number of images: {}".format(len(self)))

        # width = self.annotations.W
        # height = self.annotations.H
        # ratio = width / height
        # self.annotations = self.annotations[(ratio > 0.5) & (ratio < 0.60)]
        # self.annotations = self.annotations[ratio < 0.65]
        # self.annotations = self.annotations[ratio > 0.9]

        # FIXME: shuffle for find_similar_images
        # self.annotations = self.annotations[:1000000]
        # from sklearn.utils import shuffle
        # self.annotations = shuffle(self.annotations, random_state=2)
        #############3

        # limit number of samples
        st, nd = 0, None
        if start is not None:
            st = start
        if max_samples is not None:
            nd = st + max_samples
        self.annotations = self.annotations[st:nd]

        if deterministic is None:
            deterministic = self.mode != TRAIN
        self.transform = ds_utils.build_transform(deterministic, self.color,
                                                  daug)

        print("Number of images: {}".format(len(self)))
        print("Number of identities: {}".format(self.annotations.ID.nunique()))