def test_pertrurb_shape(): fitter = LucasKanadeAAMFitter(aam) s = fitter.perturb_shape(training_images[0].landmarks[None].lms, noise_std=0.08, rotation=False) assert (s.n_dims == 2) assert (s.n_landmark_groups == 0) assert (s.n_points == 68)
def test_obtain_shape_from_bb(): fitter = LucasKanadeAAMFitter(aam) s = fitter.obtain_shape_from_bb(np.array([[53.916, 1.853], [408.469, 339.471]])) assert ((np.around(s.points) == np.around(initial_shape[0].points)).all()) assert (s.n_dims == 2) assert (s.n_landmark_groups == 0) assert (s.n_points == 68)
def aam_helper(aam, algorithm, im_number, max_iters, initial_error, final_error, error_type): fitter = LucasKanadeAAMFitter(aam, algorithm=algorithm) fitting_result = fitter.fit( training_images[im_number], initial_shape[im_number], gt_shape=training_images[im_number].landmarks[None].lms, max_iters=max_iters) assert_allclose( np.around(fitting_result.initial_error(error_type=error_type), 5), initial_error) assert_allclose( np.around(fitting_result.final_error(error_type=error_type), 5), final_error)
def Train(self, i_diag = 150, i_scale = [0.5, 1.0], i_max_greyscale_dims = 200, i_max_shape_dims = 20): # laterals tuned for performance gain - Sacrifice mouth modes self.model = HolisticAAM( self.LoadDataset(), group='PTS', verbose=True, holistic_features=float32_fast_dsift, diagonal=i_diag, scales=i_scale, max_appearance_components = i_max_greyscale_dims, max_shape_components = i_max_shape_dims) self.fitter = LucasKanadeAAMFitter( self.model, n_shape = [5, 15], n_appearance= [50, 150]);
def aam_fit_benchmark(fitting_images, aam, fitting_options=None, perturb_options=None, verbose=False): r""" Fits a trained AAM model to a database. Parameters ---------- fitting_images: list of :class:MaskedImage objects A list of the fitting images. aam: :class:menpo.fitmultilevel.aam.AAM object The trained AAM object. It can be generated from the aam_build_benchmark() method. fitting_options: dictionary, optional A dictionary with the parameters that will be passed in the LucasKanadeAAMFitter (:class:menpo.fitmultilevel.aam.base). If None, the default options will be used. This is an example of the dictionary with the default options: fitting_options = {'algorithm': AlternatingInverseCompositional, 'md_transform': OrthoMDTransform, 'global_transform': AlignmentSimilarity, 'n_shape': None, 'n_appearance': None, 'max_iters': 50, 'error_type': 'me_norm' } For an explanation of the options, please refer to the LucasKanadeAAMFitter documentation. Default: None bounding_boxes: list of (2, 2) ndarray, optional If provided, fits will be initialized from a bounding box. If None, perturbation of ground truth will be used instead. can be provided). Interpreted as [[min_x, min_y], [max_x, max_y]]. perturb_options: dictionary, optional A dictionary with parameters that control the perturbation on the ground truth shape with noise of specified std. Note that if bounding_box is provided perturb_options is ignored and not used. If None, the default options will be used. This is an example of the dictionary with the default options: initialization_options = {'noise_std': 0.04, 'rotation': False } For an explanation of the options, please refer to the perturb_shape() method documentation of :map:`MultilevelFitter`. verbose: bool, optional If True, it prints information regarding the AAM fitting including progress bar, current image error and percentage of images with errors less or equal than a value. Default: False Returns ------- fitting_results: :map:`FittingResultList` A list with the :map:`FittingResult` object per image. """ if verbose: print('AAM Fitting:') perc1 = 0. perc2 = 0. # parse options if fitting_options is None: fitting_options = {} if perturb_options is None: perturb_options = {} # extract some options group = fitting_options.pop('gt_group', 'PTS') max_iters = fitting_options.pop('max_iters', 50) error_type = fitting_options.pop('error_type', 'me_norm') # create fitter fitter = LucasKanadeAAMFitter(aam, **fitting_options) # fit images n_images = len(fitting_images) fitting_results = [] for j, i in enumerate(fitting_images): # perturb shape gt_s = i.landmarks[group].lms if 'bbox' in i.landmarks: # shape from bounding box s = fitter.obtain_shape_from_bb(i.landmarks['bbox'].lms.points) else: # shape from perturbation s = fitter.perturb_shape(gt_s, **perturb_options) # fit fr = fitter.fit(i, s, gt_shape=gt_s, max_iters=max_iters) fitting_results.append(fr) # print final_error = fr.final_error(error_type=error_type) initial_error = fr.initial_error(error_type=error_type) if verbose: if error_type == 'me_norm': if final_error <= 0.03: perc1 += 1. if final_error <= 0.04: perc2 += 1. elif error_type == 'rmse': if final_error <= 0.05: perc1 += 1. if final_error <= 0.06: perc2 += 1. print_dynamic('- {0} - [<=0.03: {1:.1f}%, <=0.04: {2:.1f}%] - ' 'Image {3}/{4} (error: {5:.3f} --> {6:.3f})'.format( progress_bar_str(float(j + 1) / n_images, show_bar=False), perc1 * 100. / n_images, perc2 * 100. / n_images, j + 1, n_images, initial_error, final_error)) if verbose: print_dynamic('- Fitting completed: [<=0.03: {0:.1f}%, <=0.04: ' '{1:.1f}%]\n'.format(perc1 * 100. / n_images, perc2 * 100. / n_images)) return fitting_results
def test_max_iters_exception(): fitter = LucasKanadeAAMFitter(aam, algorithm=AIC) fitter.fit(training_images[0], initial_shape[0], max_iters=[10, 20, 30, 40])
class AAM: ''' Initialise members and train AAM \param[in] i_dataset Full filepath to training and test dataset \param[in] i_debug True to display debug info ''' def __init__(self, i_dataset, i_debug = False): self.debug = i_debug self.dataset = i_dataset if not os.path.exists(self.dataset): raise RuntimeError('Database dir does not exist in ' + self.dataset) self.Train() self.viola_face_detector = FaceDetectViola(False) self.menpo_face_detector = FaceDetectMenpo() if self.debug: self.PrintDebug() ''' Load training images and annotated landmarks from a training set in the file system ''' def LoadDataset(self): trainset = os.path.join(self.dataset,'trainset','*') training_images = [self.LoadImage(img) for img in menpoio.import_images(trainset, verbose = True) ] return training_images ''' Crops image landmarks (0.1 referenced from AAMs Basics) and convert to greyscale \param[in] i_img Menpo image to process \return processed menpo image ''' def LoadImage(self, i_img, i_landmark_crop = 0.5): img = i_img.crop_to_landmarks_proportion( i_landmark_crop ) img = GreyscaleConversionMenpo(img) return img ''' Train an Active Appearance Model and compute the \param[in] i_diag Search gradient along model landmark \param[in] i_scale Scale applied to search direction (search) || (initial, search) \param[in] i_max_greyscale_dims Dimensionality limit for PCA appearance model \param[in] i_max_shape_dims Dimensionality limit for PCA keypoint components ''' def Train(self, i_diag = 150, i_scale = [0.5, 1.0], i_max_greyscale_dims = 200, i_max_shape_dims = 20): # laterals tuned for performance gain - Sacrifice mouth modes self.model = HolisticAAM( self.LoadDataset(), group='PTS', verbose=True, holistic_features=float32_fast_dsift, diagonal=i_diag, scales=i_scale, max_appearance_components = i_max_greyscale_dims, max_shape_components = i_max_shape_dims) self.fitter = LucasKanadeAAMFitter( self.model, n_shape = [5, 15], n_appearance= [50, 150]); ''' Fit an appearance model to an image with annotated landmarks \return Converged candidate fit ''' def FitAnnotatedImage(self, i_img): gt = i_img.landmarks['PTS'].lms initial_shape = self.fitter.perturb_from_bb( gt, gt.bounding_box() ) return self.fitter.fit_from_shape(i_img, initial_shape, gt_shape=gt) ''' Fit an appearance model to an image without annotations using Menpo Face Detection \return Converged landmarks ''' def FitWildImageMenpo(self, i_img, i_initial_guess = None, i_max_iters = 10): # Convert menpo image to expected openCV format i_img = GreyscaleConversionMenpo(i_img) ret = None if i_initial_guess is not None: pts = menpo.shape.PointCloud(i_initial_guess, False) ret = self.fitter.fit_from_shape(i_img, pts, i_max_iters).final_shape.points else: bb = self.menpo_face_detector.Detect(i_img) if bb is not None: ret = self.fitter.fit_from_bb(i_img, bb, i_max_iters).final_shape.points return ret ''' Fit an appearance model to an image without annotations using Viola Face Detection \return Converged landmarks ''' def FitWildImageViola(self, i_img, i_initial_guess = None, i_max_iters = 10): # Convert menpo image to expected openCV format i_img = GreyscaleConversionMenpo(i_img) img = i_img.pixels[0] * 255 img = numpy.array(img, dtype=numpy.uint8) # Detect face with experiment tuning according to lfpw testset ret = None if i_initial_guess is None: faces = self.viola_face_detector.Detect(img, 3, 1.1, 0.125, 1.0) # Fit candidate model if len(faces) > 1: faces = [GetLargestROI(faces)] faces = ConvertRectToMenpoBoundingBox(faces) fit = self.fitter.fit_from_bb(i_img, faces[0], i_max_iters) ret = fit.final_shape.points elif i_initial_guess is not None: pts = menpo.shape.PointCloud(i_initial_guess, False) ret = self.fitter.fit_from_shape(i_img, pts, i_max_iters).final_shape.points return ret ''' Print debug information for the AAM class ''' def PrintDebug(self): print('Dataset', self.dataset) print self.model
if d > 400: img = img.rescale(400.0 / d) # append to list training_images.append(img) patch_aam = PatchAAM(training_images, group='PTS', patch_shape=[(16, 19), (19, 16)], diagonal=200, scales=(0.5, 1.0), holistic_features=fast_dsift, max_shape_components=60, max_appearance_components=200, verbose=True) fitter = LucasKanadeAAMFitter(patch_aam, lk_algorithm_cls=WibergInverseCompositional, n_shape=[10, 30], n_appearance=[40, 160]) adjacency_matrix = np.array([[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 0, 0, 0]]) def chen_get_bbx(txt_path): coordinate = np.loadtxt(txt_path, comments='\n', delimiter=',') y, x = coordinate.T max_x = max(x) min_x = min(x) max_y = max(y) min_y = min(y) - 18 points = np.array([[min_x, min_y], [min_x, max_y], [max_x, max_y],
def auto_construct(pdm, images, trilist=None, fit_group='init', train_group='final', models=[], errors=[], costs=[], isplot=False, feature=[igo] * 10, diagonal=200, scales=(0.5, 1.0), n_shape=[2, 4], n_appearance=[20, 30], max_iters=10, generative_iter=30, discriminative_iter = 10, n_processes=24, inc_appearance=0, model_class=HolisticAAM, increament=False, update_shape=False, shape_forgetting_factor=1.0, appearance_forgetting_factor=1.0, export_path=None ): # initialisation DB_size = len(images) / 2 DB1 = images[:DB_size] DB2 = images[DB_size:] init_shape = pdm.shape_models[-1].model.mean() n_iteration = 0 if trilist is None: trilist = TriMesh(init_shape.points).trilist for j in xrange(discriminative_iter): i_appearance = np.array(n_appearance) + np.array(inc_appearance) if (i_appearance > 1).any(): i_appearance = i_appearance.astype(int).tolist() else: i_appearance = i_appearance.tolist() # ------------ generative iterations ------------- for i in xrange(generative_iter): print 'Discriminative Iter: {}, Generative Iter: {}'.format(j, i) aam_fitter = LucasKanadeAAMFitter(pdm, n_shape=n_shape, n_appearance=i_appearance) pdm, error = generative_construct( DB1, aam_fitter, trilist, fit_group=fit_group, train_group=train_group, label='iteration_{:03d}'.format(n_iteration), feature=feature[j], diagonal=diagonal, scales=scales, original_shape_model=None if update_shape else pdm.shape_models, n_processes=n_processes, model_class=model_class, increament_model=pdm if increament else None, shape_forgetting_factor=shape_forgetting_factor, appearance_forgetting_factor=appearance_forgetting_factor, max_iters=max_iters ) n_iteration += 1 models.append(pdm) errors.append(error) if export_path: mio.export_pickle([images, models, errors], export_path, overwrite=True) if isplot: plot(errors) # ----------- discriminative iterations------------ aam_fitter = LucasKanadeAAMFitter(pdm, n_shape=n_shape, n_appearance=i_appearance) frs = mp_fit(DB2, aam_fitter, group=fit_group, n_processes=n_processes, max_iters=max_iters) for img, fr in zip(DB2, frs): img.landmarks[train_group] = fr.final_shape sdm = RegularizedSDM( DB2, diagonal=diagonal, alpha=100, group=train_group, n_iterations=4, scales=(0.5,0.5,1.0,1.0), patch_features=patch_features, n_perturbations=30, patch_shape=[(25, 25), (15, 15), (15,15), (9,9)], verbose=True ) pdm, error = generative_construct( DB1, sdm, trilist, fit_group=fit_group, train_group=train_group, label='discriminative_{:02d}'.format(j), original_shape_model=None if update_shape else pdm.shape_models, feature=feature[j], diagonal=diagonal, scales=scales, n_processes=n_processes, model_class=model_class, increament_model=pdm if increament else None, shape_forgetting_factor=shape_forgetting_factor, appearance_forgetting_factor=appearance_forgetting_factor, max_iters=max_iters ) models.append(pdm) errors.append(error) if export_path: mio.export_pickle([images, models, errors], export_path, overwrite=True) if isplot: plot(errors) return models[-2]
def test_max_iters_exception(): fitter = LucasKanadeAAMFitter(aam, algorithm=AlternatingInverseCompositional) fitter.fit(training_images[0], initial_shape[0], max_iters=[10, 20, 30, 40])
def fit(path_to_images, path_to_test, c, r, w): training_images = [] for img in print_progress(mio.import_images(path_to_images, verbose=True)): # convert to greyscale if img.n_channels == 3: img = img.as_greyscale() # crop to landmarks bounding box with an extra 20% padding img = img.crop_to_landmarks_proportion(0.2) # rescale image if its diagonal is bigger than 400 pixels d = img.diagonal() if d > 1000: img = img.rescale(1000.0 / d) # define a TriMesh which will be useful for Piecewise Affine Warp of HolisticAAM # labeller(img, 'PTS', face_ibug_68_to_face_ibug_68_trimesh) # append to list training_images.append(img) # ## Training ribcage - Patch # from menpofit.aam import PatchAAM # from menpo.feature import fast_dsift # # patch_aam = PatchAAM(training_images, group='PTS', patch_shape=[(15, 15), (23, 23)], # diagonal=500, scales=(0.5, 1.0), holistic_features=fast_dsift, # max_shape_components=20, max_appearance_components=150, # verbose=True) ## Training ribcage - Holistic patch_aam = HolisticAAM(training_images, group='PTS', diagonal=500, scales=(0.5, 1.0), holistic_features=fast_dsift, verbose=True, max_shape_components=20, max_appearance_components=150) ## Prediction fitter = LucasKanadeAAMFitter(patch_aam, lk_algorithm_cls=WibergInverseCompositional, n_shape=[5, 20], n_appearance=[30, 150]) image = mio.import_image(path_to_test) #initialize box adjacency_matrix = np.array([ [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 0, 0, 0], ]) # points = np.array([[0,0], [0,2020], [2020, 2020], [2020, 0]]) points = np.array([[r - w / 2, c - w / 2], [r - w / 2, c + w / 2], [r + w / 2, c + w / 2], [r + w / 2, c - w / 2]]) graph = PointDirectedGraph(points, adjacency_matrix) box = graph.bounding_box() # initial bbox initial_bbox = box # fit image result = fitter.fit_from_bb(image, initial_bbox, max_iters=[15, 5]) pts = result.final_shape.points return pts
def test_n_appearance_exception(): fitter = LucasKanadeAAMFitter(aam, n_appearance=[10, 20])
def test_n_shape_exception(): fitter = LucasKanadeAAMFitter(aam, n_shape=[3, 6, 'a'])
def get_feature(self, file, process_opts=None): r""" Computes the AAM features, according to the `process_opts` Parameters ---------- file process_opts Returns ------- A dictionary of five elements, each representing a variation of the computed features (shape and appearance alone or concatenated, with or without derivatives) """ self._maybe_start_logging(file) self._load_landmark_fitter() frames = mio.import_video(file, landmark_resolver=self._myresolver, normalize=True, exact_frame_count=True) feat_shape = [] feat_app = [] feat_shape_app = [] for frameIdx, frame in enumerate(frames): bounding_boxes = self._face_detect(frame) if len(bounding_boxes) > 0: initial_bbox = bounding_boxes[0] if self._log_errors is True: gt_shape = frame.landmarks['pts_face'] else: gt_shape = None if isinstance(self._landmark_fitter, LucasKanadeAAMFitter): result = self._landmark_fitter.fit_from_bb( frame, initial_bbox, max_iters=self._max_iters, gt_shape=gt_shape) elif isinstance( self._landmark_fitter, DlibWrapper): # DLIB fitter, doesn't have max_iters result = self._landmark_fitter.fit_from_bb( frame, initial_bbox, gt_shape=gt_shape) else: raise Exception('incompatible landmark fitter') self._maybe_append_to_log(file, frameIdx, result) if self._shape == 'face': if self._parameters == 'lk_fitting': # skip the first 4 similarity params, probably not useful for classification shape_param_frame = result.shape_parameters[-1][4:] app_param_frame = result.appearance_parameters[-1] elif self._parameters == 'aam_projection': result_aam = self._projection_fitter.fit_from_shape( frame, result.final_shape, max_iters=[0, 0, 0]) # TODO: analyse the case when aam true components are less than max components shape_param_frame = result_aam.shape_parameters[-1][4:] app_param_frame = result_aam.appearance_parameters[-1] else: raise Exception( 'parameters from: lk_fitting or aam_projection') feat_shape.append(shape_param_frame) feat_app.append(app_param_frame) feat_shape_app.append( np.concatenate((shape_param_frame, app_param_frame))) elif self._shape == 'lips': # extract lips landmarks from the final face fitting to initialize the part model fitting aam_lips = mio.import_pickle(self._part_aam) fitter_lips = LucasKanadeAAMFitter( aam_lips, lk_algorithm_cls=WibergInverseCompositional, n_shape=[10, 20], n_appearance=[20, 150]) result_lips = fitter_lips.fit_from_shape( image=frame, initial_shape=_pointcloud_subset( result.final_shape, 'lips'), max_iters=[5, 5]) shape_param_frame_lips = result_lips.shape_parameters[-1][ 4:] app_param_frame_lips = result_lips.appearance_parameters[ -1] feat_shape.append(shape_param_frame_lips) feat_app.append(app_param_frame_lips) feat_shape_app.append( np.concatenate( (shape_param_frame_lips, app_param_frame_lips))) elif self._shape == 'chin': # extract chin and lips landmarks from the final face fitting to initialize the part model fitting aam_chin = mio.import_pickle(self._part_aam) fitter_chin = LucasKanadeAAMFitter( aam_chin, lk_algorithm_cls=WibergInverseCompositional, n_shape=[10, 20, 25], n_appearance=[20, 50, 150]) result_chin = fitter_chin.fit_from_shape( image=frame, initial_shape=_pointcloud_subset( result.final_shape, 'chin'), max_iters=[10, 10, 5]) shape_param_frame_mchin = result_chin.shape_parameters[-1][ 4:] app_param_frame_mchin = result_chin.appearance_parameters[ -1] feat_shape.append(shape_param_frame_mchin) feat_app.append(app_param_frame_mchin) feat_shape_app.append( np.concatenate( (shape_param_frame_mchin, app_param_frame_mchin))) else: raise Exception( 'Unknown shape model, currently supported are: face, lips, chin' ) else: # we did not detect any face zero_feat_shape = np.zeros( process_opts['shape_components'][-1]) zero_feat_app = np.zeros( process_opts['appearance_components'][-1]) zero_feat_shape_app = np.zeros( process_opts['shape_components'][-1] + process_opts['appearance_components'][-1]) feat_shape.append(zero_feat_shape) feat_app.append(zero_feat_app) feat_shape_app.append(zero_feat_shape_app) npfeat_shape = np.array(feat_shape) npfeat_app = np.array(feat_app) npfeat_shape_app = np.array(feat_shape_app) npfeat_app_delta = vsrmath.accurate_derivative(npfeat_app, 'delta') npfeat_shape_app_delta = vsrmath.accurate_derivative( npfeat_shape_app, 'delta') return { 'shape': npfeat_shape, 'app': npfeat_app, 'shape_app': npfeat_shape_app, 'app_delta': npfeat_app_delta, 'shape_app_delta': npfeat_shape_app_delta }
def __init__(self, files, extract_opts=None, process_opts=None, output_dir=None): r""" Parameters ---------- files : `dict` holding (video_file, landmark directory) pairs extract_opts : `dict` holding the configuration for feature extraction For complete description of some parameters, please refer upstream to their documentation in the menpofit project Must specify the following options: ``warp`` : `holistic` or `patch`; chooses between menpofit.aam.HolisticAAM and menpofit.aam.PatchAAM ``resolution_scales`` : `tuple` of `floats` between 0.0 and 1.0 A pyramid of AAMs will be created, one for each element in the tuple A value of 1.0 corresponds to the full resolution images, 0.5 to a half and so on ``patch_shape`` : `tuple` of `tuple` of two `ints` Parameter required when ``warp`` is `patch` One tuple per resolution scale The patch shape is specified as a window of MxN pixels around each landmark ``max_shape_components`` : `int` or `list` of `ints` maximum number of eigenvectors (per resolution scale) kept from shape PCA True value can be less that max, depending on the variance in the training images ``max_appearance_components: `int` or `list` of `ints` maximum number of eigenvectors (per resolution scale) kept from texture PCA True value can be less that max, depending on the variance in the training images ``diagonal`` : `int` serving as the diagonal size of the rescaled training images ``features`` : `no_op`, `hog`, `dsift`, `fast_dsift` `no_op` uses the image pixels for the texture model `hog, dsift, fast_dsift` extract popular image descriptors instead ``landmark_group`` : `pts_face`, `pts_chin`, `pts_lips` `pts_face` constructs a full facial model using all the 68 landmark points `pts_chin` uses landmarks [2:15) plus [48:68) to model the chin and lips region `pts_lips` uses only [48:68) to model the lip region ``confidence_thresh`` : `float` in range [0:1] Makes use of the OpenFace average confidence score, keeping only the frames above this threshold ``kept_frames`` : `float` in range [0:1] Samples the remaining video frames (above the confidence threshold) to keep only a small proportion This avoids training the AAM with a large number of consecutive video frames Before sampling, the frames from each video are sorted by the amount of lip opening. Then sampling is done at evenly spaced intervals ``greyscale`` : `boolean`; if ``True``, converts the frames to a single channel of grey / luminance levels if ``False``, the model is built on the original RGB channels ``model_name`` : `str`; name of the AAM pickle object to be stored offline process_opts: `dict` holding the configuration for feature processing Must specif y the following options: ``face_detector`` : `dlib` or `opencv` or `dpm` Selects the implementation that detects a face in an image `dlib` is the fastest, `dpm` may be more accurate (check G.Chrysos, Feb 2017) ``landmark_fitter`` : `aam` or `ert` Selects the algorithm that fits the landmarks on a detected face `ert` uses a model pre-trained on challenging datasets `aam` may use your own model ``aam_fitter`` : `str`, full file name storing an AAM pickle to be used for landmark fitting Mandatory if ``landmark_fitter`` is AAM ``parameters_from`` : `lk_fitting`, `aam_projection` If `lk_fitting`, the shape and appearance parameters optimized by the Lukas-Kanade fitting algorithm are returned. In this case, `landmark_fitter` must be `aam`. If `aam_projection`, only the final shape of the fitting process will be used, initializing another fitter based on a new AAM specified below `` projection_aam`` : `str`, full file name storing an AAM pickle to be used in the process described above ``shape`` : `face`, `chin` or `lips` Chooses an AAM that may describe an entire face, or sub-parts of it If `chin` or `lips`, the associated landmarks will be selected from the face fitting process, then a few more iterations of a fitting algorithm will be run using the part AAM specified below ``part_AAM`` : `None` or a `str` representing the file storing a part AAM pickle (chin or lips) Must be different from `None` if `shape` is `chin` or `lips` Such part_AAM can be obtained by choosing the ``landmark_group`` parameter accordingly in the extraction process ``confidence_thresh`` : `float`, DEPRECATED It was used to filter out the frames having a confidence threshold for the landmarks lower than this value. Their corresponding features were simply arrays of zeros. Now we consider every frame where a face is detected. ``shape_components`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of the kept shape eigenvectors for the projection and fitter AAMs The shape feature size will be up to this value ``appearance_components`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of the kept texture eigenvectors for the projection and fitter AAMs The appearance feature size will be up to this value ``max_iters`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of iterations (per resolution scale) of the optimisation algorithm Only used for the fitter AAM, since 0 iterations are used with the projection AAM ``log_errors`` : `boolean` If ``True``, generates a log file per video, stating the models used and the prediction error for each frame ``log_dir`` : `str`, directory to store the error logs above output_dir : `str`, absolute path where the features are to be stored """ self._files = files self._outDir = output_dir if extract_opts is not None: self._extractOpts = extract_opts self._warpType = extract_opts['warp'] self._landmarkGroup = extract_opts['landmark_group'] self._max_shape_components = extract_opts['max_shape_components'] self._max_appearance_components = extract_opts[ 'max_appearance_components'] self._diagonal = extract_opts['diagonal'] self._scales = extract_opts['resolution_scales'] self._confidence_thresh = extract_opts['confidence_thresh'] self._kept_frames = extract_opts['kept_frames'] if extract_opts['features'] == 'fast_dsift': self._features = fast_dsift elif extract_opts['features'] == 'dsift': self._features = dsift elif extract_opts['features'] == 'hog': self._features = hog elif extract_opts['features'] == 'no_op': self._features = no_op else: raise Exception( 'Unknown feature type to extract, did you mean fast_dsift ?' ) if 'greyscale' in extract_opts.keys(): self._greyscale = extract_opts['greyscale'] else: self._greyscale = False self._outModelName = extract_opts['model_name'] if process_opts is not None: # Face detection self._face_detect_method = process_opts['face_detector'] if self._face_detect_method == 'dlib': from menpodetect import load_dlib_frontal_face_detector detector = load_dlib_frontal_face_detector() elif self._face_detect_method == 'opencv': from menpodetect import load_opencv_frontal_face_detector detector = load_opencv_frontal_face_detector() elif self._face_detect_method == 'dpm': from menpodetect.ffld2 import load_ffld2_frontal_face_detector detector = load_ffld2_frontal_face_detector() else: raise Exception( 'unknown detector, did you mean dlib/opencv/dpm?') self._face_detect = detector self._shape_components = process_opts['shape_components'] self._appearance_components = process_opts['appearance_components'] self._max_iters = process_opts['max_iters'] self._fitter_type = process_opts['landmark_fitter'] # Landmark fitter (pretrained ERT or AAM), actually loaded later to avoid pickling with Pool if self._fitter_type == 'aam': self._aam_fitter_file = process_opts['aam_fitter'] # Parameters source # If fitting, self._parameters = process_opts['parameters_from'] if self._parameters == 'aam_projection': self._projection_aam_file = process_opts['projection_aam'] self._projection_aam = mio.import_pickle( self._projection_aam_file) self._projection_fitter = LucasKanadeAAMFitter( aam=self._projection_aam, lk_algorithm_cls=WibergInverseCompositional, n_shape=self._shape_components, n_appearance=self._appearance_components) else: pass self._confidence_thresh = process_opts['confidence_thresh'] self._shape = process_opts['shape'] self._part_aam = process_opts['part_aam'] self._log_errors = process_opts['log_errors'] if self._log_errors is False: self._myresolver = None self._log_dir = process_opts['log_dir']
class AAMFeature(Feature): r""" Active Appearance Model (AAM) feature extraction """ def __init__(self, files, extract_opts=None, process_opts=None, output_dir=None): r""" Parameters ---------- files : `dict` holding (video_file, landmark directory) pairs extract_opts : `dict` holding the configuration for feature extraction For complete description of some parameters, please refer upstream to their documentation in the menpofit project Must specify the following options: ``warp`` : `holistic` or `patch`; chooses between menpofit.aam.HolisticAAM and menpofit.aam.PatchAAM ``resolution_scales`` : `tuple` of `floats` between 0.0 and 1.0 A pyramid of AAMs will be created, one for each element in the tuple A value of 1.0 corresponds to the full resolution images, 0.5 to a half and so on ``patch_shape`` : `tuple` of `tuple` of two `ints` Parameter required when ``warp`` is `patch` One tuple per resolution scale The patch shape is specified as a window of MxN pixels around each landmark ``max_shape_components`` : `int` or `list` of `ints` maximum number of eigenvectors (per resolution scale) kept from shape PCA True value can be less that max, depending on the variance in the training images ``max_appearance_components: `int` or `list` of `ints` maximum number of eigenvectors (per resolution scale) kept from texture PCA True value can be less that max, depending on the variance in the training images ``diagonal`` : `int` serving as the diagonal size of the rescaled training images ``features`` : `no_op`, `hog`, `dsift`, `fast_dsift` `no_op` uses the image pixels for the texture model `hog, dsift, fast_dsift` extract popular image descriptors instead ``landmark_group`` : `pts_face`, `pts_chin`, `pts_lips` `pts_face` constructs a full facial model using all the 68 landmark points `pts_chin` uses landmarks [2:15) plus [48:68) to model the chin and lips region `pts_lips` uses only [48:68) to model the lip region ``confidence_thresh`` : `float` in range [0:1] Makes use of the OpenFace average confidence score, keeping only the frames above this threshold ``kept_frames`` : `float` in range [0:1] Samples the remaining video frames (above the confidence threshold) to keep only a small proportion This avoids training the AAM with a large number of consecutive video frames Before sampling, the frames from each video are sorted by the amount of lip opening. Then sampling is done at evenly spaced intervals ``greyscale`` : `boolean`; if ``True``, converts the frames to a single channel of grey / luminance levels if ``False``, the model is built on the original RGB channels ``model_name`` : `str`; name of the AAM pickle object to be stored offline process_opts: `dict` holding the configuration for feature processing Must specif y the following options: ``face_detector`` : `dlib` or `opencv` or `dpm` Selects the implementation that detects a face in an image `dlib` is the fastest, `dpm` may be more accurate (check G.Chrysos, Feb 2017) ``landmark_fitter`` : `aam` or `ert` Selects the algorithm that fits the landmarks on a detected face `ert` uses a model pre-trained on challenging datasets `aam` may use your own model ``aam_fitter`` : `str`, full file name storing an AAM pickle to be used for landmark fitting Mandatory if ``landmark_fitter`` is AAM ``parameters_from`` : `lk_fitting`, `aam_projection` If `lk_fitting`, the shape and appearance parameters optimized by the Lukas-Kanade fitting algorithm are returned. In this case, `landmark_fitter` must be `aam`. If `aam_projection`, only the final shape of the fitting process will be used, initializing another fitter based on a new AAM specified below `` projection_aam`` : `str`, full file name storing an AAM pickle to be used in the process described above ``shape`` : `face`, `chin` or `lips` Chooses an AAM that may describe an entire face, or sub-parts of it If `chin` or `lips`, the associated landmarks will be selected from the face fitting process, then a few more iterations of a fitting algorithm will be run using the part AAM specified below ``part_AAM`` : `None` or a `str` representing the file storing a part AAM pickle (chin or lips) Must be different from `None` if `shape` is `chin` or `lips` Such part_AAM can be obtained by choosing the ``landmark_group`` parameter accordingly in the extraction process ``confidence_thresh`` : `float`, DEPRECATED It was used to filter out the frames having a confidence threshold for the landmarks lower than this value. Their corresponding features were simply arrays of zeros. Now we consider every frame where a face is detected. ``shape_components`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of the kept shape eigenvectors for the projection and fitter AAMs The shape feature size will be up to this value ``appearance_components`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of the kept texture eigenvectors for the projection and fitter AAMs The appearance feature size will be up to this value ``max_iters`` : `int` or `list` of `ints` (one per resolution scale) Selects the number of iterations (per resolution scale) of the optimisation algorithm Only used for the fitter AAM, since 0 iterations are used with the projection AAM ``log_errors`` : `boolean` If ``True``, generates a log file per video, stating the models used and the prediction error for each frame ``log_dir`` : `str`, directory to store the error logs above output_dir : `str`, absolute path where the features are to be stored """ self._files = files self._outDir = output_dir if extract_opts is not None: self._extractOpts = extract_opts self._warpType = extract_opts['warp'] self._landmarkGroup = extract_opts['landmark_group'] self._max_shape_components = extract_opts['max_shape_components'] self._max_appearance_components = extract_opts[ 'max_appearance_components'] self._diagonal = extract_opts['diagonal'] self._scales = extract_opts['resolution_scales'] self._confidence_thresh = extract_opts['confidence_thresh'] self._kept_frames = extract_opts['kept_frames'] if extract_opts['features'] == 'fast_dsift': self._features = fast_dsift elif extract_opts['features'] == 'dsift': self._features = dsift elif extract_opts['features'] == 'hog': self._features = hog elif extract_opts['features'] == 'no_op': self._features = no_op else: raise Exception( 'Unknown feature type to extract, did you mean fast_dsift ?' ) if 'greyscale' in extract_opts.keys(): self._greyscale = extract_opts['greyscale'] else: self._greyscale = False self._outModelName = extract_opts['model_name'] if process_opts is not None: # Face detection self._face_detect_method = process_opts['face_detector'] if self._face_detect_method == 'dlib': from menpodetect import load_dlib_frontal_face_detector detector = load_dlib_frontal_face_detector() elif self._face_detect_method == 'opencv': from menpodetect import load_opencv_frontal_face_detector detector = load_opencv_frontal_face_detector() elif self._face_detect_method == 'dpm': from menpodetect.ffld2 import load_ffld2_frontal_face_detector detector = load_ffld2_frontal_face_detector() else: raise Exception( 'unknown detector, did you mean dlib/opencv/dpm?') self._face_detect = detector self._shape_components = process_opts['shape_components'] self._appearance_components = process_opts['appearance_components'] self._max_iters = process_opts['max_iters'] self._fitter_type = process_opts['landmark_fitter'] # Landmark fitter (pretrained ERT or AAM), actually loaded later to avoid pickling with Pool if self._fitter_type == 'aam': self._aam_fitter_file = process_opts['aam_fitter'] # Parameters source # If fitting, self._parameters = process_opts['parameters_from'] if self._parameters == 'aam_projection': self._projection_aam_file = process_opts['projection_aam'] self._projection_aam = mio.import_pickle( self._projection_aam_file) self._projection_fitter = LucasKanadeAAMFitter( aam=self._projection_aam, lk_algorithm_cls=WibergInverseCompositional, n_shape=self._shape_components, n_appearance=self._appearance_components) else: pass self._confidence_thresh = process_opts['confidence_thresh'] self._shape = process_opts['shape'] self._part_aam = process_opts['part_aam'] self._log_errors = process_opts['log_errors'] if self._log_errors is False: self._myresolver = None self._log_dir = process_opts['log_dir'] def extract_save_features(self, files): r""" Uses the input files as train AAMs and store the resulting pickle on the disk Parameters ---------- files Returns ------- """ # 1. fetch all video frames, attach landmarks files_list = list(files.keys()) frames = mio.import_video(files_list[0], landmark_resolver=self._myresolver, normalize=True, exact_frame_count=True) # frames = frames.map(AAMFeature._preprocess) idx_above_thresh, idx_lip_opening = landmark_filter( files_list[0], file_dict=self._files, threshold=self._confidence_thresh, keep=self._kept_frames) frames = frames[idx_above_thresh] frames = frames[idx_lip_opening] frames = frames.map(attach_semantic_landmarks) if self._greyscale is True: frames = frames.map(convert_to_grayscale) # initial AAM training if self._warpType == 'holistic': aam = HolisticAAM( frames, group=self._landmarkGroup, holistic_features=self._features, reference_shape=None, diagonal=self._diagonal, scales=self._scales, max_shape_components=self._max_shape_components, max_appearance_components=self._max_appearance_components, verbose=False) elif self._warpType == 'patch': aam = PatchAAM( frames, group=self._landmarkGroup, holistic_features=self._features, diagonal=self._diagonal, scales=self._scales, max_shape_components=self._max_shape_components, max_appearance_components=self._max_appearance_components, patch_shape=self._extractOpts['patch_shape'], verbose=False) else: raise Exception('Unknown warp type. Did you mean holistic/patch ?') frame_buffer = LazyList.init_from_iterable([]) buffer_len = 256 for idx, file in enumerate(files_list[1:]): # useful to check progress with open('./run/log_' + self._outModelName + '.txt', 'w') as log: log.write(str(idx) + ' ' + file + '\n') frames = mio.import_video(file, landmark_resolver=self._myresolver, normalize=True, exact_frame_count=True) idx_above_thresh, idx_lip_opening = landmark_filter( file, file_dict=self._files, threshold=self._confidence_thresh, keep=self._kept_frames) frames = frames[idx_above_thresh] frames = frames[idx_lip_opening] frames = frames.map(attach_semantic_landmarks) if self._greyscale is True: frames = frames.map(convert_to_grayscale) frame_buffer += frames if len(frame_buffer) > buffer_len: # 2. retrain AAM aam.increment(frame_buffer, group=self._landmarkGroup, shape_forgetting_factor=1.0, appearance_forgetting_factor=1.0, verbose=False, batch_size=None) del frame_buffer frame_buffer = LazyList.init_from_iterable([]) else: pass if len(frame_buffer) != 0: # # deplete remaining frames aam.increment(frame_buffer, group=self._landmarkGroup, shape_forgetting_factor=1.0, appearance_forgetting_factor=1.0, verbose=False, batch_size=None) del frame_buffer makedirs(self._outDir) mio.export_pickle(obj=aam, fp=self._outDir + self._outModelName, overwrite=True, protocol=4) def get_feature(self, file, process_opts=None): r""" Computes the AAM features, according to the `process_opts` Parameters ---------- file process_opts Returns ------- A dictionary of five elements, each representing a variation of the computed features (shape and appearance alone or concatenated, with or without derivatives) """ self._maybe_start_logging(file) self._load_landmark_fitter() frames = mio.import_video(file, landmark_resolver=self._myresolver, normalize=True, exact_frame_count=True) feat_shape = [] feat_app = [] feat_shape_app = [] for frameIdx, frame in enumerate(frames): bounding_boxes = self._face_detect(frame) if len(bounding_boxes) > 0: initial_bbox = bounding_boxes[0] if self._log_errors is True: gt_shape = frame.landmarks['pts_face'] else: gt_shape = None if isinstance(self._landmark_fitter, LucasKanadeAAMFitter): result = self._landmark_fitter.fit_from_bb( frame, initial_bbox, max_iters=self._max_iters, gt_shape=gt_shape) elif isinstance( self._landmark_fitter, DlibWrapper): # DLIB fitter, doesn't have max_iters result = self._landmark_fitter.fit_from_bb( frame, initial_bbox, gt_shape=gt_shape) else: raise Exception('incompatible landmark fitter') self._maybe_append_to_log(file, frameIdx, result) if self._shape == 'face': if self._parameters == 'lk_fitting': # skip the first 4 similarity params, probably not useful for classification shape_param_frame = result.shape_parameters[-1][4:] app_param_frame = result.appearance_parameters[-1] elif self._parameters == 'aam_projection': result_aam = self._projection_fitter.fit_from_shape( frame, result.final_shape, max_iters=[0, 0, 0]) # TODO: analyse the case when aam true components are less than max components shape_param_frame = result_aam.shape_parameters[-1][4:] app_param_frame = result_aam.appearance_parameters[-1] else: raise Exception( 'parameters from: lk_fitting or aam_projection') feat_shape.append(shape_param_frame) feat_app.append(app_param_frame) feat_shape_app.append( np.concatenate((shape_param_frame, app_param_frame))) elif self._shape == 'lips': # extract lips landmarks from the final face fitting to initialize the part model fitting aam_lips = mio.import_pickle(self._part_aam) fitter_lips = LucasKanadeAAMFitter( aam_lips, lk_algorithm_cls=WibergInverseCompositional, n_shape=[10, 20], n_appearance=[20, 150]) result_lips = fitter_lips.fit_from_shape( image=frame, initial_shape=_pointcloud_subset( result.final_shape, 'lips'), max_iters=[5, 5]) shape_param_frame_lips = result_lips.shape_parameters[-1][ 4:] app_param_frame_lips = result_lips.appearance_parameters[ -1] feat_shape.append(shape_param_frame_lips) feat_app.append(app_param_frame_lips) feat_shape_app.append( np.concatenate( (shape_param_frame_lips, app_param_frame_lips))) elif self._shape == 'chin': # extract chin and lips landmarks from the final face fitting to initialize the part model fitting aam_chin = mio.import_pickle(self._part_aam) fitter_chin = LucasKanadeAAMFitter( aam_chin, lk_algorithm_cls=WibergInverseCompositional, n_shape=[10, 20, 25], n_appearance=[20, 50, 150]) result_chin = fitter_chin.fit_from_shape( image=frame, initial_shape=_pointcloud_subset( result.final_shape, 'chin'), max_iters=[10, 10, 5]) shape_param_frame_mchin = result_chin.shape_parameters[-1][ 4:] app_param_frame_mchin = result_chin.appearance_parameters[ -1] feat_shape.append(shape_param_frame_mchin) feat_app.append(app_param_frame_mchin) feat_shape_app.append( np.concatenate( (shape_param_frame_mchin, app_param_frame_mchin))) else: raise Exception( 'Unknown shape model, currently supported are: face, lips, chin' ) else: # we did not detect any face zero_feat_shape = np.zeros( process_opts['shape_components'][-1]) zero_feat_app = np.zeros( process_opts['appearance_components'][-1]) zero_feat_shape_app = np.zeros( process_opts['shape_components'][-1] + process_opts['appearance_components'][-1]) feat_shape.append(zero_feat_shape) feat_app.append(zero_feat_app) feat_shape_app.append(zero_feat_shape_app) npfeat_shape = np.array(feat_shape) npfeat_app = np.array(feat_app) npfeat_shape_app = np.array(feat_shape_app) npfeat_app_delta = vsrmath.accurate_derivative(npfeat_app, 'delta') npfeat_shape_app_delta = vsrmath.accurate_derivative( npfeat_shape_app, 'delta') return { 'shape': npfeat_shape, 'app': npfeat_app, 'shape_app': npfeat_shape_app, 'app_delta': npfeat_app_delta, 'shape_app_delta': npfeat_shape_app_delta } def _myresolver(self, file, frame): return { 'pts_face': self._files[str(file)] + '/frame_' + str(frame + 1) + '.pts' } def _maybe_start_logging(self, file): if self._log_errors is True: from os import makedirs makedirs('./run/logs/' + self._log_dir, exist_ok=True) cf = file_to_feature(file, extension='') with open('./run/logs/' + self._log_dir + '/log_' + cf + '.txt', 'w') as log: log.write('{} \n'.format(file)) log.write('Face detector: {}\n'.format( self._face_detect_method)) if self._fitter_type == 'aam': log.write('AAM Landmark fitter: {}\n'.format( self._aam_fitter_file)) elif self._fitter_type == 'ert': log.write('Pretrained ERT Landmark fitter\n') if self._parameters == 'projection': log.write('AAM Projector: {}\n'.format( self._projection_aam_file)) def _maybe_append_to_log(self, file, frame_idx, result): if self._log_errors is True: cf = file_to_feature(file, extension='') error = result.final_error() with open('./run/logs/' + self._log_dir + '/log_' + cf + '.txt', 'a') as log: log.write('frame {}. error: {} \n'.format( str(frame_idx), str(error))) def _load_landmark_fitter(self): if self._fitter_type == 'aam': self._aam_fitter = mio.import_pickle(self._aam_fitter_file) fitter = LucasKanadeAAMFitter( self._aam_fitter, lk_algorithm_cls=WibergInverseCompositional, n_shape=self._shape_components, n_appearance=self._appearance_components) elif self._fitter_type == 'ert': _, _, fitter68 = maybe_download_models() fitter = DlibWrapper(fitter68) else: raise Exception('unknown fitter, did you mean aam/ert?') self._landmark_fitter = fitter