Esempio n. 1
0
    def enroll(self, data):
        """Enrolls a GMM using MAP adaptation given a reference's feature vectors

        Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data.
        """

        # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them
        data = check_data_dim(data, expected_ndim=2)

        # Use the array to train a GMM and return it
        logger.info("Enrolling with %d feature vectors", data.shape[0])

        gmm = GMMMachine(
            n_gaussians=self.n_gaussians,
            trainer="map",
            ubm=copy.deepcopy(self),
            convergence_threshold=self.convergence_threshold,
            max_fitting_steps=self.enroll_iterations,
            random_state=self.random_state,
            update_means=self.enroll_update_means,
            update_variances=self.enroll_update_variances,
            update_weights=self.enroll_update_weights,
            mean_var_update_threshold=self.mean_var_update_threshold,
            map_relevance_factor=self.enroll_relevance_factor,
            map_alpha=self.enroll_alpha,
        )
        gmm.fit(data)
        return gmm
Esempio n. 2
0
    def enroll(self, data):
        """Enrolls a GMM using MAP adaptation given a reference's feature vectors

        Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data.
        """

        for feature in data:
            self._check_feature(feature)

        # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them
        if data[0].ndim == 2:
            data = np.vstack(data)

        # Use the array to train a GMM and return it
        logger.info("Enrolling with %d feature vectors", data.shape[0])

        gmm = GMMMachine(
            n_gaussians=self.number_of_gaussians,
            trainer="map",
            ubm=copy.deepcopy(self.ubm),
            convergence_threshold=self.training_threshold,
            max_fitting_steps=self.gmm_enroll_iterations,
            random_state=self.rng,
            update_means=self.enroll_update_means,
            update_variances=self.enroll_update_variances,
            update_weights=self.enroll_update_weights,
            mean_var_update_threshold=self.variance_threshold,
            map_relevance_factor=self.enroll_relevance_factor,
            map_alpha=self.enroll_alpha,
        )
        gmm.fit(data)
        return gmm
Esempio n. 3
0
    def _voice_activity_detection(self,
                                  energy_array: np.ndarray) -> np.ndarray:
        """Fits a 2 Gaussian GMM on the energy that splits between voice and silence."""
        n_samples = len(energy_array)
        # if energy does not change a lot, it may not be audio?
        if np.std(energy_array) < 10e-5:
            return np.zeros(shape=n_samples)

        # Add an epsilon small Gaussian noise to avoid numerical issues (mainly due to artificial silence).
        energy_array = (1e-6 * np.random.randn(n_samples)) + energy_array

        # Normalize the energy array, make it an array of 1D samples
        normalized_energy = utils.normalize_std_array(energy_array).reshape(
            (-1, 1))

        # Note: self.max_iterations and self.convergence_threshold are used for both
        # k-means and GMM training.
        kmeans_trainer = KMeansMachine(
            n_clusters=2,
            convergence_threshold=self.convergence_threshold,
            max_iter=self.max_iterations,
            init_max_iter=self.max_iterations,
        )
        ubm_gmm = GMMMachine(
            n_gaussians=2,
            trainer="ml",
            update_means=True,
            update_variances=True,
            update_weights=True,
            convergence_threshold=self.convergence_threshold,
            max_fitting_steps=self.max_iterations,
            k_means_trainer=kmeans_trainer,
        )
        ubm_gmm.variance_thresholds = self.variance_threshold

        ubm_gmm.fit(normalized_energy)

        if np.isnan(ubm_gmm.means).any():
            logger.warn("Annotation aborted: File contains NaN's")
            return np.zeros(shape=n_samples, dtype=int)

        # Classify

        # Different behavior dep on which mean represents high energy (higher value)
        labels = ubm_gmm.log_weighted_likelihood(normalized_energy)
        if ubm_gmm.means.argmax() == 0:  # High energy in means[0]
            labels = labels.argmin(axis=0)
        else:  # High energy in means[1]
            labels = labels.argmax(axis=0)

        return labels
Esempio n. 4
0
def test_gmm_MAP_3():
    # Train a GMMMachine with MAP_GMMTrainer; compares to old reference
    ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))

    # Initialize GMMMachine
    n_gaussians = 5
    prior_gmm = GMMMachine(n_gaussians)
    prior_gmm.means = load_array(
        resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
    prior_gmm.variances = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
    prior_gmm.weights = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))

    threshold = 0.001
    prior_gmm.variance_thresholds = threshold

    # Initialize MAP Trainer
    prior = 0.001
    accuracy = 0.00001
    gmm = GMMMachine(
        n_gaussians,
        trainer="map",
        ubm=prior_gmm,
        convergence_threshold=prior,
        max_fitting_steps=1,
        update_means=True,
        update_variances=False,
        update_weights=False,
        mean_var_update_threshold=accuracy,
        map_relevance_factor=None,
    )
    gmm.variance_thresholds = threshold

    # Test results
    # Load torch3vision reference
    meansMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5"))
    variancesMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5"))
    weightsMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        # Train
        gmm = gmm.fit(ar)

        # Compare to current results
        # Gaps are quite large. This might be explained by the fact that there is no
        # adaptation of a given Gaussian in torch3 when the corresponding responsibilities
        # are below the responsibilities threshold
        np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1)
        np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4)
        np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4)
Esempio n. 5
0
def test_gmm_ML_2():
    # Trains a GMMMachine with ML_GMMTrainer; compares to a reference
    ar = load_array(
        resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))

    # Test results
    # Load torch3vision reference
    meansML_ref = load_array(
        resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
    variancesML_ref = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
    weightsML_ref = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        # Initialize GMMMachine
        gmm = GMMMachine(n_gaussians=5)
        gmm.means = load_array(
            resource_filename("bob.learn.em",
                              "data/meansAfterKMeans.hdf5")).astype("float64")
        gmm.variances = load_array(
            resource_filename(
                "bob.learn.em",
                "data/variancesAfterKMeans.hdf5")).astype("float64")
        gmm.weights = np.exp(
            load_array(
                resource_filename(
                    "bob.learn.em",
                    "data/weightsAfterKMeans.hdf5")).astype("float64"))

        threshold = 0.001
        gmm.variance_thresholds = threshold

        # Initialize ML Trainer
        gmm.mean_var_update_threshold = 0.001
        gmm.max_fitting_steps = 25
        gmm.convergence_threshold = 0.000001
        gmm.update_means = True
        gmm.update_variances = True
        gmm.update_weights = True

        # Run ML
        gmm = gmm.fit(ar)

        # Compare to current results
        np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3)
        np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3)
        np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
Esempio n. 6
0
def test_map_transformer():
    post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8],
                          [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3
    prior_machine = GMMMachine(n_gaussians)
    prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    prior_machine.variances = np.ones_like(prior_machine.means)
    prior_machine.weights = np.array([0.5, 0.5])

    machine = GMMMachine(
        n_gaussians,
        trainer="map",
        ubm=prior_machine,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )

    for transform in (to_numpy, to_dask_array):
        post_data = transform(post_data)
        machine = machine.fit(post_data)

        expected_means = np.array([[1.83333333, 1.83333333, 2.0],
                                   [7.57142857, 8, 8]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        eps = np.finfo(float).eps
        expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]])
        np.testing.assert_almost_equal(machine.variances, expected_vars)
        expected_weights = np.array([0.46226415, 0.53773585])
        np.testing.assert_almost_equal(machine.weights, expected_weights)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-1.3837590691807108e16,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Esempio n. 7
0
def test_gmm_kmeans_plusplus_init():
    n_gaussians = 3
    machine = GMMMachine(
        n_gaussians,
        k_means_trainer=KMeansMachine(n_clusters=n_gaussians,
                                      init_method="k-means++"),
    )
    data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2],
                     [2.5, 2.5]])
    for transform in (to_numpy, to_dask_array):
        data = transform(data)
        machine = machine.fit(data)
        expected_means = np.array([[2.25, 2.25], [-1.25, 0.25], [1.25, 1.25]])
        expected_variances = np.array([[1 / 16, 1 / 16], [1 / 16, 1 / 16],
                                       [1 / 16, 1 / 16]])
        np.testing.assert_almost_equal(machine.means,
                                       expected_means,
                                       decimal=3)
        np.testing.assert_almost_equal(machine.variances, expected_variances)
Esempio n. 8
0
def test_gmm_MAP_2():
    # Train a GMMMachine with MAP_GMMTrainer and compare with matlab reference

    data = load_array(resource_filename("bob.learn.em", "data/data.hdf5"))
    data = data.reshape((1, -1))  # make a 2D array out of it
    means = load_array(resource_filename("bob.learn.em", "data/means.hdf5"))
    variances = load_array(
        resource_filename("bob.learn.em", "data/variances.hdf5"))
    weights = load_array(resource_filename("bob.learn.em",
                                           "data/weights.hdf5"))

    gmm = GMMMachine(n_gaussians=2)
    gmm.means = means
    gmm.variances = variances
    gmm.weights = weights

    gmm_adapted = GMMMachine(
        n_gaussians=2,
        trainer="map",
        ubm=gmm,
        max_fitting_steps=1,
        update_means=True,
        update_variances=False,
        update_weights=False,
        mean_var_update_threshold=0.0,
    )
    gmm_adapted.means = means
    gmm_adapted.variances = variances
    gmm_adapted.weights = weights

    new_means = load_array(
        resource_filename("bob.learn.em", "data/new_adapted_mean.hdf5"))

    for transform in (to_numpy, to_dask_array):
        data = transform(data)
        gmm_adapted = gmm_adapted.fit(data)

        # Compare to matlab reference
        np.testing.assert_allclose(new_means.T, gmm_adapted.means, rtol=1e-4)
Esempio n. 9
0
def test_ml_transformer():
    data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3

    machine = GMMMachine(
        n_gaussians,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )
    machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    machine.variances = np.ones_like(machine.means)

    for transform in (to_numpy, to_dask_array):
        data = transform(data)
        machine = machine.fit(data)

        expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        expected_weights = np.array([2 / 5, 3 / 5])
        np.testing.assert_almost_equal(machine.weights, expected_weights)
        eps = np.finfo(float).eps
        expected_variances = np.array([[1 / 4, 1 / 4, eps],
                                       [eps, 2 / 3, 2 / 3]])
        np.testing.assert_almost_equal(machine.variances, expected_variances)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-6755399441055685.0,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Esempio n. 10
0
data = np.column_stack((iris_data.data[:, 0], iris_data.data[:, 3]))
setosa = data[iris_data.target == 0]
versicolor = data[iris_data.target == 1]
virginica = data[iris_data.target == 2]

# Two clusters with a feature dimensionality of 3
machine = GMMMachine(
    3,
    convergence_threshold=1e-5,
    update_means=True,
    update_variances=True,
    update_weights=True,
)

# Initialize the means with known values (optional, skips kmeans)
machine = machine.fit(data)

# Plotting
figure, ax = plt.subplots()
ax.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
ax.scatter(versicolor[:, 0],
           versicolor[:, 1],
           c="goldenrod",
           label="versicolor")
ax.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
ax.scatter(
    machine.means[:, 0],
    machine.means[:, 1],
    c="blue",
    marker="x",
    label="centroids",
Esempio n. 11
0
class GMM(BioAlgorithm, BaseEstimator):
    """Algorithm for computing UBM and Gaussian Mixture Models of the features.

    Features must be normalized to zero mean and unit standard deviation.

    Models are MAP GMM machines trained from a UBM on the enrollment feature set.

    The UBM is a ML GMM machine trained on the training feature set.

    Probes are GMM statistics of features projected on the UBM.
    """
    def __init__(
        self,
        # parameters for the GMM
        number_of_gaussians: int,
        # parameters of UBM training
        kmeans_training_iterations:
        int = 25,  # Maximum number of iterations for K-Means
        kmeans_init_iterations: Union[
            int, None] = None,  # Maximum number of iterations for K-Means init
        kmeans_oversampling_factor: int = 64,
        ubm_training_iterations:
        int = 25,  # Maximum number of iterations for GMM Training
        training_threshold: float = 5e-4,  # Threshold to end the ML training
        variance_threshold:
        float = 5e-4,  # Minimum value that a variance can reach
        update_means: bool = True,
        update_variances: bool = True,
        update_weights: bool = True,
        # parameters of the GMM enrollment (MAP)
        gmm_enroll_iterations: int = 1,
        enroll_update_means: bool = True,
        enroll_update_variances: bool = False,
        enroll_update_weights: bool = False,
        enroll_relevance_factor: Union[float, None] = 4,
        enroll_alpha: float = 0.5,
        # scoring
        scoring_function: Callable = linear_scoring,
        # RNG
        init_seed: int = 5489,
        **kwargs,
    ):
        """Initializes the local UBM-GMM tool chain.

        Parameters
        ----------
        number_of_gaussians
            The number of Gaussians used in the UBM and the models.
        kmeans_training_iterations
            Number of e-m iterations to train k-means initializing the UBM.
        kmeans_init_iterations
            Number of iterations used for setting the k-means initial centroids.
            if None, will use the same as kmeans_training_iterations.
        kmeans_oversampling_factor
            Oversampling factor used by k-means initializer.
        ubm_training_iterations
            Number of e-m iterations for training the UBM.
        training_threshold
            Convergence threshold to halt the GMM training early.
        variance_threshold
            Minimum value a variance of the Gaussians can reach.
        update_weights
            Decides wether the weights of the Gaussians are updated while training.
        update_means
            Decides wether the means of the Gaussians are updated while training.
        update_variances
            Decides wether the variancess of the Gaussians are updated while training.
        gmm_enroll_iterations
            Number of iterations for the MAP GMM used for enrollment.
        enroll_update_weights
            Decides wether the weights of the Gaussians are updated while enrolling.
        enroll_update_means
            Decides wether the means of the Gaussians are updated while enrolling.
        enroll_update_variances
            Decides wether the variancess of the Gaussians are updated while enrolling.
        enroll_relevance_factor
            For enrollment: MAP relevance factor as described in Reynolds paper.
            If None, will not apply Reynolds adaptation.
        enroll_alpha
            For enrollment: MAP adaptation coefficient.
        init_seed
            Seed for the random number generation.
        scoring_function
            Function returning a score from a model, a UBM, and a probe.
        """
        super().__init__(**kwargs)

        # Copy parameters
        self.number_of_gaussians = number_of_gaussians
        self.kmeans_training_iterations = kmeans_training_iterations
        self.kmeans_init_iterations = (kmeans_training_iterations
                                       if kmeans_init_iterations is None else
                                       kmeans_init_iterations)
        self.kmeans_oversampling_factor = kmeans_oversampling_factor
        self.ubm_training_iterations = ubm_training_iterations
        self.training_threshold = training_threshold
        self.variance_threshold = variance_threshold
        self.update_weights = update_weights
        self.update_means = update_means
        self.update_variances = update_variances
        self.enroll_relevance_factor = enroll_relevance_factor
        self.enroll_alpha = enroll_alpha
        self.gmm_enroll_iterations = gmm_enroll_iterations
        self.enroll_update_means = enroll_update_means
        self.enroll_update_weights = enroll_update_weights
        self.enroll_update_variances = enroll_update_variances
        self.init_seed = init_seed
        self.rng = self.init_seed

        self.scoring_function = scoring_function

        self.ubm = None

    def _check_feature(self, feature):
        """Checks that the features are appropriate"""
        if (not isinstance(feature, np.ndarray) or feature.ndim != 2
                or feature.dtype != np.float64):
            raise ValueError(
                f"The given feature is not appropriate: \n{feature}")
        if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]:
            raise ValueError(
                "The given feature is expected to have %d elements, but it has %d"
                % (self.ubm.shape[1], feature.shape[1]))

    def save_model(self, ubm_file):
        """Saves the projector (UBM) to file."""
        # Saves the UBM to file
        logger.debug("Saving model to file '%s'", ubm_file)

        hdf5 = (ubm_file if isinstance(ubm_file, HDF5File) else HDF5File(
            ubm_file, "w"))
        self.ubm.save(hdf5)

    def load_model(self, ubm_file):
        """Loads the projector (UBM) from a file."""
        hdf5file = HDF5File(ubm_file, "r")
        logger.debug("Loading model from file '%s'", ubm_file)
        # Read the UBM
        self.ubm = GMMMachine.from_hdf5(hdf5file)
        self.ubm.variance_thresholds = self.variance_threshold

    def project(self, array):
        """Computes GMM statistics against a UBM, given a 2D array of feature vectors

        This is applied to the probes before scoring.
        """
        self._check_feature(array)
        logger.debug("Projecting %d feature vectors", array.shape[0])
        # Accumulates statistics
        gmm_stats = self.ubm.transform(array)
        gmm_stats.compute()

        # Return the resulting statistics
        return gmm_stats

    def enroll(self, data):
        """Enrolls a GMM using MAP adaptation given a reference's feature vectors

        Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data.
        """

        for feature in data:
            self._check_feature(feature)

        # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them
        if data[0].ndim == 2:
            data = np.vstack(data)

        # Use the array to train a GMM and return it
        logger.info("Enrolling with %d feature vectors", data.shape[0])

        gmm = GMMMachine(
            n_gaussians=self.number_of_gaussians,
            trainer="map",
            ubm=copy.deepcopy(self.ubm),
            convergence_threshold=self.training_threshold,
            max_fitting_steps=self.gmm_enroll_iterations,
            random_state=self.rng,
            update_means=self.enroll_update_means,
            update_variances=self.enroll_update_variances,
            update_weights=self.enroll_update_weights,
            mean_var_update_threshold=self.variance_threshold,
            map_relevance_factor=self.enroll_relevance_factor,
            map_alpha=self.enroll_alpha,
        )
        gmm.fit(data)
        return gmm

    def read_biometric_reference(self, model_file):
        """Reads an enrolled reference model, which is a MAP GMMMachine."""
        if self.ubm is None:
            raise ValueError(
                "You must load a UBM before reading a biometric reference.")
        return GMMMachine.from_hdf5(HDF5File(model_file, "r"), ubm=self.ubm)

    def write_biometric_reference(self, model: GMMMachine, model_file):
        """Write the enrolled reference (MAP GMMMachine) into a file."""
        return model.save(model_file)

    def score(self, biometric_reference: GMMMachine, probe):
        """Computes the score for the given model and the given probe.

        Uses the scoring function passed during initialization.

        Parameters
        ----------
        biometric_reference:
            The model to score against.
        probe:
            The probe data to compare to the model.
        """

        if not isinstance(probe, GMMStats):
            # Projection is done here instead of in transform (or it would be applied to enrollment data too...)
            probe = self.project(probe)
        return self.scoring_function(
            models_means=[biometric_reference],
            ubm=self.ubm,
            test_stats=probe,
            frame_length_normalization=True,
        )[0]

    def score_multiple_biometric_references(
            self, biometric_references: "list[GMMMachine]", probe: GMMStats):
        """Computes the score between multiple models and one probe.

        Uses the scoring function passed during initialization.

        Parameters
        ----------
        biometric_references:
            The models to score against.
        probe:
            The probe data to compare to the models.
        """

        stats = (self.project(probe)
                 if not isinstance(probe, GMMStats) else probe)
        return self.scoring_function(
            models_means=biometric_references,
            ubm=self.ubm,
            test_stats=stats,
            frame_length_normalization=True,
        )

    def fit(self, array, y=None, **kwargs):
        """Trains the UBM."""
        # Stack all the samples in a 2D array of features
        if isinstance(array, da.Array):
            array = array.persist()

        # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them
        if array[0].ndim == 2:
            array = np.vstack(array)

        logger.debug(
            f"Creating UBM machine with {self.number_of_gaussians} gaussians and {len(array)} samples"
        )

        self.ubm = GMMMachine(
            n_gaussians=self.number_of_gaussians,
            trainer="ml",
            max_fitting_steps=self.ubm_training_iterations,
            convergence_threshold=self.training_threshold,
            update_means=self.update_means,
            update_variances=self.update_variances,
            update_weights=self.update_weights,
            mean_var_update_threshold=self.variance_threshold,
            k_means_trainer=KMeansMachine(
                self.number_of_gaussians,
                convergence_threshold=self.training_threshold,
                max_iter=self.kmeans_training_iterations,
                init_method="k-means||",
                init_max_iter=self.kmeans_init_iterations,
                random_state=self.init_seed,
                oversampling_factor=self.kmeans_oversampling_factor,
            ),
        )

        # Train the GMM
        logger.info("Training UBM GMM")

        self.ubm.fit(array)

        return self

    def transform(self, X, **kwargs):
        """Passthrough. Enroll applies a different transform as score."""
        # The idea would be to apply the projection in Transform (going from extracted
        # to GMMStats), but we must not apply this during the training or enrollment
        # (those require extracted data directly, not projected).
        # `project` is applied in the score function directly.
        return X

    @classmethod
    def custom_enrolled_save_fn(cls, data, path):
        data.save(path)

    def custom_enrolled_load_fn(self, path):
        return GMMMachine.from_hdf5(path, ubm=self.ubm)

    def _more_tags(self):
        return {
            "bob_fit_supports_dask_array": True,
            "bob_enrolled_save_fn": self.custom_enrolled_save_fn,
            "bob_enrolled_load_fn": self.custom_enrolled_load_fn,
        }