Esempio n. 1
0
class ReNA(BaseEstimator, ClusterMixin, TransformerMixin):
    """Recursive Neighbor Agglomeration (ReNA):
    Recursively merges the pair of clusters according to 1-nearest neighbors
    criterion [1]_.

    Parameters
    ----------
    mask_img: Niimg-like object used for masking the data.

    n_clusters: int, optional (default 2)
        The number of clusters to find.

    scaling: bool, optional (default False)
        If scaling is True, each cluster is scaled by the square root of its
        size, preserving the l2-norm of the image.

    n_iter: int, optional (default 10)
        Number of iterations of the recursive neighbor agglomeration

    threshold: float in the open interval (0., 1.), optional (default 1e-7)
        Threshold used to handle eccentricities.

    memory: instance of joblib.Memory or string
        Used to cache the masking process.
        By default, no caching is done. If a string is given, it is the
        path to the caching directory.

    memory_level: integer, optional (default 1)
        Rough estimator of the amount of memory used by caching. Higher value
        means more memory for caching.

    verbose: int, optional (default 1)
        Verbosity level.

    Attributes
    ----------
    `labels_ `: ndarray, shape = [n_features]
        cluster labels for each feature.

    `n_clusters_`: int
        Number of clusters.

    `sizes_`: ndarray, shape = [n_features]
        It contains the size of each cluster.

    References
    ----------
    .. [1] A. Hoyos-Idrobo, G. Varoquaux, J. Kahn and B. Thirion, "Recursive
           Nearest Agglomeration (ReNA): Fast Clustering for Approximation of
           Structured Signals," in IEEE Transactions on Pattern Analysis and
           Machine Intelligence, vol. 41, no. 3, pp. 669-681, 1 March 2019.
           https://hal.archives-ouvertes.fr/hal-01366651/
    """
    def __init__(self,
                 mask_img,
                 n_clusters=2,
                 scaling=False,
                 n_iter=10,
                 threshold=1e-7,
                 memory=None,
                 memory_level=1,
                 verbose=0):
        self.mask_img = mask_img
        self.n_clusters = n_clusters
        self.scaling = scaling
        self.n_iter = n_iter
        self.threshold = threshold
        self.memory = memory
        self.memory_level = memory_level
        self.verbose = verbose

    def fit(self, X, y=None):
        """Compute clustering of the data.

        Parameters
        ----------
        X: ndarray, shape = [n_samples, n_features]
            Training data.
        y: Ignored

        Returns
        -------
        self: `ReNA` object
        """

        X = check_array(X,
                        ensure_min_features=2,
                        ensure_min_samples=2,
                        estimator=self)
        n_features = X.shape[1]

        if not isinstance(self.mask_img, (str, Nifti1Image)):
            raise ValueError("The mask image should be a Niimg-like"
                             "object. Instead a %s object was provided." %
                             type(self.mask_img))

        if self.memory is None or isinstance(self.memory, str):
            self.memory_ = Memory(cachedir=self.memory,
                                  verbose=max(0, self.verbose - 1))
        else:
            self.memory_ = self.memory

        if self.n_clusters <= 0:
            raise ValueError("n_clusters should be an integer greater than 0."
                             " %s was provided." % str(self.n_clusters))

        if self.n_iter <= 0:
            raise ValueError("n_iter should be an integer greater than 0."
                             " %s was provided." % str(self.n_iter))

        if self.n_clusters > n_features:
            self.n_clusters = n_features
            warnings.warn("n_clusters should be at most the number of "
                          "features. Taking n_clusters = %s instead." %
                          str(n_features))

        n_components, labels = self.memory_.cache(
            recursive_neighbor_agglomeration)(X,
                                              self.mask_img,
                                              self.n_clusters,
                                              n_iter=self.n_iter,
                                              threshold=self.threshold,
                                              verbose=self.verbose)

        sizes = np.bincount(labels)
        sizes = sizes[sizes > 0]

        self.labels_ = labels
        self.n_clusters_ = np.unique(self.labels_).shape[0]
        self.sizes_ = sizes

        return self

    def transform(self, X, y=None):
        """Apply clustering, reduce the dimensionality of the data.

        Parameters
        ----------
        X: ndarray, shape = [n_samples, n_features]
            Data to transform with the fitted clustering.

        Returns
        -------
        X_red: ndarray, shape = [n_samples, n_clusters]
            Data reduced with agglomerated signal for each cluster
        """

        check_is_fitted(self, "labels_")

        unique_labels = np.unique(self.labels_)

        mean_cluster = []
        for label in unique_labels:
            mean_cluster.append(np.mean(X[:, self.labels_ == label], axis=1))

        X_red = np.array(mean_cluster).T

        if self.scaling:
            X_red = X_red * np.sqrt(self.sizes_)

        return X_red

    def inverse_transform(self, X_red):
        """Send the reduced 2D data matrix back to the original feature
        space (voxels).

        Parameters
        ----------
        X_red: ndarray , shape = [n_samples, n_clusters]
            Data reduced with agglomerated signal for each cluster

        Returns
        -------
        X_inv: ndarray, shape = [n_samples, n_features]
            Data reduced expanded to the original feature space
        """

        check_is_fitted(self, "labels_")

        _, inverse = np.unique(self.labels_, return_inverse=True)

        if self.scaling:
            X_red = X_red / np.sqrt(self.sizes_)
        X_inv = X_red[..., inverse]

        return X_inv
                                    t_r=2,
                                    standardize=True,
                                    memory='nilearn_cache',
                                    memory_level=1,
                                    verbose=2)
masker.fit()

subject_time_series = []
func_filenames = rest_dataset.func
confound_filenames = rest_dataset.confounds
for func_filename, confound_filename in zip(func_filenames,
                                            confound_filenames):
    print("Processing file %s" % func_filename)

    # Computing some confounds
    hv_confounds = mem.cache(image.high_variance_confounds)(func_filename)

    region_ts = masker.transform(func_filename,
                                 confounds=[hv_confounds, confound_filename])
    subject_time_series.append(region_ts)

##############################################################################
# Computing group-sparse precision matrices
# ------------------------------------------
from nilearn.connectome import GroupSparseCovarianceCV
gsc = GroupSparseCovarianceCV(verbose=2)
gsc.fit(subject_time_series)

try:
    from sklearn.covariance import GraphicalLassoCV
except ImportError: