def transform(self, imgs, confounds=None): """Extract signals from parcellations learned on fmri images. Parameters ---------- imgs: List of Nifti-like images See http://nilearn.github.io/manipulating_images/input_output.html. Images to process. confounds: List of CSV files or arrays-like, optional Each file or numpy array in a list should have shape (number of scans, number of confounds) This parameter is passed to signal.clean. Please see the related documentation for details. Must be of same length of imgs. Returns ------- region_signals: List of or 2D numpy.ndarray Signals extracted for each label for each image. Example, for single image shape will be (number of scans, number of labels) """ self._check_fitted() imgs, confounds, single_subject = _check_parameters_transform( imgs, confounds) # Requires for special cases like extracting signals on list of # 3D images imgs_list = _iter_check_niimg(imgs, atleast_4d=True) masker = NiftiLabelsMasker(self.labels_img_, mask_img=self.masker_.mask_img_, smoothing_fwhm=self.smoothing_fwhm, standardize=self.standardize, detrend=self.detrend, low_pass=self.low_pass, high_pass=self.high_pass, t_r=self.t_r, resampling_target='data', memory=self.memory, memory_level=self.memory_level, verbose=self.verbose) region_signals = Parallel(n_jobs=self.n_jobs)( delayed(self._cache(_labels_masker_extraction, func_memory_level=2))(img, masker, confound) for img, confound in zip(imgs_list, confounds)) if single_subject: return region_signals[0] else: return region_signals
def _unmix_components(self, components): """Core function of CanICA than rotate components_ to maximize independance""" random_state = check_random_state(self.random_state) seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init) # Note: fastICA is very unstable, hence we use 64bit on it results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._cache(fastica, func_memory_level=2))( components.astype(np.float64), whiten=True, fun='cube', random_state=seed) for seed in seeds) ica_maps_gen_ = (result[2].T for result in results) ica_maps_and_sparsities = ((ica_map, np.sum(np.abs(ica_map), axis=1).max()) for ica_map in ica_maps_gen_) ica_maps, _ = min(ica_maps_and_sparsities, key=itemgetter(-1)) # Thresholding ratio = None if isinstance(self.threshold, float): ratio = self.threshold elif self.threshold == 'auto': ratio = 1. elif self.threshold is not None: raise ValueError("Threshold must be None, " "'auto' or float. You provided %s." % str(self.threshold)) if ratio is not None: abs_ica_maps = np.abs(ica_maps) threshold = scoreatpercentile( abs_ica_maps, 100. - (100. / len(ica_maps)) * ratio) ica_maps[abs_ica_maps < threshold] = 0. # We make sure that we keep the dtype of components self.components_ = ica_maps.astype(self.components_.dtype) # flip signs in each component so that peak is +ve for component in self.components_: if component.max() < -component.min(): component *= -1 if hasattr(self, "masker_"): self.components_img_ = self.masker_.inverse_transform( self.components_)
def inverse_transform(self, signals): """Transform signals extracted from parcellations back to brain images. Uses `labels_img_` (parcellations) built at fit() level. Parameters ---------- signals: List of 2D numpy.ndarray Each 2D array with shape (number of scans, number of regions) Returns ------- imgs: List of or Nifti-like image Brain image(s) """ from .signal_extraction import signals_to_img_labels self._check_fitted() if not isinstance(signals, (list, tuple)) or\ isinstance(signals, np.ndarray): signals = [ signals, ] single_subject = True elif isinstance(signals, (list, tuple)) and len(signals) == 1: single_subject = True else: single_subject = False imgs = Parallel(n_jobs=self.n_jobs)( delayed(self._cache(signals_to_img_labels, func_memory_level=2))( each_signal, self.labels_img_, self.mask_img_) for each_signal in signals) if single_subject: return imgs[0] else: return imgs
def fit(self, X, y, groups=None): """Fit the decoder (learner). Parameters ---------- X: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html Data on which model is to be fitted. If this is a list, the affine is considered the same for all. y: numpy.ndarray of shape=(n_samples) or list of length n_samples The dependent variable (age, sex, IQ, yes/no, etc.). Target variable to predict. Must have exactly as many elements as 3D images in niimg. groups: None Group labels for the samples used while splitting the dataset into train/test set. Default None. Note that this parameter must be specified in some scikit-learn cross-validation generators to calculate the number of splits, e.g. sklearn.model_selection.LeaveOneGroupOut or sklearn.model_selection.LeavePGroupsOut. For more details see https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data Attributes ---------- `masker_`: instance of NiftiMasker or MultiNiftiMasker The NiftiMasker used to mask the data. `mask_img_`: Nifti1Image Mask computed by the masker object. `classes_`: numpy.ndarray Classes to predict. For classification only. `screening_percentile_`: float Screening percentile corrected according to volume of mask, relative to the volume of standard brain. `coef_`: numpy.ndarray, shape=(n_classes, n_features) Contains the mean of the models weight vector across fold for each class. `coef_img_`: dict of Nifti1Image Dictionary containing `coef_` with class names as keys, and `coef_` transformed in Nifti1Images as values. In the case of a regression, it contains a single Nifti1Image at the key 'beta'. `intercept_`: narray, shape (nclasses,) Intercept (a.k.a. bias) added to the decision function. `cv_`: list of pairs of lists List of the (n_folds,) folds. For the corresponding fold, each pair is composed of two lists of indices, one for the train samples and one for the test samples. `std_coef_`: numpy.ndarray, shape=(n_classes, n_features) Contains the standard deviation of the models weight vector across fold for each class. Note that folds are not independent, see https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data `std_coef_img_`: dict of Nifti1Image Dictionary containing `std_coef_` with class names as keys, and `coef_` transformed in Nifti1Image as values. In the case of a regression, it contains a single Nifti1Image at the key 'beta'. `cv_params_`: dict of lists Best point in the parameter grid for each tested fold in the inner cross validation loop. `cv_scores_`: dict, (classes, n_folds) Scores (misclassification) for each parameter, and on each fold """ self.estimator = _check_estimator(self.estimator) self.memory_ = _check_memory(self.memory, self.verbose) X = self._apply_mask(X) X, y = check_X_y(X, y, dtype=np.float, multi_output=True) # Setup scorer scorer = check_scoring(self.estimator, self.scoring) # Setup cross-validation object. Default is StratifiedKFold when groups # is None. If groups is specified but self.cv is not set to custom CV # splitter, default is LeaveOneGroupOut. If self.cv is manually set to # a CV splitter object do check_cv regardless of groups parameter. cv = self.cv if (isinstance(cv, int) or cv is None) and groups is not None: warnings.warn('groups parameter is specified but ' 'cv parameter is not set to custom CV splitter. ' 'Using default object LeaveOneGroupOut().') cv_object = LeaveOneGroupOut() else: cv_object = check_cv(cv, y=y, classifier=self.is_classification) self.cv_ = list(cv_object.split(X, y, groups=groups)) # Define the number problems to solve. In case of classification this # number corresponds to the number of binary problems to solve if self.is_classification: y = self._binarize_y(y) else: y = y[:, np.newaxis] if self.is_classification and self.n_classes_ > 2: n_problems = self.n_classes_ else: n_problems = 1 # Return a suitable screening percentile according to the mask image self.screening_percentile_ = _adjust_screening_percentile( self.screening_percentile, self.mask_img_, verbose=self.verbose) parallel = Parallel(n_jobs=self.n_jobs, verbose=2 * self.verbose) parallel_fit_outputs = parallel( delayed(self._cache(_parallel_fit)) (self.estimator, X, y[:, c], train, test, self.param_grid, self.is_classification, scorer, self.mask_img_, c, self.screening_percentile_) for c, (train, test) in itertools.product(range(n_problems), self.cv_)) coefs, intercepts = self._fetch_parallel_fit_outputs( parallel_fit_outputs, y, n_problems) # Build the final model (the aggregated one) self.coef_ = np.vstack([ np.mean(coefs[class_index], axis=0) for class_index in self.classes_ ]) self.std_coef_ = np.vstack([ np.std(coefs[class_index], axis=0) for class_index in self.classes_ ]) self.intercept_ = np.hstack([ np.mean(intercepts[class_index], axis=0) for class_index in self.classes_ ]) self.coef_img_, self.std_coef_img_ = self._output_image( self.classes_, self.coef_, self.std_coef_) if self.is_classification and (self.n_classes_ == 2): self.coef_ = self.coef_[0, :][np.newaxis, :] self.intercept_ = self.intercept_[0]
def compute_multi_background_mask(data_imgs, border_size=2, upper_cutoff=0.85, connected=True, opening=2, threshold=0.5, target_affine=None, target_shape=None, exclude_zeros=False, n_jobs=1, memory=None, verbose=0): """ Compute a common mask for several sessions or subjects of data. Uses the mask-finding algorithms to extract masks for each session or subject, and then keep only the main connected component of the a given fraction of the intersection of all the masks. Parameters ---------- data_imgs: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html A list of arrays, each item being a subject or a session. 3D and 4D images are accepted. If 3D images is given, we suggest to use the mean image of each session threshold: float, optional the inter-session threshold: the fraction of the total number of session in for which a voxel must be in the mask to be kept in the common mask. threshold=1 corresponds to keeping the intersection of all masks, whereas threshold=0 is the union of all masks. border_size: integer, optional The size, in voxel of the border used on the side of the image to determine the value of the background. connected: bool, optional if connected is True, only the largest connect component is kept. target_affine: 3x3 or 4x4 matrix, optional This parameter is passed to image.resample_img. Please see the related documentation for details. target_shape: 3-tuple of integers, optional This parameter is passed to image.resample_img. Please see the related documentation for details. memory: instance of joblib.Memory or string Used to cache the function call. n_jobs: integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. Returns ------- mask : 3D nibabel.Nifti1Image The brain mask. """ if len(data_imgs) == 0: raise TypeError('An empty object - %r - was passed instead of an ' 'image or a list of images' % data_imgs) masks = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(compute_background_mask)(img, border_size=border_size, connected=connected, opening=opening, target_affine=target_affine, target_shape=target_shape, memory=memory) for img in data_imgs) mask = intersect_masks(masks, connected=connected, threshold=threshold) return mask
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1): """Prepare multi subject data in parallel Parameters ---------- imgs_list: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html List of imgs file to prepare. One item per subject. confounds: list of confounds, optional List of confounds (2D arrays or filenames pointing to CSV files). Must be of same length than imgs_list. copy: boolean, optional If True, guarantees that output array has no memory in common with input array. n_jobs: integer, optional The number of cpus to use to do the computation. -1 means 'all cpus'. Returns ------- region_signals: list of 2D numpy.ndarray List of signal for each element per subject. shape: list of (number of scans, number of elements) """ if not hasattr(self, 'mask_img_'): raise ValueError( 'It seems that %s has not been fitted. ' 'You must call fit() before calling transform().' % self.__class__.__name__) target_fov = None if self.target_affine is None: # Force resampling on first image target_fov = 'first' niimg_iter = _iter_check_niimg(imgs_list, ensure_ndim=None, atleast_4d=False, target_fov=target_fov, memory=self.memory, memory_level=self.memory_level, verbose=self.verbose) if confounds is None: confounds = itertools.repeat(None, len(imgs_list)) # Ignore the mask-computing params: they are not useful and will # just invalidate the cache for no good reason # target_shape and target_affine are conveyed implicitly in mask_img params = get_params( self.__class__, self, ignore=['mask_img', 'mask_args', 'mask_strategy', 'copy']) func = self._cache( filter_and_mask, ignore=['verbose', 'memory', 'memory_level', 'copy'], shelve=self._shelving) data = Parallel(n_jobs=n_jobs)( delayed(func)(imgs, self.mask_img_, params, memory_level=self.memory_level, memory=self.memory, verbose=self.verbose, confounds=cfs, copy=copy, dtype=self.dtype) for imgs, cfs in izip(niimg_iter, confounds)) return data
def mean_img(imgs, target_affine=None, target_shape=None, verbose=0, n_jobs=1): """ Compute the mean of the images (in the time dimension of 4th dimension) Note that if list of 4D images are given, the mean of each 4D image is computed separately, and the resulting mean is computed after. Parameters ---------- imgs: Niimg-like object or iterable of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html Images to mean. target_affine: numpy.ndarray, optional If specified, the image is resampled corresponding to this new affine. target_affine can be a 3x3 or a 4x4 matrix target_shape: tuple or list, optional If specified, the image will be resized to match this new shape. len(target_shape) must be equal to 3. A target_affine has to be specified jointly with target_shape. verbose: int, optional Controls the amount of verbosity: higher numbers give more messages (0 means no messages). n_jobs: integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. Returns ------- mean: nibabel.Nifti1Image mean image See Also -------- nilearn.image.math_img : For more general operations on images """ if (isinstance(imgs, _basestring) or not isinstance(imgs, collections.Iterable)): imgs = [ imgs, ] imgs_iter = iter(imgs) first_img = check_niimg(next(imgs_iter)) # Compute the first mean to retrieve the reference # target_affine and target_shape if_needed n_imgs = 1 running_mean, first_affine = _compute_mean(first_img, target_affine=target_affine, target_shape=target_shape) if target_affine is None or target_shape is None: target_affine = first_affine target_shape = running_mean.shape[:3] for this_mean in Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_compute_mean)( n, target_affine=target_affine, target_shape=target_shape) for n in imgs_iter): n_imgs += 1 # _compute_mean returns (mean_img, affine) this_mean = this_mean[0] running_mean += this_mean running_mean = running_mean / float(n_imgs) return new_img_like(first_img, running_mean, target_affine)
def search_light(X, y, estimator, A, groups=None, scoring=None, cv=None, n_jobs=-1, verbose=0): """Function for computing a search_light Parameters ---------- X : array-like of shape at least 2D data to fit. y : array-like target variable to predict. estimator : estimator object implementing 'fit' object to use to fit the data A : scipy sparse matrix. adjacency matrix. Defines for each feature the neigbhoring features following a given structure of the data. groups : array-like, optional group label for each sample for cross validation. default None NOTE: will have no effect for scikit learn < 0.18 scoring : string or callable, optional The scoring strategy to use. See the scikit-learn documentation for possible values. If callable, it taks as arguments the fitted estimator, the test data (X_test) and the test target (y_test) if y is not None. cv : cross-validation generator, optional A cross-validation generator. If None, a 3-fold cross validation is used or 3-fold stratified cross-validation when y is supplied. n_jobs : int, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : int, optional The verbosity level. Defaut is 0 Returns ------- scores : array-like of shape (number of rows in A) search_light scores """ group_iter = GroupIterator(A.shape[0], n_jobs) with warnings.catch_warnings(): # might not converge warnings.simplefilter('ignore', ConvergenceWarning) scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_group_iter_search_light)(A.rows[list_i], estimator, X, y, groups, scoring, cv, thread_id + 1, A.shape[0], verbose) for thread_id, list_i in enumerate(group_iter)) return np.concatenate(scores)
def mask_and_reduce(masker, imgs, confounds=None, reduction_ratio='auto', n_components=None, random_state=None, memory_level=0, memory=Memory(cachedir=None), n_jobs=1): """Mask and reduce provided 4D images with given masker. Uses a PCA (randomized for small reduction ratio) or a range finding matrix on time series to reduce data size in time direction. For multiple images, the concatenation of data is returned, either as an ndarray or a memorymap (useful for big datasets that do not fit in memory). Parameters ---------- masker: NiftiMasker or MultiNiftiMasker Instance used to mask provided data. imgs: list of 4D Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html List of subject data to mask, reduce and stack. confounds: CSV file path or 2D matrix, optional This parameter is passed to signal.clean. Please see the corresponding documentation for details. reduction_ratio: 'auto' or float between 0. and 1. - Between 0. or 1. : controls data reduction in the temporal domain , 1. means no reduction, < 1. calls for an SVD based reduction. - if set to 'auto', estimator will set the number of components per reduced session to be n_components. n_components: integer, optional Number of components per subject to be extracted by dimension reduction random_state: int or RandomState Pseudo number generator state used for random sampling. memory_level: integer, optional Integer indicating the level of memorization. The higher, the more function calls are cached. memory: joblib.Memory Used to cache the function calls. Returns ------ data: ndarray or memorymap Concatenation of reduced data. """ if not hasattr(imgs, '__iter__'): imgs = [imgs] if reduction_ratio == 'auto': if n_components is None: # Reduction ratio is 1 if # neither n_components nor ratio is provided reduction_ratio = 1 else: if reduction_ratio is None: reduction_ratio = 1 else: reduction_ratio = float(reduction_ratio) if not 0 <= reduction_ratio <= 1: raise ValueError('Reduction ratio should be between 0. and 1.,' 'got %.2f' % reduction_ratio) if confounds is None: confounds = itertools.repeat(confounds) if reduction_ratio == 'auto': n_samples = n_components reduction_ratio = None else: # We'll let _mask_and_reduce_single decide on the number of # samples based on the reduction_ratio n_samples = None data_list = Parallel(n_jobs=n_jobs)( delayed(_mask_and_reduce_single)(masker, img, confound, reduction_ratio=reduction_ratio, n_samples=n_samples, memory=memory, memory_level=memory_level, random_state=random_state) for img, confound in zip(imgs, confounds)) subject_n_samples = [subject_data.shape[0] for subject_data in data_list] n_samples = np.sum(subject_n_samples) n_voxels = int(np.sum(_safe_get_data(masker.mask_img_))) dtype = (np.float64 if data_list[0].dtype.type is np.float64 else np.float32) data = np.empty((n_samples, n_voxels), order='F', dtype=dtype) current_position = 0 for i, next_position in enumerate(np.cumsum(subject_n_samples)): data[current_position:next_position] = data_list[i] current_position = next_position # Clear memory as fast as possible: remove the reference on # the corresponding block of data data_list[i] = None return data
def fit(self, subjects, y=None): """Compute cross-validated group-sparse precisions. Parameters ---------- subjects : list of numpy.ndarray with shapes (n_samples, n_features) input subjects. Each subject is a 2D array, whose columns contain signals. Sample number can vary from subject to subject, but all subjects must have the same number of features (i.e. of columns.) Returns ------- self: GroupSparseCovarianceCV the object instance itself. """ # Empirical covariances emp_covs, n_samples = \ empirical_covariances(subjects, assume_centered=False) n_subjects = emp_covs.shape[2] # One cv generator per subject must be created, because each subject # can have a different number of samples from the others. cv = [] for k in range(n_subjects): cv.append(check_cv( self.cv, np.ones(subjects[k].shape[0]), classifier=False ).split(subjects[k]) ) path = list() # List of (alpha, scores, covs) n_alphas = self.alphas if isinstance(n_alphas, collections.Sequence): alphas = list(self.alphas) n_alphas = len(alphas) n_refinements = 1 else: n_refinements = self.n_refinements alpha_1, _ = compute_alpha_max(emp_covs, n_samples) alpha_0 = 1e-2 * alpha_1 alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1] covs_init = itertools.repeat(None) # Copying the cv generators to use them n_refinements times. cv_ = izip(*cv) for i, (this_cv) in enumerate(itertools.tee(cv_, n_refinements)): # Compute the cross-validated loss on the current grid train_test_subjs = [] for train_test in this_cv: assert(len(train_test) == n_subjects) train_test_subjs.append(list(zip(*[(subject[train, :], subject[test, :]) for subject, (train, test) in zip(subjects, train_test)]))) if self.early_stopping: probes = [EarlyStopProbe(test_subjs, verbose=max(0, self.verbose - 1)) for _, test_subjs in train_test_subjs] else: probes = itertools.repeat(None) this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(group_sparse_covariance_path)( train_subjs, alphas, test_subjs=test_subjs, max_iter=self.max_iter_cv, tol=self.tol_cv, verbose=max(0, self.verbose - 1), debug=self.debug, # Warm restart is useless with early stopping. precisions_init=None if self.early_stopping else prec_init, probe_function=probe) for (train_subjs, test_subjs), prec_init, probe in zip(train_test_subjs, covs_init, probes)) # this_path[i] is a tuple (precisions_list, scores) # - scores: scores obtained with the i-th folding, for each value # of alpha. # - precisions_list: corresponding precisions matrices, for each # value of alpha. precisions_list, scores = list(zip(*this_path)) # now scores[i][j] is the score for the i-th folding, j-th value of # alpha (analoguous for precisions_list) precisions_list = list(zip(*precisions_list)) scores = [np.mean(sc) for sc in zip(*scores)] # scores[i] is the mean score obtained for the i-th value of alpha. path.extend(list(zip(alphas, scores, precisions_list))) path = sorted(path, key=operator.itemgetter(0), reverse=True) # Find the maximum score (avoid using the built-in 'max' function # to have a fully-reproducible selection of the smallest alpha in # case of equality) best_score = -np.inf last_finite_idx = 0 for index, (alpha, this_score, _) in enumerate(path): if this_score >= .1 / np.finfo(np.float).eps: this_score = np.nan if np.isfinite(this_score): last_finite_idx = index if this_score >= best_score: best_score = this_score best_index = index # Refine the grid if best_index == 0: # We do not need to go back: we have chosen # the highest value of alpha for which there are # non-zero coefficients alpha_1 = path[0][0] alpha_0 = path[1][0] covs_init = path[0][2] elif (best_index == last_finite_idx and not best_index == len(path) - 1): # We have non-converged models on the upper bound of the # grid, we need to refine the grid there alpha_1 = path[best_index][0] alpha_0 = path[best_index + 1][0] covs_init = path[best_index][2] elif best_index == len(path) - 1: alpha_1 = path[best_index][0] alpha_0 = 0.01 * path[best_index][0] covs_init = path[best_index][2] else: alpha_1 = path[best_index - 1][0] alpha_0 = path[best_index + 1][0] covs_init = path[best_index - 1][2] alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), len(alphas) + 2) alphas = alphas[1:-1] if n_refinements > 1: logger.log("[GroupSparseCovarianceCV] Done refinement " "% 2i out of %i" % (i + 1, n_refinements), verbose=self.verbose) path = list(zip(*path)) cv_scores_ = list(path[1]) alphas = list(path[0]) self.cv_scores_ = np.array(cv_scores_) self.alpha_ = alphas[best_index] self.cv_alphas_ = alphas # Finally, fit the model with the selected alpha logger.log("Final optimization", verbose=self.verbose) self.covariances_ = emp_covs self.precisions_ = _group_sparse_covariance( emp_covs, n_samples, self.alpha_, tol=self.tol, max_iter=self.max_iter, verbose=max(0, self.verbose - 1), debug=self.debug) return self