def _do_subject_smooth(subject_data, fwhm, prefix=None, write_output_images=2, func_basenames=None, concat=False, caching=True): if prefix is None: prefix = PREPROC_OUTPUT_IMAGE_PREFICES['smoothing'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] if caching: mem = Memory(cachedir=os.path.join(subject_data.output_dir, 'cache_dir'), verbose=100) sfunc = [] for sess in range(subject_data.n_sessions): sess_func = subject_data.func[sess] _tmp = mem.cache(smooth_image)(sess_func, fwhm) if write_output_images == 2: _tmp = mem.cache(save_vols)(_tmp, subject_data.output_dir, basenames=func_basenames[sess], prefix=prefix, concat=concat) sfunc.append(_tmp) subject_data.func = sfunc return subject_data
def _delete_orientation(self): """ Delete orientation metadata. Garbage orientation metadata can lead to severe mis-registration trouble. """ # prepare for smart caching if self.scratch is None: self.scratch = self.output_dir cache_dir = os.path.join(self.scratch, 'cache_dir') if not os.path.exists(cache_dir): os.makedirs(cache_dir) mem = Memory(cachedir=cache_dir, verbose=5) # deleteorient for func for attr in ['n_sessions', 'session_output_dirs']: if getattr(self, attr) is None: warnings.warn("'%s' attribute of is None! Skipping" % attr) break else: self.func = [mem.cache(delete_orientation)( self.func[sess], self.session_output_dirs[sess]) for sess in range(self.n_sessions)] # deleteorient for anat if self.anat is not None: self.anat = mem.cache(delete_orientation)( self.anat, self.anat_output_dir)
def _delete_orientation(self): """ Delete orientation metadata. Garbage orientation metadata can lead to severe mis-registration trouble. """ # prepare for smart caching if self.scratch is None: self.scratch = self.output_dir if self.caching: cache_dir = os.path.join(self.scratch, 'cache_dir') if not os.path.exists(cache_dir): os.makedirs(cache_dir) mem = Memory(cachedir=cache_dir, verbose=5) else: mem = Memory(None, verbose=0) # deleteorient for func for attr in ['n_sessions', 'session_output_dirs']: if getattr(self, attr) is None: warnings.warn("'%s' attribute of is None! Skipping" % attr) break else: self.func = [ mem.cache(delete_orientation)(self.func[sess], self.session_output_dirs[sess]) for sess in range(self.n_sessions) ] # deleteorient for anat if self.anat is not None: self.anat = mem.cache(delete_orientation)(self.anat, self.anat_output_dir)
def _do_subject_slice_timing(subject_data, ref_slice=0, slice_order="ascending", interleaved=False, caching=True, write_output_images=2, func_prefix=None, func_basenames=None, ext=None, verbose=True): if func_prefix is None: func_prefix = PREPROC_OUTPUT_IMAGE_PREFICES['STC'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100 if verbose is True else verbose) else: mem = Memory(None) stc_output = [] original_bold = subject_data.func for sidx, sess_func in enumerate(subject_data.func): fmristc = fMRISTC(slice_order=slice_order, ref_slice=ref_slice, interleaved=interleaved, verbose=verbose) mem.cache(fmristc.fit)(raw_data=sess_func) stc_output.append(mem.cache(fmristc.transform)( sess_func, output_dir=subject_data.session_output_dirs[sidx] if ( write_output_images > 0) else None, basenames=func_basenames[sidx], prefix=func_prefix, ext=ext)) subject_data.func = stc_output del original_bold, fmristc if write_output_images > 1: subject_data.hardlink_output_files(verbose=verbose) return subject_data
def fit(self, X, y=None): """ Compute agglomerative clustering. Parameters ---------- X : array-like, shape=(n_samples, n_features) Returns ------- self """ memory = self.memory if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if self.n_landmarks is None: distances = memory.cache(pdist)(X, self.metric) else: if self.landmark_strategy == 'random': land_indices = check_random_state(self.random_state).randint(len(X), size=self.n_landmarks) else: land_indices = np.arange(len(X))[::(len(X) // self.n_landmarks)][:self.n_landmarks] distances = memory.cache(pdist)(X[land_indices], self.metric) tree = memory.cache(linkage)(distances, method=self.linkage) self.landmark_labels_ = fcluster(tree, criterion='maxclust', t=self.n_clusters) - 1 if self.n_landmarks is None: self.landmarks_ = X else: self.landmarks_ = X[land_indices] return self
def transform(self, niimgs): memory = self.transform_memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) # Load data (if filenames are given, load them) if self.verbose > 0: print "[%s.transform] Loading data" % self.__class__.__name__ niimgs = utils.check_niimgs(niimgs) # Resampling: allows the user to change the affine, the shape or both if self.verbose > 0: print "[%s.transform] Resampling" % self.__class__.__name__ niimgs = memory.cache(resampling.resample_img)(niimgs, target_affine=self.target_affine, target_shape=self.target_shape) # Get series from data with optional smoothing if self.verbose > 0: print "[%s.transform] Masking and smoothing" \ % self.__class__.__name__ data = masking.apply_mask(niimgs, self.mask_, smooth=self.smooth) # Temporal # ======== # Detrending (optional) # Filtering (grab TR from header) # Confounds (from csv file or numpy array) # Normalizing if self.verbose > 0: print "[%s.transform] Cleaning signal" % self.__class__.__name__ if self.sessions_ is None: data = memory.cache(signals.clean)(data, confounds=self.confounds, low_pass=self.low_pass, high_pass=self.high_pass, t_r=self.t_r, detrend=self.detrend, normalize=False) else: for s in np.unique(self.sessions_): if self.confounds is not None: session_confounds = self.confounds[self.sessions_ == s] data[self.sessions_ == s] = \ memory.cache(signals.clean)( data=data[self.sessions_ == s], confounds=session_confounds, low_pass=self.low_pass, high_pass=self.high_pass, t_r=self.t_r, detrend=self.detrend, normalize=False) # For _later_: missing value removal or imputing of missing data # (i.e. we want to get rid of NaNs, if smoothing must be done # earlier) # Optionally: 'doctor_nan', remove voxels with NaNs, other option # for later: some form of imputation # data is in format voxel x time_series. We inverse it data = np.rollaxis(data, -1) self.affine_ = niimgs.get_affine() return data
def fit(self, X, y=None): """ Compute agglomerative clustering. Parameters ---------- X : array-like, shape=(n_samples, n_features) Returns ------- self """ memory = self.memory if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if self.n_landmarks is None: distances = memory.cache(pdist)(X, self.metric) else: if self.landmark_strategy == 'random': land_indices = check_random_state(self.random_state).randint(len(X), size=self.n_landmarks) else: land_indices = np.arange(len(X))[::(len(X)//self.n_landmarks)][:self.n_landmarks] distances = memory.cache(pdist)(X[land_indices], self.metric) tree = memory.cache(linkage)(distances, method=self.linkage) self.landmark_labels_ = fcluster(tree, criterion='maxclust', t=self.n_clusters) - 1 if self.n_landmarks is None: self.landmarks_ = X else: self.landmarks_ = X[land_indices] return self
def _do_subject_coregister( subject_data, coreg_func_to_anat=True, caching=True, ext=None, write_output_images=2, func_basenames=None, func_prefix="", anat_basename=None, anat_prefix="", report=True, verbose=True): ref_brain = 'func' src_brain = 'anat' ref = subject_data.func[0] src = subject_data.anat if coreg_func_to_anat: ref_brain, src_brain = src_brain, ref_brain ref, src = src, ref # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100 if verbose is True else verbose) else: mem = Memory() # estimate realignment (affine) params for coreg coreg = Coregister(verbose=verbose) coreg = mem.cache(coreg.fit)(ref, src) # apply coreg if coreg_func_to_anat: if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] coreg_func = [] for sidx, sess_func in enumerate(subject_data.func): output_dir = subject_data.session_scratch_dirs[sidx] coreg_func.append(mem.cache(coreg.transform)( sess_func, output_dir=output_dir if ( write_output_images == 2) else None, basenames=func_basenames[sidx] if coreg_func_to_anat else anat_basename, prefix=func_prefix)) subject_data.func = coreg_func src = load_vols(subject_data.func[0])[0] else: if anat_basename is None: anat_basename = get_basenames(subject_data.anat) subject_data.anat = mem.cache(coreg.transform)( subject_data.anat, basename=anat_basename, output_dir=subject_data.anat_scratch_output_dir if ( write_output_images == 2) else None, prefix=anat_prefix, ext=ext) src = subject_data.anat # generate coregistration QA thumbs if report: subject_data.generate_coregistration_thumbnails( coreg_func_to_anat=coreg_func_to_anat, nipype=False) del coreg if write_output_images > 1: subject_data.hardlink_output_files(verbose=verbose) return subject_data
def _cache(self, func, memory_level=1, **kwargs): """ Return a joblib.Memory object if necessary. The memory_level determines the level above which the wrapped function output is cached. By specifying a numeric value for this level, the user can to control the amount of cache memory used. This function will cache the function call or not depending on the cache level. Parameters ---------- func: python function The function which output is to be cached. memory_level: integer The memory_level from which caching must be enabled for the wrapped function. Returns ------- Either the original function, if there is no need to cache it (because the requested level is lower than the value given to _cache()) or a joblib.Memory object that wraps the function func. """ # Creates attributes if they don't exist # This is to make creating them in __init__() optional. if not hasattr(self, "memory_level"): self.memory_level = 0 if not hasattr(self, "memory"): self.memory = Memory(cachedir=None) # If cache level is 0 but a memory object has been provided, set # memory_level to 1 with a warning. if self.memory_level == 0: if (isinstance(self.memory, basestring) or self.memory.cachedir is not None): warnings.warn("memory_level is currently set to 0 but " "a Memory object has been provided. " "Setting memory_level to 1.") self.memory_level = 1 if self.memory_level < memory_level: mem = Memory(cachedir=None) return mem.cache(func, **kwargs) else: memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) if not isinstance(memory, Memory): raise TypeError("'memory' argument must be a string or a " "joblib.Memory object.") if memory.cachedir is None: warnings.warn( "Caching has been enabled (memory_level = %d) but no" " Memory object or path has been provided (parameter" " memory). Caching deactivated for function %s." % (self.memory_level, func.func_name)) return memory.cache(func, **kwargs)
def _cache(self, func, memory_level=1, **kwargs): """ Return a joblib.Memory object if necessary. The memory_level determines the level above which the wrapped function output is cached. By specifying a numeric value for this level, the user can to control the amount of cache memory used. This function will cache the function call or not depending on the cache level. Parameters ---------- func: python function The function which output is to be cached. memory_level: integer The memory_level from which caching must be enabled for the wrapped function. Returns ------- Either the original function, if there is no need to cache it (because the requested level is lower than the value given to _cache()) or a joblib.Memory object that wraps the function func. """ # Creates attributes if they don't exist # This is to make creating them in __init__() optional. if not hasattr(self, "memory_level"): self.memory_level = 0 if not hasattr(self, "memory"): self.memory = Memory(cachedir=None) # If cache level is 0 but a memory object has been provided, set # memory_level to 1 with a warning. if self.memory_level == 0: if (isinstance(self.memory, basestring) or self.memory.cachedir is not None): warnings.warn("memory_level is currently set to 0 but " "a Memory object has been provided. " "Setting memory_level to 1.") self.memory_level = 1 if self.memory_level < memory_level: mem = Memory(cachedir=None) return mem.cache(func, **kwargs) else: memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) if not isinstance(memory, Memory): raise TypeError("'memory' argument must be a string or a " "joblib.Memory object.") if memory.cachedir is None: warnings.warn("Caching has been enabled (memory_level = %d) but no" " Memory object or path has been provided (parameter" " memory). Caching deactivated for function %s." % (self.memory_level, func.func_name)) return memory.cache(func, **kwargs)
def affine_registration_pypreprocess(in_path, ref_path, out_path, in_ref_mat = '', ref_in_mat = '', T = None, force_resample = False, extra_params={}): """ Affine registation and resampling. Use Coregister from pypreprocess. Coregister is designed for transformation between func and anat, so applying this function to mni standard space may not produce the best result. inputs: in_path: path to the source (input) image. ref_path: path to the target (reference) image. out_path: path to use to save the registered image. in_ref_mat: if bool(in_ref_mat) is True, save the 4x4 transformation matrix to a text file <in_ref_mat>. ref_in_mat: if bool(ref_in_mat) is True, save the reverse of the 4x4 transformation matrix to a text file <ref_in_mat>. T: specific transformation to use. if None, T will be estimated using Coregister().fit; else numpy.array(T) will be used. T is an array of 6 elements; the first three represent translation, and the last three represent rotations. force_resample: bool. whether or not to resample in an extra step. by default pypreprocess does not resample data, which means we have to use nilearn's module to do that. also scaling is not one of the provided DoF/estimation parameters of pypreprocess, neither did I implement it myself. maybe check scipy.misc.imresize if scaling needs to be implemented in the future. extra_params: for Coregister() """ source = nib.load(in_path) target = nib.load(ref_path) # coreg = Coregister() coreg = AllFeatures(Coregister, extra_params).run() if T is None: mem = Memory("affine_registration_pypreprocess_cache") coreg = mem.cache(coreg.fit)(target, source) # fit(target, source) else: T_ = np.array(T) if T_.size != 6 or T_.dtype != float: raise ValueError('T should either be None or ndarray with size 6 and dtype float') print('using predefined T = %s' % T) coreg.params_ = T_ img = coreg.transform(source)[0] if force_resample: # no rescaling here img = mem.cache(resample_img)(img, target.affine, target.shape) nib.save(img, out_path) if in_ref_mat: np.savetxt(in_ref_mat, coreg.params_) if ref_in_mat: np.savetxt(ref_in_mat, -coreg.params_) return coreg.params_
def fit(self, niimgs, y=None): """Compute the mask corresponding to the data Parameters ---------- niimgs: list of filenames or NiImages Data on which the mask must be calculated. If this is a list, the affine is considered the same for all. """ memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) # Load data (if filenames are given, load them) if self.verbose > 0: print "[%s.fit] Loading data from %s" % ( self.__class__.__name__, utils._repr_niimgs(niimgs)[:200]) data = [] for niimg in niimgs: # Note that data is not loaded into memory at this stage # if niimg is a string data.append(utils.check_niimgs(niimg, accept_3d=True)) # Compute the mask if not given by the user if self.mask is None: if self.verbose > 0: print "[%s.fit] Computing the mask" % self.__class__.__name__ mask = memory.cache(masking.compute_multi_epi_mask, ignore=['verbose'])( niimgs, connected=self.mask_connected, opening=self.mask_opening, lower_cutoff=self.mask_lower_cutoff, upper_cutoff=self.mask_upper_cutoff, n_jobs=self.n_jobs, verbose=(self.verbose - 1)) self.mask_img_ = Nifti1Image(mask.astype(np.int), data[0].get_affine()) else: self.mask_img_ = utils.check_niimg(self.mask) # If resampling is requested, resample also the mask # Resampling: allows the user to change the affine, the shape or both if self.verbose > 0: print "[%s.transform] Resampling mask" % self.__class__.__name__ self.mask_img_ = memory.cache(resampling.resample_img)( self.mask_img_, target_affine=self.target_affine, target_shape=self.target_shape, copy=(self.target_affine is not None and self.target_shape is not None)) return self
def fit(self, niimgs, y=None): """Compute the mask corresponding to the data Parameters ---------- niimgs: list of filenames or NiImages Data on which the mask must be calculated. If this is a list, the affine is considered the same for all. """ memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) # Load data (if filenames are given, load them) if self.verbose > 0: print "[%s.fit] Loading data from %s" % ( self.__class__.__name__, utils._repr_niimgs(niimgs)[:200]) data = [] for niimg in niimgs: # Note that data is not loaded into memory at this stage # if niimg is a string data.append(utils.check_niimgs(niimg, accept_3d=True)) # Compute the mask if not given by the user if self.mask is None: if self.verbose > 0: print "[%s.fit] Computing the mask" % self.__class__.__name__ mask = memory.cache(masking.compute_multi_epi_mask, ignore=['verbose' ])(niimgs, connected=self.mask_connected, opening=self.mask_opening, lower_cutoff=self.mask_lower_cutoff, upper_cutoff=self.mask_upper_cutoff, n_jobs=self.n_jobs, verbose=(self.verbose - 1)) self.mask_img_ = Nifti1Image(mask.astype(np.int), data[0].get_affine()) else: self.mask_img_ = utils.check_niimg(self.mask) # If resampling is requested, resample also the mask # Resampling: allows the user to change the affine, the shape or both if self.verbose > 0: print "[%s.transform] Resampling mask" % self.__class__.__name__ self.mask_img_ = memory.cache(resampling.resample_img)( self.mask_img_, target_affine=self.target_affine, target_shape=self.target_shape, copy=(self.target_affine is not None and self.target_shape is not None)) return self
def _fit(self, X, y=None, **fit_params): self._validate_steps() # Setup the memory memory = self.memory if memory is None: memory = Memory(cachedir=None, verbose=0) elif isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) elif not isinstance(memory, Memory): raise ValueError("'memory' should either be a string or" " a joblib.Memory instance, got" " 'memory={!r}' instead.".format(memory)) fit_transform_one_cached = memory.cache(_fit_transform_one) fit_sample_one_cached = memory.cache(_fit_sample_one) fit_params_steps = dict((name, {}) for name, step in self.steps if step is not None) for pname, pval in six.iteritems(fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X yt = y for step_idx, (name, transformer) in enumerate(self.steps[:-1]): if transformer is None: pass else: if memory.cachedir is None: # we do not clone when caching is disabled to preserve # backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) # Fit or load from cache the current transfomer if (hasattr(cloned_transformer, "transform") or hasattr(cloned_transformer, "fit_transform")): Xt, fitted_transformer = fit_transform_one_cached( cloned_transformer, None, Xt, yt, **fit_params_steps[name]) elif hasattr(cloned_transformer, "sample"): Xt, yt, fitted_transformer = fit_sample_one_cached( cloned_transformer, Xt, yt, **fit_params_steps[name]) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) if self._final_estimator is None: return Xt, yt, {} return Xt, yt, fit_params_steps[self.steps[-1][0]]
def _fit(self, X, y=None, **fit_params): self._validate_steps() # Setup the memory memory = self.memory if memory is None: memory = Memory(cachedir=None, verbose=0) elif isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) elif not isinstance(memory, Memory): raise ValueError("'memory' should either be a string or" " a joblib.Memory instance, got" " 'memory={!r}' instead.".format(memory)) fit_transform_one_cached = memory.cache(_fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) fit_params_steps = dict( (name, {}) for name, step in self.steps if step is not None) for pname, pval in six.iteritems(fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X yt = y for step_idx, (name, transformer) in enumerate(self.steps[:-1]): if transformer is None: pass else: if memory.cachedir is None: # we do not clone when caching is disabled to preserve # backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) # Fit or load from cache the current transfomer if (hasattr(cloned_transformer, "transform") or hasattr(cloned_transformer, "fit_transform")): Xt, fitted_transformer = fit_transform_one_cached( cloned_transformer, None, Xt, yt, **fit_params_steps[name]) elif hasattr(cloned_transformer, "fit_resample"): Xt, yt, fitted_transformer = fit_resample_one_cached( cloned_transformer, Xt, yt, **fit_params_steps[name]) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) if self._final_estimator is None: return Xt, yt, {} return Xt, yt, fit_params_steps[self.steps[-1][0]]
def motion_correction_nipy(in_file, out_path, mc_alg, extra_params={}): """ an attempt at motion correction using NiPy package. inputs: in_file: Full path to the resting-state scan. out_path: Full path to the (to be) output file. mc_alg: can be either 'nipy_spacerealign' or 'nipy_spacetimerealign' extra_params: extra parameters to SpaceRealign, SpaceTimeRealign, estimate return: the motion corrected image """ alg_dict = { 'nipy_spacerealign': (SpaceRealign, {}), 'nipy_spacetimerealign': (SpaceTimeRealign, { 'tr': 2, 'slice_times': 'asc_alt_2', 'slice_info': 2 }) } # format: {'function_name':(function, kwargs), ...} # processing starts here if type(in_file) in nib.all_image_classes: I = nifti2nipy(in_file) # assume Nifti1Image else: I = load_image(in_file) print 'source image loaded. ' # initialize the registration algorithm reg = AllFeatures(alg_dict[mc_alg][0], extra_params).run(I, **alg_dict[mc_alg][1]) # reg = alg_dict[mc_alg][0](I, **alg_dict[mc_alg][1]) # SpaceTimeRealign(I, tr=2, ...) print 'motion correction algorithm established. ' print 'estimating...' if USE_CACHE: mem = Memory("func_preproc_cache_2") mem.cache(AllFeatures(reg.estimate, extra_params).run)(refscan=None) # mem.cache(reg.estimate)(refscan=None) else: AllFeatures(reg.estimate, extra_params).run(refscan=None) # reg.estimate(refscan=None) print 'estimation complete. Writing to file...' result = reg.resample(0) if out_path: save_image(result, out_path) return nipy2nifti(result)
def _niigz2nii(self): """ Convert .nii.gz to .nii (crucial for SPM). """ cache_dir = os.path.join(self.scratch, 'cache_dir') mem = Memory(cache_dir, verbose=100) self._sanitize_session_output_dirs() if not None in [self.func, self.n_sessions, self.session_output_dirs]: self.func = [mem.cache(do_niigz2nii)( self.func[sess], output_dir=self.session_output_dirs[sess]) for sess in range(self.n_sessions)] if not self.anat is None: self.anat = mem.cache(do_niigz2nii)( self.anat, output_dir=self.anat_output_dir)
def test_multilabel(self): cache = Memory(cachedir=tempfile.gettempdir()) cached_func = cache.cache( sklearn.datasets.make_multilabel_classification) X, Y = cached_func(n_samples=150, n_features=20, n_classes=5, n_labels=2, length=50, allow_unlabeled=True, sparse=False, return_indicator=True, return_distributions=False, random_state=1) X_train = X[:100, :] Y_train = Y[:100, :] X_test = X[101:, :] Y_test = Y[101:, ] data = { 'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test } dataset_properties = {'multilabel': True} cs = SimpleClassificationPipeline(dataset_properties=dataset_properties).\ get_hyperparameter_search_space() self._test_configurations(configurations_space=cs, data=data)
def test_multilabel(self): cache = Memory(cachedir=tempfile.gettempdir()) cached_func = cache.cache( sklearn.datasets.make_multilabel_classification ) X, Y = cached_func( n_samples=150, n_features=20, n_classes=5, n_labels=2, length=50, allow_unlabeled=True, sparse=False, return_indicator=True, return_distributions=False, random_state=1 ) X_train = X[:100, :] Y_train = Y[:100, :] X_test = X[101:, :] Y_test = Y[101:, ] data = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test} dataset_properties = {'multilabel': True} cs = SimpleClassificationPipeline(dataset_properties=dataset_properties).\ get_hyperparameter_search_space() self._test_configurations(configurations_space=cs, data=data)
def comput_coefs(self, X, y, size): cv = KFold(2) # cross-validation generator for model selection ridge = BayesianRidge() cachedir = tempfile.mkdtemp() mem = Memory(cachedir=cachedir, verbose=1) # Ward agglomeration followed by BayesianRidge connectivity = grid_to_graph(n_x=size, n_y=size) ward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem) clf = Pipeline([('ward', ward), ('ridge', ridge)]) # Select the optimal number of parcels with grid search clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv) clf.fit(X, y) # set the best parameters coef_ = clf.best_estimator_.steps[-1][1].coef_ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_) coef_agglomeration_ = coef_.reshape(size, size) # Anova univariate feature selection followed by BayesianRidge f_regression = mem.cache(feature_selection.f_regression) # caching function anova = feature_selection.SelectPercentile(f_regression) clf = Pipeline([('anova', anova), ('ridge', ridge)]) # Select the optimal percentage of features with grid search clf = GridSearchCV(clf, {'anova__percentile': [5, 10, 20]}, cv=cv) clf.fit(X, y) # set the best parameters coef_ = clf.best_estimator_.steps[-1][1].coef_ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1)) coef_selection_ = coef_.reshape(size, size) return dict( coef_selection_=coef_selection_, coef_agglomeration_=coef_agglomeration_, cachedir=cachedir )
def _do_subject_slice_timing(subject_data, ref_slice=0, slice_order="ascending", interleaved=False, caching=True, write_output_images=2, func_prefix=None, func_basenames=None, ext=None): if func_prefix is None: func_prefix = PREPROC_OUTPUT_IMAGE_PREFICES['STC'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100) runner = lambda handle: mem.cache(handle) if caching else handle stc_output = [] original_bold = subject_data.func for sess_func, sess_id in zip(subject_data.func, range(subject_data.n_sessions)): fmristc = runner(fMRISTC(slice_order=slice_order, ref_slice=ref_slice, interleaved=interleaved, verbose=True).fit)( raw_data=sess_func) stc_output.append(runner(fmristc.transform)( sess_func, output_dir=subject_data.tmp_output_dir if ( write_output_images > 0) else None, basenames=func_basenames[sess_id], prefix=func_prefix, ext=ext)) subject_data.func = stc_output del original_bold, fmristc if write_output_images > 1: subject_data.hardlink_output_files() return subject_data
def fit(self, niimgs, y=None): """Compute the mask corresponding to the data Parameters ---------- niimgs: list of filenames or NiImages Data on which the mask must be calculated. If this is a list, the affine is considered the same for all. """ memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) # Load data (if filenames are given, load them) if self.verbose > 0: print "[%s.fit] Loading data" % self.__class__.__name__ niimgs = utils.check_niimgs(niimgs, accept_3d=True) # Compute the mask if not given by the user if self.mask is None: if self.verbose > 0: print "[%s.fit] Computing the mask" % self.__class__.__name__ mask = memory.cache(masking.compute_epi_mask)( niimgs.get_data(), connected=self.mask_connected, opening=self.mask_opening, lower_cutoff=self.mask_lower_cutoff, upper_cutoff=self.mask_upper_cutoff, verbose=(self.verbose - 1), ) self.mask_ = Nifti1Image(mask.astype(np.int), niimgs.get_affine()) else: self.mask_ = utils.check_niimg(self.mask) # If resampling is requested, resample also the mask # Resampling: allows the user to change the affine, the shape or both if self.verbose > 0: print "[%s.transform] Resampling mask" % self.__class__.__name__ self.mask_ = memory.cache(resampling.resample_img)( self.mask_, target_affine=self.target_affine, target_shape=self.target_shape, copy=(self.target_affine is not None and self.target_shape is not None), ) return self
def fetch_asirra(image_count=1000): partial_path = check_fetch_asirra() m = Memory(cachedir=partial_path, compress=6, verbose=0) load_func = m.cache(_fetch_asirra) images, target = load_func(partial_path, image_count=image_count) return Bunch(data=images.reshape(len(images), -1), images=images, target=target, DESCR="Asirra cats and dogs dataset")
def _niigz2nii(self): """ Convert .nii.gz to .nii (crucial for SPM). """ cache_dir = os.path.join(self.scratch, 'cache_dir') mem = Memory(cache_dir, verbose=100) self._sanitize_session_output_dirs() if not None in [self.func, self.n_sessions, self.session_output_dirs]: self.func = [ mem.cache(do_niigz2nii)( self.func[sess], output_dir=self.session_output_dirs[sess]) for sess in range(self.n_sessions) ] if not self.anat is None: self.anat = mem.cache(do_niigz2nii)( self.anat, output_dir=self.anat_output_dir)
def get_multilabel(self): cache = Memory(cachedir=tempfile.gettempdir()) cached_func = cache.cache(make_multilabel_classification) return cached_func(n_samples=100, n_features=10, n_classes=5, n_labels=5, return_indicator=True, random_state=1)
def get_all_metadata(config=None, args=None): if config == None and args == None: raise Exception('Either config or args need to be not None') if config == None: config = get_config(args) class_meta = read_class_meta(config.dataset.class_meta_file) attrib_meta_with_name = read_attribute_meta(config.dataset.attrib_meta_file) attrib_meta = attrib_meta_with_name.drop('class_name',axis=1) train_annos = read_image_annotations(config.dataset.train_annos_file) test_annos = read_image_annotations(config.dataset.test_annos_file, has_class_id=False) domain_meta = read_domain_meta(config.dataset.domain_meta_file) train_annos['class_name'] = np.array([class_meta.class_name[class_index] for class_index in train_annos.class_index]) # test_annos['class_name'] = np.array([class_meta.class_name[class_index] for # class_index in # test_annos.class_index]) # Prepand path to the dataset to each img_path train_annos.img_path = train_annos.img_path.apply(lambda x: config.dataset.main_path.joinpath(x).abspath()) test_annos.img_path = test_annos.img_path.apply(lambda x: config.dataset.main_path.joinpath(x).abspath()) # Filter the class meta and train/test annotations to just use the # domains defined in config class_meta = class_meta[class_meta.domain_index.isin(config.dataset.domains)] train_annos = train_annos[train_annos.domain_index.isin(config.dataset.domains)] test_annos = test_annos[test_annos.domain_index.isin(config.dataset.domains)] # Create dev set dev_annos_train, dev_annos_test = create_dev_set(train_annos, config) # Should we use the dev set as the test set if config.dataset.dev_set.use: train_used, test_used = dev_annos_train, dev_annos_test else: train_used, test_used = train_annos, test_annos if config.flip_images: memory = Memory(cachedir=config.cache_dir, verbose=config.logging.verbose) flip_func = memory.cache(create_flipped_images) train_used = flip_func(train_used, config) return ({'real_train_annos': train_annos, 'real_test_annos': test_annos, 'train_annos': train_used, 'test_annos': test_used, 'validation_annos': dev_annos_test, 'class_meta': class_meta, 'domain_meta': domain_meta, 'attrib_meta': attrib_meta, 'attrib_meta_with_name': attrib_meta_with_name}, config)
def cache(func, memory, ref_memory_level=2, memory_level=1, **kwargs): """ Return a joblib.Memory object. The memory_level determines the level above which the wrapped function output is cached. By specifying a numeric value for this level, the user can to control the amount of cache memory used. This function will cache the function call or not depending on the cache level. Parameters ---------- func: function The function which output is to be cached. memory: instance of joblib.Memory or string Used to cache the function call. ref_memory_level: int The reference memory_level used to determine if function call must be cached or not (if memory_level is larger than ref_memory_level the function is cached) memory_level: int The memory_level from which caching must be enabled for the wrapped function. kwargs: keyword arguments The keyword arguments passed to memory.cache Returns ------- mem: joblib.MemorizedFunc object that wraps the function func. This object may be a no-op, if the requested level is lower than the value given to _cache()). For consistency, a joblib.Memory object is always returned. """ if ref_memory_level <= memory_level or memory is None: memory = Memory(cachedir=None) else: memory = memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) if not isinstance(memory, memory_classes): raise TypeError("'memory' argument must be a string or a " "joblib.Memory object. " "%s %s was given." % (memory, type(memory))) if memory.cachedir is None: warnings.warn("Caching has been enabled (memory_level = %d) " "but no Memory object or path has been provided" " (parameter memory). Caching deactivated for " "function %s." % (ref_memory_level, func.func_name), stacklevel=2) return memory.cache(func, **kwargs)
def _do_subject_coregister( subject_data, coreg_func_to_anat=True, caching=True, ext=None, write_output_images=2, func_basenames=None, func_prefix="", anat_basename=None, anat_prefix="", report=True, verbose=True): ref_brain = 'func' src_brain = 'anat' ref = subject_data.func[0] src = subject_data.anat if coreg_func_to_anat: ref_brain, src_brain = src_brain, ref_brain ref, src = src, ref # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100) runner = lambda handle: mem.cache(handle) if caching else handle # estimate realignment (affine) params for coreg coreg = runner(Coregister(verbose=verbose).fit)(ref, src) # apply coreg if coreg_func_to_anat: if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] coreg_func = [] for sess_func, sess_id in zip(subject_data.func, range( subject_data.n_sessions)): coreg_func.append(runner(coreg.transform)( sess_func, output_dir=subject_data.tmp_output_dir if ( write_output_images == 2) else None, basenames=func_basenames[sess_id] if coreg_func_to_anat else anat_basename, prefix=func_prefix)) subject_data.func = coreg_func src = load_vols(subject_data.func[0])[0] else: if anat_basename is None: anat_basename = get_basenames(subject_data.anat) subject_data.anat = runner(coreg.transform)( subject_data.anat, basename=anat_basename, output_dir=subject_data.tmp_output_dir if ( write_output_images == 2) else None, prefix=anat_prefix, ext=ext) src = subject_data.anat # generate coregistration QA thumbs if report: subject_data.generate_coregistration_thumbnails( coreg_func_to_anat=coreg_func_to_anat, nipype=False) del coreg if write_output_images > 1: subject_data.hardlink_output_files() return subject_data
def get_multilabel(self): cache = Memory(cachedir=tempfile.gettempdir()) cached_func = cache.cache(make_multilabel_classification) return cached_func( n_samples=100, n_features=10, n_classes=5, n_labels=5, return_indicator=True, random_state=1 )
def _do_subject_realign(subject_data, reslice=True, register_to_mean=False, caching=True, hardlink_output=True, ext=None, func_basenames=None, write_output_images=2, report=True, func_prefix=None, verbose=True): if register_to_mean: raise NotImplementedError("Feature pending...") if func_prefix is None: func_prefix = PREPROC_OUTPUT_IMAGE_PREFICES['MC'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100 if verbose is True else verbose) else: mem = Memory(None) mrimc = MRIMotionCorrection( n_sessions=subject_data.n_sessions, verbose=verbose) mrimc = mem.cache(mrimc.fit)(subject_data.func) mrimc_output = mem.cache(mrimc.transform)( reslice=reslice, output_dir=subject_data.scratch if ( write_output_images == 2) else None, ext=ext, prefix=func_prefix, basenames=func_basenames) subject_data.func = mrimc_output['realigned_images'] subject_data.realignment_parameters = mrimc_output[ 'realignment_parameters'] # generate realignment thumbs if report: subject_data.generate_realignment_thumbnails(nipype=False) # garbage collection del mrimc if write_output_images > 1: subject_data.hardlink_output_files(verbose=verbose) return subject_data
def get_lookalike_people(): m = Memory(cachedir='./cache_data', compress=6, verbose=0) load_func = m.cache(_get_lookalike_people) #faces, targets, target_ids = _get_lookalike_people() faces, targets, target_ids = load_func() return Bunch(data=faces.reshape(len(faces), -1), images=faces, target=target_ids, target_names=targets, DESCR="Look Alike People Dataset")
def _do_subject_smooth(subject_data, fwhm, prefix=None, write_output_images=2, func_basenames=None, concat=False, caching=True): if prefix is None: prefix = PREPROC_OUTPUT_IMAGE_PREFICES['smoothing'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100) sfunc = [] for sess in range(subject_data.n_sessions): sess_func = subject_data.func[sess] _tmp = mem.cache(smooth_image)(sess_func, fwhm) if write_output_images == 2: _tmp = mem.cache(save_vols)( _tmp, subject_data.output_dir, basenames=func_basenames[sess], prefix=prefix, concat=concat) sfunc.append(_tmp) subject_data.func = sfunc return subject_data
def cache(self, func, func_memory_level, **kwargs): """ Return a joblib.Memory object if necessary (depends on memory_level) The memory_level is a rough estimator of the amount of memory necessary to cache a function call. By specifying a numeric value for this level, the user will be able to control more or less the memory used on his computer. This function will cache the function call or not depending on the memory level. This is an helper to avoid code pasting. Parameters ---------- self: python object The object containing information about caching. It must have a memory attribute (used if caching is necessary) and an integer memory_level attribute to determine if the function must be cached or not. func: python function The function that may be cached func_memory_level: integer The memory_level from which caching must be enabled. Returns ------- Either the original function (if there is no need to cache it) or a joblib.Memory object that will be used to cache the function call. """ # if memory level is 0 but a memory object is provided, put memory_level # to 1 with a warning if self.memory_level == 0: if hasattr(self, 'memory') and self.memory is not None \ and (isinstance(self.memory, basestring) or self.memory.cachedir is not None): warnings.warn("memory_level is set to 0 but a Memory object has" " been provided. Setting memory_level to 1.") self.memory_level = 1 if self.memory_level < func_memory_level: return func else: memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) if memory.cachedir is None: warnings.warn("Caching has been enabled (memory_level = %d) but no" " Memory object or path has been provided (parameter" " memory). Caching canceled for function %s." % (self.memory_level, func.func_name)) return memory.cache(func, **kwargs)
def fit(self, data, Y=None): if hasattr(data, 'copy'): # It's an array data = data.copy() else: # Probably a list data = copy.deepcopy(data) memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) pcas = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(subject_pca)( subject_data, n_components=self.n_components, mem=memory) for subject_data in data) pcas = np.concatenate(pcas, axis=1) if self.kurtosis_thr is None: group_maps = memory.cache(randomized_svd)(pcas, self.n_components)[0] group_maps = group_maps[:, :self.n_components] ica_maps = memory.cache(fastica)(group_maps, whiten=False, fun='cube', random_state=self.random_state)[2] ica_maps = ica_maps.T else: ica_maps = self._find_high_kurtosis(pcas, memory) del pcas self.maps_ = ica_maps if not self.maps_only: # Relearn the time series self.learn_from_maps(data) return self
def fit(self, data, Y=None): if hasattr(data, 'copy'): # It's an array data = data.copy() else: # Probably a list data = copy.deepcopy(data) memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) pcas = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(subject_pca)(subject_data, n_components=self.n_components, mem=memory) for subject_data in data) pcas = np.concatenate(pcas, axis=1) if self.kurtosis_thr is None: group_maps = memory.cache(randomized_svd)( pcas, self.n_components)[0] group_maps = group_maps[:, :self.n_components] ica_maps = memory.cache(fastica)(group_maps, whiten=False, fun='cube', random_state=self.random_state)[2] ica_maps = ica_maps.T else: ica_maps = self._find_high_kurtosis(pcas, memory) del pcas self.maps_ = ica_maps if not self.maps_only: # Relearn the time series self.learn_from_maps(data) return self
def motion_correction_pypreprocess(in_file, out_path, force_mean_reference, extra_params={}): """ an attempt at motion correction using pypreprocess package. inputs: in_file: path to the input file or input file loaded as an nibabel image. out_path: path to the future output file force_mean_reference: if evaluated True, adjust motion according to the mean image; otherwise adjust to the first volume. extra_params: extra parameters to MRIMotionCorrection return: the motion corrected image """ if force_mean_reference: # calculate the mean and insert to the front print('motion correction referenced to mean!') in_file = math_img('np.insert(img, 0, np.mean(img, axis=-1), axis=3)', img=in_file) else: print('motion correction referenced to the first slice.') # instantiate realigner if 'MRIMotionCorrection' in extra_params: print 'extra parameters are used for MRIMotionCorrection: %s' % extra_params[ 'MRIMotionCorrection'] mrimc = MRIMotionCorrection(**extra_params['MRIMotionCorrection']) else: mrimc = MRIMotionCorrection() # fit realigner if USE_CACHE: mem = Memory("func_preproc_cache") mrimc = mem.cache(mrimc.fit)(in_file) else: mrimc = mrimc.fit(in_file) # write realigned files to disk result = mrimc.transform(concat=True)['realigned_images'][0] if force_mean_reference: # remove the first frame, which was the mean result = math_img('img[...,1:]', img=result) if out_path: nib.save(result, out_path) return result
def fit(self, X, y=None, get_rhos=False): ''' Sets up for divergence estimation "from" new data "to" X. Builds FLANN indices for each bag, and maybe gets within-bag distances. Parameters ---------- X : list of arrays or :class:`skl_groups.features.Features` The bags to search "to". get_rhos : boolean, optional, default False Compute within-bag distances :attr:`rhos_`. These are only needed for some divergence functions or if do_sym is passed, and they'll be computed (and saved) during :meth:`transform` if they're not computed here. If you're using Jensen-Shannon divergence, a higher max_K may be needed once it sees the number of points in the transformed bags, so the computation here might be wasted. ''' self.features_ = X = as_features(X, stack=True, bare=True) # if we're using a function that needs to pick its K vals itself, # then we need to set max_K here. when we transform(), might have to # re-do this :| Ks = self._get_Ks() _, _, _, max_K, save_all_Ks, _ = _choose_funcs(self.div_funcs, Ks, X.dim, X.n_pts, None, self.version) if max_K >= X.n_pts.min(): msg = "asked for K = {}, but there's a bag with only {} points" raise ValueError(msg.format(max_K, X.n_pts.min())) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) self.indices_ = id = memory.cache(_build_indices)(X, self._flann_args()) if get_rhos: self.rhos_ = _get_rhos(X, id, Ks, max_K, save_all_Ks, self.min_dist) elif hasattr(self, 'rhos_'): del self.rhos_ return self
def fit(self, X, y=None, get_rhos=False): ''' Sets up for divergence estimation "from" new data "to" X. Builds FLANN indices for each bag, and maybe gets within-bag distances. Parameters ---------- X : list of arrays or :class:`skl_groups.features.Features` The bags to search "to". get_rhos : boolean, optional, default False Compute within-bag distances :attr:`rhos_`. These are only needed for some divergence functions or if do_sym is passed, and they'll be computed (and saved) during :meth:`transform` if they're not computed here. If you're using Jensen-Shannon divergence, a higher max_K may be needed once it sees the number of points in the transformed bags, so the computation here might be wasted. ''' self.features_ = X = as_features(X, stack=True, bare=True) # if we're using a function that needs to pick its K vals itself, # then we need to set max_K here. when we transform(), might have to # re-do this :| Ks = self._get_Ks() _, _, _, max_K, save_all_Ks, _ = _choose_funcs( self.div_funcs, Ks, X.dim, X.n_pts, None, self.version) if max_K >= X.n_pts.min(): msg = "asked for K = {}, but there's a bag with only {} points" raise ValueError(msg.format(max_K, X.n_pts.min())) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) self.indices_ = id = memory.cache(_build_indices)(X, self._flann_args()) if get_rhos: self.rhos_ = _get_rhos(X, id, Ks, max_K, save_all_Ks, self.min_dist) elif hasattr(self, 'rhos_'): del self.rhos_ return self
def transform(self, X): r''' Computes the divergences from X to :attr:`features_`. Parameters ---------- X : list of bag feature arrays or :class:`skl_groups.features.Features` The bags to search "from". Returns ------- divs : array of shape ``[len(div_funcs), len(Ks), len(X), len(features_)] + ([2] if do_sym else [])`` The divergences from X to :attr:`features_`. ``divs[d, k, i, j]`` is the ``div_funcs[d]`` divergence from ``X[i]`` to ``fetaures_[j]`` using a K of ``Ks[k]``. If ``do_sym``, ``divs[d, k, i, j, 0]`` is :math:`D_{d,k}( X_i \| \texttt{features_}_j)` and ``divs[d, k, i, j, 1]`` is :math:`D_{d,k}(\texttt{features_}_j \| X_i)`. ''' X = as_features(X, stack=True, bare=True) Y = self.features_ Ks = np.asarray(self.Ks) if X.dim != Y.dim: msg = "incompatible dimensions: fit with {}, transform with {}" raise ValueError(msg.format(Y.dim, X.dim)) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) # ignore Y_indices to avoid slow pickling of them # NOTE: if the indices are approximate, then might not get the same # results! est = memory.cache(_est_divs, ignore=['n_jobs', 'Y_indices', 'Y_rhos']) output, self.rhos_ = est( X, Y, self.indices_, getattr(self, 'rhos_', None), self.div_funcs, Ks, self.do_sym, self.clamp, self.version, self.min_dist, self._flann_args(), self._n_jobs) return output
def transform(self, X): r''' Computes the divergences from X to :attr:`features_`. Parameters ---------- X : list of bag feature arrays or :class:`skl_groups.features.Features` The bags to search "from". Returns ------- divs : array of shape ``[len(div_funcs), len(Ks), len(X), len(features_)] + ([2] if do_sym else [])`` The divergences from X to :attr:`features_`. ``divs[d, k, i, j]`` is the ``div_funcs[d]`` divergence from ``X[i]`` to ``fetaures_[j]`` using a K of ``Ks[k]``. If ``do_sym``, ``divs[d, k, i, j, 0]`` is :math:`D_{d,k}( X_i \| \texttt{features_}_j)` and ``divs[d, k, i, j, 1]`` is :math:`D_{d,k}(\texttt{features_}_j \| X_i)`. ''' X = as_features(X, stack=True, bare=True) Y = self.features_ Ks = np.asarray(self.Ks) if X.dim != Y.dim: msg = "incompatible dimensions: fit with {}, transform with {}" raise ValueError(msg.format(Y.dim, X.dim)) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) # ignore Y_indices to avoid slow pickling of them # NOTE: if the indices are approximate, then might not get the same # results! est = memory.cache(_est_divs, ignore=['n_jobs', 'Y_indices', 'Y_rhos']) output, self.rhos_ = est(X, Y, self.indices_, getattr(self, 'rhos_', None), self.div_funcs, Ks, self.do_sym, self.clamp, self.version, self.min_dist, self._flann_args(), self._n_jobs) return output
def _do_subject_slice_timing(subject_data, ref_slice=0, slice_order="ascending", interleaved=False, caching=True, write_output_images=2, func_prefix=None, func_basenames=None, ext=None): if func_prefix is None: func_prefix = PREPROC_OUTPUT_IMAGE_PREFICES['STC'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join(subject_data.output_dir, 'cache_dir'), verbose=100) runner = lambda handle: mem.cache(handle) if caching else handle stc_output = [] original_bold = subject_data.func for sess_func, sess_id in zip(subject_data.func, range(subject_data.n_sessions)): fmristc = runner( fMRISTC(slice_order=slice_order, ref_slice=ref_slice, interleaved=interleaved, verbose=True).fit)(raw_data=sess_func) stc_output.append( runner(fmristc.transform)(sess_func, output_dir=subject_data.tmp_output_dir if (write_output_images > 0) else None, basenames=func_basenames[sess_id], prefix=func_prefix, ext=ext)) subject_data.func = stc_output del original_bold, fmristc if write_output_images > 1: subject_data.hardlink_output_files() return subject_data
def _do_subject_realign(subject_data, reslice=True, register_to_mean=False, caching=True, hardlink_output=True, ext=None, func_basenames=None, write_output_images=2, report=True, func_prefix=None): if register_to_mean: raise NotImplementedError("Feature pending...") if func_prefix is None: func_prefix = PREPROC_OUTPUT_IMAGE_PREFICES['MC'] if func_basenames is None: func_basenames = [get_basenames(func) for func in subject_data.func] # prepare for smart caching if caching: mem = Memory(cachedir=os.path.join( subject_data.output_dir, 'cache_dir'), verbose=100) runner = lambda handle: mem.cache(handle) if caching else handle mrimc = runner(MRIMotionCorrection( n_sessions=subject_data.n_sessions, verbose=True).fit)( [sess_func for sess_func in subject_data.func]) mrimc_output = runner(mrimc.transform)( reslice=reslice, output_dir=subject_data.tmp_output_dir if ( write_output_images == 2) else None, ext=ext, prefix=func_prefix, basenames=func_basenames) subject_data.func = mrimc_output['realigned_images'] subject_data.realignment_parameters = mrimc_output[ 'realignment_parameters'] # generate realignment thumbs if report: subject_data.generate_realignment_thumbnails(nipype=False) # garbage collection del mrimc if write_output_images > 1: subject_data.hardlink_output_files() return subject_data
def fetch_asirra(image_count=1000): """ Parameters ---------- image_count : positive integer Returns ------- data : Bunch Dictionary-like object with the following attributes : 'images', the sample images, 'data', the flattened images, 'target', the label for the image (0 for cat, 1 for dog), and 'DESCR' the full description of the dataset. """ partial_path = check_fetch_asirra() m = Memory(cachedir=partial_path, compress=6, verbose=0) load_func = m.cache(_fetch_asirra) images, target = load_func(partial_path, image_count=image_count) return Bunch(data=images.reshape(len(images), -1), images=images, target=target, DESCR="Asirra cats and dogs dataset")
def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0, metric='minkowski', p=2, leaf_size=40, algorithm='best', memory=Memory(cachedir=None, verbose=0), approx_min_span_tree=True, gen_min_span_tree=False, core_dist_n_jobs=4, allow_single_cluster=False, **kwargs): """Perform HDBSCAN clustering from a vector array or distance matrix. Parameters ---------- X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \ array of shape (n_samples, n_samples) A feature array, or array of distances between samples if ``metric='precomputed'``. min_cluster_size : int optional The minimum number of samples in a group for that group to be considered a cluster; groupings smaller than this size will be left as noise. min_samples : int, optional The number of samples in a neighborhood for a point to be considered as a core point. This includes the point itself. defaults to the min_cluster_size. alpha : float, optional A distance scaling parameter as used in robust single linkage. See (K. Chaudhuri and S. Dasgupta "Rates of convergence for the cluster tree."). (default 1.0) metric : string, or callable, optional The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. If metric is "precomputed", X is assumed to be a distance matrix and must be square. (default minkowski) p : int, optional p value to use if using the minkowski metric. (default 2) leaf_size : int, optional Leaf size for trees responsible for fast nearest neighbour queries. (default 40) algorithm : string, optional Exactly which algorithm to use; hdbscan has variants specialised for different characteristics of the data. By default this is set to ``best`` which chooses the "best" algorithm given the nature of the data. You can force other options if you believe you know better. Options are: * ``best`` * ``generic`` * ``prims_kdtree`` * ``prims_balltree`` * ``boruvka_kdtree`` * ``boruvka_balltree`` memory : Instance of joblib.Memory or string (optional) Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory. approx_min_span_tree : Bool, optional Whether to accept an only approximate minimum spanning tree. For some algorithms this can provide a significant speedup, but the resulting clustering may be of marginally lower quality. If you are willing to sacrifice speed for correctness you may want to explore this; in general this should be left at the default True. (default True) gen_min_span_tree : bool, optional Whether to generate the minimum spanning tree for later analysis. (default False) core_dist_n_jobs : int, optional Number of parallel jobs to run in core distance computations (if supported by the specific algorithm). (default 4) allow_single_cluster : boolean By default HDBSCAN* will not produce a single cluster, setting this to t=True will override this and allow single cluster results in the case that you feel this is a valid result for your dataset. (default False) **kwargs : optional Arguments passed to the distance metric Returns ------- labels : array [n_samples] Cluster labels for each point. Noisy samples are given the label -1. probabilities : array [n_samples] Cluster membership strengths for each point. Noisy samples are assigned 0. cluster_persistence : array, shape = [n_clusters] A score of how persistent each cluster is. A score of 1.0 represents a perfectly stable cluster that persists over all distance scales, while a score of 0.0 represents a perfectly ephemeral cluster. These scores can be guage the relative coherence of the clusters output by the algorithm. condensed_tree : record array The condensed cluster hierarchy used to generate clusters. single_linkage_tree : array [n_samples - 1, 4] The single linkage tree produced during clustering in scipy hierarchical clustering format (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html). min_spanning_tree : array [n_samples - 1, 3] The minimum spanning as an edgelist. If gen_min_span_tree was False this will be None. References ---------- R. Campello, D. Moulavi, and J. Sander, "Density-Based Clustering Based on Hierarchical Density Estimates" In: Advances in Knowledge Discovery and Data Mining, Springer, pp 160-172. 2013 """ if min_samples is None: min_samples = min_cluster_size if type(min_samples) is not int or type(min_cluster_size) is not int: raise ValueError('Min samples and min cluster size must be integers!') if min_samples <= 0 or min_cluster_size <= 0: raise ValueError('Min samples and Min cluster size must be positive integers') if alpha <= 0.0 or type(alpha) is int: raise ValueError('Alpha must be a positive value greater than 0!') if leaf_size < 1: raise ValueError('Leaf size must be greater than 0!') # Checks input and converts to an nd-array where possible X = check_array(X, accept_sparse='csr') # Python 2 and 3 compliant string_type checking if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if algorithm != 'best': if algorithm == 'generic': (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_generic)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == 'prims_kdtree': if metric not in KDTree.valid_metrics: raise ValueError("Cannot use Prim's with KDTree for this metric!") (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_prims_kdtree)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == 'prims_balltree': if metric not in BallTree.valid_metrics: raise ValueError("Cannot use Prim's with BallTree for this metric!") (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_prims_balltree)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == 'boruvka_kdtree': if metric not in BallTree.valid_metrics: raise ValueError("Cannot use Boruvka with KDTree for this metric!") (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_boruvka_kdtree)(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs) elif algorithm == 'boruvka_balltree': if metric not in BallTree.valid_metrics: raise ValueError("Cannot use Boruvka with BallTree for this metric!") (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_boruvka_balltree)(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs) else: raise TypeError('Unknown algorithm type %s specified' % algorithm) else: if issparse(X) or metric not in FAST_METRICS: # We can't do much with sparse matrices ... (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_generic)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif metric in KDTree.valid_metrics: #TO DO: Need heuristic to decide when to go to boruvka; still debugging for now if X.shape[1] > 60: (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_prims_kdtree)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) else: (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_boruvka_kdtree)(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs) else: # Metric is a valid BallTree metric # TO DO: Need heuristic to decide when to go to boruvka; still debugging for now if X.shape[1] > 60: (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_prims_kdtree)(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) else: (single_linkage_tree, result_min_span_tree) = \ memory.cache(_hdbscan_boruvka_balltree)(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs) return _tree_to_labels(X, single_linkage_tree, min_cluster_size, allow_single_cluster) + (result_min_span_tree,)
t_r=2.5, standardize=True, memory='nilearn_cache', memory_level=1, verbose=2) masker.fit() subject_time_series = [] func_filenames = adhd_dataset.func confound_filenames = adhd_dataset.confounds for func_filename, confound_filename in zip(func_filenames, confound_filenames): print("Processing file %s" % func_filename) # Computing some confounds hv_confounds = mem.cache(image.high_variance_confounds)(func_filename) region_ts = masker.transform(func_filename, confounds=[hv_confounds, confound_filename]) subject_time_series.append(region_ts) ############################################################################## # Computing group-sparse precision matrices from nilearn.connectome import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(verbose=2) gsc.fit(subject_time_series) from sklearn import covariance gl = covariance.GraphLassoCV(verbose=2) gl.fit(np.concatenate(subject_time_series))
def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric='minkowski', p=2, algorithm='best', memory=Memory(cachedir=None, verbose=0)): """Perform robust single linkage clustering from a vector array or distance matrix. Parameters ---------- X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \ array of shape (n_samples, n_samples) A feature array, or array of distances between samples if ``metric='precomputed'``. cut : float The reachability distance value to cut the cluster heirarchy at to derive a flat cluster labelling. k : int, optional Reachability distances will be computed with regard to the `k` nearest neighbors. (default 5) alpha : float, optional Distance scaling for reachability distance computation. Reachability distance is computed as $max \{ core_k(a), core_k(b), 1/\alpha d(a,b) \}$. (default sqrt(2)) gamma : int, optional Ignore any clusters in the flat clustering with size less than gamma, and declare points in such clusters as noise points. (default 5) metric : string, or callable, optional The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. If metric is "precomputed", X is assumed to be a distance matrix and must be square. algorithm : string, optional Exactly which algorithm to use; hdbscan has variants specialised for different characteristics of the data. By default this is set to ``best`` which chooses the "best" algorithm given the nature of the data. You can force other options if you believe you know better. Options are: * ``generic`` * ``best`` * ``prims_kdtree`` * ``prims_balltree`` * ``boruvka_kdtree`` * ``boruvka_balltree`` memory : Instance of joblib.Memory or string (optional) Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory. Returns ------- labels : array [n_samples] Cluster labels for each point. Noisy samples are given the label -1. single_linkage_tree : array [n_samples - 1, 4] The single linkage tree produced during clustering in scipy hierarchical clustering format (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html). References ---------- K. Chaudhuri and S. Dasgupta. "Rates of convergence for the cluster tree." In Advances in Neural Information Processing Systems, 2010. """ if type(k) is not int or k < 1: raise ValueError('k must be an integer greater than zero!') if type(alpha) is not float or alpha < 1.0: raise ValueError('alpha must be a float greater than or equal to 1.0!') if type(gamma) is not int or gamma < 1: raise ValueError('gamma must be an integer greater than zero!') X = check_array(X, accept_sparse='csr') if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if algorithm != 'best': if algorithm == 'generic': single_linkage_tree = \ memory.cache(_rsl_generic)(X, k, alpha, metric, p) elif algorithm == 'prims_kdtree': single_linkage_tree = \ memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, p) elif algorithm == 'prims_balltree': single_linkage_tree = \ memory.cache(_rsl_prims_balltree)(X, k, alpha, metric, p) elif algorithm == 'boruvka_kdtree': single_linkage_tree = \ memory.cache(_rsl_boruvka_kdtree)(X, k, alpha, metric, p) elif algorithm == 'boruvka_balltree': single_linkage_tree = \ memory.cache(_rsl_boruvka_balltree)(X, k, alpha, metric, p) else: raise TypeError('Unknown algorithm type %s specified' % algorithm) else: if issparse(X) or metric not in FAST_METRICS: # We can't do much with sparse matrices ... single_linkage_tree = \ memory.cache(_rsl_generic)(X, k, alpha, metric, p) elif metric in KDTree.valid_metrics: # Need heuristic to decide when to go to boruvka; still debugging for now if X.shape[1] > 128: single_linkage_tree = \ memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, p) else: single_linkage_tree = \ memory.cache(_rsl_boruvka_kdtree)(X, k, alpha, metric, p) else: # Metric is a valid BallTree metric # Need heuristic to decide when to go to boruvka; still debugging for now if X.shape[1] > 128: single_linkage_tree = \ memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, p) else: single_linkage_tree = \ memory.cache(_rsl_boruvka_balltree)(X, k, alpha, metric, p) labels = single_linkage_tree.get_clusters(cut, gamma) return labels, single_linkage_tree
import nilearn.input_data from sklearn.externals.joblib import Memory mem = Memory('nilearn_cache') masker = nilearn.input_data.NiftiMapsMasker( msdl_atlas_dataset.maps, resampling_target="maps", detrend=True, low_pass=None, high_pass=0.01, t_r=2.5, standardize=True, memory=mem, memory_level=1, verbose=2) masker.fit() fmri_filename = adhd_dataset.func[0] confound_filename = adhd_dataset.confounds[0] # Computing some confounds hv_confounds = mem.cache(nilearn.image.high_variance_confounds)( fmri_filename) time_series = masker.transform(fmri_filename, confounds=[hv_confounds, confound_filename]) print("-- Computing graph-lasso inverse matrix ...") from sklearn import covariance gl = covariance.GraphLassoCV(verbose=2) gl.fit(time_series) # Displaying results ########################################################## atlas_imgs = image.iter_img(msdl_atlas_dataset.maps) atlas_region_coords = [plotting.find_xyz_cut_coords(img) for img in atlas_imgs] title = "GraphLasso"
def _do_fmri_distortion_correction( subject_data, # i'm unsure of the readout time, # but this is constant across both PE # directions and so can be scaled to 1 # (or any other nonzero float) protocol="MOTOR", readout_time=.01392, realign=True, coregister=True, coreg_func_to_anat=True, dc=True, segment=False, normalize=False, func_write_voxel_sizes=None, anat_write_voxel_sizes=None, report=False, **kwargs): """ Function to undistort task fMRI data for a given HCP subject. """ directions = ['LR', 'RL'] subject_data.sanitize() if dc: acq_params = [[1, 0, 0, readout_time], [-1, 0, 0, readout_time]] acq_params_file = os.path.join(subject_data.output_dir, "b0_acquisition_params.txt") np.savetxt(acq_params_file, acq_params, fmt='%f') fieldmap_files = [ os.path.join( os.path.dirname(subject_data.func[sess]), "%s_3T_SpinEchoFieldMap_%s.nii.gz" % (subject_data.subject_id, directions[sess])) for sess in xrange(subject_data.n_sessions) ] sbref_files = [ sess_func.replace(".nii", "_SBRef.nii") for sess_func in subject_data.func ] # prepare for smart caching mem = Memory(os.path.join(subject_data.output_dir, "cache_dir")) for x in [fieldmap_files, sbref_files, subject_data.func]: assert len(x) == 2 for y in x: assert os.path.isfile(y), y # fslroi zeroth_fieldmap_files = [] for fieldmap_file in fieldmap_files: if not os.path.isfile(fieldmap_file): print "Can't find fieldmap file %s; skipping subject %s" % ( fieldmap_file, subject_data.subject_id) return # peel 0th volume of each fieldmap zeroth_fieldmap_file = os.path.join( subject_data.output_dir, "0th_%s" % os.path.basename(fieldmap_file)) fslroi_cmd = "fsl5.0-fslroi %s %s 0 1" % (fieldmap_file, zeroth_fieldmap_file) print "\r\nExecuting '%s' ..." % fslroi_cmd print mem.cache(commands.getoutput)(fslroi_cmd) zeroth_fieldmap_files.append(zeroth_fieldmap_file) # merge the 0th volume of both fieldmaps merged_zeroth_fieldmap_file = os.path.join( subject_data.output_dir, "merged_with_other_direction_%s" % (os.path.basename(zeroth_fieldmap_files[0]))) fslmerge_cmd = "fsl5.0-fslmerge -t %s %s %s" % ( merged_zeroth_fieldmap_file, zeroth_fieldmap_files[0], zeroth_fieldmap_files[1]) print "\r\nExecuting '%s' ..." % fslmerge_cmd print mem.cache(commands.getoutput)(fslmerge_cmd) # do topup (learn distortion model) topup_results_basename = os.path.join(subject_data.output_dir, "topup_results") topup_cmd = ("fsl5.0-topup --imain=%s --datain=%s --config=b02b0.cnf " "--out=%s" % (merged_zeroth_fieldmap_file, acq_params_file, topup_results_basename)) print "\r\nExecuting '%s' ..." % topup_cmd print mem.cache(commands.getoutput)(topup_cmd) # apply learn deformations to absorb distortion dc_fmri_files = [] for sess in xrange(2): # merge SBRef + task BOLD for current PE direction assert len(subject_data.func) == 2, subject_data fourD_plus_sbref = os.path.join( subject_data.output_dir, "sbref_plus_" + os.path.basename(subject_data.func[sess])) fslmerge_cmd = "fsl5.0-fslmerge -t %s %s %s" % ( fourD_plus_sbref, sbref_files[sess], subject_data.func[sess]) print "\r\nExecuting '%s' ..." % fslmerge_cmd print mem.cache(commands.getoutput)(fslmerge_cmd) # realign task BOLD to SBRef sess_output_dir = subject_data.session_output_dirs[sess] rfourD_plus_sbref = _do_subject_realign(SubjectData( func=[fourD_plus_sbref], output_dir=subject_data.output_dir, n_sessions=1, session_output_dirs=[sess_output_dir]), report=False).func[0] # apply topup to realigned images dc_rfourD_plus_sbref = os.path.join( subject_data.output_dir, "dc" + os.path.basename(rfourD_plus_sbref)) applytopup_cmd = ( "fsl5.0-applytopup --imain=%s --verbose --inindex=%i " "--topup=%s --out=%s --datain=%s --method=jac" % (rfourD_plus_sbref, sess + 1, topup_results_basename, dc_rfourD_plus_sbref, acq_params_file)) print "\r\nExecuting '%s' ..." % applytopup_cmd print mem.cache(commands.getoutput)(applytopup_cmd) # recover undistorted task BOLD dc_rfmri_file = dc_rfourD_plus_sbref.replace("sbref_plus_", "") fslroi_cmd = "fsl5.0-fslroi %s %s 1 -1" % (dc_rfourD_plus_sbref, dc_rfmri_file) print "\r\nExecuting '%s' ..." % fslroi_cmd print mem.cache(commands.getoutput)(fslroi_cmd) # sanity tricks if dc_rfmri_file.endswith(".nii"): dc_rfmri_file = dc_rfmri_file + ".gz" dc_fmri_files.append(dc_rfmri_file) subject_data.func = dc_fmri_files if isinstance(subject_data.func, basestring): subject_data.func = [subject_data.func] # continue preprocessing subject_data = do_subject_preproc( subject_data, realign=realign, coregister=coregister, coreg_anat_to_func=not coreg_func_to_anat, segment=True, normalize=False, report=report) # ok for GLM now return subject_data
def run_suject_level1_glm( subject_data, readout_time=.01392, # seconds tr=.72, dc=True, hrf_model="Canonical with Derivative", drift_model="Cosine", hfcut=100, regress_motion=True, slicer='ortho', cut_coords=None, threshold=3., cluster_th=15, normalize=True, fwhm=0., protocol="MOTOR", func_write_voxel_sizes=None, anat_write_voxel_sizes=None, **other_preproc_kwargs): """ Function to do preproc + analysis for a single HCP subject (task fMRI) """ add_regs_files = None n_motion_regressions = 6 subject_data.n_sessions = 2 subject_data.tmp_output_dir = os.path.join(subject_data.output_dir, "tmp") if not os.path.exists(subject_data.tmp_output_dir): os.makedirs(subject_data.tmp_output_dir) if not os.path.exists(subject_data.output_dir): os.makedirs(subject_data.output_dir) mem = Memory(os.path.join(subject_data.output_dir, "cache_dir"), verbose=100) # glob design files (.fsf) subject_data.design_files = [ os.path.join(subject_data.data_dir, ("MNINonLinear/Results/tfMRI_%s_%s/" "tfMRI_%s_%s_hp200_s4_level1.fsf") % (protocol, direction, protocol, direction)) for direction in ['LR', 'RL'] ] assert len(subject_data.design_files) == 2 for df in subject_data.design_files: if not os.path.isfile(df): return if 0x0: subject_data = _do_fmri_distortion_correction( subject_data, dc=dc, fwhm=fwhm, readout_time=readout_time, **other_preproc_kwargs) # chronometry stats_start_time = pretty_time() # merged lists paradigms = [] frametimes_list = [] design_matrices = [] # fmri_files = [] n_scans = [] # for direction, direction_index in zip(['LR', 'RL'], xrange(2)): for sess in xrange(subject_data.n_sessions): direction = ['LR', 'RL'][sess] # glob the design file # design_file = os.path.join(# _subject_data_dir, "tfMRI_%s_%s" % ( # protocol, direction), design_file = subject_data.design_files[sess] # "tfMRI_%s_%s_hp200_s4_level1.fsf" % ( # protocol, direction)) if not os.path.isfile(design_file): print "Can't find design file %s; skipping subject %s" % ( design_file, subject_data.subject_id) return # read the experimental setup print "Reading experimental setup from %s ..." % design_file fsl_condition_ids, timing_files, fsl_contrast_ids, contrast_values = \ read_fsl_design_file(design_file) print "... done.\r\n" # fix timing filenames timing_files = [ tf.replace("EVs", "tfMRI_%s_%s/EVs" % (protocol, direction)) for tf in timing_files ] # make design matrix print "Constructing design matrix for direction %s ..." % direction _n_scans = nibabel.load(subject_data.func[sess]).shape[-1] n_scans.append(_n_scans) add_regs_file = add_regs_files[ sess] if not add_regs_files is None else None design_matrix, paradigm, frametimes = make_dmtx_from_timing_files( timing_files, fsl_condition_ids, n_scans=_n_scans, tr=tr, hrf_model=hrf_model, drift_model=drift_model, hfcut=hfcut, add_regs_file=add_regs_file, add_reg_names=[ 'Translation along x axis', 'Translation along yaxis', 'Translation along z axis', 'Rotation along x axis', 'Rotation along y axis', 'Rotation along z axis', 'Differential Translation along x axis', 'Differential Translation along yaxis', 'Differential Translation along z axis', 'Differential Rotation along x axis', 'Differential Rotation along y axis', 'Differential Rotation along z axis' ][:n_motion_regressions] if not add_regs_files is None else None, ) print "... done." paradigms.append(paradigm) frametimes_list.append(frametimes) design_matrices.append(design_matrix) # convert contrasts to dict contrasts = dict(( contrast_id, # append zeros to end of contrast to match design np.hstack(( contrast_value, np.zeros(len(design_matrix.names) - len(contrast_value))))) for contrast_id, contrast_value in zip( fsl_contrast_ids, contrast_values)) # more interesting contrasts if protocol == 'MOTOR': contrasts['RH-LH'] = contrasts['RH'] - contrasts['LH'] contrasts['LH-RH'] = -contrasts['RH-LH'] contrasts['RF-LF'] = contrasts['RF'] - contrasts['LF'] contrasts['LF-RF'] = -contrasts['RF-LF'] contrasts['H'] = contrasts['RH'] + contrasts['LH'] contrasts['F'] = contrasts['RF'] + contrasts['LF'] contrasts['H-F'] = contrasts['RH'] + contrasts['LH'] - ( contrasts['RF'] - contrasts['LF']) contrasts['F-H'] = -contrasts['H-F'] contrasts = dict((k, v) for k, v in contrasts.iteritems() if "-" in k) # replicate contrasts across sessions contrasts = dict((cid, [cval] * 2) for cid, cval in contrasts.iteritems()) cache_dir = cache_dir = os.path.join(subject_data.output_dir, 'cache_dir') if not os.path.exists(cache_dir): os.makedirs(cache_dir) nipype_mem = NipypeMemory(base_dir=cache_dir) if 0x0: if np.sum(fwhm) > 0.: subject_data.func = nipype_mem.cache(spm.Smooth)( in_files=subject_data.func, fwhm=fwhm, ignore_exception=False, ).outputs.smoothed_files # fit GLM def tortoise(*args): print args print( 'Fitting a "Fixed Effect" GLM for merging LR and RL ' 'phase-encoding directions for subject %s ...' % (subject_data.subject_id)) fmri_glm = FMRILinearModel( subject_data.func, [design_matrix.matrix for design_matrix in design_matrices], mask='compute') fmri_glm.fit(do_scaling=True, model='ar1') print "... done.\r\n" # save computed mask mask_path = os.path.join(subject_data.output_dir, "mask.nii") print "Saving mask image to %s ..." % mask_path nibabel.save(fmri_glm.mask, mask_path) print "... done.\r\n" z_maps = {} effects_maps = {} map_dirs = {} try: for contrast_id, contrast_val in contrasts.iteritems(): print "\tcontrast id: %s" % contrast_id z_map, eff_map = fmri_glm.contrast(contrast_val, con_id=contrast_id, output_z=True, output_effects=True) # store stat maps to disk for map_type, out_map in zip(['z', 'effects'], [z_map, eff_map]): map_dir = os.path.join(subject_data.output_dir, '%s_maps' % map_type) map_dirs[map_type] = map_dir if not os.path.exists(map_dir): os.makedirs(map_dir) map_path = os.path.join( map_dir, '%s_%s.nii' % (map_type, contrast_id)) print "\t\tWriting %s ..." % map_path nibabel.save(out_map, map_path) # collect zmaps for contrasts we're interested in if map_type == 'z': z_maps[contrast_id] = map_path if map_type == 'effects': effects_maps[contrast_id] = map_path return effects_maps, z_maps, mask_path, map_dirs except: return None # compute native-space maps and mask stuff = mem.cache(tortoise)(subject_data.func, subject_data.anat) if stuff is None: return None effects_maps, z_maps, mask_path, map_dirs = stuff # remove repeated contrasts contrasts = dict((cid, cval[0]) for cid, cval in contrasts.iteritems()) import json json.dump( dict((k, list(v)) for k, v in contrasts.iteritems()), open(os.path.join(subject_data.tmp_output_dir, "contrasts.json"), "w")) subject_data.contrasts = contrasts if normalize: assert hasattr(subject_data, "parameter_file") subject_data.native_effects_maps = effects_maps subject_data.native_z_maps = z_maps subject_data.native_mask_path = mask_path # warp effects maps and mask from native to standard space (MNI) apply_to_files = [ v for _, v in subject_data.native_effects_maps.iteritems() ] + [subject_data.native_mask_path] tmp = nipype_mem.cache(spm.Normalize)( parameter_file=getattr(subject_data, "parameter_file"), apply_to_files=apply_to_files, write_bounding_box=[[-78, -112, -50], [78, 76, 85]], write_voxel_sizes=func_write_voxel_sizes, write_wrap=[0, 0, 0], write_interp=1, jobtype='write', ignore_exception=False, ).outputs.normalized_files subject_data.mask = hard_link(tmp[-1], subject_data.output_dir) subject_data.effects_maps = dict( zip(effects_maps.keys(), hard_link(tmp[:-1], map_dirs["effects"]))) # warp anat image subject_data.anat = hard_link( nipype_mem.cache(spm.Normalize)( parameter_file=getattr(subject_data, "parameter_file"), apply_to_files=subject_data.anat, write_bounding_box=[[-78, -112, -50], [78, 76, 85]], write_voxel_sizes=anat_write_voxel_sizes, write_wrap=[0, 0, 0], write_interp=1, jobtype='write', ignore_exception=False, ).outputs.normalized_files, subject_data.anat_output_dir) else: subject_data.mask = mask_path subject_data.effects_maps = effects_maps subject_data.z_maps = z_maps return subject_data
masker = input_data.NiftiMapsMasker( msdl_atlas_dataset.maps, resampling_target="maps", detrend=True, low_pass=None, high_pass=0.01, t_r=2.5, standardize=True, memory='nilearn_cache', memory_level=1, verbose=2) masker.fit() subject_time_series = [] func_filenames = adhd_dataset.func confound_filenames = adhd_dataset.confounds for func_filename, confound_filename in zip(func_filenames, confound_filenames): print("Processing file %s" % func_filename) # Computing some confounds hv_confounds = mem.cache(image.high_variance_confounds)( func_filename) region_ts = masker.transform(func_filename, confounds=[hv_confounds, confound_filename]) subject_time_series.append(region_ts) ############################################################################## # Computing group-sparse precision matrices # ------------------------------------------ from nilearn.connectome import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(verbose=2) gsc.fit(subject_time_series) from sklearn import covariance gl = covariance.GraphLassoCV(verbose=2)
def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric='euclidean', algorithm='best', memory=Memory(cachedir=None, verbose=0), leaf_size=40, core_dist_n_jobs=4, **kwargs): """Perform robust single linkage clustering from a vector array or distance matrix. Parameters ---------- X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \ array of shape (n_samples, n_samples) A feature array, or array of distances between samples if ``metric='precomputed'``. cut : float The reachability distance value to cut the cluster heirarchy at to derive a flat cluster labelling. k : int, optional (default=5) Reachability distances will be computed with regard to the `k` nearest neighbors. alpha : float, optional (default=np.sqrt(2)) Distance scaling for reachability distance computation. Reachability distance is computed as $max \{ core_k(a), core_k(b), 1/\alpha d(a,b) \}$. gamma : int, optional (default=5) Ignore any clusters in the flat clustering with size less than gamma, and declare points in such clusters as noise points. metric : string, or callable, optional (default='euclidean') The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. If metric is "precomputed", X is assumed to be a distance matrix and must be square. algorithm : string, optional (default='best') Exactly which algorithm to use; hdbscan has variants specialised for different characteristics of the data. By default this is set to ``best`` which chooses the "best" algorithm given the nature of the data. You can force other options if you believe you know better. Options are: * ``generic`` * ``best`` * ``prims_kdtree`` * ``prims_balltree`` * ``boruvka_kdtree`` * ``boruvka_balltree`` memory : Instance of joblib.Memory or string (optional) Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory. leaf_size : int, optional (default=40) Leaf size for trees responsible for fast nearest neighbour queries. core_dist_n_jobs : int, optional Number of parallel jobs to run in core distance computations (if supported by the specific algorithm). For ``core_dist_n_jobs`` below -1, (n_cpus + 1 + core_dist_n_jobs) are used. (default 4) Returns ------- labels : ndarray, shape (n_samples, ) Cluster labels for each point. Noisy samples are given the label -1. single_linkage_tree : ndarray, shape (n_samples - 1, 4) The single linkage tree produced during clustering in scipy hierarchical clustering format (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html). References ---------- .. [1] Chaudhuri, K., & Dasgupta, S. (2010). Rates of convergence for the cluster tree. In Advances in Neural Information Processing Systems (pp. 343-351). """ if not isinstance(k, int) or k < 1: raise ValueError('k must be an integer greater than zero!') if not isinstance(alpha, float) or alpha < 1.0: raise ValueError('alpha must be a float greater than or equal to 1.0!') if not isinstance(gamma, int) or gamma < 1: raise ValueError('gamma must be an integer greater than zero!') if not isinstance(leaf_size, int) or leaf_size < 1: raise ValueError('Leaf size must be at least one!') if metric == 'minkowski': if 'p' not in kwargs or kwargs['p'] is None: raise TypeError('Minkowski metric given but no p value supplied!') if kwargs['p'] < 0: raise ValueError('Minkowski metric with negative p value is not' ' defined!') X = check_array(X, accept_sparse='csr') if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if algorithm != 'best': if algorithm == 'generic': single_linkage_tree = memory.cache(_rsl_generic)(X, k, alpha, metric, **kwargs) elif algorithm == 'prims_kdtree': single_linkage_tree = memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, **kwargs) elif algorithm == 'prims_balltree': single_linkage_tree = memory.cache(_rsl_prims_balltree)(X, k, alpha, metric, **kwargs) elif algorithm == 'boruvka_kdtree': single_linkage_tree = \ memory.cache(_rsl_boruvka_kdtree)(X, k, alpha, metric, leaf_size, core_dist_n_jobs, **kwargs) elif algorithm == 'boruvka_balltree': single_linkage_tree = \ memory.cache(_rsl_boruvka_balltree)(X, k, alpha, metric, leaf_size, core_dist_n_jobs, **kwargs) else: raise TypeError('Unknown algorithm type %s specified' % algorithm) else: if issparse(X) or metric not in FAST_METRICS: # We can't do much with sparse matrices ... single_linkage_tree = memory.cache(_rsl_generic)(X, k, alpha, metric, **kwargs) elif metric in KDTree.valid_metrics: # Need heuristic to decide when to go to boruvka; # still debugging for now if X.shape[1] > 128: single_linkage_tree = memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, **kwargs) else: single_linkage_tree = \ memory.cache(_rsl_boruvka_kdtree)(X, k, alpha, metric, leaf_size, core_dist_n_jobs, **kwargs) else: # Metric is a valid BallTree metric # Need heuristic to decide when to go to boruvka; # still debugging for now if X.shape[1] > 128: single_linkage_tree = memory.cache(_rsl_prims_kdtree)(X, k, alpha, metric, **kwargs) else: single_linkage_tree = \ memory.cache(_rsl_boruvka_balltree)(X, k, alpha, metric, leaf_size, core_dist_n_jobs, **kwargs) labels = single_linkage_tree.get_clusters(cut, gamma) return labels, single_linkage_tree.to_numpy()
# set func subject_data.func = [x for x in session_func if subject_id in x] assert len(subject_data.func) == 1 subject_data.func = subject_data.func[0] # set anat subject_data.anat = [x for x in session_anat if subject_id in x] assert len(subject_data.anat) == 1 subject_data.anat = subject_data.anat[0] # set subject output directory subject_data.output_dir = "/tmp/%s" % subject_id subject_data.sanitize(deleteorient=True, niigz2nii=False) yield (subject_data.subject_id, subject_data.func[0], subject_data.anat) # spm auditory demo mem.cache(_run_demo)(*_spm_auditory_factory()) # NYU rest demo for subject_id, func, anat in _nyu_rest_factory(): print "%s +++NYU rest %s+++\r\n" % ("\t" * 5, subject_id) mem.cache(_run_demo)(func, anat) # ABIDE demo for subject_id, func, anat in _abide_factory(): print "%s +++ABIDE %s+++\r\n" % ("\t" * 5, subject_id) mem.cache(_run_demo)(func, anat)
def do_subject_glm(subject_data): """FE analysis for a single subject.""" subject_id = subject_data['subject_id'] output_dir = subject_data["output_dir"] func_files = subject_data['func'] anat = subject_data['anat'] onset_files = subject_data['onset'] # subject_id = os.path.basename(subject_dir) # subject_output_dir = os.path.join(output_dir, subject_id) mem = Memory(os.path.join(output_dir, "cache")) if not os.path.exists(output_dir): os.makedirs(output_dir) # glob files: anat, session func files, session onset files # anat = glob.glob(os.path.join(subject_dir, anat_wildcard)) # assert len(anat) == 1 # anat = anat[0] # onset_files = sorted([glob.glob(os.path.join(subject_dir, session))[0] # for session in session_onset_wildcards]) # func_files = sorted([sorted(glob.glob(os.path.join(subject_dir, session))) # for session in session_func_wildcards]) ### Preprocess data ####################################################### if 0: subject_data = mem.cache(do_subject_preproc)( dict(func=func_files, anat=anat, output_dir=output_dir)) func_files = subject_data['func'] anat = subject_data['anat'] # reslice func images func_files = [mem.cache(reslice_vols)( sess_func, target_affine=nibabel.load(sess_func[0]).get_affine()) for sess_func in func_files] ### GLM: loop on (session_bold, onse_file) pairs over the various sessions design_matrices = [] for session, (func_file, onset_file) in enumerate(zip(func_files, onset_files)): if isinstance(func_file, str): bold = nibabel.load(func_file) else: if len(func_file) == 1: func_file = func_file[0] bold = nibabel.load(func_file) assert len(bold.shape) == 4 n_scans = bold.shape[-1] del bold else: n_scans = len(func_file) frametimes = np.linspace(0, (n_scans - 1) * tr, n_scans) conditions, onsets, durations, amplitudes = parse_onset_file( onset_file) onsets *= tr durations *= tr paradigm = BlockParadigm(con_id=conditions, onset=onsets, duration=durations, amplitude=amplitudes) design_matrices.append(make_dmtx(frametimes, paradigm, hrf_model=hrf_model, drift_model=drift_model, hfcut=hfcut)) # specify contrasts n_columns = len(design_matrices[0].names) contrasts = {} for i in xrange(paradigm.n_conditions): contrasts['%s' % design_matrices[0].names[2 * i] ] = np.eye(n_columns)[2 * i] # more interesting contrasts contrasts['faces-scrambled'] = contrasts['faces' ] - contrasts['scrambled'] contrasts['scrambled-faces'] = -contrasts['faces-scrambled'] contrasts['effects_of_interest'] = contrasts['faces' ] + contrasts['scrambled'] # effects of interest F-test diff_contrasts = [] for i in xrange(paradigm.n_conditions - 1): a = contrasts[design_matrices[0].names[2 * i]] b = contrasts[design_matrices[0].names[2 * (i + 1)]] diff_contrasts.append(a - b) contrasts["diff"] = diff_contrasts # fit GLM print 'Fitting a GLM (this takes time)...' fmri_glm = FMRILinearModel([nibabel.concat_images(sess_func, check_affines=False) for sess_func in func_files], [design_matrix.matrix for design_matrix in design_matrices], mask='compute' ) fmri_glm.fit(do_scaling=True, model='ar1') # save computed mask mask_path = os.path.join(output_dir, "mask.nii.gz") print "Saving mask image %s" % mask_path nibabel.save(fmri_glm.mask, mask_path) # compute contrasts z_maps = {} effects_maps = {} for contrast_id, contrast_val in contrasts.iteritems(): print "\tcontrast id: %s" % contrast_id if np.ndim(contrast_val) > 1: contrast_type = "t" else: contrast_type = "F" z_map, t_map, effects_map, var_map = fmri_glm.contrast( [contrast_val] * 2, con_id=contrast_id, contrast_type=contrast_type, output_z=True, output_stat=True, output_effects=True, output_variance=True ) # store stat maps to disk for map_type, out_map in zip(['z', 't', 'effects', 'variance'], [z_map, t_map, effects_map, var_map]): map_dir = os.path.join( output_dir, '%s_maps' % map_type) if not os.path.exists(map_dir): os.makedirs(map_dir) map_path = os.path.join( map_dir, '%s.nii.gz' % contrast_id) print "\t\tWriting %s ..." % map_path nibabel.save(out_map, map_path) # collect zmaps for contrasts we're interested in if map_type == 'z': z_maps[contrast_id] = map_path if map_type == 'effects': effects_maps[contrast_id] = map_path return subject_id, anat, effects_maps, z_maps, contrasts, fmri_glm.mask
map_dir, '%s.nii.gz' % contrast_id) print "\t\tWriting %s ..." % map_path nibabel.save(out_map, map_path) # collect zmaps for contrasts we're interested in if map_type == 'z': z_maps[contrast_id] = map_path if map_type == 'effects': effects_maps[contrast_id] = map_path return subject_id, anat, effects_maps, z_maps, contrasts, fmri_glm.mask if __name__ == "__main__": mem = Memory(os.path.join(output_dir, "cache")) first_level_glms = map(mem.cache(do_subject_glm), subject_dirs) # plot stats (per subject) import matplotlib.pyplot as plt from nilearn.plotting import plot_stat_map all_masks = [] all_effects_maps = [] for (subject_id, anat, effects_maps, z_maps, contrasts, mask) in first_level_glms: all_masks.append(mask) anat_img = nibabel.load(anat) z_map = nibabel.load(z_maps.values()[0]) all_effects_maps.append(effects_maps) for contrast_id, z_map in z_maps.iteritems(): plot_stat_map(z_map, black_bg=True, threshold=2.3, title="%s: %s" % (subject_id, contrast_id))
class FirstLevelModel(BaseEstimator, TransformerMixin, CacheMixin): """ Implementation of the General Linear Model for single session fMRI data Parameters ---------- t_r: float This parameter indicates repetition times of the experimental runs. In seconds. It is necessary to correctly consider times in the design matrix. This parameter is also passed to nilearn.signal.clean. Please see the related documentation for details. slice_time_ref: float, optional (default 0.) This parameter indicates the time of the reference slice used in the slice timing preprocessing step of the experimental runs. It is expressed as a percentage of the t_r (time repetition), so it can have values between 0. and 1. hrf_model : string, optional This parameter specifies the hemodynamic response function (HRF) for the design matrices. It can be 'canonical', 'canonical with derivative' or 'fir'. drift_model : string, optional This parameter specifies the desired drift model for the design matrices. It can be 'polynomial', 'cosine' or 'blank'. period_cut : float, optional This parameter specifies the cut period of the low-pass filter in seconds for the design matrices. drift_order : int, optional This parameter specifices the order of the drift model (in case it is polynomial) for the design matrices. fir_delays : array of shape(n_onsets) or list, optional In case of FIR design, yields the array of delays used in the FIR model, in seconds. min_onset : float, optional This parameter specifies the minimal onset relative to the design (in seconds). Events that start before (slice_time_ref * t_r + min_onset) are not considered. mask: Niimg-like, NiftiMasker or MultiNiftiMasker object, optional, Mask to be used on data. If an instance of masker is passed, then its mask will be used. If no mask is given, it will be computed automatically by a MultiNiftiMasker with default parameters. target_affine: 3x3 or 4x4 matrix, optional This parameter is passed to nilearn.image.resample_img. Please see the related documentation for details. target_shape: 3-tuple of integers, optional This parameter is passed to nilearn.image.resample_img. Please see the related documentation for details. smoothing_fwhm: float, optional If smoothing_fwhm is not None, it gives the size in millimeters of the spatial smoothing to apply to the signal. memory: string, optional Path to the directory used to cache the masking process and the glm fit. By default, no caching is done. Creates instance of joblib.Memory. memory_level: integer, optional Rough estimator of the amount of memory used by caching. Higher value means more memory for caching. standardize : boolean, optional If standardize is True, the time-series are centered and normed: their variance is put to 1 in the time dimension. signal_scaling: False, int or (int, int), optional, If not False, fMRI signals are scaled to the mean value of scaling_axis given, which can be 0, 1 or (0, 1). 0 refers to mean scaling each voxel with respect to time, 1 refers to mean scaling each time point with respect to all voxels and (0, 1) refers to scaling with respect to voxels and time, which is known as grand mean scaling. Incompatible with standardize (standardize=False is enforced when signal_scaling is not False). noise_model : {'ar1', 'ols'}, optional The temporal variance model. Defaults to 'ar1' verbose : integer, optional Indicate the level of verbosity. By default, nothing is printed. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs', -2 'all CPUs but one', and so on. minimize_memory : boolean, optional Gets rid of some variables on the model fit results that are not necessary for contrast computation and would only be useful for further inspection of model details. This has an important impact on memory consumption. True by default. Attributes ---------- labels : array of shape (n_voxels,), a map of values on voxels used to identify the corresponding model results : dict, with keys corresponding to the different labels values values are RegressionResults instances corresponding to the voxels """ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover', drift_model='cosine', period_cut=128, drift_order=1, fir_delays=[0], min_onset=-24, mask=None, target_affine=None, target_shape=None, smoothing_fwhm=None, memory=Memory(None), memory_level=1, standardize=False, signal_scaling=0, noise_model='ar1', verbose=1, n_jobs=1, minimize_memory=True): # design matrix parameters self.t_r = t_r self.slice_time_ref = slice_time_ref self.hrf_model = hrf_model self.drift_model = drift_model self.period_cut = period_cut self.drift_order = drift_order self.fir_delays = fir_delays self.min_onset = min_onset # glm parameters self.mask = mask self.target_affine = target_affine self.target_shape = target_shape self.smoothing_fwhm = smoothing_fwhm if isinstance(memory, _basestring): self.memory = Memory(memory) else: self.memory = memory self.memory_level = memory_level self.standardize = standardize if signal_scaling in [0, 1, (0, 1)]: self.scaling_axis = signal_scaling self.signal_scaling = True self.standardize = False elif signal_scaling is False: self.signal_scaling = signal_scaling else: raise ValueError('signal_scaling must be "False", "0", "1"' ' or "(0, 1)"') self.noise_model = noise_model self.verbose = verbose self.n_jobs = n_jobs self.minimize_memory = minimize_memory # attributes self.labels_ = None self.results_ = None def fit(self, run_imgs, paradigms=None, confounds=None, design_matrices=None): """ Fit the GLM For each run: 1. create design matrix X 2. do a masker job: fMRI_data -> Y 3. fit regression to (Y, X) Parameters ---------- run_imgs: Niimg-like object or list of Niimg-like objects, See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. Data on which the GLM will be fitted. If this is a list, the affine is considered the same for all. paradigms: pandas Dataframe or string or list of pandas DataFrames or strings, fMRI paradigms used to build design matrices. One paradigm expected per run_img. Ignored in case designs is not None. confounds: pandas Dataframe or string or list of pandas DataFrames or strings, Each column in a DataFrame corresponds to a confound variable to be included in the regression model of the respective run_img. The number of rows must match the number of volumes in the respective run_img. Ignored in case designs is not None. design_matrices: pandas DataFrame or list of pandas DataFrames, Design matrices that will be used to fit the GLM. """ # Check arguments # Check imgs type if not isinstance(run_imgs, (list, tuple)): run_imgs = [run_imgs] for rimg in run_imgs: if not isinstance(rimg, (_basestring, Nifti1Image)): raise ValueError('run_imgs must be Niimg-like object or list' ' of Niimg-like objects') # check all information necessary to build design matrices is available if design_matrices is None: if paradigms is None: raise ValueError('paradigms or design matrices must be provided') if self.t_r is None: raise ValueError('t_r not given to FirstLevelModel object' ' to compute design from paradigm') else: design_matrices = _check_run_tables(run_imgs, design_matrices, 'design_matrices') # check the number of paradigm and confound files match number of runs # Also check paradigm and confound files can be loaded as DataFrame if paradigms is not None: paradigms = _check_run_tables(run_imgs, paradigms, 'paradigms') if confounds is not None: confounds = _check_run_tables(run_imgs, confounds, 'confounds') # Learn the mask if not isinstance(self.mask, NiftiMasker): self.masker_ = NiftiMasker( mask_img=self.mask, smoothing_fwhm=self.smoothing_fwhm, target_affine=self.target_affine, standardize=self.standardize, mask_strategy='epi', t_r=self.t_r, memory=self.memory, verbose=max(0, self.verbose - 1), target_shape=self.target_shape, memory_level=self.memory_level) else: self.masker_ = clone(self.mask) for param_name in ['target_affine', 'target_shape', 'smoothing_fwhm', 'low_pass', 'high_pass', 't_r', 'memory', 'memory_level']: our_param = getattr(self, param_name) if our_param is None: continue if getattr(self.masker_, param_name) is not None: warn('Parameter %s of the masker overriden' % param_name) setattr(self.masker_, param_name, our_param) self.masker_.fit(run_imgs[0]) # For each run fit the model and keep only the regression results. self.labels_, self.results_, self.design_matrices_ = [], [], [] n_runs = len(run_imgs) t0 = time.time() for run_idx, run_img in enumerate(run_imgs): # Report progress if self.verbose > 0: percent = float(run_idx) / n_runs percent = round(percent * 100, 2) dt = time.time() - t0 # We use a max to avoid a division by zero if run_idx == 0: remaining = 'go take a coffee, a big one' else: remaining = (100. - percent) / max(0.01, percent) * dt remaining = '%i seconds remaining' % remaining sys.stderr.write(" " * 100 + "\r") sys.stderr.write( "Computing run %d out of %d runs (%s)\r" % (run_idx, n_runs, remaining)) # Build the experimental design for the glm run_img = check_niimg(run_img, ensure_ndim=4) if design_matrices is None: n_scans = run_img.get_data().shape[3] if confounds is not None: confounds_matrix = confounds[run_idx].values if confounds_matrix.shape[0] != n_scans: raise ValueError('Rows in confounds does not match' 'n_scans in run_img at index %d' % (run_idx,)) confounds_names = confounds[run_idx].columns else: confounds_matrix = None confounds_names = None start_time = self.slice_time_ref * self.t_r end_time = (n_scans - 1 + self.slice_time_ref) * self.t_r frame_times = np.linspace(start_time, end_time, n_scans) design = make_design_matrix(frame_times, paradigms[run_idx], self.hrf_model, self.drift_model, self.period_cut, self.drift_order, self.fir_delays, confounds_matrix, confounds_names, self.min_onset) else: design = design_matrices[run_idx] self.design_matrices_.append(design) # Compute GLM Y = self.masker_.transform(run_img) if self.signal_scaling: Y, _ = mean_scaling(Y, self.scaling_axis) if self.memory is not None: mem_glm = self.memory.cache(run_glm) else: mem_glm = run_glm labels, results = mem_glm(Y, design, noise_model=self.noise_model, bins=100, n_jobs=self.n_jobs) self.labels_.append(labels) # We save memory if inspecting model details is not necessary if self.minimize_memory: for key in results: results[key] = SimpleRegressionResults(results[key]) self.results_.append(results) del Y # Report progress if self.verbose > 0: sys.stderr.write("\nComputation of %d runs done in %i seconds\n" % (n_runs, time.time() - t0)) return self def compute_contrast(self, contrast_def, contrast_name=None, stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. In multi-session case, outputs the fixed effects map. Parameters ---------- contrast_def : array or list of arrays of shape (n_col) or (n_run, n_col) where ``n_col`` is the number of columns of the design matrix, (one array per run). If only one array is provided when there are several runs, it will be assumed that the same contrast is desired for all runs contrast_name : str, optional name of the contrast stat_type : {'t', 'F'}, optional type of the contrast output_type : str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size' or 'effect_variance' Returns ------- output_image : Nifti1Image The desired output image """ if self.labels_ is None or self.results_ is None: raise ValueError('The model has not been fit yet') if isinstance(contrast_def, np.ndarray): con_vals = [contrast_def] elif isinstance(contrast_def, (list, tuple)): con_vals = contrast_def for cidx, con in enumerate(contrast_def): if not isinstance(con, np.ndarray): raise ValueError('contrast_def at index %i is not an' ' array' % cidx) else: raise ValueError('contrast_def must be an array or list of arrays') n_runs = len(self.labels_) if len(con_vals) != n_runs: warn('One contrast given, assuming it for all %d runs' % n_runs) con_vals = con_vals * n_runs if isinstance(output_type, _basestring): if output_type not in ['z_score', 'stat', 'p_value', 'effect_size', 'effect_variance']: raise ValueError('output_type must be one of "z_score", "stat",' ' "p_value","effect_size" or "effect_variance"') else: raise ValueError('output_type must be one of "z_score", "stat",' ' "p_value","effect_size" or "effect_variance"') if self.memory is not None: arg_ignore = ['labels', 'results'] mem_contrast = self.memory.cache(_fixed_effect_contrast, ignore=arg_ignore) else: mem_contrast = _fixed_effect_contrast contrast = mem_contrast(self.labels_, self.results_, con_vals, stat_type) estimate_ = getattr(contrast, output_type)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) if contrast_name is None: contrast_name = str(con_vals) output.get_header()['descrip'] = ( '%s of contrast %s' % (output_type, contrast_name)) return output
if output_dir is not None: with open(join(debug_folder, 'score'), 'w+') as f: f.write('score : %.4f' % score) return score output_dir = expanduser(join('~/output/dl_recommender/', datetime.datetime.now().strftime('%Y-%m-%d_%H' '-%M-%S'))) os.makedirs(output_dir) random_state = check_random_state(0) mem = Memory(cachedir=expanduser("~/cache"), verbose=10) X_csr = mem.cache(fetch_ml_10m)(expanduser('~/data/own/ml-10M100K'), remove_empty=True) permutation = random_state.permutation(X_csr.shape[0]) X_csr = X_csr[permutation] X, y = array_to_fm_format(X_csr) uniform_split = ShuffleSplit(n_iter=4, test_size=.25, random_state=random_state) fm_decoder = FMDecoder(n_samples=X_csr.shape[0], n_features=X_csr.shape[1]) base_estimator = BaseRecommender(fm_decoder) convex_fm = ConvexFM(fit_linear=True, alpha=0, max_rank=20,
n_sources, n_times = mean_stc.data.shape X = np.empty((len(stcs), n_sources, n_times)) for i, stc in enumerate(stcs): if len(times) == len(stc.times): X[i] = stc.data mean_stc._data = np.mean(X, axis=0) return mean_stc, X print "Jane here" #X1, X2 are the full time,vertices,subject matrices; mean_stc1 and mean_stc2 are the grand-avgs mean_stc1, X1 = average_stcs(stcs1) mean_stc2, X2 = average_stcs(stcs2) return mean_stc1, X1, mean_stc2, X2 mean_stc1, X1, mean_stc2, X2 = mem.cache(load_data)(stcs1_fname, stcs2_fname, dec) template_stc = copy.deepcopy(mean_stc1) stc_diff = copy.deepcopy(template_stc) stc_diff._data = mean_stc2.data - mean_stc1.data ##Stc cond 2- Stc cond 1 stc_diff.save( '/cluster/kuperberg/SemPrMM/MEG/results/source_space/cluster_stats/' + prefix + 'diff_of_means') if time_interval is not None: # squash time interval tmin, tmax = time_interval times = mean_stc1.times mask = (times >= tmin) & (times <= tmax) X1 = np.mean(X1[:, :, mask], axis=2)[:, :, None] X2 = np.mean(X2[:, :, mask], axis=2)[:, :, None] template_stc = copy.deepcopy(template_stc)
def fit(self, X, y=None): """Fit the hierarchical clustering on the data Parameters ---------- X : array-like, shape = [n_samples, n_features] The samples a.k.a. observations. Returns ------- self """ X = check_array(X, ensure_min_samples=2, estimator=self) memory = self.memory if isinstance(memory, six.string_types): memory = Memory(cachedir=memory, verbose=0) if self.n_clusters <= 0: raise ValueError("n_clusters should be an integer greater than 0." " %s was provided." % str(self.n_clusters)) if self.linkage == "ward" and self.affinity != "euclidean": raise ValueError("%s was provided as affinity. Ward can only " "work with euclidean distances." % (self.affinity, )) if self.linkage not in _TREE_BUILDERS: raise ValueError("Unknown linkage type %s." "Valid options are %s" % (self.linkage, _TREE_BUILDERS.keys())) tree_builder = _TREE_BUILDERS[self.linkage] connectivity = self.connectivity if self.connectivity is not None: if callable(self.connectivity): connectivity = self.connectivity(X) connectivity = check_array(connectivity, accept_sparse=['csr', 'coo', 'lil']) n_samples = len(X) compute_full_tree = self.compute_full_tree if self.connectivity is None: compute_full_tree = True if compute_full_tree == 'auto': # Early stopping is likely to give a speed up only for # a large number of clusters. The actual threshold # implemented here is heuristic compute_full_tree = self.n_clusters < max(100, .02 * n_samples) n_clusters = self.n_clusters if compute_full_tree: n_clusters = None # Construct the tree kwargs = {} if self.linkage != 'ward': kwargs['linkage'] = self.linkage kwargs['affinity'] = self.affinity if self.return_distance: self.children_, self.n_components_, self.n_leaves_, parents, \ self.distances = \ memory.cache(tree_builder)(X, connectivity, n_components=self.n_components, n_clusters=n_clusters, return_distance=True, **kwargs) else: self.children_, self.n_components_, self.n_leaves_, parents = \ memory.cache(tree_builder)(X, connectivity, n_components=self.n_components, n_clusters=n_clusters, **kwargs) # Cut the tree if compute_full_tree: self.labels_ = _hc_cut(self.n_clusters, self.children_, self.n_leaves_) else: labels = _hierarchical.hc_get_heads(parents, copy=False) # copy to avoid holding a reference on the original array labels = np.copy(labels[:n_samples]) # Reasign cluster numbers self.labels_ = np.searchsorted(np.unique(labels), labels) return self