def _fetch_precomputed_neurosynth( data_dir: Path) -> Generator[Path, None, None]: """Downloads precomputed Neurosynth features and returns the filepaths.""" json = read_data_fetcher_json()["neurosynth_precomputed"] url = json["url"] existing_files = data_dir.glob( "Neurosynth_TFIDF__*z_desc-consistency.nii.gz") if len(list(existing_files)) != json["n_files"]: logger.info("Downloading Neurosynth data files.") response = urllib.request.urlopen(url) # Open, close, and reopen file to deal with Windows permission issues. with NamedTemporaryFile(prefix=str(data_dir), suffix=".zip", delete=False) as f: name = f.name try: with open(name, "wb") as fw: fw.write(response.read()) with zipfile.ZipFile(name, "r") as fr: fr.extractall(data_dir) finally: (Path(name)).unlink() return data_dir.glob("Neurosynth_TFIDF__*z_desc-consistency.nii.gz")
def read_histology_profile( data_dir: Optional[Union[str, Path]] = None, template: str = "fsaverage", overwrite: bool = False, ) -> np.ndarray: """Reads BigBrain histology profiles. Parameters ---------- data_dir : str, pathlib.Path, None, optional Path to the data directory. If data is not found here then data will be downloaded. If None, data_dir is set to the home directory, by default None. template : str, optional Surface template. Currently allowed options are 'fsaverage' and 'fslr32k', by default 'fsaverage'. overwrite : bool, optional If true, existing data will be overwrriten, by default False. Returns ------- numpy.ndarray Depth-by-vertex array of BigBrain intensities. """ data_dir = Path( data_dir) if data_dir else data_directories["BIGBRAIN_DATA_DIR"] if template[:5] == "civet": logger.info( "CIVET histology profiles were not included with BigBrainWarp. Interpolating from fsaverage using nearest neighbor interpolation." ) civet_template = template template = "fsaverage" else: civet_template = "" histology_file = data_dir / ("histology_" + template + ".h5") if not histology_file.exists() or overwrite: logger.info( "Could not find a histological profile or an overwrite was requested. Downloading..." ) download_histology_profiles(data_dir=data_dir, template=template, overwrite=overwrite) with h5py.File(histology_file, "r") as h5_file: profiles = h5_file.get(template)[...] if civet_template: fsaverage_surface = fetch_template_surface("fsaverage") civet_surface = fetch_template_surface(civet_template) return _surf2surf(fsaverage_surface, civet_surface, profiles.T).T else: return profiles
def surface_genetic_expression(labels: Union[Sequence[str], np.ndarray], surfaces: Optional[Union[str, Sequence[str]]] = None, space: Optional[str] = None, *, atlas_info: str = None, ibf_threshold: float = 0.5, probe_selection: str = "diff_stability", donor_probes: str = "aggregate", lr_mirror: Optional[bool] = None, missing: Optional[str] = None, tolerance: float = 2, sample_norm: str = "srs", gene_norm: str = "srs", norm_matched: bool = True, norm_structures: bool = False, region_agg: str = "donors", agg_metric: str = "mean", corrected_mni: bool = True, reannotated: bool = True, return_counts: bool = False, return_donors: bool = False, return_report: bool = False, donors: str = "all", data_dir: Optional[str] = None, verbose: float = 0, n_proc: int = 1) -> pd.DataFrame: """Computes genetic expression of surface parcels. Parameters ---------- labels : list-of-str or numpy.ndarray List of paths to label files for the parcellation, or numpy array containing the pre-loaded labels surfaces : list-of-image, optional List of paths to surface files or preloaded surfaces. If not specified assumes that `labels` are on the `fsaverage5` surface. Default: None space : {'fsaverage', 'fslr'} What template space `surfaces` are aligned to. If not specified assumes that `labels` are on the `fsaverage5` surface. Default: None For details of the remaining parameters please consult the abagen.get_expression_data() documentation. All its parameters bar "atlas" are valid input parameters. Returns ------- pandas.DataFrame Dataframe containing the expression of each gene within each region. Examples -------- >>> from brainstat.context.genetics import surface_genetic_expression >>> from nilearn import datasets >>> import numpy as np >>> destrieux = datasets.fetch_atlas_surf_destrieux() >>> labels = np.hstack((destrieux['map_left'], destrieux['map_right'])) >>> fsaverage = datasets.fetch_surf_fsaverage() >>> surfaces = (fsaverage['pial_left'], fsaverage['pial_right']) >>> expression = surface_genetic_expression(labels, surfaces, ... space='fsaverage') """ # Deal with the input parameters. if isinstance(surfaces, str): surfaces = [surfaces] elif surfaces is None: surfaces = [] surfaces_gii = [] for surface in surfaces: if not isinstance(surface, str) and not isinstance(surface, Path): # Rather roundabout deletion of the temporary file for Windows compatibility. try: with tempfile.NamedTemporaryFile(suffix=".gii", delete=False) as f: name = f.name write_surface(surface, name, otype="gii") surfaces_gii.append(nib.load(name)) finally: Path(name).unlink() else: surfaces_gii.append(nib.load(surface)) # Use abagen to grab expression data. logger.info( "If you use BrainStat's genetics functionality, please cite abagen (https://abagen.readthedocs.io/en/stable/citing.html)." ) atlas = check_atlas(labels, geometry=surfaces_gii, space=space) expression = get_expression_data( atlas, atlas_info=atlas_info, ibf_threshold=ibf_threshold, probe_selection=probe_selection, donor_probes=donor_probes, lr_mirror=lr_mirror, missing=missing, tolerance=tolerance, sample_norm=sample_norm, gene_norm=gene_norm, norm_matched=norm_matched, norm_structures=norm_structures, region_agg=region_agg, agg_metric=agg_metric, corrected_mni=corrected_mni, reannotated=reannotated, return_counts=return_counts, return_donors=return_donors, return_report=return_report, donors=donors, data_dir=data_dir, verbose=verbose, n_proc=n_proc, ) return expression
def surface_decoder( pial: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]], white: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]], stat_labels: Union[str, np.ndarray, Sequence[Union[str, np.ndarray]]], *, interpolation: str = "linear", data_dir: Optional[Union[str, Path]] = None, database: str = "neurosynth", ) -> pd.DataFrame: """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery. Parameters ---------- pial : str, BSPolyData, sequence of str or BSPolyData Path of a pial surface file, BSPolyData of a pial surface or a list containing multiple of the aforementioned. white : str, BSPolyData, sequence of str or BSPolyData Path of a white matter surface file, BSPolyData of a pial surface or a list containing multiple of the aforementioned. stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray Path to a label file for the surfaces, numpy array containing the labels, or a list containing multiple of the aforementioned. interpolation : str, optional Either 'nearest' for nearest neighbor interpolation, or 'linear' for trilinear interpolation, by default 'linear'. data_dir : str, optional The directory of the dataset. If none exists, a new dataset will be downloaded and saved to this path. If None, the directory defaults to your home directory, by default None. Returns ------- pandas.DataFrame Table with correlation values for each feature. """ from nilearn.datasets import load_mni152_brain_mask data_dir = Path( data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) logger.info( "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet." ) feature_files = tuple(_fetch_precomputed(data_dir, database=database)) mni152 = load_mni152_brain_mask() with NamedTemporaryFile(suffix=".nii.gz", delete=False) as f: name = f.name try: multi_surface_to_volume( pial=pial, white=white, volume_template=mni152, output_file=name, labels=stat_labels, interpolation=interpolation, ) stat_volume = nib.load(name) mask = (stat_volume.get_fdata() != 0) & (mni152.get_fdata() != 0) stat_vector = stat_volume.get_fdata()[mask] finally: Path(name).unlink() feature_names = [] correlations = np.zeros(len(feature_files)) logger.info("Running correlations with all Neurosynth features.") for i in range(len(feature_files)): feature_names.append( re.search("__[A-Za-z0-9 ]+", feature_files[i].stem)[0][2:]) # type: ignore feature_data = nib.load(feature_files[i]).get_fdata()[mask] keep = np.logical_not( np.isnan(feature_data) | np.isinf(feature_data) | np.isnan(stat_vector) | np.isinf(stat_vector)) correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep]) df = pd.DataFrame(correlations, index=feature_names, columns=["Pearson's r"]) return df.sort_values(by="Pearson's r", ascending=False)
def meta_analytic_decoder( template: str, stat_labels: np.ndarray, data_dir: Optional[Union[str, Path]] = None, ): """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery. Parameters ---------- template : str Path of a template volume file. stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray Path to a label file for the surfaces, numpy array containing the labels, or a list containing multiple of the aforementioned. data_dir : str, optional The directory of the dataset. If none exists, a new dataset will be downloaded and saved to this path. If None, the directory defaults to your home directory, by default None. Returns ------- pandas.DataFrame Table with correlation values for each feature. """ data_dir = Path( data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) logger.info( "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet." ) feature_files = tuple(_fetch_precomputed(data_dir, database="neurosynth")) mni152 = nib.load( tflow.get("MNI152Lin", resolution=2, desc="brain", suffix="mask")) stat_nii = _surf2vol(template, stat_labels.flatten()) mask = (stat_nii.get_fdata() != 0) & (mni152.get_fdata() != 0) stat_vector = stat_nii.get_fdata()[mask] feature_names = [] correlations = np.zeros(len(feature_files)) logger.info("Running correlations with all Neurosynth features.") for i in range(len(feature_files)): feature_names.append( re.search("__[A-Za-z0-9 ]+", feature_files[i].stem)[0][2:]) # type: ignore feature_data = nib.load(feature_files[i]).get_fdata()[mask] keep = np.logical_not( np.isnan(feature_data) | np.isinf(feature_data) | np.isnan(stat_vector) | np.isinf(stat_vector)) correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep]) df = pd.DataFrame(correlations, index=feature_names, columns=["Pearson's r"]) return df.sort_values(by="Pearson's r", ascending=False)
def fetch_parcellation( template: str, atlas: str, n_regions: int, join: bool = True, seven_networks: bool = True, data_dir: Optional[Union[str, Path]] = None, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """Loads the surface parcellation of a given atlas. Parameters ---------- template : str, The surface template. Valid values are "fsaverage", "fsaverage5", "fsaverage6", "fslr32k", "civet41k", "civet164k", by default "fsaverage5". atlas : str Name of the atlas. Valid names are "cammoun", "glasser", "schaefer", "yeo". n_regions : int Number of regions of the requested atlas. Valid values for the cammoun atlas are 33, 60, 125, 250, 500. Valid values for the glasser atlas are 360. Valid values for the "schaefer" atlas are 100, 200, 300, 400, 500, 600, 800, 1000. Valid values for "yeo" are 7 and 17. join : bool, optional If true, returns parcellation as a single array, if false, returns an array per hemisphere, by default True. seven_networks : bool, optional If true, uses the 7 networks parcellation. Only used for the Schaefer atlas, by default True. data_dir : str, pathlib.Path, optional Directory to save the data, defaults to $HOME_DIR/brainstat_data/parcellation_data. Returns ------- np.ndarray or tuple of np.npdarray Surface parcellation. If a tuple, then the first element is the left hemisphere. """ data_dir = Path( data_dir) if data_dir else data_directories["PARCELLATION_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) if template == "civet41k" or template == "civet164k": logger.info( "CIVET parcellations were not included with the toolbox. Interpolating parcellation from the fsaverage surface with a nearest neighbor interpolation." ) civet_template = template template = "fsaverage" else: civet_template = "" if atlas == "schaefer": parcellations = _fetch_schaefer_parcellation(template, n_regions, seven_networks, data_dir) elif atlas == "cammoun": parcellations = _fetch_cammoun_parcellation(template, n_regions, data_dir) elif atlas == "glasser": parcellations = _fetch_glasser_parcellation(template, data_dir) elif atlas == "yeo": parcellations = _fetch_yeo_parcellation(template, n_regions, data_dir) else: raise ValueError(f"Invalid atlas: {atlas}") if civet_template: fsaverage_left, fsaverage_right = fetch_template_surface("fsaverage", layer="white", join=False) civet_left, civet_right = fetch_template_surface(civet_template, layer="white", join=False) parcellations[0] = _surf2surf(fsaverage_left, civet_left, parcellations[0], interpolation="nearest") parcellations[1] = _surf2surf(fsaverage_right, civet_right, parcellations[1], interpolation="nearest") if join: return np.concatenate((parcellations[0], parcellations[1]), axis=0) else: return parcellations[0], parcellations[1]
def fetch_gradients( template: str = "fsaverage5", name: str = "margulies2016", data_dir: Optional[Union[str, Path]] = None, overwrite: bool = False, ) -> np.ndarray: """Fetch example gradients. Parameters ---------- template : str, optional Name of the template surface. Valid values are "fsaverage5", "fsaverage", "fslr32k", defaults to "fsaverage5". name : str Name of the gradients. Valid values are "margulies2016", defaults to "margulies2016". data_dir : str, Path, optional Path to the directory to store the gradient data files, by default $HOME_DIR/brainstat_data/gradient_data. overwrite : bool, optional If true, overwrites existing files, by default False. Returns ------- numpy.ndarray Vertex-by-gradient matrix. """ data_dir = Path( data_dir) if data_dir else data_directories["GRADIENT_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) gradients_file = data_dir / f"gradients_{name}.h5" if not gradients_file.exists() or overwrite: url = read_data_fetcher_json()["gradients"][name]["url"] _download_file(url, gradients_file, overwrite=overwrite) hf = h5py.File(gradients_file, "r") if template == "civet41k" or template == "civet164k": logger.info( "CIVET gradients were not included with the toolbox. Interpolating gradients from the fsaverage surface with a nearest interpolation." ) fsaverage_left, fsaverage_right = fetch_template_surface("fsaverage", layer="white", join=False) civet_left, civet_right = fetch_template_surface(template, layer="white", join=False) gradients_fsaverage = np.array(hf["fsaverage"]).T gradients_left = _surf2surf( fsaverage_left, civet_left, gradients_fsaverage[:gradients_fsaverage.shape[0] // 2, :], interpolation="nearest", ) gradients_right = _surf2surf( fsaverage_right, civet_right, gradients_fsaverage[gradients_fsaverage.shape[0] // 2:, :], interpolation="nearest", ) return np.concatenate((gradients_left, gradients_right), axis=0) else: return np.array(hf[template]).T