def _fetch_civet_spheres(template: str, data_dir: Path) -> Tuple[str, str]: """Fetches CIVET spheres Parameters ---------- template : str Template name. data_dir : Path Directory to save the data Returns ------- tuple Paths to sphere files. """ civet_v2_dir = data_dir / "tpl-civet" / "v2" / template civet_v2_dir.mkdir(parents=True, exist_ok=True) # Uses the same sphere for L/R hemisphere. filename = civet_v2_dir / "tpl-civet_space-ICBM152_sphere.obj" if not filename.exists(): url = read_data_fetcher_json()["spheres"][template]["url"] _download_file(url, filename) # Return two filenames to conform to other left/right hemisphere functions. return (str(filename), str(filename))
def _fetch_precomputed_neurosynth( data_dir: Path) -> Generator[Path, None, None]: """Downloads precomputed Neurosynth features and returns the filepaths.""" json = read_data_fetcher_json()["neurosynth_precomputed"] url = json["url"] existing_files = data_dir.glob( "Neurosynth_TFIDF__*z_desc-consistency.nii.gz") if len(list(existing_files)) != json["n_files"]: logger.info("Downloading Neurosynth data files.") response = urllib.request.urlopen(url) # Open, close, and reopen file to deal with Windows permission issues. with NamedTemporaryFile(prefix=str(data_dir), suffix=".zip", delete=False) as f: name = f.name try: with open(name, "wb") as fw: fw.write(response.read()) with zipfile.ZipFile(name, "r") as fr: fr.extractall(data_dir) finally: (Path(name)).unlink() return data_dir.glob("Neurosynth_TFIDF__*z_desc-consistency.nii.gz")
def fetch_mics_data( data_dir: Optional[Union[str, Path]] = None, overwrite: bool = False, ) -> Tuple[np.ndarray, pd.DataFrame]: """Fetches MICS cortical thickness data. Parameters ---------- data_dir : str, pathlib.Path, optional Path to store the MICS data, by default $HOME_DIR/brainstat_data/mics_data. overwrite : bool, optional If true overwrites existing data, by default False Returns ------- np.ndarray Subject-by-vertex cortical thickness data on fsaverage5. pd.DataFrame Subject demographics. """ data_dir = Path( data_dir) if data_dir else data_directories["MICS_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) demographics_file = data_dir / "mics_demographics.csv" demographics_url = read_data_fetcher_json( )["mics_tutorial"]["demographics"] _download_file(demographics_url["url"], demographics_file, overwrite) df = pd.read_csv(demographics_file) thickness_file = data_dir / "mics_thickness.h5" thickness_url = read_data_fetcher_json()["mics_tutorial"]["thickness"] _download_file(thickness_url["url"], thickness_file, overwrite) with h5py.File(thickness_file, "r") as f: thickness = f["thickness"][:] return thickness, df
def _fetch_glasser_parcellation(template: str, data_dir: Path) -> List[np.ndarray]: """Fetches Glasser parcellation.""" urls = read_data_fetcher_json( )["parcellations"]["glasser"][template]["url"] filepaths = [] for i, hemi in enumerate(("lh", "rh")): filename = "_".join(("glasser", "360", template, hemi)) + ".label.gii" filepaths.append(data_dir / filename) _download_file(urls[i], filepaths[i]) gifti = [nib_load(file) for file in filepaths] parcellations = [x.darrays[0].data for x in gifti] parcellations[1] = (parcellations[1] + 180) * (parcellations[1] > 0) return parcellations
def _fetch_yeo_parcellation(template: str, n_regions: int, data_dir: Path) -> List[np.ndarray]: """Fetches Yeo parcellation.""" filenames = [ data_dir / f"{template}_{hemi}_yeo{n_regions}.label.gii" for hemi in ("lh", "rh") ] if not all([x.exists() for x in filenames]): url = read_data_fetcher_json()["parcellations"]["yeo"]["url"] with tempfile.NamedTemporaryFile(suffix=".zip") as f: downloaded_file = Path(f.name) try: _download_file(url, downloaded_file) with zipfile.ZipFile(downloaded_file, "r") as zip_ref: zip_ref.extractall(data_dir) finally: downloaded_file.unlink() return [nib_load(file).darrays[0].data for file in filenames]
def fetch_mask( template: str, join: bool = True, data_dir: Optional[Union[str, Path]] = None, overwrite: bool = False, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """Fetches midline masks. Parameters ---------- template : str Name of the surface template. Valid templates are: "fsaverage5", "fsaverage", "fslr32k", "civet41k", and "civet164k". join : bool, optional If true, returns a numpy array containing the mask. If false, returns a tuple containing the left and right hemispheric masks, respectively, by default True data_dir : str, pathlib.Path, optional Directory to save the data, by default $HOME_DIR/brainstat_data/surface_data. Returns ------- numpy.ndarray or tuple of numpy.ndarray Midline mask, either as a single array or a tuple of a left and right hemispheric array. """ data_dir = Path( data_dir) if data_dir else data_directories["SURFACE_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) mask_file = data_dir / f"{template}_mask.csv" url = read_data_fetcher_json()["masks"][template]["url"] _download_file(url, mask_file, overwrite=overwrite) mask = np.loadtxt(mask_file, delimiter=",") == 1 if join: return mask else: n = len(mask) return mask[:n // 2], mask[n // 2:]
def download_histology_profiles( data_dir: Optional[Union[str, Path]] = None, template: str = "fsaverage", overwrite: bool = False, ) -> None: """Downloads BigBrain histology profiles. Parameters ---------- data_dir : str, pathlib,Path, None, optional Path to the directory to store the data. If None, defaults to the home directory, by default None. template : str, optional Surface template. Currently allowed options are 'fsaverage' and 'fslr32k', by default 'fsaverage'. overwrite : bool, optional If true, existing data will be overwrriten, by default False. Raises ------ KeyError Thrown if an invalid template is requested. """ data_dir = Path( data_dir) if data_dir else data_directories["BIGBRAIN_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) output_file = data_dir / ("histology_" + template + ".h5") url = read_data_fetcher_json()["bigbrain_profiles"][template]["url"] try: _download_file(url, output_file, overwrite) except KeyError: raise KeyError( "Could not find the requested template. Valid templates are: 'fslr32k', 'fsaverage', 'fsaverage5'." )
"""Unit tests for the histology module.""" import pytest import requests from brainstat._utils import read_data_fetcher_json parametrize = pytest.mark.parametrize json = read_data_fetcher_json() @parametrize("template", list(json["bigbrain_profiles"].keys())) def test_urls(template): """Tests whether the histology files can be downloaded. Parameters ---------- template : list Template names. """ r = requests.head(json["bigbrain_profiles"][template]["url"]) assert r.status_code == 200
def fetch_abide_data( data_dir: Optional[Union[str, Path]] = None, sites: Sequence[str] = None, keep_control: bool = True, keep_patient: bool = True, overwrite: bool = False, min_rater_ok: int = 3, ) -> Tuple[np.ndarray, pd.DataFrame]: """Fetches ABIDE cortical thickness data. Parameters ---------- data_dir : str, pathlib.Path, optional Path to store the MICS data, by default $HOME_DIR/brainstat_data/mics_data. sites : list, tuple, optional List of sites to include. If none, uses all sites, by default None. keep_control : bool, optional If true keeps control subjects, by default True. keep_patient : bool, optional If true keeps patient subjects, by default True. overwrite : bool, optional If true overwrites existing data, by default False. min_rater_ok : int, optional Minimum number of raters who approved the data, by default 3. Returns ------- np.ndarray Subject-by-vertex cortical thickness data on fsaverage5. pd.DataFrame Subject demographics. """ data_dir = Path( data_dir) if data_dir else data_directories["ABIDE_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) summary_spreadsheet = data_dir / "summary_spreadsheet.csv" summary_url = read_data_fetcher_json( )["abide_tutorial"]["summary_spreadsheet"] _download_file(summary_url["url"], summary_spreadsheet, overwrite) df = pd.read_csv(summary_spreadsheet) _select_subjects(df, sites, keep_patient, keep_control, min_rater_ok) # Download subject thickeness data def _thickness_url(derivative, identifier): return f"https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Outputs/civet/thickness_{derivative}/{identifier}_{derivative}.txt" thickness_data = np.zeros((df.shape[0], 81924)) remove_rows = [] progress_bar = tqdm(df.itertuples()) for i, row in enumerate(progress_bar): progress_bar.set_description( f"Fetching thickness data for subject {i+1} out of {df.shape[0]}") for j, hemi in enumerate(["left", "right"]): filename = data_dir / f"{row.SUB_ID}_{hemi}_thickness.txt" if not filename.is_file() or overwrite: thickness_url = _thickness_url( f"native_rms_rsl_tlink_30mm_{hemi}", row.FILE_ID) try: _download_file(thickness_url, filename, overwrite, verbose=False) except HTTPError: logger.warn(f"Could not download file for {row.SUB_ID}.") remove_rows.append(i) continue thickness_data[i, j * 40962:(j + 1) * 40962] = np.loadtxt(filename) if remove_rows: thickness_data = np.delete(thickness_data, remove_rows, axis=0) df.drop(np.unique(remove_rows), inplace=True) df.reset_index(inplace=True) return thickness_data, df
def fetch_gradients( template: str = "fsaverage5", name: str = "margulies2016", data_dir: Optional[Union[str, Path]] = None, overwrite: bool = False, ) -> np.ndarray: """Fetch example gradients. Parameters ---------- template : str, optional Name of the template surface. Valid values are "fsaverage5", "fsaverage", "fslr32k", defaults to "fsaverage5". name : str Name of the gradients. Valid values are "margulies2016", defaults to "margulies2016". data_dir : str, Path, optional Path to the directory to store the gradient data files, by default $HOME_DIR/brainstat_data/gradient_data. overwrite : bool, optional If true, overwrites existing files, by default False. Returns ------- numpy.ndarray Vertex-by-gradient matrix. """ data_dir = Path( data_dir) if data_dir else data_directories["GRADIENT_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) gradients_file = data_dir / f"gradients_{name}.h5" if not gradients_file.exists() or overwrite: url = read_data_fetcher_json()["gradients"][name]["url"] _download_file(url, gradients_file, overwrite=overwrite) hf = h5py.File(gradients_file, "r") if template == "civet41k" or template == "civet164k": logger.info( "CIVET gradients were not included with the toolbox. Interpolating gradients from the fsaverage surface with a nearest interpolation." ) fsaverage_left, fsaverage_right = fetch_template_surface("fsaverage", layer="white", join=False) civet_left, civet_right = fetch_template_surface(template, layer="white", join=False) gradients_fsaverage = np.array(hf["fsaverage"]).T gradients_left = _surf2surf( fsaverage_left, civet_left, gradients_fsaverage[:gradients_fsaverage.shape[0] // 2, :], interpolation="nearest", ) gradients_right = _surf2surf( fsaverage_right, civet_right, gradients_fsaverage[gradients_fsaverage.shape[0] // 2:, :], interpolation="nearest", ) return np.concatenate((gradients_left, gradients_right), axis=0) else: return np.array(hf[template]).T