Exemple #1
0
def _fetch_precomputed_neurosynth(
        data_dir: Path) -> Generator[Path, None, None]:
    """Downloads precomputed Neurosynth features and returns the filepaths."""

    json = read_data_fetcher_json()["neurosynth_precomputed"]
    url = json["url"]

    existing_files = data_dir.glob(
        "Neurosynth_TFIDF__*z_desc-consistency.nii.gz")

    if len(list(existing_files)) != json["n_files"]:
        logger.info("Downloading Neurosynth data files.")
        response = urllib.request.urlopen(url)

        # Open, close, and reopen file to deal with Windows permission issues.
        with NamedTemporaryFile(prefix=str(data_dir),
                                suffix=".zip",
                                delete=False) as f:
            name = f.name
        try:
            with open(name, "wb") as fw:
                fw.write(response.read())

            with zipfile.ZipFile(name, "r") as fr:
                fr.extractall(data_dir)
        finally:
            (Path(name)).unlink()

    return data_dir.glob("Neurosynth_TFIDF__*z_desc-consistency.nii.gz")
Exemple #2
0
def read_histology_profile(
    data_dir: Optional[Union[str, Path]] = None,
    template: str = "fsaverage",
    overwrite: bool = False,
) -> np.ndarray:
    """Reads BigBrain histology profiles.

    Parameters
    ----------
    data_dir : str, pathlib.Path, None, optional
        Path to the data directory. If data is not found here then data will be
        downloaded. If None, data_dir is set to the home directory, by default None.
    template : str, optional
        Surface template. Currently allowed options are 'fsaverage' and 'fslr32k', by
        default 'fsaverage'.
    overwrite : bool, optional
        If true, existing data will be overwrriten, by default False.

    Returns
    -------
    numpy.ndarray
        Depth-by-vertex array of BigBrain intensities.
    """

    data_dir = Path(
        data_dir) if data_dir else data_directories["BIGBRAIN_DATA_DIR"]

    if template[:5] == "civet":
        logger.info(
            "CIVET histology profiles were not included with BigBrainWarp. Interpolating from fsaverage using nearest neighbor interpolation."
        )
        civet_template = template
        template = "fsaverage"
    else:
        civet_template = ""

    histology_file = data_dir / ("histology_" + template + ".h5")

    if not histology_file.exists() or overwrite:
        logger.info(
            "Could not find a histological profile or an overwrite was requested. Downloading..."
        )
        download_histology_profiles(data_dir=data_dir,
                                    template=template,
                                    overwrite=overwrite)

    with h5py.File(histology_file, "r") as h5_file:
        profiles = h5_file.get(template)[...]
        if civet_template:
            fsaverage_surface = fetch_template_surface("fsaverage")
            civet_surface = fetch_template_surface(civet_template)
            return _surf2surf(fsaverage_surface, civet_surface, profiles.T).T
        else:
            return profiles
Exemple #3
0
def surface_genetic_expression(labels: Union[Sequence[str], np.ndarray],
                               surfaces: Optional[Union[str,
                                                        Sequence[str]]] = None,
                               space: Optional[str] = None,
                               *,
                               atlas_info: str = None,
                               ibf_threshold: float = 0.5,
                               probe_selection: str = "diff_stability",
                               donor_probes: str = "aggregate",
                               lr_mirror: Optional[bool] = None,
                               missing: Optional[str] = None,
                               tolerance: float = 2,
                               sample_norm: str = "srs",
                               gene_norm: str = "srs",
                               norm_matched: bool = True,
                               norm_structures: bool = False,
                               region_agg: str = "donors",
                               agg_metric: str = "mean",
                               corrected_mni: bool = True,
                               reannotated: bool = True,
                               return_counts: bool = False,
                               return_donors: bool = False,
                               return_report: bool = False,
                               donors: str = "all",
                               data_dir: Optional[str] = None,
                               verbose: float = 0,
                               n_proc: int = 1) -> pd.DataFrame:
    """Computes genetic expression of surface parcels.

    Parameters
    ----------
    labels : list-of-str or numpy.ndarray
        List of paths to label files for the parcellation, or numpy array
        containing the pre-loaded labels
    surfaces : list-of-image, optional
        List of paths to surface files or preloaded surfaces. If not specified
        assumes that `labels` are on the `fsaverage5` surface. Default: None
    space : {'fsaverage', 'fslr'}
        What template space `surfaces` are aligned to. If not specified assumes
        that `labels` are on the `fsaverage5` surface. Default: None

    For details of the remaining parameters please consult the
    abagen.get_expression_data() documentation. All its parameters bar "atlas"
    are valid input parameters.

    Returns
    -------
    pandas.DataFrame
        Dataframe containing the expression of each gene within each region.

    Examples
    --------
    >>> from brainstat.context.genetics import surface_genetic_expression
    >>> from nilearn import datasets
    >>> import numpy as np

    >>> destrieux = datasets.fetch_atlas_surf_destrieux()
    >>> labels = np.hstack((destrieux['map_left'], destrieux['map_right']))
    >>> fsaverage = datasets.fetch_surf_fsaverage()
    >>> surfaces = (fsaverage['pial_left'], fsaverage['pial_right'])
    >>> expression = surface_genetic_expression(labels, surfaces,
    ...                                         space='fsaverage')
    """

    # Deal with the input parameters.
    if isinstance(surfaces, str):
        surfaces = [surfaces]
    elif surfaces is None:
        surfaces = []

    surfaces_gii = []
    for surface in surfaces:
        if not isinstance(surface, str) and not isinstance(surface, Path):
            # Rather roundabout deletion of the temporary file for Windows compatibility.
            try:
                with tempfile.NamedTemporaryFile(suffix=".gii",
                                                 delete=False) as f:
                    name = f.name
                    write_surface(surface, name, otype="gii")
                surfaces_gii.append(nib.load(name))
            finally:
                Path(name).unlink()
        else:
            surfaces_gii.append(nib.load(surface))

    # Use abagen to grab expression data.
    logger.info(
        "If you use BrainStat's genetics functionality, please cite abagen (https://abagen.readthedocs.io/en/stable/citing.html)."
    )
    atlas = check_atlas(labels, geometry=surfaces_gii, space=space)
    expression = get_expression_data(
        atlas,
        atlas_info=atlas_info,
        ibf_threshold=ibf_threshold,
        probe_selection=probe_selection,
        donor_probes=donor_probes,
        lr_mirror=lr_mirror,
        missing=missing,
        tolerance=tolerance,
        sample_norm=sample_norm,
        gene_norm=gene_norm,
        norm_matched=norm_matched,
        norm_structures=norm_structures,
        region_agg=region_agg,
        agg_metric=agg_metric,
        corrected_mni=corrected_mni,
        reannotated=reannotated,
        return_counts=return_counts,
        return_donors=return_donors,
        return_report=return_report,
        donors=donors,
        data_dir=data_dir,
        verbose=verbose,
        n_proc=n_proc,
    )

    return expression
Exemple #4
0
def surface_decoder(
    pial: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]],
    white: Union[str, BSPolyData, Sequence[Union[str, BSPolyData]]],
    stat_labels: Union[str, np.ndarray, Sequence[Union[str, np.ndarray]]],
    *,
    interpolation: str = "linear",
    data_dir: Optional[Union[str, Path]] = None,
    database: str = "neurosynth",
) -> pd.DataFrame:
    """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery.

    Parameters
    ----------
    pial : str, BSPolyData, sequence of str or BSPolyData
        Path of a pial surface file, BSPolyData of a pial surface or a list
        containing multiple of the aforementioned.
    white : str, BSPolyData, sequence of str or BSPolyData
        Path of a white matter surface file, BSPolyData of a pial surface or a
        list containing multiple of the aforementioned.
    stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray
        Path to a label file for the surfaces, numpy array containing the
        labels, or a list containing multiple of the aforementioned.
    interpolation : str, optional
        Either 'nearest' for nearest neighbor interpolation, or 'linear'
        for trilinear interpolation, by default 'linear'.
    data_dir : str, optional
        The directory of the dataset. If none exists, a new dataset will
        be downloaded and saved to this path. If None, the directory defaults to
        your home directory, by default None.


    Returns
    -------
    pandas.DataFrame
        Table with correlation values for each feature.
    """
    from nilearn.datasets import load_mni152_brain_mask

    data_dir = Path(
        data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"]
    data_dir.mkdir(exist_ok=True, parents=True)

    logger.info(
        "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet."
    )
    feature_files = tuple(_fetch_precomputed(data_dir, database=database))

    mni152 = load_mni152_brain_mask()

    with NamedTemporaryFile(suffix=".nii.gz", delete=False) as f:
        name = f.name
    try:
        multi_surface_to_volume(
            pial=pial,
            white=white,
            volume_template=mni152,
            output_file=name,
            labels=stat_labels,
            interpolation=interpolation,
        )

        stat_volume = nib.load(name)

        mask = (stat_volume.get_fdata() != 0) & (mni152.get_fdata() != 0)
        stat_vector = stat_volume.get_fdata()[mask]
    finally:
        Path(name).unlink()

    feature_names = []
    correlations = np.zeros(len(feature_files))

    logger.info("Running correlations with all Neurosynth features.")
    for i in range(len(feature_files)):
        feature_names.append(
            re.search("__[A-Za-z0-9 ]+",
                      feature_files[i].stem)[0][2:])  # type: ignore
        feature_data = nib.load(feature_files[i]).get_fdata()[mask]
        keep = np.logical_not(
            np.isnan(feature_data)
            | np.isinf(feature_data)
            | np.isnan(stat_vector)
            | np.isinf(stat_vector))
        correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep])

    df = pd.DataFrame(correlations,
                      index=feature_names,
                      columns=["Pearson's r"])
    return df.sort_values(by="Pearson's r", ascending=False)
Exemple #5
0
def meta_analytic_decoder(
    template: str,
    stat_labels: np.ndarray,
    data_dir: Optional[Union[str, Path]] = None,
):
    """Meta-analytic decoding of surface maps using NeuroSynth or NeuroQuery.

    Parameters
    ----------
    template : str
        Path of a template volume file.
    stat_labels : str, numpy.ndarray, sequence of str or numpy.ndarray
        Path to a label file for the surfaces, numpy array containing the
        labels, or a list containing multiple of the aforementioned.
    data_dir : str, optional
        The directory of the dataset. If none exists, a new dataset will
        be downloaded and saved to this path. If None, the directory defaults to
        your home directory, by default None.


    Returns
    -------
    pandas.DataFrame
        Table with correlation values for each feature.
    """
    data_dir = Path(
        data_dir) if data_dir else data_directories["NEUROSYNTH_DATA_DIR"]
    data_dir.mkdir(exist_ok=True, parents=True)

    logger.info(
        "Fetching Neurosynth feature files. This may take several minutes if you haven't downloaded them yet."
    )
    feature_files = tuple(_fetch_precomputed(data_dir, database="neurosynth"))

    mni152 = nib.load(
        tflow.get("MNI152Lin", resolution=2, desc="brain", suffix="mask"))

    stat_nii = _surf2vol(template, stat_labels.flatten())
    mask = (stat_nii.get_fdata() != 0) & (mni152.get_fdata() != 0)
    stat_vector = stat_nii.get_fdata()[mask]

    feature_names = []
    correlations = np.zeros(len(feature_files))

    logger.info("Running correlations with all Neurosynth features.")
    for i in range(len(feature_files)):
        feature_names.append(
            re.search("__[A-Za-z0-9 ]+",
                      feature_files[i].stem)[0][2:])  # type: ignore
        feature_data = nib.load(feature_files[i]).get_fdata()[mask]
        keep = np.logical_not(
            np.isnan(feature_data)
            | np.isinf(feature_data)
            | np.isnan(stat_vector)
            | np.isinf(stat_vector))
        correlations[i], _ = pearsonr(stat_vector[keep], feature_data[keep])

    df = pd.DataFrame(correlations,
                      index=feature_names,
                      columns=["Pearson's r"])
    return df.sort_values(by="Pearson's r", ascending=False)
Exemple #6
0
def fetch_parcellation(
    template: str,
    atlas: str,
    n_regions: int,
    join: bool = True,
    seven_networks: bool = True,
    data_dir: Optional[Union[str, Path]] = None,
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    """Loads the surface parcellation of a given atlas.

    Parameters
    ----------
    template : str,
        The surface template. Valid values are "fsaverage", "fsaverage5",
        "fsaverage6", "fslr32k", "civet41k", "civet164k", by default "fsaverage5".
    atlas : str
        Name of the atlas. Valid names are "cammoun", "glasser", "schaefer", "yeo".
    n_regions : int
        Number of regions of the requested atlas. Valid values for the cammoun
        atlas are 33, 60, 125, 250, 500. Valid values for the glasser atlas are
        360. Valid values for the "schaefer" atlas are 100, 200, 300, 400, 500,
        600, 800, 1000. Valid values for "yeo" are 7 and 17.
    join : bool, optional
        If true, returns parcellation as a single array, if false, returns an
        array per hemisphere, by default True.
    seven_networks : bool, optional
        If true, uses the 7 networks parcellation. Only used for the Schaefer
        atlas, by default True.
    data_dir : str, pathlib.Path, optional
        Directory to save the data, defaults to $HOME_DIR/brainstat_data/parcellation_data.

    Returns
    -------
    np.ndarray or tuple of np.npdarray
        Surface parcellation. If a tuple, then the first element is the left hemisphere.
    """

    data_dir = Path(
        data_dir) if data_dir else data_directories["PARCELLATION_DATA_DIR"]
    data_dir.mkdir(parents=True, exist_ok=True)

    if template == "civet41k" or template == "civet164k":
        logger.info(
            "CIVET parcellations were not included with the toolbox. Interpolating parcellation from the fsaverage surface with a nearest neighbor interpolation."
        )
        civet_template = template
        template = "fsaverage"
    else:
        civet_template = ""

    if atlas == "schaefer":
        parcellations = _fetch_schaefer_parcellation(template, n_regions,
                                                     seven_networks, data_dir)
    elif atlas == "cammoun":
        parcellations = _fetch_cammoun_parcellation(template, n_regions,
                                                    data_dir)
    elif atlas == "glasser":
        parcellations = _fetch_glasser_parcellation(template, data_dir)
    elif atlas == "yeo":
        parcellations = _fetch_yeo_parcellation(template, n_regions, data_dir)
    else:
        raise ValueError(f"Invalid atlas: {atlas}")

    if civet_template:
        fsaverage_left, fsaverage_right = fetch_template_surface("fsaverage",
                                                                 layer="white",
                                                                 join=False)
        civet_left, civet_right = fetch_template_surface(civet_template,
                                                         layer="white",
                                                         join=False)

        parcellations[0] = _surf2surf(fsaverage_left,
                                      civet_left,
                                      parcellations[0],
                                      interpolation="nearest")
        parcellations[1] = _surf2surf(fsaverage_right,
                                      civet_right,
                                      parcellations[1],
                                      interpolation="nearest")

    if join:
        return np.concatenate((parcellations[0], parcellations[1]), axis=0)
    else:
        return parcellations[0], parcellations[1]
Exemple #7
0
def fetch_gradients(
    template: str = "fsaverage5",
    name: str = "margulies2016",
    data_dir: Optional[Union[str, Path]] = None,
    overwrite: bool = False,
) -> np.ndarray:
    """Fetch example gradients.

    Parameters
    ----------
    template : str, optional
        Name of the template surface. Valid values are "fsaverage5",
        "fsaverage", "fslr32k", defaults to "fsaverage5".
    name : str
        Name of the gradients. Valid values are "margulies2016", defaults to
        "margulies2016".
    data_dir : str, Path, optional
        Path to the directory to store the gradient data files, by
        default $HOME_DIR/brainstat_data/gradient_data.
    overwrite : bool, optional
        If true, overwrites existing files, by default False.

    Returns
    -------
    numpy.ndarray
        Vertex-by-gradient matrix.
    """
    data_dir = Path(
        data_dir) if data_dir else data_directories["GRADIENT_DATA_DIR"]
    data_dir.mkdir(parents=True, exist_ok=True)

    gradients_file = data_dir / f"gradients_{name}.h5"
    if not gradients_file.exists() or overwrite:
        url = read_data_fetcher_json()["gradients"][name]["url"]
        _download_file(url, gradients_file, overwrite=overwrite)

    hf = h5py.File(gradients_file, "r")
    if template == "civet41k" or template == "civet164k":
        logger.info(
            "CIVET gradients were not included with the toolbox. Interpolating gradients from the fsaverage surface with a nearest interpolation."
        )
        fsaverage_left, fsaverage_right = fetch_template_surface("fsaverage",
                                                                 layer="white",
                                                                 join=False)
        civet_left, civet_right = fetch_template_surface(template,
                                                         layer="white",
                                                         join=False)

        gradients_fsaverage = np.array(hf["fsaverage"]).T
        gradients_left = _surf2surf(
            fsaverage_left,
            civet_left,
            gradients_fsaverage[:gradients_fsaverage.shape[0] // 2, :],
            interpolation="nearest",
        )
        gradients_right = _surf2surf(
            fsaverage_right,
            civet_right,
            gradients_fsaverage[gradients_fsaverage.shape[0] // 2:, :],
            interpolation="nearest",
        )
        return np.concatenate((gradients_left, gradients_right), axis=0)
    else:
        return np.array(hf[template]).T