Ejemplo n.º 1
0
Archivo: macm.py Proyecto: NBCLab/cALE
def macm_workflow(ns_data_dir, output_dir, prefix, mask_fn):

    # download neurosynth dataset if necessary
    dataset_file = op.join(ns_data_dir, 'neurosynth_dataset.pkl.gz')

    if not op.isfile(dataset_file):
        if not op.isdir(ns_data_dir):
            os.mkdir(ns_data_dir)
        download(ns_data_dir, unpack=True)
        ###############################################################################
        # Convert Neurosynth database to NiMARE dataset file
        # --------------------------------------------------
        dset = convert_neurosynth_to_dataset(
            op.join(ns_data_dir, 'database.txt'),
            op.join(ns_data_dir, 'features.txt'))
        dset.save(dataset_file)

    dset = Dataset.load(dataset_file)
    mask_ids = dset.get_studies_by_mask(mask_fn)
    maskdset = dset.slice(mask_ids)
    nonmask_ids = sorted(list(set(dset.ids) - set(mask_ids)))
    nonmaskdset = dset.slice(nonmask_ids)

    ale = ALE(kernel__fwhm=15)
    ale.fit(maskdset)

    corr = FWECorrector(method='permutation',
                        n_iters=10,
                        n_cores=-1,
                        voxel_thresh=0.001)
    cres = corr.transform(ale.results)
    cres.save_maps(output_dir=output_dir, prefix=prefix)
Ejemplo n.º 2
0
def macm(prefix=None, mask=None, output_dir=None, ns_data_dir=None):

    if mask is None or not op.isfile(mask):
        raise Exception('A valid mask is required for input!')

    if ns_data_dir is None:
        raise Exception(
            'A valid directory is required for downloading Neurosynth data!')

    if prefix is None:
        prefix = op.basename(mask).strip('.nii.gz')

    if output_dir is None:
        output_dir = op.dirname(op.abspath(mask))

    dataset_file = op.join(ns_data_dir, 'neurosynth_dataset.pkl.gz')

    # download neurosynth dataset if necessary
    if not op.isfile(dataset_file):
        from datasets.neurosynth import neurosynth_download
        neurosynth_download(ns_data_dir)

    dset = Dataset.load(dataset_file)
    mask_ids = dset.get_studies_by_mask(mask)
    maskdset = dset.slice(mask_ids)
    nonmask_ids = sorted(list(set(dset.ids) - set(mask_ids)))
    nonmaskdset = dset.slice(nonmask_ids)

    ale = ALE(kernel__fwhm=15)
    ale.fit(maskdset)

    corr = FWECorrector(method='montecarlo',
                        n_iters=5000,
                        n_cores=-1,
                        voxel_thresh=0.001)
    cres = corr.transform(ale.results)
    cres.save_maps(output_dir=output_dir, prefix=prefix)
Ejemplo n.º 3
0
import nibabel as nib
import numpy as np
from nilearn import datasets, image, plotting

from nimare.correct import FWECorrector
from nimare.dataset import Dataset
from nimare.meta.cbma.ale import SCALE
from nimare.meta.cbma.mkda import MKDAChi2

###############################################################################
# Load Dataset
# -----------------------------------------------------------------------------
# We will assume that the Neurosynth database has already been downloaded and
# converted to a NiMARE dataset.
dset_file = "neurosynth_nimare_with_abstracts.pkl.gz"
dset = Dataset.load(dset_file)

###############################################################################
# Define a region of interest
# -----------------------------------------------------------------------------
# We'll use the right amygdala from the Harvard-Oxford atlas
atlas = datasets.fetch_atlas_harvard_oxford("sub-maxprob-thr50-2mm")
img = nib.load(atlas["maps"])

roi_idx = atlas["labels"].index("Right Amygdala")
img_vals = np.unique(img.get_fdata())
roi_val = img_vals[roi_idx]
roi_img = image.math_img(f"img1 == {roi_val}", img1=img)

###############################################################################
# Select studies with a reported coordinate in the ROI
Ejemplo n.º 4
0
"""
Extract Cognitive Atlas terms from abstracts for Neurosynth database and
add to Dataset.
Plus with hierarchical expansion.
"""
from nimare import annotate
from nimare.dataset import Dataset
import os.path as op
import numpy as np
from nimare.extract import download_cognitive_atlas

# Load dataset with abstracts
dataset = Dataset.load('resources/neurosynth_with_abstracts.pkl.gz')

# Extract Cognitive Atlas term counts and add to Dataset annotations
counts_df, rep_text_df = annotate.cogat.extract_cogat(dataset.texts,
                                                      text_column='abstract')
dataset.annotations = pd.merge(dataset.annotations,
                               counts_df,
                               left_on='id',
                               right_index=True,
                               how='left')

# Perform simple hierarchical expansion on CogAt term counts and add to Dataset annotations
weights = {'isKindOf': 1, 'isPartOf': 1, 'inCategory': 1}
columns = counts_df.columns
columns = {c: c.replace('CogAt_count__', '') for c in columns}  # drop prefix
expanded_counts_df = counts_df.rename(columns=columns)

expanded_counts_df = annotate.cogat.expand_counts(expanded_counts_df,
                                                  weights=weights)
Ejemplo n.º 5
0
def scale_workflow(
    dataset_file,
    baseline=None,
    output_dir=None,
    prefix=None,
    n_iters=2500,
    v_thr=0.001,
    n_cores=1,
):
    """Perform SCALE meta-analysis from Sleuth text file or NiMARE json file.

    Warnings
    --------
    This method is not yet implemented.
    """
    if dataset_file.endswith(".json"):
        dset = Dataset(dataset_file, target="mni152_2mm")
    elif dataset_file.endswith(".txt"):
        dset = convert_sleuth_to_dataset(dataset_file, target="mni152_2mm")
    else:
        dset = Dataset.load(dataset_file)

    boilerplate = """
A specific coactivation likelihood estimation (SCALE; Langner et al., 2014)
meta-analysis was performed using NiMARE. The input dataset included {n}
studies/experiments.

Voxel-specific null distributions were generated using base rates from {bl}
with {n_iters} iterations. Results were thresholded at p < {thr}.

References
----------
- Langner, R., Rottschy, C., Laird, A. R., Fox, P. T., & Eickhoff, S. B. (2014).
Meta-analytic connectivity modeling revisited: controlling for activation base
rates. NeuroImage, 99, 559-570.
    """
    boilerplate = boilerplate.format(
        n=len(dset.ids),
        thr=v_thr,
        bl=baseline if baseline else "a gray matter template",
        n_iters=n_iters,
    )

    # At the moment, the baseline file should be an n_coords X 3 list of matrix
    # indices matching the dataset template, where the base rate for a given
    # voxel is reflected by the number of times that voxel appears in the array
    if not baseline:
        xyz = vox2mm(
            np.vstack(np.where(dset.masker.mask_img.get_fdata())).T,
            dset.masker.mask_img.affine,
        )
    else:
        xyz = np.loadtxt(baseline)

    estimator = SCALE(xyz=xyz, n_iters=n_iters, n_cores=n_cores)
    results = estimator.fit(dset)

    if output_dir is None:
        output_dir = os.path.dirname(dataset_file)
    else:
        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    if prefix is None:
        base = os.path.basename(dataset_file)
        prefix, _ = os.path.splitext(base)
        prefix += "_"
    elif not prefix.endswith("_"):
        prefix = prefix + "_"

    results.save_maps(output_dir=output_dir, prefix=prefix)
    copyfile(dataset_file,
             os.path.join(output_dir, prefix + "input_coordinates.txt"))

    LGR.info("Workflow completed.")
    LGR.info(boilerplate)
Ejemplo n.º 6
0
def main(
    workdir,
    outdir,
    atlas,
    kernel,
    sparsity,
    affinity,
    approach,
    gradients,
    subcort,
    neurosynth,
    neurosynth_file,
    sleuth_file,
    nimare_dataset,
    roi_mask,
    term,
    topic,
):
    workdir = op.join(workdir, "tmp")
    if op.isdir(workdir):
        shutil.rmtree(workdir)
    os.makedirs(workdir)

    atlas_name = "atlas-{0}".format(atlas)
    kernel_name = "kernel-{0}".format(kernel)
    sparsity_name = "sparsity-{0}".format(sparsity)
    affinity_name = "affinity-{0}".format(affinity)
    approach_name = "approach-{0}".format(approach)
    gradients_name = "gradients-{0}".format(gradients)
    dset = None

    # handle neurosynth dataset, if called
    if neurosynth:
        if neurosynth_file is None:

            ns_data_dir = op.join(workdir, "neurosynth")
            dataset_file = op.join(ns_data_dir, "neurosynth_dataset.pkl.gz")
            # download neurosynth dataset if necessary
            if not op.isfile(dataset_file):
                neurosynth_download(ns_data_dir)

        else:
            dataset_file = neurosynth_file

        dset = Dataset.load(dataset_file)
        dataset_name = "dataset-neurosynth"

    # handle sleuth text file, if called
    if sleuth_file is not None:
        dset = convert_sleuth_to_dataset(sleuth_file, target="mni152_2mm")
        dataset_name = "dataset-{0}".format(op.basename(sleuth_file).split(".")[0])

    if nimare_dataset is not None:
        dset = Dataset.load(nimare_dataset)
        dataset_name = "dataset-{0}".format(op.basename(nimare_dataset).split(".")[0])

    if dset:
        # slice studies, if needed
        if roi_mask is not None:
            roi_ids = dset.get_studies_by_mask(roi_mask)
            print(
                "{}/{} studies report at least one coordinate in the "
                "ROI".format(len(roi_ids), len(dset.ids))
            )
            dset_sel = dset.slice(roi_ids)
            dset = dset_sel
            dataset_name = "dataset-neurosynth_mask-{0}".format(
                op.basename(roi_mask).split(".")[0]
            )

        if term is not None:
            labels = ["Neurosynth_TFIDF__{label}".format(label=label) for label in [term]]
            term_ids = dset.get_studies_by_label(labels=labels, label_threshold=0.1)
            print(
                "{}/{} studies report association "
                "with the term {}".format(len(term_ids), len(dset.ids), term)
            )
            dset_sel = dset.slice(term_ids)
            dset = dset_sel
            # img_inds = np.nonzero(dset.masker.mask_img.get_fdata())  # unused
            # vox_locs = np.unravel_index(img_inds, dset.masker.mask_img.shape)  # unused
            dataset_name = "dataset-neurosynth_term-{0}".format(term)

        if topic is not None:
            topics = [
                "Neurosynth_{version}__{topic}".format(version=topic[0], topic=topic)
                for topic in topic[1:]
            ]
            topics_ids = []
            for topic in topics:
                topic_ids = dset.annotations.id[np.where(dset.annotations[topic])[0]].tolist()
                topics_ids.extend(topic_ids)
                print(
                    "{}/{} studies report association "
                    "with the term {}".format(len(topic_ids), len(dset.ids), topic)
                )
            topics_ids_unique = np.unique(topics_ids)
            print("{} unique ids".format(len(topics_ids_unique)))
            dset_sel = dset.slice(topics_ids_unique)
            dset = dset_sel
            # img_inds = np.nonzero(dset.masker.mask_img.get_fdata())  # unused
            # vox_locs = np.unravel_index(img_inds, dset.masker.mask_img.shape)  # unused
            dataset_name = "dataset-neurosynth_topic-{0}".format("_".join(topic[1:]))

        if (
            neurosynth
            or (sleuth_file is not None)
            or (nimare_dataset is not None)
        ):
            # set kernel for MA smoothing
            if kernel == "peaks2maps":
                print("Running peak2maps")
                k = Peaks2MapsKernel(resample_to_mask=True)
            elif kernel == "alekernel":
                print("Running alekernel")
                k = ALEKernel(fwhm=15)

            if atlas is not None:
                if atlas == "harvard-oxford":
                    print("Parcellating using the Harvard Oxford Atlas")
                    # atlas_labels = atlas.labels[1:]  # unused
                    atlas_shape = atlas.maps.shape
                    atlas_affine = atlas.maps.affine
                    atlas_data = atlas.maps.get_fdata()
                elif atlas == "aal":
                    print("Parcellating using the AAL Atlas")
                    atlas = datasets.fetch_atlas_aal()
                    # atlas_labels = atlas.labels  # unused
                    atlas_shape = nib.load(atlas.maps).shape
                    atlas_affine = nib.load(atlas.maps).affine
                    atlas_data = nib.load(atlas.maps).get_fdata()
                elif atlas == "craddock-2012":
                    print("Parcellating using the Craddock-2012 Atlas")
                    atlas = datasets.fetch_atlas_craddock_2012()
                elif atlas == "destrieux-2009":
                    print("Parcellating using the Destrieux-2009 Atlas")
                    atlas = datasets.fetch_atlas_destrieux_2009(lateralized=True)
                    # atlas_labels = atlas.labels[3:]  # unused
                    atlas_shape = nib.load(atlas.maps).shape
                    atlas_affine = nib.load(atlas.maps).affine
                    atlas_data = nib.load(atlas.maps).get_fdata()
                elif atlas == "msdl":
                    print("Parcellating using the MSDL Atlas")
                    atlas = datasets.fetch_atlas_msdl()
                elif atlas == "surface":
                    print("Generating surface vertices")

                if atlas != "fsaverage5" and atlas != "hcp":
                    imgs = k.transform(dset, return_type="image")

                    masker = NiftiLabelsMasker(
                        labels_img=atlas.maps, standardize=True, memory="nilearn_cache"
                    )
                    time_series = masker.fit_transform(imgs)

                else:
                    # change to array for other approach
                    imgs = k.transform(dset, return_type="image")
                    print(np.shape(imgs))

                    if atlas == "fsaverage5":
                        fsaverage = fetch_surf_fsaverage(mesh="fsaverage5")
                        pial_left = fsaverage.pial_left
                        pial_right = fsaverage.pial_right
                        medial_wall_inds_left = surface.load_surf_data(
                            "./templates/lh.Medial_wall.label"
                        )
                        print(np.shape(medial_wall_inds_left))
                        medial_wall_inds_right = surface.load_surf_data(
                            "./templates/rh.Medial_wall.label"
                        )
                        print(np.shape(medial_wall_inds_right))
                        sulc_left = fsaverage.sulc_left
                        sulc_right = fsaverage.sulc_right

                    elif atlas == "hcp":
                        pial_left = "./templates/S1200.L.pial_MSMAll.32k_fs_LR.surf.gii"
                        pial_right = "./templates/S1200.R.pial_MSMAll.32k_fs_LR.surf.gii"
                        medial_wall_inds_left = np.where(
                            nib.load("./templates/hcp.tmp.lh.dscalar.nii").get_fdata()[0] == 0
                        )[0]
                        medial_wall_inds_right = np.where(
                            nib.load("./templates/hcp.tmp.rh.dscalar.nii").get_fdata()[0] == 0
                        )[0]
                        left_verts = 32492 - len(medial_wall_inds_left)
                        sulc_left = nib.load(
                            "./templates/S1200.sulc_MSMAll.32k_fs_LR.dscalar.nii"
                        ).get_fdata()[0][0:left_verts]
                        sulc_left = np.insert(
                            sulc_left,
                            np.subtract(
                                medial_wall_inds_left, np.arange(len(medial_wall_inds_left))
                            ),
                            0,
                        )
                        sulc_right = nib.load(
                            "./templates/S1200.sulc_MSMAll.32k_fs_LR.dscalar.nii"
                        ).get_fdata()[0][left_verts:]
                        sulc_right = np.insert(
                            sulc_right,
                            np.subtract(
                                medial_wall_inds_right, np.arange(len(medial_wall_inds_right))
                            ),
                            0,
                        )

                    surf_lh = surface.vol_to_surf(
                        imgs,
                        pial_left,
                        radius=6.0,
                        interpolation="nearest",
                        kind="ball",
                        n_samples=None,
                        mask_img=dset.masker.mask_img,
                    )
                    surf_rh = surface.vol_to_surf(
                        imgs,
                        pial_right,
                        radius=6.0,
                        interpolation="nearest",
                        kind="ball",
                        n_samples=None,
                        mask_img=dset.masker.mask_img,
                    )
                    surfs = np.transpose(np.vstack((surf_lh, surf_rh)))
                    del surf_lh, surf_rh

                    # handle cortex first
                    coords_left = surface.load_surf_data(pial_left)[0]
                    coords_left = np.delete(coords_left, medial_wall_inds_left, axis=0)
                    coords_right = surface.load_surf_data(pial_right)[0]
                    coords_right = np.delete(coords_right, medial_wall_inds_right, axis=0)

                    print("Left Hemipshere Vertices")
                    surface_macms_lh, inds_discard_lh = build_macms(dset, surfs, coords_left)
                    print(np.shape(surface_macms_lh))
                    print(inds_discard_lh)

                    print("Right Hemipshere Vertices")
                    surface_macms_rh, inds_discard_rh = build_macms(dset, surfs, coords_right)
                    print(np.shape(surface_macms_rh))
                    print(len(inds_discard_rh))

                    lh_vertices_total = np.shape(surface_macms_lh)[0]
                    rh_vertices_total = np.shape(surface_macms_rh)[0]
                    time_series = np.transpose(np.vstack((surface_macms_lh, surface_macms_rh)))
                    print(np.shape(time_series))
                    del surface_macms_lh, surface_macms_rh

                    if subcort:
                        subcort_img = nib.load("templates/rois-subcortical_mni152_mask.nii.gz")
                        subcort_vox = np.asarray(np.where(subcort_img.get_fdata()))
                        subcort_mm = vox2mm(subcort_vox.T, subcort_img.affine)

                        print("Subcortical Voxels")
                        subcort_macm, inds_discard_subcort = build_macms(dset, surfs, subcort_mm)

                        num_subcort_vox = np.shape(subcort_macm)[0]
                        print(inds_discard_subcort)

                        time_series = np.hstack((time_series, np.asarray(subcort_macm).T))
                        print(np.shape(time_series))

                time_series = time_series.astype("float32")

                print("calculating correlation matrix")
                correlation = ConnectivityMeasure(kind="correlation")
                time_series = correlation.fit_transform([time_series])[0]
                print(np.shape(time_series))

                if affinity == "cosine":
                    time_series = calculate_affinity(time_series, 10 * sparsity)

            else:
                time_series = np.transpose(k.transform(dset, return_type="array"))

    print("Performing gradient analysis")

    gradients, statistics = embed.compute_diffusion_map(
        time_series, alpha=0.5, return_result=True, overwrite=True
    )
    pickle.dump(statistics, open(op.join(workdir, "statistics.p"), "wb"))

    # if subcortical included in gradient decomposition, remove gradient scores
    if subcort:
        subcort_grads = gradients[np.shape(gradients)[0] - num_subcort_vox :, :]
        subcort_grads = insert(subcort_grads, inds_discard_subcort)
        gradients = gradients[0 : np.shape(gradients)[0] - num_subcort_vox, :]

    # get left hemisphere gradient scores, and insert 0's where medial wall is
    gradients_lh = gradients[0:lh_vertices_total, :]
    if len(inds_discard_lh) > 0:
        gradients_lh = insert(gradients_lh, inds_discard_lh)
    gradients_lh = insert(gradients_lh, medial_wall_inds_left)

    # get right hemisphere gradient scores and insert 0's where medial wall is
    gradients_rh = gradients[-rh_vertices_total:, :]
    if len(inds_discard_rh) > 0:
        gradients_rh = insert(gradients_rh, inds_discard_rh)
    gradients_rh = insert(gradients_rh, medial_wall_inds_right)

    grad_dict = {
        "grads_lh": gradients_lh,
        "grads_rh": gradients_rh,
        "pial_left": pial_left,
        "sulc_left": sulc_left,
        "pial_right": pial_right,
        "sulc_right": sulc_right,
    }
    if subcort:
        grad_dict["subcort_grads"] = subcort_grads
    pickle.dump(grad_dict, open(op.join(workdir, "gradients.p"), "wb"))

    # map the gradient to the parcels
    for i in range(np.shape(gradients)[1]):
        if atlas is not None:
            if atlas == "fsaverage5" or atlas == "hcp":

                plot_surfaces(grad_dict, i, workdir)

                if subcort:
                    tmpimg = masking.unmask(subcort_grads[:, i], subcort_img)
                    nib.save(tmpimg, op.join(workdir, "gradient-{0}.nii.gz".format(i)))
            else:
                tmpimg = np.zeros(atlas_shape)
                for j, n in enumerate(np.unique(atlas_data)[1:]):
                    inds = atlas_data == n
                    tmpimg[inds] = gradients[j, i]
                    nib.save(
                        nib.Nifti1Image(tmpimg, atlas_affine),
                        op.join(workdir, "gradient-{0}.nii.gz".format(i)),
                    )
        else:
            tmpimg = np.zeros(np.prod(dset.masker.mask_img.shape))
            inds = np.ravel_multi_index(
                np.nonzero(dset.masker.mask_img.get_fdata()), dset.masker.mask_img.shape
            )
            tmpimg[inds] = gradients[:, i]
            nib.save(
                nib.Nifti1Image(
                    np.reshape(tmpimg, dset.masker.mask_img.shape), dset.masker.mask_img.affine
                ),
                op.join(workdir, "gradient-{0}.nii.gz".format(i)),
            )

            os.system(
                "python3 /Users/miriedel/Desktop/GitHub/surflay/make_figures.py "
                "-f {grad_image} --colormap jet".format(
                    grad_image=op.join(workdir, "gradient-{0}.nii.gz".format(i))
                )
            )

    output_dir = op.join(
        outdir,
        (
            f"{dataset_name}_{atlas_name}_{kernel_name}_{sparsity_name}_{gradients_name}_"
            f"{affinity_name}_{approach_name}"
        )
    )

    shutil.copytree(workdir, output_dir)

    shutil.rmtree(workdir)
Ejemplo n.º 7
0
# -----------------------------------------------------------------------------
# Json files are used to create Datasets, while generated Datasets are saved
# to, and loaded from, pkl[.gz] files.
# We use jsons because they are easy to edit, and thus build by hand, if
# necessary.
# We then store the generated Datasets as pkl.gz files because an initialized
# Dataset is no longer a dictionary.

# Let's start by downloading a dataset
dset_dir = download_nidm_pain()

# Now we can load and save the Dataset object
dset_file = os.path.join(get_resource_path(), "nidm_pain_dset.json")
dset = Dataset(dset_file, target="mni152_2mm", mask=None)
dset.save("pain_dset.pkl")
dset = Dataset.load("pain_dset.pkl")
os.remove("pain_dset.pkl")  # cleanup

###############################################################################
# Much of the data in Datasets is stored as DataFrames
# -----------------------------------------------------------------------------
# The five DataFrames in Dataset are "coordinates" (reported peaks),
# "images" (statistical maps), "metadata", "texts", and "annotations" (labels).

###############################################################################
# ``Dataset.annotations`` contains labels describing studies
# `````````````````````````````````````````````````````````````````````````````
# Columns include the standard identifiers and any labels.
# The labels may be grouped together based on label source, in which case they
# should be prefixed with some string followed by two underscores.
dset.annotations.head()