Ejemplo n.º 1
0
def test_NeuroVault_metadata():
    from pyneurovault import api

    # Test for all images
    print "Checking metadata extraction for images..."
    images = api.get_images()
    check_df(df=images,size_min=7000,columns=["url","name","map_type"])

    # Test for subset of images
    images = api.get_images(pks=images.image_id[0:10].tolist())
    check_df(df=images,size_min=10,columns=["url","name","map_type"])

    # Test for collections
    print "Checking metadata extraction for collections..."
    collections = api.get_collections()
    check_df(df=collections,size_min=300,columns=["used_smoothing","url","collection_id"])

    # Test metadata from specific DOIs
    dois = collections.DOI[collections.DOI.isnull()==False].tolist()[0:15]
    results = api.collections_from_dois(dois)
    check_df(df=results,size_min=len(dois),columns=["used_smoothing","url","collection_id"])

    # Test get_images_and_collections
    combined_df = api.get_images_with_collections(collection_pks=[877,437])
    check_df(df=combined_df,size_min=50,columns=["url_image","collection_id","name_image","map_type","image_id"])

    # Test metadata for subset of collections
    collections = api.get_collections(pks=[877,437])
    check_df(df=collections,size_min=1,columns=["used_smoothing","url","collection_id"])
    
    # Test metadata of images from specific collections
    images = api.get_images(collection_pks=[877,437])
    check_df(df=images,size_min=50,columns=["url","name","map_type"])
Ejemplo n.º 2
0
Archivo: db.py Proyecto: vsoch/brainart
def get_data(download_folder):
    """get_data
    :param download_folder: download destination folder for nifti gz files
    :returns brainmaps: list of nii gz images
    """
    all_collections = api.get_collections()
    collections_with_dois = all_collections[np.logical_not(all_collections.DOI.isnull())]
    group_collections = collections_with_dois[collections_with_dois.group_comparison != True]
    images = api.get_images(collection_pks=list(group_collections.collection_id))
    images = api.filter(df=images, column_name="is_thresholded", field_value=False)
    images = api.filter(df=images, column_name="not_mni", field_value=False)
    images = api.filter(df=images, column_name="modality", field_value="fMRI-BOLD")
    api.download_images(dest_dir=download_folder, images_df=images, resample=False)
    return glob("%s/original/*.nii.gz" % (download_folder))
Ejemplo n.º 3
0
def get_data(download_folder):
    '''get_data
    :param download_folder: download destination folder for nifti gz files
    :returns brainmaps: list of nii gz images
    '''
    all_collections = api.get_collections()
    collections_with_dois = all_collections[np.logical_not(
        all_collections.DOI.isnull())]
    group_collections = collections_with_dois[
        collections_with_dois.group_comparison != True]
    images = api.get_images(
        collection_pks=list(group_collections.collection_id))
    images = api.filter(df=images,
                        column_name="is_thresholded",
                        field_value=False)
    images = api.filter(df=images, column_name="not_mni", field_value=False)
    images = api.filter(df=images,
                        column_name="modality",
                        field_value="fMRI-BOLD")
    api.download_images(dest_dir=download_folder,
                        images_df=images,
                        resample=False)
    return glob("%s/original/*.nii.gz" % (download_folder))
if not os.path.exists(results):
    os.mkdir(results)

if not os.path.exists(data):
    os.mkdir(data)

# Get all collections
collections = api.get_collections()

# Filter images to those that have a DOI
collections = collections[collections.DOI.isnull() == False]
# We use this file to create custom web interface
collections.to_csv("%s/collections_with_dois.tsv" % results, encoding="utf-8", sep="\t")

# Get image meta data for collections (N=1023)
images = api.get_images(collection_pks=collections.collection_id.tolist())

# Filter images to those with contrasts defined (N=98)
images = images[images.cognitive_contrast_cogatlas_id.isnull() == False]
# Not needed for future analyses, for documentation only
images.to_csv("%s/contrast_defined_images.tsv" % results, encoding="utf-8", sep="\t")

# Get rid of any not in MNI
images = images[images.not_mni == False]

images = images[images.analysis_level != "single-subject"]

# Get rid of thresholded images
images = images[images.is_thresholded == False]

# We can't use Rest or other/none
Ejemplo n.º 5
0
from pyneurovault import api
import numpy as np

all_collections = api.get_collections()
collections_with_dois = all_collections[np.logical_not(all_collections.DOI.isnull())]

images = api.get_images(collection_pks=list(collections_with_dois.collection_id))

# Remove images that are thresholded
images = api.filter(df=images,column_name="is_thresholded",field_value=False)

# Not in MNI
images = api.filter(df=images,column_name="not_mni",field_value=False)

# Just fMRI bold
images = api.filter(df=images,column_name="modality",field_value="fMRI-BOLD")

# Download all images to file, resample to target
outfolder = "/home/vanessa/Documents/Work/NEUROVAULT/mosaic"

# If you don't want to resample
api.download_images(dest_dir = outfolder,images_df=images,resample=False)
Ejemplo n.º 6
0
from pyneurovault import api
import pandas as pd
from nilearn.image import resample_img, smooth_img
import nibabel as nb
import numpy as np
from scipy.ndimage import binary_fill_holes
from scipy import stats
from scipy.ndimage.measurements import labeled_comprehension
import sys

atlas_id = sys.argv[2]
data_location = sys.argv[1]

atlases = api.get_images(pks=[atlas_id])
api.download_images(data_location + "/atlases", atlases, resample=False)

standard = "/usr/share/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz"

get_ipython().system('mkdir -p ' + data_location + '/atlases/resampled')

fname = atlas_id + ".nii.gz"
print "resampling " + fname
nii = nb.load(data_location + "atlases/original/" + fname)

target_nii = nb.load(standard)
resampled_nii = resample_img(nii,
                             target_affine=target_nii.get_affine(),
                             target_shape=target_nii.shape,
                             interpolation='nearest')
resampled_nii.to_filename(data_location + "atlases/resampled/" + fname)
Ejemplo n.º 7
0
folders = [data_directory, results_directory]
make_dirs(folders)

# Get all collections
collections = api.get_collections()

# Filter images to those that have a DOI
collections = collections[collections.DOI.isnull() == False]

# Useless, but might as well save it
collections.to_csv("%s/collections_with_dois.tsv" % (results_directory),
                   encoding="utf-8",
                   sep="\t")

# Get image meta data for collections
images = api.get_images(collection_pks=collections.collection_id.tolist())

# load list of included image IDs (curated by Poldracklab) and exclude others
curated_images = pandas.read_csv('%s/included_images.csv' % (data_directory),
                                 header=None,
                                 names=['image_id'])
images = images.loc[images['image_id'].isin(curated_images['image_id'])]

## the following won't really have any effect, since the filtering
## is done by the explicit list above, but I'm leaving them
## to make it clear how the filtering was initially done
## however, if any of these criteria change in the neurovault
## database (modality, MNI, thresholded description, cognitive atlas task)
## this list could be filtered further

# Get rid of any not in MNI
Ejemplo n.º 8
0
def download_combined_database(out_dir, overwrite=False):
    """
    Download coordinates/annotations from brainspell and images/annotations
    from Neurovault.

    Currently, the largest barrier is the lack of links between experiments
    (tables) in brainspell/NeuroSynth and those in NeuroVault. The closest we
    have is overall papers, via DOIs.

    Additional problems:
    -   Does NeuroVault have standard error maps?
        -   If so, I doubt there's any way to associate a given SE map and beta
            map within a collection.
    -   How should space be handled?
        -   Should everything be resliced and transformed to the same space at
            this stage or later on?
        -   How can we link a target template (for images) to a target space
            (for coordinates)?
        -   Should we even allow custom targets? Maybe we just limit it to 2mm
            and 1mm MNI templates.

    Parameters
    ----------
    out_dir : :obj:`str`
        Folder in which to write out Dataset object and subfolders containing
        images.
    overwrite: :obj:`bool`, optional
        Whether to overwrite existing database, if one exists in `out_dir`.
        Defaults to False.
    """
    # Download collections metadata from Neurovault
    collections_file = op.join(out_dir, 'neurovault_collections.csv')
    if overwrite or not op.isfile(collections_file):
        colls_df = api.get_collections()
        colls_df.to_csv(collections_file, index=False, encoding='utf-8')
    else:
        colls_df = pd.read_csv(collections_file, encoding='utf-8')

    # Only include collections from published papers (or preprints)
    papers_file = op.join(out_dir, 'neurovault_papers.csv')
    if overwrite or not op.isfile(papers_file):
        paper_df = colls_df.dropna(subset=['DOI'])
        paper_df.to_csv(papers_file, index=False, encoding='utf-8')
    else:
        paper_df = pd.read_csv(papers_file, encoding='utf-8')

    # Get metadata for individual images from valid collections
    papers_metadata_file = op.join(out_dir, 'neurovault_papers_metadata.csv')
    if overwrite or not op.isfile(papers_metadata_file):
        valid_collections = sorted(paper_df['collection_id'].tolist())

        # Sleep between get_images calls to avoid spamming Neurovault
        image_dfs = []
        for chunk in to_chunks(valid_collections, 500):
            image_dfs.append(api.get_images(collection_pks=chunk))
            time.sleep(10)

        image_df = pd.concat(image_dfs)
        image_df.to_csv(papers_metadata_file, index=False, encoding='utf-8')
    else:
        image_df = pd.read_csv(papers_metadata_file, encoding='utf-8')

    # Reduce images database according to additional criteria
    # Only keep unthresholded, MNI, group level fMRI maps
    red_df = image_df.loc[image_df['modality'] == 'fMRI-BOLD']
    red_df = red_df.loc[red_df['image_type'] == 'statistic_map']
    red_df = red_df.loc[red_df['analysis_level'] == 'group']
    red_df = red_df.loc[red_df['is_thresholded'] is False]
    red_df = red_df.loc[red_df['not_mni'] is False]

    # Look for relevant metadata
    red_df = red_df.dropna(subset=['cognitive_paradigm_cogatlas'])

    ## MFX/FFX GLMs need contrast (beta) + standard error
    mffx_df = red_df.loc[red_df['map_type'] == 'univariate-beta map']

    ## RFX GLMs need contrast (beta)
    rfx_df = red_df.loc[red_df['map_type'] == 'univariate-beta map']

    ## Stouffer's, Stouffer's RFX, and Fisher's IBMAs can use Z maps.
    # T and F maps can be transformed into Z maps, but T maps need sample size.
    # Only keep test statistic maps
    acc_map_types = ['Z map', 'T map', 'F map']
    st_df = red_df.loc[red_df['map_type'].isin(acc_map_types)]
    keep_idx = st_df['map_type'].isin(['Z map', 'F map'])
    keep_idx2 = (st_df['map_type'] == 'T map') & ~pd.isnull(st_df['number_of_subjects'])
    keep_idx = keep_idx | keep_idx2
    st_df = st_df.loc[keep_idx]

    ## Weighted Stouffer's IBMAs need Z + sample size.
    st_df['id_str'] = st_df['image_id'].astype(str).str.zfill(6)

    if not op.isdir(out_dir):
        mkdir(out_dir)
        api.download_images(out_dir, red_df, target=None, resample=False)
    elif overwrite:
        # clear out out_dir
        raise Exception('Currently not prepared to overwrite database.')
        api.download_images(out_dir, red_df, target=None, resample=False)
Ejemplo n.º 9
0
def main():

    base = "data/"

    # Make a folder for mean images
    if not os.path.exists("mr"):
        os.mkdir("mr")

    # Get Neurovault Images with defined cognitive atlas contrast
    collections = get_collections()

    # Filter images to those that have a DOI
    collections = collections[collections.DOI.isnull() == False]

    # Get image meta data for collections (N=1023)
    images = get_images(collection_pks=collections.collection_id.tolist())

    # Filter images to those with contrasts defined (N=98)
    images = images[images.cognitive_contrast_cogatlas_id.isnull() == False]

    # Get rid of any not in MNI
    images = images[images.not_mni == False]

    # Get rid of thresholded images
    images = images[images.is_thresholded == False]

    ### Step 1: Load meta data sources
    unique_contrasts = images.cognitive_contrast_cogatlas_id.unique().tolist()

    # Images that do not match the correct identifier will not be used (eg, "Other")
    expression = re.compile("cnt_*")
    unique_contrasts = [u for u in unique_contrasts if expression.match(u)]

    # Make sure exists in cognitive atlas
    existing_contrasts = []
    for u in unique_contrasts:
        try:
            tmp = get_concept(contrast_id=u, silent=True)
            existing_contrasts.append(u)
        except:
            print "%s is defined in NeuroVault, does not exist in Cognitive Atlas" % u

    image_lookup = dict()
    for u in existing_contrasts:
        image_lookup[u] = images.image_id[images.cognitive_contrast_cogatlas_id
                                          == u].tolist()

    # Create a data structure of tasks and contrasts for our analysis
    relationship_table = concept_node_triples(image_dict=image_lookup,
                                              save_to_file=False)

    unique_nodes = relationship_table.id.unique().tolist()

    # We will store a data frame of meta data
    # Lookup for meta_data is the id of the node!
    meta_data = {}

    for node in unique_nodes:
        meta_single = {}
        # This is an image node
        if re.search("node_", node):
            print "Found image node!"
            relationship_table_row = relationship_table[relationship_table.id
                                                        == node]
            image_id = relationship_table_row.name.tolist()[0]
            meta_single["category"] = ""
            meta_single["type"] = "nii"
            # NeuroVault metadata
            concepts = relationship_table.parent[relationship_table.name ==
                                                 image_id]
            concepts = [
                relationship_table.name[relationship_table.id == c].tolist()[0]
                for c in concepts
            ]
            neurovault_row = images[images.image_id == int(image_id)]
            collection_row = collections[collections.collection_id ==
                                         neurovault_row.collection_id.tolist()
                                         [0]]
            collection_meta = {
                "DOI":
                collection_row["DOI"].tolist()[0],
                "authors":
                collection_row["authors"].tolist()[0],
                "journal":
                collection_row["journal_name"].tolist()[0],
                "url":
                collection_row["url"].tolist()[0],
                "subjects":
                collection_row["number_of_subjects"].tolist()[0],
                "smoothing_fwhm":
                str(collection_row["smoothing_fwhm"].tolist()[0]).encode(
                    "utf-8")
            }
            meta_single["url"] = neurovault_row["url"].tolist()[0]
            meta_single["thumbnail"] = neurovault_row["thumbnail"].tolist()[0]
            meta_single["images"] = neurovault_row["thumbnail"].tolist()
            meta_single["task"] = neurovault_row[
                "cognitive_paradigm_cogatlas"].tolist()[0]
            meta_single["contrast"] = neurovault_row[
                "cognitive_contrast_cogatlas"].tolist()[0]
            meta_single["download"] = neurovault_row["file"].tolist()[0]
            meta_single["concept"] = concepts
            if neurovault_row["description"].tolist()[0]:
                meta_single["description"] = str(
                    neurovault_row["description"].tolist()[0]).encode("utf-8")
            else:
                meta_single["description"] = ""
            if len(meta_single["description"]) > 600:
                meta_single["description"] = "%s..." % meta_single[
                    "description"][0:600]
        else:  # A concept node
            if node != "1":
                relationship_table_row = relationship_table[
                    relationship_table.id == node]
                concept = get_concept(id=node, silent=True).json
                children_nodes = [
                    relationship_table.name.tolist()[x]
                    for x in range(relationship_table.shape[0])
                    if relationship_table.parent.tolist()[x] == node
                ]
                while len(
                    [x for x in children_nodes if not isinstance(x, int)]) > 0:
                    new_parent_nodes = [
                        x for x in children_nodes if not isinstance(x, int)
                    ]
                    children_nodes = [
                        x for x in children_nodes if x not in new_parent_nodes
                    ]
                    for new_parent in new_parent_nodes:
                        node_name = relationship_table.id[
                            relationship_table.name == new_parent].tolist()[0]
                        children_nodes = children_nodes + [
                            relationship_table.name.tolist()[x]
                            for x in range(relationship_table.shape[0]) if
                            relationship_table.parent.tolist()[x] == node_name
                        ]
                # Now only keep children that are images
                meta_single["images"] = images["thumbnail"][
                    images.image_id.isin(children_nodes)].tolist()
                # Cognitive Atlas meta data
                meta_single[
                    "url"] = "http://www.cognitiveatlas.org/term/id/%s" % node
                meta_single["type"] = "concept"
                meta_single[
                    "thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png"
                meta_single["concept"] = [
                    relationship_table.name[relationship_table.id ==
                                            node].tolist()[0]
                ]
                meta_single["task"] = ""
                meta_single["contrast"] = []
                meta_single[
                    "download"] = "http://www.cognitiveatlas.org/rdf/id/%s" % node
                if concept[0]["definition_text"]:
                    meta_single["description"] = concept[0][
                        "definition_text"].encode("utf-8")
                else:
                    meta_single["description"] = ""
                if len(meta_single["description"]) > 600:
                    meta_single["description"] = "%s..." % meta_single[
                        "description"][0:600]
        meta_data[node] = meta_single

    ## STEP 2: VISUALIZATION WITH PYBRAINCOMPARE
    from pybraincompare.ontology.tree import named_ontology_tree_from_tsv, make_ontology_tree_d3

    # First let's look at the tree structure
    # output_json = "%s/task_contrast_tree.json" % outfolder
    tree = named_ontology_tree_from_tsv(relationship_table,
                                        output_json=None,
                                        meta_data=meta_data)
    html_snippet = make_ontology_tree_d3(tree)
    web_folder = base
    make_analysis_web_folder(html_snippet, web_folder)

    # To get a dump of just the tree (for use in more advanced custom web interface)
    filey = open('%s/reverseinference.json' % base, 'wb')
    filey.write(
        json.dumps(tree, sort_keys=True, indent=4, separators=(',', ': ')))
    filey.close()

    ## STEP 3: Export individual nodes

    ### Images
    unique_images = images.image_id.unique().tolist()

    # Images
    for s in range(0, len(unique_images)):
        image_id = unique_images[s]
        meta_data = {}
        meta_data["image_id"] = image_id
        print "Parsing data for images %s of %s" % (s, len(unique_images))
        concepts = relationship_table.parent[relationship_table.name == str(
            image_id)].tolist()
        concepts = [
            relationship_table.name[relationship_table.id == c].tolist()[0]
            for c in concepts
        ]
        concepts_ids = [
            relationship_table.id[relationship_table.id == c].tolist()[0]
            for c in concepts
        ]
        neurovault_row = images[images.image_id == int(image_id)]
        collection_row = collections[collections.collection_id ==
                                     neurovault_row.collection_id.tolist()[0]]
        collection_meta = {
            "DOI":
            collection_row["DOI"].tolist()[0],
            "authors":
            collection_row["authors"].tolist()[0],
            "journal":
            collection_row["journal_name"].tolist()[0],
            "url":
            collection_row["url"].tolist()[0],
            "subjects":
            collection_row["number_of_subjects"].tolist()[0],
            "smoothing_fwhm":
            str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8"),
            "title":
            collection_row["name"].tolist()[0]
        }
        meta_data["collection"] = collection_meta
        meta_data["url"] = neurovault_row["url"].tolist()[0]
        meta_data["thumbnail"] = neurovault_row["thumbnail"].tolist()[0]
        meta_data["images"] = neurovault_row["thumbnail"].tolist()
        meta_data["task"] = neurovault_row[
            "cognitive_paradigm_cogatlas"].tolist()[0]
        meta_data["contrast"] = neurovault_row[
            "cognitive_contrast_cogatlas"].tolist()[0]
        meta_data["download"] = neurovault_row["file"].tolist()[0]
        meta_data["concept"] = concepts
        meta_data["concept_id"] = concepts_ids
        if neurovault_row["description"].tolist()[0]:
            try:
                description = str(
                    neurovault_row["description"].tolist()[0]).encode("utf-8")
            except:
                description = ""
            if description != "nan":
                meta_data["description"] = description
            else:
                meta_data["description"] = ""
        else:
            meta_data["description"] = ""
        if len(meta_data["description"]) > 600:
            meta_data[
                "description"] = "%s..." % meta_data["description"][0:600]
        output_file = "%s/ri_%s.json" % (base, meta_data["image_id"])
        filey = open(output_file, 'wb')
        filey.write(
            json.dumps(meta_data,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))
        filey.close()

    ### Concepts
    for node in unique_nodes:
        # This is a concept node
        if not re.search("node_", node):
            if node != "1":
                relationship_table_row = relationship_table[
                    relationship_table.id == node]
                concept = get_concept(id=node).json
                meta_single = {}
                children_nodes = [
                    relationship_table.name.tolist()[x]
                    for x in range(relationship_table.shape[0])
                    if relationship_table.parent.tolist()[x] == node
                ]
                while len(
                    [x for x in children_nodes if not isinstance(x, int)]) > 0:
                    new_parent_nodes = [
                        x for x in children_nodes if not isinstance(x, int)
                    ]
                    children_nodes = [
                        x for x in children_nodes if x not in new_parent_nodes
                    ]
                    for new_parent in new_parent_nodes:
                        node_name = relationship_table.id[
                            relationship_table.name == new_parent].tolist()[0]
                        children_nodes = children_nodes + [
                            relationship_table.name.tolist()[x]
                            for x in range(relationship_table.shape[0]) if
                            relationship_table.parent.tolist()[x] == node_name
                        ]
                # Now only keep children that are images
                meta_single["images"] = images["thumbnail"][
                    images.image_id.isin(children_nodes)].tolist()
                meta_single["image_list"] = children_nodes
                # Cognitive Atlas meta data
                meta_single[
                    "url"] = "http://www.cognitiveatlas.org/term/id/%s" % node
                meta_single["type"] = "concept"
                meta_single[
                    "thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png"
                meta_single["concept"] = [
                    relationship_table.name[relationship_table.id ==
                                            node].tolist()[0]
                ]
                meta_single["task"] = ""
                meta_single["contrast"] = []
                meta_single[
                    "download"] = "http://www.cognitiveatlas.org/rdf/id/%s" % node
                if concept[0]["definition_text"]:
                    meta_single["description"] = concept[0][
                        "definition_text"].encode("utf-8")
                else:
                    meta_single["description"] = ""
                if len(meta_single["description"]) > 600:
                    meta_single["description"] = "%s..." % meta_single[
                        "description"][0:600]
                output_file = "%s/ri_%s.json" % (base, node)
                filey = open(output_file, 'wb')
                filey.write(
                    json.dumps(meta_single,
                               sort_keys=True,
                               indent=4,
                               separators=(',', ': ')))
                filey.close()
Ejemplo n.º 10
0
from pyneurovault import api
import pandas as pd
from nilearn.image import resample_img, smooth_img
import nibabel as nb
import numpy as np
from scipy.ndimage import binary_fill_holes
from scipy import stats
from scipy.ndimage.measurements import labeled_comprehension
import sys

atlas_id = sys.argv[2]
data_location = sys.argv[1]

atlases = api.get_images(pks=[atlas_id])
api.download_images(data_location + "/atlases", atlases, resample=False)

standard = "/usr/share/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz"

get_ipython().system('mkdir -p '+data_location+'/atlases/resampled')

fname = atlas_id+".nii.gz"
print "resampling " + fname
nii = nb.load(data_location+"atlases/original/" + fname)

target_nii = nb.load(standard)
resampled_nii = resample_img(nii,target_affine=target_nii.get_affine(),
                             target_shape=target_nii.shape, interpolation='nearest')
resampled_nii.to_filename(data_location+"atlases/resampled/" + fname)

def score_map(map_filename, atlas_data, labels):
    map_data = nb.load(map_filename).get_data()
Ejemplo n.º 11
0
def main():

    base = "data/"

    # Make a folder for mean images
    if not os.path.exists("mr"):
        os.mkdir("mr")

    # Get Neurovault Images with defined cognitive atlas contrast    
    collections = get_collections()

    # Filter images to those that have a DOI
    collections = collections[collections.DOI.isnull()==False]
    
    # Get image meta data for collections (N=1023)
    images = get_images(collection_pks=collections.collection_id.tolist())

    # Filter images to those with contrasts defined (N=98)
    images = images[images.cognitive_contrast_cogatlas_id.isnull()==False]

    # Get rid of any not in MNI
    images = images[images.not_mni == False]

    # Get rid of thresholded images
    images = images[images.is_thresholded == False]

    ### Step 1: Load meta data sources 
    unique_contrasts = images.cognitive_contrast_cogatlas_id.unique().tolist()

    # Images that do not match the correct identifier will not be used (eg, "Other")
    expression = re.compile("cnt_*")
    unique_contrasts = [u for u in unique_contrasts if expression.match(u)]

    # Make sure exists in cognitive atlas
    existing_contrasts = []
    for u in unique_contrasts:
        try:
           tmp = get_concept(contrast_id=u,silent=True)
           existing_contrasts.append(u)
        except:
            print "%s is defined in NeuroVault, does not exist in Cognitive Atlas" %u

    image_lookup = dict()
    for u in existing_contrasts:
        image_lookup[u] = images.image_id[images.cognitive_contrast_cogatlas_id==u].tolist()

    # Create a data structure of tasks and contrasts for our analysis
    relationship_table = concept_node_triples(image_dict=image_lookup,save_to_file=False)

    unique_nodes = relationship_table.id.unique().tolist()

    # We will store a data frame of meta data
    # Lookup for meta_data is the id of the node!
    meta_data = {}

    for node in unique_nodes:
        meta_single = {}
        # This is an image node
        if re.search("node_",node):
            print "Found image node!"
            relationship_table_row = relationship_table[relationship_table.id==node]
            image_id = relationship_table_row.name.tolist()[0]
            meta_single["category"] = ""
            meta_single["type"] = "nii"
            # NeuroVault metadata
            concepts = relationship_table.parent[relationship_table.name == image_id]
            concepts = [relationship_table.name[relationship_table.id==c].tolist()[0] for c in concepts]
            neurovault_row = images[images.image_id == int(image_id)]
            collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist()[0]]
            collection_meta = {"DOI":collection_row["DOI"].tolist()[0],
                               "authors":collection_row["authors"].tolist()[0],
                               "journal":collection_row["journal_name"].tolist()[0],
                               "url":collection_row["url"].tolist()[0],
                               "subjects":collection_row["number_of_subjects"].tolist()[0],
                               "smoothing_fwhm":str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8")}
            meta_single["url"] = neurovault_row["url"].tolist()[0]
            meta_single["thumbnail"] = neurovault_row["thumbnail"].tolist()[0]
            meta_single["images"] = neurovault_row["thumbnail"].tolist()
            meta_single["task"] = neurovault_row["cognitive_paradigm_cogatlas"].tolist()[0]
            meta_single["contrast"] = neurovault_row["cognitive_contrast_cogatlas"].tolist()[0]
            meta_single["download"] = neurovault_row["file"].tolist()[0]
            meta_single["concept"] = concepts
            if neurovault_row["description"].tolist()[0]:
                meta_single["description"] =  str(neurovault_row["description"].tolist()[0]).encode("utf-8")
            else:
                meta_single["description"] = ""
            if len(meta_single["description"]) > 600:
                meta_single["description"] = "%s..." % meta_single["description"][0:600]
        else: # A concept node
            if node != "1":
                relationship_table_row = relationship_table[relationship_table.id==node]
                concept = get_concept(id=node,silent=True).json
                children_nodes = [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node]
                while len([x for x in children_nodes if not isinstance(x,int)]) > 0:
                    new_parent_nodes = [x for x in children_nodes if not isinstance(x,int)]
                    children_nodes = [x for x in children_nodes if x not in new_parent_nodes]
                    for new_parent in new_parent_nodes:
                        node_name = relationship_table.id[relationship_table.name==new_parent].tolist()[0]
                        children_nodes = children_nodes + [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node_name]
                # Now only keep children that are images
                meta_single["images"] = images["thumbnail"][images.image_id.isin(children_nodes)].tolist()
                # Cognitive Atlas meta data
                meta_single["url"] = "http://www.cognitiveatlas.org/term/id/%s" %node
                meta_single["type"] = "concept"
                meta_single["thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png"
                meta_single["concept"] = [relationship_table.name[relationship_table.id==node].tolist()[0]]
                meta_single["task"] = ""
                meta_single["contrast"] = []
                meta_single["download"] = "http://www.cognitiveatlas.org/rdf/id/%s" %node
                if concept[0]["definition_text"]:
                    meta_single["description"] = concept[0]["definition_text"].encode("utf-8")
                else:
                    meta_single["description"] = ""
                if len(meta_single["description"]) > 600:
                    meta_single["description"] = "%s..." % meta_single["description"][0:600]
        meta_data[node] = meta_single
    
    
    ## STEP 2: VISUALIZATION WITH PYBRAINCOMPARE
    from pybraincompare.ontology.tree import named_ontology_tree_from_tsv, make_ontology_tree_d3

    # First let's look at the tree structure
    # output_json = "%s/task_contrast_tree.json" % outfolder
    tree = named_ontology_tree_from_tsv(relationship_table,output_json=None,meta_data=meta_data)
    html_snippet = make_ontology_tree_d3(tree)
    web_folder = base
    make_analysis_web_folder(html_snippet,web_folder)

    # To get a dump of just the tree (for use in more advanced custom web interface)
    filey = open('%s/reverseinference.json' %base,'wb')
    filey.write(json.dumps(tree, sort_keys=True,indent=4, separators=(',', ': ')))
    filey.close()

    ## STEP 3: Export individual nodes

    ### Images
    unique_images = images.image_id.unique().tolist()

    # Images
    for s in range(0,len(unique_images)):
        image_id = unique_images[s]
        meta_data = {}
        meta_data["image_id"] = image_id
        print "Parsing data for images %s of %s" %(s,len(unique_images))
        concepts = relationship_table.parent[relationship_table.name == str(image_id)].tolist()
        concepts = [relationship_table.name[relationship_table.id==c].tolist()[0] for c in concepts]
        concepts_ids = [relationship_table.id[relationship_table.id==c].tolist()[0] for c in concepts]
        neurovault_row = images[images.image_id == int(image_id)]            
        collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist()[0]]
        collection_meta = {"DOI":collection_row["DOI"].tolist()[0],
                           "authors":collection_row["authors"].tolist()[0],
                           "journal":collection_row["journal_name"].tolist()[0],
                           "url":collection_row["url"].tolist()[0],
                           "subjects":collection_row["number_of_subjects"].tolist()[0],
                           "smoothing_fwhm":str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8"),
                           "title":collection_row["name"].tolist()[0]}
        meta_data["collection"] = collection_meta
        meta_data["url"] = neurovault_row["url"].tolist()[0]
        meta_data["thumbnail"] = neurovault_row["thumbnail"].tolist()[0]
        meta_data["images"] = neurovault_row["thumbnail"].tolist()
        meta_data["task"] = neurovault_row["cognitive_paradigm_cogatlas"].tolist()[0]
        meta_data["contrast"] = neurovault_row["cognitive_contrast_cogatlas"].tolist()[0]
        meta_data["download"] = neurovault_row["file"].tolist()[0]
        meta_data["concept"] = concepts
        meta_data["concept_id"] = concepts_ids
        if neurovault_row["description"].tolist()[0]:
            try:
                description = str(neurovault_row["description"].tolist()[0]).encode("utf-8")
            except:
                description = ""
            if description != "nan":
                meta_data["description"] =  description
            else:
                meta_data["description"] = ""
        else:
            meta_data["description"] = ""
        if len(meta_data["description"]) > 600:
            meta_data["description"] = "%s..." % meta_data["description"][0:600]
        output_file = "%s/ri_%s.json" %(base,meta_data["image_id"])
        filey = open(output_file,'wb')
        filey.write(json.dumps(meta_data, sort_keys=True,indent=4, separators=(',', ': ')))
        filey.close()
    

    ### Concepts
    for node in unique_nodes:
        # This is a concept node
        if not re.search("node_",node):
            if node != "1":
                relationship_table_row = relationship_table[relationship_table.id==node]
                concept = get_concept(id=node).json
                meta_single = {}
                children_nodes = [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node]
                while len([x for x in children_nodes if not isinstance(x,int)]) > 0:
                    new_parent_nodes = [x for x in children_nodes if not isinstance(x,int)]
                    children_nodes = [x for x in children_nodes if x not in new_parent_nodes]
                    for new_parent in new_parent_nodes:
                        node_name = relationship_table.id[relationship_table.name==new_parent].tolist()[0]
                        children_nodes = children_nodes + [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node_name]
                # Now only keep children that are images
                meta_single["images"] = images["thumbnail"][images.image_id.isin(children_nodes)].tolist()
                meta_single["image_list"] = children_nodes
                # Cognitive Atlas meta data
                meta_single["url"] = "http://www.cognitiveatlas.org/term/id/%s" %node
                meta_single["type"] = "concept"
                meta_single["thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png"
                meta_single["concept"] = [relationship_table.name[relationship_table.id==node].tolist()[0]]
                meta_single["task"] = ""
                meta_single["contrast"] = []
                meta_single["download"] = "http://www.cognitiveatlas.org/rdf/id/%s" %node
                if concept[0]["definition_text"]:
                    meta_single["description"] = concept[0]["definition_text"].encode("utf-8")
                else:
                    meta_single["description"] = ""
                if len(meta_single["description"]) > 600:
                    meta_single["description"] = "%s..." % meta_single["description"][0:600]
                output_file = "%s/ri_%s.json" %(base,node)
                filey = open(output_file,'wb')
                filey.write(json.dumps(meta_single, sort_keys=True,indent=4, separators=(',', ': ')))
                filey.close()
# In case we want it, decode each of our original images
unique_images = images.image_id.unique().tolist()
decode = pandas.DataFrame(index=unique_images,columns=terms)

for unique_image in unique_images:
    print "Decoding original image %s" %(unique_image)
    ns = json.loads(get_url("http://neurosynth.org/decode/data/?neurovault=%s" %unique_image))
    for term in  ns["data"]:
        decode.loc[unique_image,term["analysis"]] = term["r"]

decode_result_file = "%s/original_images_decoding.tsv" %results
decode.to_csv(decode_result_file,sep="\t")

# Now we will decode our collection of images!
rp_images = get_images(collection_pks=[neurovault_collection])
unique_rp_images = rp_images.image_id.unique().tolist()
rp_decode = pandas.DataFrame(index=unique_rp_images,columns=terms)

for unique_image in unique_rp_images:
    print "Decoding regression parameter image %s" %(unique_image)
    ns = json.loads(get_url("http://neurosynth.org/decode/data/?neurovault=%s" %unique_image))
    for term in ns["data"]:
        rp_decode.loc[unique_image,term["analysis"]] = term["r"]

# Now let's add some cognitive atlas meta data, so we don't have to look up later
image_urls = rp_images.file[rp_images.image_id.isin(unique_rp_images)]
rp_decode["0image_urls"] = image_urls.tolist()
concept_ids = [str(x.split("/")[-1].replace(".nii.gz","").replace("_regparam_z","")) for x in image_urls]
rp_decode["0cognitive_atlas_concept_id"] = concept_ids
Ejemplo n.º 13
0
#!/usr/bin/env python2

# This script will use the pyneurovault module to download meta information about images and collections from NeuroVault.

from pyneurovault import api

# Get a collection
collection = api.get_collections(pks=457)
# collection.collection_id is 457

# Get all images
images = api.get_images()

# Get all images meta data for a collection
images = api.get_images(collection_pks=457)

# Remove images that are thresholded
images = api.filter(df=images,column_name="is_thresholded",field_value=False)

# Not in MNI
images = api.filter(df=images,column_name="not_mni",field_value=False)

# Just fMRI bold
images = api.filter(df=images,column_name="modality",field_value="fMRI-BOLD")

# Download images, collections, or both
api.export_images_tsv("/home/vanessa/Desktop/images.tsv",images)
api.export_collections_tsv("/home/vanessa/Desktop/collections.tsv",collection)

# Download all images to file, resample to target
outfolder = "/home/vanessa/Desktop"