def test_NeuroVault_metadata(): from pyneurovault import api # Test for all images print "Checking metadata extraction for images..." images = api.get_images() check_df(df=images,size_min=7000,columns=["url","name","map_type"]) # Test for subset of images images = api.get_images(pks=images.image_id[0:10].tolist()) check_df(df=images,size_min=10,columns=["url","name","map_type"]) # Test for collections print "Checking metadata extraction for collections..." collections = api.get_collections() check_df(df=collections,size_min=300,columns=["used_smoothing","url","collection_id"]) # Test metadata from specific DOIs dois = collections.DOI[collections.DOI.isnull()==False].tolist()[0:15] results = api.collections_from_dois(dois) check_df(df=results,size_min=len(dois),columns=["used_smoothing","url","collection_id"]) # Test get_images_and_collections combined_df = api.get_images_with_collections(collection_pks=[877,437]) check_df(df=combined_df,size_min=50,columns=["url_image","collection_id","name_image","map_type","image_id"]) # Test metadata for subset of collections collections = api.get_collections(pks=[877,437]) check_df(df=collections,size_min=1,columns=["used_smoothing","url","collection_id"]) # Test metadata of images from specific collections images = api.get_images(collection_pks=[877,437]) check_df(df=images,size_min=50,columns=["url","name","map_type"])
def get_data(download_folder): """get_data :param download_folder: download destination folder for nifti gz files :returns brainmaps: list of nii gz images """ all_collections = api.get_collections() collections_with_dois = all_collections[np.logical_not(all_collections.DOI.isnull())] group_collections = collections_with_dois[collections_with_dois.group_comparison != True] images = api.get_images(collection_pks=list(group_collections.collection_id)) images = api.filter(df=images, column_name="is_thresholded", field_value=False) images = api.filter(df=images, column_name="not_mni", field_value=False) images = api.filter(df=images, column_name="modality", field_value="fMRI-BOLD") api.download_images(dest_dir=download_folder, images_df=images, resample=False) return glob("%s/original/*.nii.gz" % (download_folder))
def get_data(download_folder): '''get_data :param download_folder: download destination folder for nifti gz files :returns brainmaps: list of nii gz images ''' all_collections = api.get_collections() collections_with_dois = all_collections[np.logical_not( all_collections.DOI.isnull())] group_collections = collections_with_dois[ collections_with_dois.group_comparison != True] images = api.get_images( collection_pks=list(group_collections.collection_id)) images = api.filter(df=images, column_name="is_thresholded", field_value=False) images = api.filter(df=images, column_name="not_mni", field_value=False) images = api.filter(df=images, column_name="modality", field_value="fMRI-BOLD") api.download_images(dest_dir=download_folder, images_df=images, resample=False) return glob("%s/original/*.nii.gz" % (download_folder))
if not os.path.exists(results): os.mkdir(results) if not os.path.exists(data): os.mkdir(data) # Get all collections collections = api.get_collections() # Filter images to those that have a DOI collections = collections[collections.DOI.isnull() == False] # We use this file to create custom web interface collections.to_csv("%s/collections_with_dois.tsv" % results, encoding="utf-8", sep="\t") # Get image meta data for collections (N=1023) images = api.get_images(collection_pks=collections.collection_id.tolist()) # Filter images to those with contrasts defined (N=98) images = images[images.cognitive_contrast_cogatlas_id.isnull() == False] # Not needed for future analyses, for documentation only images.to_csv("%s/contrast_defined_images.tsv" % results, encoding="utf-8", sep="\t") # Get rid of any not in MNI images = images[images.not_mni == False] images = images[images.analysis_level != "single-subject"] # Get rid of thresholded images images = images[images.is_thresholded == False] # We can't use Rest or other/none
from pyneurovault import api import numpy as np all_collections = api.get_collections() collections_with_dois = all_collections[np.logical_not(all_collections.DOI.isnull())] images = api.get_images(collection_pks=list(collections_with_dois.collection_id)) # Remove images that are thresholded images = api.filter(df=images,column_name="is_thresholded",field_value=False) # Not in MNI images = api.filter(df=images,column_name="not_mni",field_value=False) # Just fMRI bold images = api.filter(df=images,column_name="modality",field_value="fMRI-BOLD") # Download all images to file, resample to target outfolder = "/home/vanessa/Documents/Work/NEUROVAULT/mosaic" # If you don't want to resample api.download_images(dest_dir = outfolder,images_df=images,resample=False)
from pyneurovault import api import pandas as pd from nilearn.image import resample_img, smooth_img import nibabel as nb import numpy as np from scipy.ndimage import binary_fill_holes from scipy import stats from scipy.ndimage.measurements import labeled_comprehension import sys atlas_id = sys.argv[2] data_location = sys.argv[1] atlases = api.get_images(pks=[atlas_id]) api.download_images(data_location + "/atlases", atlases, resample=False) standard = "/usr/share/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz" get_ipython().system('mkdir -p ' + data_location + '/atlases/resampled') fname = atlas_id + ".nii.gz" print "resampling " + fname nii = nb.load(data_location + "atlases/original/" + fname) target_nii = nb.load(standard) resampled_nii = resample_img(nii, target_affine=target_nii.get_affine(), target_shape=target_nii.shape, interpolation='nearest') resampled_nii.to_filename(data_location + "atlases/resampled/" + fname)
folders = [data_directory, results_directory] make_dirs(folders) # Get all collections collections = api.get_collections() # Filter images to those that have a DOI collections = collections[collections.DOI.isnull() == False] # Useless, but might as well save it collections.to_csv("%s/collections_with_dois.tsv" % (results_directory), encoding="utf-8", sep="\t") # Get image meta data for collections images = api.get_images(collection_pks=collections.collection_id.tolist()) # load list of included image IDs (curated by Poldracklab) and exclude others curated_images = pandas.read_csv('%s/included_images.csv' % (data_directory), header=None, names=['image_id']) images = images.loc[images['image_id'].isin(curated_images['image_id'])] ## the following won't really have any effect, since the filtering ## is done by the explicit list above, but I'm leaving them ## to make it clear how the filtering was initially done ## however, if any of these criteria change in the neurovault ## database (modality, MNI, thresholded description, cognitive atlas task) ## this list could be filtered further # Get rid of any not in MNI
def download_combined_database(out_dir, overwrite=False): """ Download coordinates/annotations from brainspell and images/annotations from Neurovault. Currently, the largest barrier is the lack of links between experiments (tables) in brainspell/NeuroSynth and those in NeuroVault. The closest we have is overall papers, via DOIs. Additional problems: - Does NeuroVault have standard error maps? - If so, I doubt there's any way to associate a given SE map and beta map within a collection. - How should space be handled? - Should everything be resliced and transformed to the same space at this stage or later on? - How can we link a target template (for images) to a target space (for coordinates)? - Should we even allow custom targets? Maybe we just limit it to 2mm and 1mm MNI templates. Parameters ---------- out_dir : :obj:`str` Folder in which to write out Dataset object and subfolders containing images. overwrite: :obj:`bool`, optional Whether to overwrite existing database, if one exists in `out_dir`. Defaults to False. """ # Download collections metadata from Neurovault collections_file = op.join(out_dir, 'neurovault_collections.csv') if overwrite or not op.isfile(collections_file): colls_df = api.get_collections() colls_df.to_csv(collections_file, index=False, encoding='utf-8') else: colls_df = pd.read_csv(collections_file, encoding='utf-8') # Only include collections from published papers (or preprints) papers_file = op.join(out_dir, 'neurovault_papers.csv') if overwrite or not op.isfile(papers_file): paper_df = colls_df.dropna(subset=['DOI']) paper_df.to_csv(papers_file, index=False, encoding='utf-8') else: paper_df = pd.read_csv(papers_file, encoding='utf-8') # Get metadata for individual images from valid collections papers_metadata_file = op.join(out_dir, 'neurovault_papers_metadata.csv') if overwrite or not op.isfile(papers_metadata_file): valid_collections = sorted(paper_df['collection_id'].tolist()) # Sleep between get_images calls to avoid spamming Neurovault image_dfs = [] for chunk in to_chunks(valid_collections, 500): image_dfs.append(api.get_images(collection_pks=chunk)) time.sleep(10) image_df = pd.concat(image_dfs) image_df.to_csv(papers_metadata_file, index=False, encoding='utf-8') else: image_df = pd.read_csv(papers_metadata_file, encoding='utf-8') # Reduce images database according to additional criteria # Only keep unthresholded, MNI, group level fMRI maps red_df = image_df.loc[image_df['modality'] == 'fMRI-BOLD'] red_df = red_df.loc[red_df['image_type'] == 'statistic_map'] red_df = red_df.loc[red_df['analysis_level'] == 'group'] red_df = red_df.loc[red_df['is_thresholded'] is False] red_df = red_df.loc[red_df['not_mni'] is False] # Look for relevant metadata red_df = red_df.dropna(subset=['cognitive_paradigm_cogatlas']) ## MFX/FFX GLMs need contrast (beta) + standard error mffx_df = red_df.loc[red_df['map_type'] == 'univariate-beta map'] ## RFX GLMs need contrast (beta) rfx_df = red_df.loc[red_df['map_type'] == 'univariate-beta map'] ## Stouffer's, Stouffer's RFX, and Fisher's IBMAs can use Z maps. # T and F maps can be transformed into Z maps, but T maps need sample size. # Only keep test statistic maps acc_map_types = ['Z map', 'T map', 'F map'] st_df = red_df.loc[red_df['map_type'].isin(acc_map_types)] keep_idx = st_df['map_type'].isin(['Z map', 'F map']) keep_idx2 = (st_df['map_type'] == 'T map') & ~pd.isnull(st_df['number_of_subjects']) keep_idx = keep_idx | keep_idx2 st_df = st_df.loc[keep_idx] ## Weighted Stouffer's IBMAs need Z + sample size. st_df['id_str'] = st_df['image_id'].astype(str).str.zfill(6) if not op.isdir(out_dir): mkdir(out_dir) api.download_images(out_dir, red_df, target=None, resample=False) elif overwrite: # clear out out_dir raise Exception('Currently not prepared to overwrite database.') api.download_images(out_dir, red_df, target=None, resample=False)
def main(): base = "data/" # Make a folder for mean images if not os.path.exists("mr"): os.mkdir("mr") # Get Neurovault Images with defined cognitive atlas contrast collections = get_collections() # Filter images to those that have a DOI collections = collections[collections.DOI.isnull() == False] # Get image meta data for collections (N=1023) images = get_images(collection_pks=collections.collection_id.tolist()) # Filter images to those with contrasts defined (N=98) images = images[images.cognitive_contrast_cogatlas_id.isnull() == False] # Get rid of any not in MNI images = images[images.not_mni == False] # Get rid of thresholded images images = images[images.is_thresholded == False] ### Step 1: Load meta data sources unique_contrasts = images.cognitive_contrast_cogatlas_id.unique().tolist() # Images that do not match the correct identifier will not be used (eg, "Other") expression = re.compile("cnt_*") unique_contrasts = [u for u in unique_contrasts if expression.match(u)] # Make sure exists in cognitive atlas existing_contrasts = [] for u in unique_contrasts: try: tmp = get_concept(contrast_id=u, silent=True) existing_contrasts.append(u) except: print "%s is defined in NeuroVault, does not exist in Cognitive Atlas" % u image_lookup = dict() for u in existing_contrasts: image_lookup[u] = images.image_id[images.cognitive_contrast_cogatlas_id == u].tolist() # Create a data structure of tasks and contrasts for our analysis relationship_table = concept_node_triples(image_dict=image_lookup, save_to_file=False) unique_nodes = relationship_table.id.unique().tolist() # We will store a data frame of meta data # Lookup for meta_data is the id of the node! meta_data = {} for node in unique_nodes: meta_single = {} # This is an image node if re.search("node_", node): print "Found image node!" relationship_table_row = relationship_table[relationship_table.id == node] image_id = relationship_table_row.name.tolist()[0] meta_single["category"] = "" meta_single["type"] = "nii" # NeuroVault metadata concepts = relationship_table.parent[relationship_table.name == image_id] concepts = [ relationship_table.name[relationship_table.id == c].tolist()[0] for c in concepts ] neurovault_row = images[images.image_id == int(image_id)] collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist() [0]] collection_meta = { "DOI": collection_row["DOI"].tolist()[0], "authors": collection_row["authors"].tolist()[0], "journal": collection_row["journal_name"].tolist()[0], "url": collection_row["url"].tolist()[0], "subjects": collection_row["number_of_subjects"].tolist()[0], "smoothing_fwhm": str(collection_row["smoothing_fwhm"].tolist()[0]).encode( "utf-8") } meta_single["url"] = neurovault_row["url"].tolist()[0] meta_single["thumbnail"] = neurovault_row["thumbnail"].tolist()[0] meta_single["images"] = neurovault_row["thumbnail"].tolist() meta_single["task"] = neurovault_row[ "cognitive_paradigm_cogatlas"].tolist()[0] meta_single["contrast"] = neurovault_row[ "cognitive_contrast_cogatlas"].tolist()[0] meta_single["download"] = neurovault_row["file"].tolist()[0] meta_single["concept"] = concepts if neurovault_row["description"].tolist()[0]: meta_single["description"] = str( neurovault_row["description"].tolist()[0]).encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single[ "description"][0:600] else: # A concept node if node != "1": relationship_table_row = relationship_table[ relationship_table.id == node] concept = get_concept(id=node, silent=True).json children_nodes = [ relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x] == node ] while len( [x for x in children_nodes if not isinstance(x, int)]) > 0: new_parent_nodes = [ x for x in children_nodes if not isinstance(x, int) ] children_nodes = [ x for x in children_nodes if x not in new_parent_nodes ] for new_parent in new_parent_nodes: node_name = relationship_table.id[ relationship_table.name == new_parent].tolist()[0] children_nodes = children_nodes + [ relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x] == node_name ] # Now only keep children that are images meta_single["images"] = images["thumbnail"][ images.image_id.isin(children_nodes)].tolist() # Cognitive Atlas meta data meta_single[ "url"] = "http://www.cognitiveatlas.org/term/id/%s" % node meta_single["type"] = "concept" meta_single[ "thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png" meta_single["concept"] = [ relationship_table.name[relationship_table.id == node].tolist()[0] ] meta_single["task"] = "" meta_single["contrast"] = [] meta_single[ "download"] = "http://www.cognitiveatlas.org/rdf/id/%s" % node if concept[0]["definition_text"]: meta_single["description"] = concept[0][ "definition_text"].encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single[ "description"][0:600] meta_data[node] = meta_single ## STEP 2: VISUALIZATION WITH PYBRAINCOMPARE from pybraincompare.ontology.tree import named_ontology_tree_from_tsv, make_ontology_tree_d3 # First let's look at the tree structure # output_json = "%s/task_contrast_tree.json" % outfolder tree = named_ontology_tree_from_tsv(relationship_table, output_json=None, meta_data=meta_data) html_snippet = make_ontology_tree_d3(tree) web_folder = base make_analysis_web_folder(html_snippet, web_folder) # To get a dump of just the tree (for use in more advanced custom web interface) filey = open('%s/reverseinference.json' % base, 'wb') filey.write( json.dumps(tree, sort_keys=True, indent=4, separators=(',', ': '))) filey.close() ## STEP 3: Export individual nodes ### Images unique_images = images.image_id.unique().tolist() # Images for s in range(0, len(unique_images)): image_id = unique_images[s] meta_data = {} meta_data["image_id"] = image_id print "Parsing data for images %s of %s" % (s, len(unique_images)) concepts = relationship_table.parent[relationship_table.name == str( image_id)].tolist() concepts = [ relationship_table.name[relationship_table.id == c].tolist()[0] for c in concepts ] concepts_ids = [ relationship_table.id[relationship_table.id == c].tolist()[0] for c in concepts ] neurovault_row = images[images.image_id == int(image_id)] collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist()[0]] collection_meta = { "DOI": collection_row["DOI"].tolist()[0], "authors": collection_row["authors"].tolist()[0], "journal": collection_row["journal_name"].tolist()[0], "url": collection_row["url"].tolist()[0], "subjects": collection_row["number_of_subjects"].tolist()[0], "smoothing_fwhm": str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8"), "title": collection_row["name"].tolist()[0] } meta_data["collection"] = collection_meta meta_data["url"] = neurovault_row["url"].tolist()[0] meta_data["thumbnail"] = neurovault_row["thumbnail"].tolist()[0] meta_data["images"] = neurovault_row["thumbnail"].tolist() meta_data["task"] = neurovault_row[ "cognitive_paradigm_cogatlas"].tolist()[0] meta_data["contrast"] = neurovault_row[ "cognitive_contrast_cogatlas"].tolist()[0] meta_data["download"] = neurovault_row["file"].tolist()[0] meta_data["concept"] = concepts meta_data["concept_id"] = concepts_ids if neurovault_row["description"].tolist()[0]: try: description = str( neurovault_row["description"].tolist()[0]).encode("utf-8") except: description = "" if description != "nan": meta_data["description"] = description else: meta_data["description"] = "" else: meta_data["description"] = "" if len(meta_data["description"]) > 600: meta_data[ "description"] = "%s..." % meta_data["description"][0:600] output_file = "%s/ri_%s.json" % (base, meta_data["image_id"]) filey = open(output_file, 'wb') filey.write( json.dumps(meta_data, sort_keys=True, indent=4, separators=(',', ': '))) filey.close() ### Concepts for node in unique_nodes: # This is a concept node if not re.search("node_", node): if node != "1": relationship_table_row = relationship_table[ relationship_table.id == node] concept = get_concept(id=node).json meta_single = {} children_nodes = [ relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x] == node ] while len( [x for x in children_nodes if not isinstance(x, int)]) > 0: new_parent_nodes = [ x for x in children_nodes if not isinstance(x, int) ] children_nodes = [ x for x in children_nodes if x not in new_parent_nodes ] for new_parent in new_parent_nodes: node_name = relationship_table.id[ relationship_table.name == new_parent].tolist()[0] children_nodes = children_nodes + [ relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x] == node_name ] # Now only keep children that are images meta_single["images"] = images["thumbnail"][ images.image_id.isin(children_nodes)].tolist() meta_single["image_list"] = children_nodes # Cognitive Atlas meta data meta_single[ "url"] = "http://www.cognitiveatlas.org/term/id/%s" % node meta_single["type"] = "concept" meta_single[ "thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png" meta_single["concept"] = [ relationship_table.name[relationship_table.id == node].tolist()[0] ] meta_single["task"] = "" meta_single["contrast"] = [] meta_single[ "download"] = "http://www.cognitiveatlas.org/rdf/id/%s" % node if concept[0]["definition_text"]: meta_single["description"] = concept[0][ "definition_text"].encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single[ "description"][0:600] output_file = "%s/ri_%s.json" % (base, node) filey = open(output_file, 'wb') filey.write( json.dumps(meta_single, sort_keys=True, indent=4, separators=(',', ': '))) filey.close()
from pyneurovault import api import pandas as pd from nilearn.image import resample_img, smooth_img import nibabel as nb import numpy as np from scipy.ndimage import binary_fill_holes from scipy import stats from scipy.ndimage.measurements import labeled_comprehension import sys atlas_id = sys.argv[2] data_location = sys.argv[1] atlases = api.get_images(pks=[atlas_id]) api.download_images(data_location + "/atlases", atlases, resample=False) standard = "/usr/share/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz" get_ipython().system('mkdir -p '+data_location+'/atlases/resampled') fname = atlas_id+".nii.gz" print "resampling " + fname nii = nb.load(data_location+"atlases/original/" + fname) target_nii = nb.load(standard) resampled_nii = resample_img(nii,target_affine=target_nii.get_affine(), target_shape=target_nii.shape, interpolation='nearest') resampled_nii.to_filename(data_location+"atlases/resampled/" + fname) def score_map(map_filename, atlas_data, labels): map_data = nb.load(map_filename).get_data()
def main(): base = "data/" # Make a folder for mean images if not os.path.exists("mr"): os.mkdir("mr") # Get Neurovault Images with defined cognitive atlas contrast collections = get_collections() # Filter images to those that have a DOI collections = collections[collections.DOI.isnull()==False] # Get image meta data for collections (N=1023) images = get_images(collection_pks=collections.collection_id.tolist()) # Filter images to those with contrasts defined (N=98) images = images[images.cognitive_contrast_cogatlas_id.isnull()==False] # Get rid of any not in MNI images = images[images.not_mni == False] # Get rid of thresholded images images = images[images.is_thresholded == False] ### Step 1: Load meta data sources unique_contrasts = images.cognitive_contrast_cogatlas_id.unique().tolist() # Images that do not match the correct identifier will not be used (eg, "Other") expression = re.compile("cnt_*") unique_contrasts = [u for u in unique_contrasts if expression.match(u)] # Make sure exists in cognitive atlas existing_contrasts = [] for u in unique_contrasts: try: tmp = get_concept(contrast_id=u,silent=True) existing_contrasts.append(u) except: print "%s is defined in NeuroVault, does not exist in Cognitive Atlas" %u image_lookup = dict() for u in existing_contrasts: image_lookup[u] = images.image_id[images.cognitive_contrast_cogatlas_id==u].tolist() # Create a data structure of tasks and contrasts for our analysis relationship_table = concept_node_triples(image_dict=image_lookup,save_to_file=False) unique_nodes = relationship_table.id.unique().tolist() # We will store a data frame of meta data # Lookup for meta_data is the id of the node! meta_data = {} for node in unique_nodes: meta_single = {} # This is an image node if re.search("node_",node): print "Found image node!" relationship_table_row = relationship_table[relationship_table.id==node] image_id = relationship_table_row.name.tolist()[0] meta_single["category"] = "" meta_single["type"] = "nii" # NeuroVault metadata concepts = relationship_table.parent[relationship_table.name == image_id] concepts = [relationship_table.name[relationship_table.id==c].tolist()[0] for c in concepts] neurovault_row = images[images.image_id == int(image_id)] collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist()[0]] collection_meta = {"DOI":collection_row["DOI"].tolist()[0], "authors":collection_row["authors"].tolist()[0], "journal":collection_row["journal_name"].tolist()[0], "url":collection_row["url"].tolist()[0], "subjects":collection_row["number_of_subjects"].tolist()[0], "smoothing_fwhm":str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8")} meta_single["url"] = neurovault_row["url"].tolist()[0] meta_single["thumbnail"] = neurovault_row["thumbnail"].tolist()[0] meta_single["images"] = neurovault_row["thumbnail"].tolist() meta_single["task"] = neurovault_row["cognitive_paradigm_cogatlas"].tolist()[0] meta_single["contrast"] = neurovault_row["cognitive_contrast_cogatlas"].tolist()[0] meta_single["download"] = neurovault_row["file"].tolist()[0] meta_single["concept"] = concepts if neurovault_row["description"].tolist()[0]: meta_single["description"] = str(neurovault_row["description"].tolist()[0]).encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single["description"][0:600] else: # A concept node if node != "1": relationship_table_row = relationship_table[relationship_table.id==node] concept = get_concept(id=node,silent=True).json children_nodes = [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node] while len([x for x in children_nodes if not isinstance(x,int)]) > 0: new_parent_nodes = [x for x in children_nodes if not isinstance(x,int)] children_nodes = [x for x in children_nodes if x not in new_parent_nodes] for new_parent in new_parent_nodes: node_name = relationship_table.id[relationship_table.name==new_parent].tolist()[0] children_nodes = children_nodes + [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node_name] # Now only keep children that are images meta_single["images"] = images["thumbnail"][images.image_id.isin(children_nodes)].tolist() # Cognitive Atlas meta data meta_single["url"] = "http://www.cognitiveatlas.org/term/id/%s" %node meta_single["type"] = "concept" meta_single["thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png" meta_single["concept"] = [relationship_table.name[relationship_table.id==node].tolist()[0]] meta_single["task"] = "" meta_single["contrast"] = [] meta_single["download"] = "http://www.cognitiveatlas.org/rdf/id/%s" %node if concept[0]["definition_text"]: meta_single["description"] = concept[0]["definition_text"].encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single["description"][0:600] meta_data[node] = meta_single ## STEP 2: VISUALIZATION WITH PYBRAINCOMPARE from pybraincompare.ontology.tree import named_ontology_tree_from_tsv, make_ontology_tree_d3 # First let's look at the tree structure # output_json = "%s/task_contrast_tree.json" % outfolder tree = named_ontology_tree_from_tsv(relationship_table,output_json=None,meta_data=meta_data) html_snippet = make_ontology_tree_d3(tree) web_folder = base make_analysis_web_folder(html_snippet,web_folder) # To get a dump of just the tree (for use in more advanced custom web interface) filey = open('%s/reverseinference.json' %base,'wb') filey.write(json.dumps(tree, sort_keys=True,indent=4, separators=(',', ': '))) filey.close() ## STEP 3: Export individual nodes ### Images unique_images = images.image_id.unique().tolist() # Images for s in range(0,len(unique_images)): image_id = unique_images[s] meta_data = {} meta_data["image_id"] = image_id print "Parsing data for images %s of %s" %(s,len(unique_images)) concepts = relationship_table.parent[relationship_table.name == str(image_id)].tolist() concepts = [relationship_table.name[relationship_table.id==c].tolist()[0] for c in concepts] concepts_ids = [relationship_table.id[relationship_table.id==c].tolist()[0] for c in concepts] neurovault_row = images[images.image_id == int(image_id)] collection_row = collections[collections.collection_id == neurovault_row.collection_id.tolist()[0]] collection_meta = {"DOI":collection_row["DOI"].tolist()[0], "authors":collection_row["authors"].tolist()[0], "journal":collection_row["journal_name"].tolist()[0], "url":collection_row["url"].tolist()[0], "subjects":collection_row["number_of_subjects"].tolist()[0], "smoothing_fwhm":str(collection_row["smoothing_fwhm"].tolist()[0]).encode("utf-8"), "title":collection_row["name"].tolist()[0]} meta_data["collection"] = collection_meta meta_data["url"] = neurovault_row["url"].tolist()[0] meta_data["thumbnail"] = neurovault_row["thumbnail"].tolist()[0] meta_data["images"] = neurovault_row["thumbnail"].tolist() meta_data["task"] = neurovault_row["cognitive_paradigm_cogatlas"].tolist()[0] meta_data["contrast"] = neurovault_row["cognitive_contrast_cogatlas"].tolist()[0] meta_data["download"] = neurovault_row["file"].tolist()[0] meta_data["concept"] = concepts meta_data["concept_id"] = concepts_ids if neurovault_row["description"].tolist()[0]: try: description = str(neurovault_row["description"].tolist()[0]).encode("utf-8") except: description = "" if description != "nan": meta_data["description"] = description else: meta_data["description"] = "" else: meta_data["description"] = "" if len(meta_data["description"]) > 600: meta_data["description"] = "%s..." % meta_data["description"][0:600] output_file = "%s/ri_%s.json" %(base,meta_data["image_id"]) filey = open(output_file,'wb') filey.write(json.dumps(meta_data, sort_keys=True,indent=4, separators=(',', ': '))) filey.close() ### Concepts for node in unique_nodes: # This is a concept node if not re.search("node_",node): if node != "1": relationship_table_row = relationship_table[relationship_table.id==node] concept = get_concept(id=node).json meta_single = {} children_nodes = [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node] while len([x for x in children_nodes if not isinstance(x,int)]) > 0: new_parent_nodes = [x for x in children_nodes if not isinstance(x,int)] children_nodes = [x for x in children_nodes if x not in new_parent_nodes] for new_parent in new_parent_nodes: node_name = relationship_table.id[relationship_table.name==new_parent].tolist()[0] children_nodes = children_nodes + [relationship_table.name.tolist()[x] for x in range(relationship_table.shape[0]) if relationship_table.parent.tolist()[x]==node_name] # Now only keep children that are images meta_single["images"] = images["thumbnail"][images.image_id.isin(children_nodes)].tolist() meta_single["image_list"] = children_nodes # Cognitive Atlas meta data meta_single["url"] = "http://www.cognitiveatlas.org/term/id/%s" %node meta_single["type"] = "concept" meta_single["thumbnail"] = "http://www.cognitiveatlas.org/images/logo-front.png" meta_single["concept"] = [relationship_table.name[relationship_table.id==node].tolist()[0]] meta_single["task"] = "" meta_single["contrast"] = [] meta_single["download"] = "http://www.cognitiveatlas.org/rdf/id/%s" %node if concept[0]["definition_text"]: meta_single["description"] = concept[0]["definition_text"].encode("utf-8") else: meta_single["description"] = "" if len(meta_single["description"]) > 600: meta_single["description"] = "%s..." % meta_single["description"][0:600] output_file = "%s/ri_%s.json" %(base,node) filey = open(output_file,'wb') filey.write(json.dumps(meta_single, sort_keys=True,indent=4, separators=(',', ': '))) filey.close()
# In case we want it, decode each of our original images unique_images = images.image_id.unique().tolist() decode = pandas.DataFrame(index=unique_images,columns=terms) for unique_image in unique_images: print "Decoding original image %s" %(unique_image) ns = json.loads(get_url("http://neurosynth.org/decode/data/?neurovault=%s" %unique_image)) for term in ns["data"]: decode.loc[unique_image,term["analysis"]] = term["r"] decode_result_file = "%s/original_images_decoding.tsv" %results decode.to_csv(decode_result_file,sep="\t") # Now we will decode our collection of images! rp_images = get_images(collection_pks=[neurovault_collection]) unique_rp_images = rp_images.image_id.unique().tolist() rp_decode = pandas.DataFrame(index=unique_rp_images,columns=terms) for unique_image in unique_rp_images: print "Decoding regression parameter image %s" %(unique_image) ns = json.loads(get_url("http://neurosynth.org/decode/data/?neurovault=%s" %unique_image)) for term in ns["data"]: rp_decode.loc[unique_image,term["analysis"]] = term["r"] # Now let's add some cognitive atlas meta data, so we don't have to look up later image_urls = rp_images.file[rp_images.image_id.isin(unique_rp_images)] rp_decode["0image_urls"] = image_urls.tolist() concept_ids = [str(x.split("/")[-1].replace(".nii.gz","").replace("_regparam_z","")) for x in image_urls] rp_decode["0cognitive_atlas_concept_id"] = concept_ids
#!/usr/bin/env python2 # This script will use the pyneurovault module to download meta information about images and collections from NeuroVault. from pyneurovault import api # Get a collection collection = api.get_collections(pks=457) # collection.collection_id is 457 # Get all images images = api.get_images() # Get all images meta data for a collection images = api.get_images(collection_pks=457) # Remove images that are thresholded images = api.filter(df=images,column_name="is_thresholded",field_value=False) # Not in MNI images = api.filter(df=images,column_name="not_mni",field_value=False) # Just fMRI bold images = api.filter(df=images,column_name="modality",field_value="fMRI-BOLD") # Download images, collections, or both api.export_images_tsv("/home/vanessa/Desktop/images.tsv",images) api.export_collections_tsv("/home/vanessa/Desktop/collections.tsv",collection) # Download all images to file, resample to target outfolder = "/home/vanessa/Desktop"