def prepare_collect(global_config=None, verbose=False): # -------------- # --- CONFIG --- # -------------- nv_path = Path(global_config["nv_path"]) nv_path.mkdir(exist_ok=True) meta_path = Path(global_config["meta_path"]) meta_path.mkdir(exist_ok=True) cache_path = Path(global_config["cache_path"]) cache_path.mkdir(exist_ok=True) colls_file = str(meta_path / "colls.csv") fmris_file = str(meta_path / "fmris.csv") hcp_file = global_config["collect"]["hcp_tags"] download_mode = global_config["collect"]["download_mode"] if verbose: source = "disk" if download_mode == "offline" else "NeuroVault" print(f" > Fetch fMRIs from {source}") neurovault = fetch_neurovault( max_images=None, collection_terms={}, image_terms={}, data_dir=str(nv_path), mode=download_mode, verbose=2, ) if verbose: print(" > Load NeuroVault metadata and images") neurovault_collections = load_colls(neurovault, verbose) neurovault_fmris = load_fmris(neurovault) # Removal of Neurovault's first and latest IBC version (to match Romuald's original data): IBC_DUPLICATES_TO_REMOVE = [2138, 4438] # 6618 fmris = neurovault_fmris.loc[lambda df: ~df.collection_id.isin(IBC_DUPLICATES_TO_REMOVE)] colls = neurovault_collections.drop(IBC_DUPLICATES_TO_REMOVE, axis=0) if verbose: print(" > Adding tags from HCP") fmris = add_hcp_tags(fmris, hcp_file) if verbose: print(">>> Data collection OK, {} fMRIs from Neurovault, {} collections" .format(len(fmris), len(colls))) # TODO: Add DiFuMo atlas fetching when Nilearn 0.7.1 is published # Final dumps colls.to_csv(colls_file, header=True) fmris.to_csv(fmris_file, header=True)
def load_data(verbose=0, image_terms=None): """Fetch neurovault data, filtering out non MNI images. """ if image_terms is None: image_terms = {"not_mni": False} neurovault_data = datasets.fetch_neurovault(max_images=None, mode="offline", verbose=verbose, image_terms=image_terms) return neurovault_data
def fetch_nv_collection(contrast): nv_data = fetch_neurovault( max_images=788, collection_id=4337, mode='overwrite', data_dir='/storage/store/data/HCP900/hcplang', cognitive_paradigm_cogatlas=neurovault.Contains( 'language processing fMRI task paradigm'), contrast_definition=neurovault.Contains(contrast), map_type='Z map', task='LANGUAGE') print([meta['id'] for meta in nv_data['images_meta']])
def fetch_nv(repo, nv_file, download=False, verbose=False): """ Loads neurovault into memory, either downloading it from the web-API or loading it from the disk. :param repo: str Path where the data is downloaded. :param nv_file: str Pickle file where the full data is saved (for faster loading than the fetch_neurovault). :param download: bool, default=False If True: the data is downloaded from the web-API. :param verbose: bool, default=False Activate verbose mode. :return: Bunch A dict-like object containing the data from fMRIs fetched from Neurovault. """ # Download and save to disk or load from disk if download: if verbose: print("...Download from Neurovault API...") neurovault = fetch_neurovault(max_images=None, collection_terms={}, image_terms={}, data_dir=repo, mode="download_new", verbose=2) with open(nv_file, 'wb') as f: pickle.dump(neurovault, f) else: if verbose: print("...Load pre-fetched data from Neurovault...") with open(nv_file, 'rb') as f: neurovault = pickle.load(f) n_fmri_dl = len(neurovault.images) if verbose: print(" > Number of (down)loaded fmri =", n_fmri_dl) return neurovault
from nilearn.datasets import load_mni152_brain_mask from nilearn.input_data import NiftiMasker from nilearn import plotting ###################################################################### # Get image and term data # ----------------------- # Download images # Here by default we only download 80 images to save time, # but for better results I recommend using at least 200. print("Fetching Neurovault images; " "if you haven't downloaded any Neurovault data before " "this will take several minutes.") nv_data = fetch_neurovault(max_images=80, fetch_neurosynth_words=True) images = nv_data['images'] term_weights = nv_data['word_frequencies'] vocabulary = nv_data['vocabulary'] # Clean and report term scores term_weights[term_weights < 0] = 0 total_scores = np.mean(term_weights, axis=0) print("\nTop 10 neurosynth terms from downloaded images:\n") for term_idx in np.argsort(total_scores)[-10:][::-1]: print(vocabulary[term_idx]) ######################################################################
import os import os.path as op from nilearn.image import resample_to_img from nilearn import datasets import nibabel as nib data_dir = '/home/mainak/Desktop/neurovault/' base_dir = 'neurovault_resampled' nv_data = datasets.fetch_neurovault(max_images=None, mode='offline', data_dir=data_dir) images = nv_data['images'] if not op.exists(base_dir): os.mkdir(base_dir) target_img = nib.load(images[0]) for ii, image in enumerate(images): collection, name = image.split('/')[-2:] fname = op.join(base_dir, collection, name) print('Resampling image %d' % ii) if op.exists(fname): continue if not op.exists(op.join(base_dir, collection)): os.mkdir(op.join(base_dir, collection)) img = nib.load(image) img = resample_to_img(img, target_img)
# DiFuMo atlases: https://parietal-inria.github.io/DiFuMo/ # Script to download DiFuMo atlases: # https://github.com/Parietal-INRIA/DiFuMo/blob/master/notebook/fetcher.py # Load a file not on the path fetcher = runpy.run_path('../../fetcher.py') fetch_difumo = fetcher['fetch_difumo'] #################################################################### # Fetch statistical maps from Neurovault repository # ------------------------------------------------- collection_terms = {'id': 504} image_terms = {'not_mni': False} pain_data = fetch_neurovault(max_images=None, image_terms=image_terms, collection_terms=collection_terms) n_images = len(pain_data.images) ref_img = load_img(pain_data.images[0]) input_images = [] y = [] groups = [] for index in range(n_images): input_images.append(pain_data.images[index]) target = pain_data.images_meta[index]['PainLevel'] subject_id = pain_data.images_meta[index]['SubjectID'] y.append(target) groups.append(subject_id) y = np.ravel(y) groups = np.ravel(groups)
from nilearn.input_data import NiftiMasker from nilearn import plotting ###################################################################### # Get image and term data # ----------------------- # Download images # Here by default we only download 80 images to save time, # but for better results I recommend using at least 200. print("Fetching Neurovault images; " "if you haven't downloaded any Neurovault data before " "this will take several minutes.") nv_data = fetch_neurovault(max_images=80, fetch_neurosynth_words=True) images = nv_data['images'] term_weights = nv_data['word_frequencies'] vocabulary = nv_data['vocabulary'] # Clean and report term scores term_weights[term_weights < 0] = 0 total_scores = np.mean(term_weights, axis=0) print("\nTop 10 neurosynth terms from downloaded images:\n") for term_idx in np.argsort(total_scores)[-10:][::-1]: print(vocabulary[term_idx])
from nilearn import datasets # The code below is to include thresholded images. # With non thresholded images it is simpler from nilearn.datasets import neurovault img_terms = neurovault.basic_image_terms().copy() img_terms = neurovault.basic_image_terms() del img_terms['is_thresholded'] d = datasets.fetch_neurovault(max_images=None, image_terms=img_terms)
def fetch_neurovault(max_images=np.inf, query_server=True, fetch_terms=True, map_types=['F map', 'T map', 'Z map'], collection_ids=tuple(), image_filters=tuple(), sort_images=True): """Give meaningful defaults, extra computations.""" # Set image filters: The filt_dict contains metadata field for the key # and the desired entry for each field as the value. # Since neurovault metadata are not always filled, it also includes any # images with missing values for the any given field. filt_dict = {'modality': 'fMRI-BOLD', 'analysis_level': 'group', 'is_thresholded': False, 'not_mni': False} def make_fun(key, val): return lambda img: (img.get(key) or '') in ('', val) image_filters = list(image_filters) + [ lambda img: (img.get('map_type') or '') in map_types ] image_filters = (image_filters + [make_fun(key, val) for key, val in filt_dict.items()]) # Also remove bad collections bad_collects = [367, # Single image w/ large uniform area value > 0 1003, # next three collections contain stat maps on 1011, # parcellated brains. Likely causes odd-looking 1013, # ICA component 1071, # Added Oct2016-strange-looking images 1889] # Added Oct2016-extreme vals on edge collection_ids = list(collection_ids) + bad_collects # Download matching images def image_filter(img_metadata): if img_metadata.get('collection_id') in collection_ids: return False for filt in image_filters: if not filt(img_metadata): return False return True # query_server=query_server, map_types=map_types,, **kwargs) ss_all = datasets.fetch_neurovault( mode='download_new' if query_server else 'offline', max_images=max_images, image_filter=image_filter, fetch_neurosynth_words=fetch_terms) images = ss_all['images_meta'] # Post-fetcher filtering: remove duplicates, bad images from raw data. images = _neurovault_dedupe(images) images = _neurovault_remove_bad_images(images) # Stamp some collection properties onto images. colls = dict([(c['id'], c) for c in ss_all['collections_meta']]) for image in images: image['DOI'] = colls.get(image['collection_id'], {}).get('DOI') if not fetch_terms: term_scores = None else: term_scores = ss_all['terms'] # Clean & report term scores terms = np.array(term_scores.keys()) term_matrix = np.asarray(term_scores.values()) term_matrix[term_matrix < 0] = 0 total_scores = np.mean(term_matrix, axis=1) print("Top 10 neurosynth terms from downloaded images:") for term_idx in np.argsort(total_scores)[-10:][::-1]: print('\t%-25s: %.2f' % (terms[term_idx], total_scores[term_idx])) if sort_images: idx = sorted( range(len(images)), lambda k1, k2: images[k1]['id'] - images[k2]['id']) images = [images[ii] for ii in idx] if term_scores: term_scores = [term_scores[ti] for ti in idx] return images, term_scores
means[:, None], maxs[:, None], mins[:, None], stddevs[:, None], kurts[:, None] ], axis=1) # first step: exclude data with nan features: exclude = np.isnan(dummy_features).any(axis=1) dummy_features_filtered = dummy_features[~exclude] inds_filtered = np.arange(n_samples)[~exclude] names_filtered = neurovault_data.images[~exclude] clf = OneClassSVM(nu=0.3, kernel="linear") clf.fit(dummy_features_filtered) # sanity check :we know that the data from this collection is clean # because it was uploaded by Bertrand clean = datasets.fetch_neurovault(max_images=None, mode='offline', image_terms={'collection_id': 656}) X_bertrand = get_data_by_names(masker, clean.images) # the following fails if nu is too high np.testing.assert_equal(clf.predict(get_dummy_features(X_bertrand)), 1.) pred = clf.predict(dummy_features_filtered) # getting back the indices of the outliers in the non filtered data: outliers = inds_filtered[pred != 1.] print("# of outliers: %d" % np.sum(pred != 1.)) # X_outliers = get_data_by_indices(neurovault_data, outliers) # # # cut_coords = (-34, -16)