def save_dataset_instance(db_filename, kw_filename, instance_filename): # Create a new Dataset instance dataset = Dataset('./raw_data/' + db_filename + '.txt') # Add some features dataset.add_features('./raw_data/' + kw_filename + '.txt') # Save new file dataset.save('./raw_data/' + instance_filename + '.pkl') return dataset
def neurosynthInit(dbsize): print "Initializing Neurosynth database..." dataset = Dataset('data/' + dbsize + 'terms/database.txt') dataset.add_features('data/' + dbsize + 'terms/features.txt') #print "Loading standard space brain..." #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz") #standard = img.get_data() return dataset
def neurosynthInit(dbsize): """Initialize Neurosynth Database, return database object""" print "Initializing Neurosynth database..." db = Dataset('data/' + str(dbsize) + 'terms/database.txt') db.add_features('data/' + str(dbsize) + 'terms/features.txt') #print "Loading standard space brain..." #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz") #standard = img.get_data() return db
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=False): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!") self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, str) and not os.path.exists(dataset)): print("\tInitializing a new Dataset...") if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE) else: print("Loading existing Dataset...") dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print("WARNING: RESETTING DATABASE!!!") self.reset_database()
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=True): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. Note that the Dataset must contain the list of Mappables (i.e., save() must have been called with keep_mappables set to True). studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!" self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)): print "\tInitializing a new Dataset..." if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE, keep_mappables=True) else: print "\tLoading existing Dataset..." dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print "WARNING: RESETTING DATABASE!!!" self.reset_database()
def _getdata(): """Downloads data from neurosynth and returns it as a Dataset. Also pickles the dataset for future use.""" LOG.warning("Downloading and processing Neurosynth database") os.makedirs("data", exist_ok=True) from neurosynth.base.dataset import download download(path="data", unpack=True) data = Dataset("data/database.txt") data.add_features("data/features.txt") data.save("data/dataset.pkl") return data
def generate_maps(terms,output_dir): f,d = download_data() features = pandas.read_csv(f,sep="\t") database = pandas.read_csv(d,sep="\t") output_dir = "%s/maps" %(output_dir) print "Deriving pickled maps to extract relationships from..." dataset = Dataset(d) dataset.add_features(f) for t in range(len(terms)): term = terms[t] print "Generating P(term|activation) for term %s, %s of %s" %(term,t,len(terms)) ids = dataset.get_ids_by_features(term) maps = meta.MetaAnalysis(dataset,ids) term_name = term.replace(" ","_") pickle.dump(maps.images["pFgA_z"],open("%s/%s_pFgA_z.pkl" %(output_dir,term_name),"wb"))
def extract_relations(terms,maps_dir,output_dir): if isinstance(terms,str): terms = [terms] f,d = download_data() features = pandas.read_csv(f,sep="\t") database = pandas.read_csv(d,sep="\t") allterms = features.columns.tolist() allterms.pop(0) #pmid dataset = Dataset(d) dataset.add_features(f) image_matrix = pandas.DataFrame(columns=range(228453)) for t in range(len(allterms)): term = allterms[t] term_name = term.replace(" ","_") pickled_map = "%s/%s_pFgA_z.pkl" %(maps_dir,term_name) if not os.path.exists(pickled_map): print "Generating P(term|activation) for term %s" %(term) ids = dataset.get_ids_by_features(term) maps = meta.MetaAnalysis(dataset,ids) pickle.dump(maps.images["pFgA_z"],open(pickled_map,"wb")) map_data = pickle.load(open(pickled_map,"rb")) image_matrix.loc[term] = map_data sims = pandas.DataFrame(columns=image_matrix.index) tuples = [] for t1 in range(len(terms)): term1 = terms[t1] print "Extracting NeuroSynth relationships for term %s..." %(term1) for t2 in range(len(terms)): term2 = terms[t2] if t1<t2: score = pearsonr(image_matrix.loc[term1],image_matrix.loc[term2])[0] tuples.append((term1,term2,score)) save_relations(output_dir=output_dir,relations=tuples)
class TestAnalysis(unittest.TestCase): def setUp(self): """ Create a new Dataset and add features. """ self.dataset = Dataset('data/test_dataset.txt') self.dataset.add_features('data/test_features.txt') def test_meta_analysis(self): """ Test full meta-analysis stream. """ pass def test_decoder(self): pass def test_coactivation(self): """ Test seed-based coactivation. """ pass def test_roi_averaging(self): pass def test_get_random_voxels(self): pass
def create_dataset(database_location, feature_location): dataset = Dataset(database_location) dataset.add_features(feature_location) dataset.save('neurosynth-dataset.pkl') return dataset
resource_dir = path.join(path.pardir, 'resources') # make sure we have the data dataset_dir = path.join(path.expanduser('~'), 'Documents', 'neurosynth-data') database_path = path.join(dataset_dir, 'database_bregma.txt') neurosynth_data_url = 'https://github.com/wmpauli/neurosynth-data' if not path.exists(database_path): print("Please download dataset from %s and store it in %s" % (neurosynth_data_url, dataset_dir)) # load dataset, both image table and feature table r = 1.0 # 1mm smoothing kernel transform = {'BREGMA': transformations.bregma_to_whs()} target = 'WHS' masker_filename = path.join(resource_dir, 'WHS_SD_rat_brainmask_sm_v2.nii.gz') dataset = Dataset(path.join(dataset_dir, 'database_bregma.txt'), masker=masker_filename, r=r, transform=transform, target=target) dataset.feature_table = FeatureTable(dataset) dataset.add_features(path.join(dataset_dir, "features_bregma.txt")) # add features fn = dataset.get_feature_names() # get the ids of studies where this feature occurs ids = dataset.get_ids_by_features(('%s*' % feature), threshold=0.1) ma = meta.MetaAnalysis(dataset, ids) results_path = path.join('results', 'meta', feature) if not path.exists(results_path): makedirs(results_path) print("saving results to: %s" % results_path) ma.save_results(results_path) # note, figure 2 of manuscript was used by plotting the z-score statistical maps for forward inference (pAgF_z.nii.gz) and reverse inference (pFgA_z.nii.gz)
class NeuroSynth: """Initialize Neurosynth Database""" def __init__(self,dbsize): print "Initializing Neurosynth database..." self.db = Dataset('data/' + str(dbsize) + 'terms/database.txt') self.db.add_features('data/' + str(dbsize) + 'terms/features.txt') self.ids = self.getIDs() self.decoder = None #self.masker = mask.Mask("data/X.nii.gz") """Do contrast analysis between two sets of """ def neurosynthContrast(self,papers1,papers2,fdr,outdir=None,outprefix=None,image_list=None): # Do a meta analysis to contrast the two ma = meta.MetaAnalysis(self.db,papers1,papers2,q=float(fdr)) if outdir: print "Saving results to %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list) return ma.images """Conduct meta analysis with particular set of ids""" def neurosynthMeta(self,papers,fdr,outdir=None,outprefix=None, image_list=None): # Get valid ids from user list valid_ids = self.get_valid_ids(papers) if (len(valid_ids) > 0): # Do meta analysis ma = meta.MetaAnalysis(self.db,valid_ids,q=float(fdr)) if outdir: print "Saving results to output directory %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list) return ma.images else: print "No studies found in database for ids in question!" """Return list of valid ids from user input""" def get_valid_ids(self,papers): # Input is DOI with list of papers valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids] print "Found %s valid ids." % (str(len(valid_ids))) return valid_ids """Decode an image, return 100 results""" def decode(self,images,outfile,mrs=None,round=4): if not self.decoder: self.decoder = decode.Decoder(self.db) # If mrs is not specified, do decoding against neurosynth database if not mrs: result = self.decoder.decode(images, save=outfile) # If mrs is specified, do decoding against custom set of images else: # This is akin to traditional neurosynth method - pearson's r correlation imgs_to_compare = imageutils.load_imgs(mrs,self.masker) imgs_to_decode = imageutils.load_imgs(images,self.masker) x, y = imgs_to_compare.astype(float),imgs_to_decode.astype(float) x, y = x - x.mean(0), y - y.mean(0) x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0)) result = np.around(x.T.dot(y).T,round) features = [os.path.basename(m) for m in mrs] rownames = [os.path.basename(m) for m in images] df = pd.DataFrame(result,columns=features) df.index = rownames df.to_csv(outfile,sep="\t") return result """Return features in neurosynth database""" def getFeatures(self,dataset): return dataset.get_feature_names() """Extract pubmed IDs or dois from Neurosynth Database""" def getIDs(self): # Get all IDs in neuroSynth return self.db.image_table.ids """Extract author names for a given pmid or doi""" def getAuthor(self,db,id): article = self.db.get_mappables(id) meta = article[0].__dict__ tmp = meta['data']['authors'] tmp = tmp.split(",") authors = [ x.strip("^ ") for x in tmp] return authors """Extract all author names in database""" def getAuthors(self,db): articles = db.mappables uniqueAuthors = [] for a in articles: meta = a.__dict__ tmp = meta['data']['authors'] tmp = tmp.split(",") authors = [ x.strip("^ ") for x in tmp] for a in authors: uniqueAuthors.append(a) uniqueAuthors = list(np.unique(uniqueAuthors)) return uniqueAuthors """Extract activation points and all meta information for a particular pmid""" def getPaperMeta(self,db,pmid): articles = db.mappables m = [] for a in articles: tmp = a.__dict__ if tmp['data']['id'] == str(pmid): journal = tmp['data']['journal'] title = tmp['data']['title'] year = tmp['data']['year'] doi = tmp['data']['doi'] auth = tmp['data']['authors'] peaks = tmp['data']['peaks'] pmid = tmp['data']['id'] tmp = (journal,title,year,doi,pmid,auth,peaks) m.append(tmp) return m
class NeuroSynth: """Initialize Neurosynth Database""" def __init__(self, dbsize): print "Initializing Neurosynth database..." self.db = Dataset("data/" + str(dbsize) + "terms/database.txt") self.db.add_features("data/" + str(dbsize) + "terms/features.txt") self.ids = self.getIDs() self.decoder = None # self.masker = mask.Mask("data/X.nii.gz") """Do contrast analysis between two sets of """ def neurosynthContrast(self, papers1, papers2, fdr, outdir=None, outprefix=None, image_list=None): # Do a meta analysis to contrast the two ma = meta.MetaAnalysis(self.db, papers1, papers2, q=float(fdr)) if outdir: print "Saving results to %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list) return ma.images """Conduct meta analysis with particular set of ids""" def neurosynthMeta(self, papers, fdr, outdir=None, outprefix=None, image_list=None): # Get valid ids from user list valid_ids = self.get_valid_ids(papers) if len(valid_ids) > 0: # Do meta analysis ma = meta.MetaAnalysis(self.db, valid_ids, q=float(fdr)) if outdir: print "Saving results to output directory %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list) return ma.images else: print "No studies found in database for ids in question!" """Return list of valid ids from user input""" def get_valid_ids(self, papers): # Input is DOI with list of papers valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids] print "Found %s valid ids." % (str(len(valid_ids))) return valid_ids """Decode an image, return 100 results""" def decode(self, images, outfile, mrs=None, round=4): if not self.decoder: self.decoder = decode.Decoder(self.db) # If mrs is not specified, do decoding against neurosynth database if not mrs: result = self.decoder.decode(images, save=outfile) # If mrs is specified, do decoding against custom set of images else: # This is akin to traditional neurosynth method - pearson's r correlation imgs_to_compare = imageutils.load_imgs(mrs, self.masker) imgs_to_decode = imageutils.load_imgs(images, self.masker) x, y = imgs_to_compare.astype(float), imgs_to_decode.astype(float) x, y = x - x.mean(0), y - y.mean(0) x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0)) result = np.around(x.T.dot(y).T, round) features = [os.path.basename(m) for m in mrs] rownames = [os.path.basename(m) for m in images] df = pd.DataFrame(result, columns=features) df.index = rownames df.to_csv(outfile, sep="\t") return result """Return features in neurosynth database""" def getFeatures(self, dataset): return dataset.get_feature_names() """Extract pubmed IDs or dois from Neurosynth Database""" def getIDs(self): # Get all IDs in neuroSynth return self.db.image_table.ids """Extract author names for a given pmid or doi""" def getAuthor(self, db, id): article = self.db.get_mappables(id) meta = article[0].__dict__ tmp = meta["data"]["authors"] tmp = tmp.split(",") authors = [x.strip("^ ") for x in tmp] return authors """Extract all author names in database""" def getAuthors(self, db): articles = db.mappables uniqueAuthors = [] for a in articles: meta = a.__dict__ tmp = meta["data"]["authors"] tmp = tmp.split(",") authors = [x.strip("^ ") for x in tmp] for a in authors: uniqueAuthors.append(a) uniqueAuthors = list(np.unique(uniqueAuthors)) return uniqueAuthors """Extract activation points and all meta information for a particular pmid""" def getPaperMeta(self, db, pmid): articles = db.mappables m = [] for a in articles: tmp = a.__dict__ if tmp["data"]["id"] == str(pmid): journal = tmp["data"]["journal"] title = tmp["data"]["title"] year = tmp["data"]["year"] doi = tmp["data"]["doi"] auth = tmp["data"]["authors"] peaks = tmp["data"]["peaks"] pmid = tmp["data"]["id"] tmp = (journal, title, year, doi, pmid, auth, peaks) m.append(tmp) return m
base_path = '/home/pauli/Development/neurobabel/' test_data_path = base_path + 'ACE/' masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz' atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz' mask = nb.load(masker_filename) masker = Masker(mask) r = 1.0 # transform = {'BREGMA': transformations.bregma_to_whs()} #transform = {'BREGMA': transformations.identity()} transform = {'BREGMA': transformations.bregma_to_whs()} target = 'WHS' # load data set dataset = Dataset(os.path.join(test_data_path, 'db_bregma_export.txt'), masker=masker_filename, r=r, transform=transform, target=target) dataset.feature_table = FeatureTable(dataset) dataset.add_features(os.path.join(test_data_path, "db_bregma_features.txt")) # add features fn = dataset.get_feature_names() def get_whs_labels(filename=os.path.join(base_path, "atlases/whs_sd/WHS_SD_rat_atlas_v2.label")): ''' load the names of all labelled areas in the atlas (e.g. brainstem), return list of them ''' in_file = open(filename, 'r') lines = in_file.readlines() labels = {} for line in lines: start = line.find("\"") + 1 if start > 0: stop = line.find("\"", start) label = line[start:stop] idx = line.split()[0] labels[label] = int(idx) in_file.close()
class Neurosynth: def __init__(self, datadir='../data/neurosynth', verbose=True, ma_count_thresh=16, meta_image='consistency_z', resolution=3): self.dataset = None self.concepts = None self.concepts_df = None self.concept_pmids = {} self.datadir = datadir self.datafile = os.path.join(datadir, 'database.txt') self.verbose = verbose self.ma_count_thresh = ma_count_thresh self.meta_image = meta_image self.resolution = resolution self.imagedir_resampled = None self.image_concepts = None self.desmtx = None if not os.path.exists(os.path.join(self.datadir, 'database.txt')): print('downloading neurosynth data') ns.dataset.download(path='/tmp', unpack=True) print('extracting data') tfile = tarfile.open("/tmp/current_data.tar.gz", 'r:gz') if not os.path.exists(self.datadir): os.mkdir(self.datadir) tfile.extractall(self.datadir) os.remove("/tmp/current_data.tar.gz") print('done creating dataset in', self.datadir) self.imagedir = os.path.join(self.datadir, 'ma_images') if not os.path.exists(self.imagedir): os.mkdir(self.imagedir) def get_dataset(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'dataset.pkl')) and not force_load: print('loading database from', os.path.join(self.datadir, 'dataset.pkl')) self.dataset = Dataset.load( os.path.join(self.datadir, 'dataset.pkl')) else: print('loading database - this takes a few minutes') self.dataset = Dataset(os.path.join(self.datadir, 'database.txt')) self.dataset.add_features( os.path.join(self.datadir, 'features.txt')) self.dataset.save(os.path.join(self.datadir, 'dataset.pkl')) def get_concepts(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'concepts_df.csv')) and not force_load: print('using cached cognitive atlas concepts') self.concepts_df = pandas.read_csv( os.path.join(self.datadir, 'concepts_df.csv')) else: self.concepts_df = get_concept().pandas self.concepts_df.to_csv( os.path.join(self.datadir, 'concepts_df.csv')) self.concepts = self.concepts_df.name.tolist() def get_concept_pmids(self, retmax=2000000, force_load=False): # get the pmids for each concept that are in neurosynth # for single-word concepts we use the neurosynth search tool # for phrases we use pubmed if os.path.exists(os.path.join( self.datadir, 'concept_pmids.pkl')) and not force_load: print('using cached concept_pmids') self.concept_pmids = pickle.load( open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'rb')) return print('loading all neurosynth pmids') all_neurosynth_ids = self.dataset.image_table.ids.tolist() for id in self.concepts: time.sleep(0.5) handle = Entrez.esearch(db="pubmed", retmax=retmax, term='"%s"' % id) record = Entrez.read(handle) handle.close() # make sure we got all the records - rerun if we didn't if int(record['Count']) > retmax: handle = Entrez.esearch(db="pubmed", retmax=int(record['Count']), term='"%s"' % id) record = Entrez.read(handle) handle.close() records_int = [int(i) for i in record['IdList']] ns_pmids = intersect(all_neurosynth_ids, records_int) print('pubmed found', len(ns_pmids), 'matching pmids for', id) self.concept_pmids[id] = ns_pmids pickle.dump( self.concept_pmids, open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'wb')) def get_concept_images(self, force_load=False): for c in self.concept_pmids.keys(): if not force_load and os.path.exists( os.path.join( self.imagedir, '%s_specificity_z.nii.gz' % c.replace(' ', '-'))): continue if len(self.concept_pmids[c]) < self.ma_count_thresh: #print('skipping',c,len(self.concept_pmids[c]),'pmids') continue print('running meta-analysis for', c) ma = meta.MetaAnalysis(self.dataset, self.concept_pmids[c]) ma.save_results(self.imagedir, c.replace(' ', '-')) if force_load or not os.path.exists( os.path.join(self.imagedir, 'mask_image.nii.gz')): # make mask of voxels with zero standard deviation concept_images = glob.glob( os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image)) imgdata = numpy.zeros((91, 109, 91, len(concept_images))) print('loading concept images to compute std') for i, c in enumerate(concept_images): tmp = nibabel.load(c).get_data() imgdata[:, :, :, i] = tmp imgstd = numpy.std(imgdata, axis=3) maskdata = (imgstd > 0).astype('int') maskimg = nibabel.Nifti1Image(maskdata, affine=nibabel.load(c).affine) maskimg.to_filename( os.path.join(self.imagedir, 'mask_image.nii.gz')) def get_resampled_images(self, shape=None, affine=None, force_load=False): # use 3 mm as default if not shape: shape = [60, 72, 60] affine = numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126], [0, 0, 3, -72], [0, 0, 0, 1]]) self.resolution = affine[1, 1].astype('int') print('resampling data to %d mm' % self.resolution) self.imagedir_resampled = os.path.join( self.datadir, 'ma_images_%dmm' % self.resolution) if not os.path.exists(self.imagedir_resampled): os.mkdir(self.imagedir_resampled) concept_images = glob.glob( os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image)) for c in concept_images: if force_load or not os.path.exists( os.path.join(self.imagedir_resampled, os.path.basename(c))): img = nilearn.image.resample_img(c, target_affine=affine, target_shape=shape) img.to_filename( os.path.join(self.imagedir_resampled, os.path.basename(c))) if not os.path.exists( os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution)): # make MNI mask at chosen resolution mask = os.path.join( os.environ['FSLDIR'], 'data/standard/MNI152_T1_2mm_brain_mask.nii.gz') maskimg = nilearn.image.resample_img(mask, target_affine=affine, target_shape=shape) maskimg.to_filename( os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution)) def load_concept_images(self, force_load=True): concept_images = glob.glob( os.path.join(self.imagedir_resampled, '*_%s.nii.gz' % self.meta_image)) concept_images.sort() self.image_concepts = [ os.path.basename(i).split('_')[0] for i in concept_images ] if os.path.exists( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution)): self.imgdata = numpy.load( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution)) # make sure it's the right size if self.imgdata.shape[1] == len(concept_images): print('using cached concept image data') return masker = nilearn.input_data.NiftiMasker( mask_img=os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution), target_shape=[60, 72, 60], target_affine=numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126], [0, 0, 3, -72], [0, 0, 0, 1]])) print('loading concept image data') self.imgdata = masker.fit_transform(concept_images) numpy.save( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution), self.imgdata) def save(self): with open('%s/neurovault_%dmm.pkl' % (self.datadir, self.resolution), 'wb') as f: pickle.dump(self, f) def build_design_matrix(self, force_load=False): if not force_load and os.path.exists( os.path.join(self.datadir, 'desmtx.csv')): self.desmtx = pandas.DataFrame.from_csv( os.path.join(self.datadir, 'desmtx.csv')) print('using cached design matrix') return print('building design matrix') all_concept_pmids = [] for k in self.concept_pmids.keys(): all_concept_pmids = all_concept_pmids + self.concept_pmids[k] all_concept_pmids = list(set(all_concept_pmids)) all_concept_pmids.sort() all_concepts = list(self.concept_pmids.keys()) self.desmtx = pandas.DataFrame(data=0, index=all_concept_pmids, columns=all_concepts) for k in self.concept_pmids.keys(): pmids = self.concept_pmids[k] self.desmtx[k][pmids] = 1 # drop columns with too few matches self.desmtx = self.desmtx.ix[:, self.desmtx.sum() > self.ma_count_thresh] self.desmtx.to_csv(os.path.join(self.datadir, 'desmtx.csv'))
def get_test_dataset(): test_data_path = get_test_data_path() dataset = Dataset(test_data_path + 'test_dataset.txt') dataset.add_features(test_data_path + 'test_features.txt') return dataset
""" Create a new Dataset instance from a database file and load features. This is basically the example from the quickstart in the README. Assumes you have database.txt and features.txt files in the current dir. """ """ Load a Dataset and generate a full set of meta-analysis images--i.e., run a meta-analysis on every single feature. """ neurosynth_data_dir = "/home/data/nbc/misc-projects/niconn-macm/code/neurosynth/" if not op.isfile(op.join(neurosynth_data_dir, "dataset.pkl")): # Create Dataset instance from a database file. dataset = Dataset(op.join(neurosynth_data_dir, "database.txt")) # Load features from file dataset.add_features(op.join(neurosynth_data_dir, "features.txt")) # Pickle the Dataset to file so we can use Dataset.load() next time # instead of having to sit through the generation process again. dataset.save(op.join(neurosynth_data_dir, "dataset.pkl")) # Load pickled Dataset--assumes you've previously saved it. If not, # follow the create_a_new_dataset_and_load_features example. dataset = Dataset.load(op.join(neurosynth_data_dir, "dataset.pkl")) # Get the full list of feature names feature_list = dataset.get_feature_names() # Run a meta-analysis on each feature, and save all the results to # a directory called results. Note that the directory will not be # created for you, so make sure it exists.
from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta import os dataset = Dataset('database.txt') dataset.add_features('features.txt') print dataset.get_feature_names() ids = dataset.get_ids_by_features('emo*', threshold=0.001) print len(ids) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('emotion')
class TestBase(unittest.TestCase): def setUp(self): """ Create a new Dataset and add features. """ self.dataset = Dataset('data/test_dataset.txt') self.dataset.add_features('data/test_features.txt') def test_dataset_initializes(self): """ Test whether dataset initializes properly. """ self.assertIsNotNone(self.dataset.volume) self.assertIsNotNone(self.dataset.image_table) self.assertEqual(len(self.dataset.mappables), 5) self.assertIsNotNone(self.dataset.volume) self.assertIsNotNone(self.dataset.r) def test_image_table_loads(self): """ Test ImageTable initialization. """ self.assertIsNotNone(self.dataset.image_table) it = self.dataset.image_table self.assertEqual(len(it.ids), 5) self.assertIsNotNone(it.volume) self.assertIsNotNone(it.r) self.assertEqual(it.data.shape, (228453, 5)) # Add tests for values in table def test_feature_table_loads(self): """ Test FeatureTable initialization. """ tt = self.dataset.feature_table self.assertIsNotNone(tt) self.assertEqual(len(self.dataset.list_features()), 5) self.assertEqual(tt.data.shape, (5,5)) self.assertEqual(tt.feature_names[3], 'f4') self.assertEqual(tt.data[0,0], 0.0003) def test_feature_search(self): """ Test feature-based Mappable search. Tests both the FeatureTable method and the Dataset wrapper. """ tt = self.dataset.feature_table features = tt.search_features(['f*']) self.assertEqual(len(features), 4) d = self.dataset ids = d.get_ids_by_features(['f*'], threshold=0.001) self.assertEqual(len(ids), 4) img_data = d.get_ids_by_features(['f1', 'f3', 'g1'], 0.001, func='max', get_image_data=True) self.assertEqual(img_data.shape, (228453, 5)) def test_selection_by_mask(self): """ Test mask-based Mappable selection. Only one peak in the test dataset (in study5) should be within the sgACC. """ ids = self.dataset.get_ids_by_mask('data/sgacc_mask.nii.gz') self.assertEquals(len(ids), 1) self.assertEquals('study5', ids[0]) def test_selection_by_peaks(self): """ Test peak-based Mappable selection. """ ids = self.dataset.get_ids_by_peaks(np.array([[3, 30, -9]])) self.assertEquals(len(ids), 1) self.assertEquals('study5', ids[0]) # def test_invalid_coordinates_ignored(self): """ Test dataset contains 3 valid coordinates and one outside mask. But this won't work
def create_dataset(database_location, feature_location): dataset = Dataset(database_location) dataset.add_features(feature_location) dataset.save('dataset-old.pkl') print 'created dataset' return dataset
xfm2vol.run() #make masks to input into neurosynth def cluster2masks(clusterfile): clustermap = nb.load(clusterfile).get_data() for x in range(1, clustermap.max() + 1): clustermask = (clustermap == x).astype(int) nImg = nb.Nifti1Image(clustermask, None) nb.save( nImg, os.path.abspath(clusterfile + '_clustermask' + str(x) + '.nii')) cluster2masks(volume_file) dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl' if not os.path.exists(dataset_file): dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt') dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt') dataset.save(dataset_file) else: dataset = cPickle.load(open(dataset_file, 'rb')) clustermask = volume_file + '_clustermask' + str(3) + '.nii' ids = dataset.get_ids_by_mask(clustermask) features = dataset.feature_table.get_features_by_ids(ids) #mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz
def get_test_dataset(prefix='test'): test_data_path = get_test_data_path() dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix) dataset.add_features(test_data_path + '%s_features.txt' % prefix) return dataset
mni152_2mm, interpolation='nearest', copy=True) resampled_file = join(map_dir, '{0}_mni2mm.nii.gz'.format(file)) resampled_roi.to_filename(resampled_file) roi_files[i] = resampled_file plot_glass_brain(resampled_file, output_file=join( map_dir, '{0}_mni2mm.png'.format(basename(file)))) # In[16]: print('loading dataset...') tds = datetime.now() dataset = Dataset('/Users/Katie/Dropbox/Data/neurosynth-v0.7/database.txt') dataset.add_features('/Users/Katie/Dropbox/Data/neurosynth-v0.7/features.txt') tdf = datetime.now() print('dataset loaded! only took {0}'.format((tdf - tds))) for i in np.arange(0, len(mask_names)): print('{0}\nmeta-analyzing {1}...'.format(datetime.now(), mask_names[i])) tmas = datetime.now() ids = dataset.get_studies(mask=roi_files[i], ) ma = meta.MetaAnalysis(dataset, ids) ma.save_results( output_dir=sink_dir, prefix=mask_names[i], image_list=['association-test_z', 'association-test_z_FDR_0.01']) tmaf = datetime.now() print('meta-analysis took {0}\ndecoding {1}...'.format((tmaf - tmas),
class NeurosynthMerge: def __init__(self, thesaurus, npath, outdir, test_mode=False): """ Generates a new set of images using the neurosynth repository combining across terms in a thesarus. Args: - thesaurus: A list of tuples where:[('term that will be the name of the file', 'the other term', 'expression combining the terms')] - the last expression is alphanumeric and separated by: (& for and) (&~ for andnot) (| for or) - npath: directory where the neurosynth git repository is locally on your machine (https://github.com/neurosynth/neurosynth) - outdir: directory where the generated images will be saved - test_mode: when true, the code will run an abridged version for test purposes (as implemented by test.Neurosynth.py) """ self.thesaurus = thesaurus self.npath = npath self.outdir = outdir self.import_neurosynth_git() from neurosynth.analysis import meta # Take out first two terms from the feature_list and insert the third # term from the tuple. for triplet in thesaurus: self.feature_list = [feature for feature in self.feature_list \ if feature not in triplet] self.feature_list.append(triplet[-1]) # This makes an abridged version of feature_list for testing purposes. if test_mode: self.feature_list = [triplet[-1] for triplet in thesaurus] # Run metanalyses on the new features set and save the results to the #outdir. for feature in self.feature_list: self.ids = self.dataset.get_ids_by_expression(feature, threshold=0.001) ma = meta.MetaAnalysis(self.dataset, self.ids) # Parse the feature name (to avoid conflicts with illegal #characters as file names) regex = re.compile('\W+') split = re.split(regex, feature) feat_fname = split[0] # Save the results (many different types of files) ma.save_results(self.outdir+os.sep+feat_fname) def import_neurosynth_git(self): # Add the appropriate neurosynth git folder to the python path. sys.path.append(self.npath) from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta # Try to load a pickle if it exists. Create a new dataset instance # if it doesn't. try: self.dataset = cPickle.load( open(self.npath+os.sep+'data/dataset.pkl', 'rb')) except IOError: # Create Dataset instance from a database file. self.dataset = Dataset(self.npath+os.sep+'data/database.txt') # Load features from file self.dataset.add_features(self.npath+os.sep+'data/features.txt') # Get names of features. self.feature_list = self.dataset.get_feature_names()
xfm2vol.inputs.identity = 'fsaverage4' xfm2vol.inputs.hemi = 'lh' xfm2vol.inputs.transformed_file = volume_file xfm2vol.inputs.template_file = template xfm2vol.run() #make masks to input into neurosynth def cluster2masks(clusterfile): clustermap = nb.load(clusterfile).get_data() for x in range(1,clustermap.max()+1): clustermask = (clustermap==x).astype(int) nImg = nb.Nifti1Image(clustermask, None) nb.save(nImg, os.path.abspath(clusterfile+'_clustermask'+str(x)+'.nii')) cluster2masks(volume_file) dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl' if not os.path.exists(dataset_file): dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt') dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt') dataset.save(dataset_file) else: dataset = cPickle.load(open(dataset_file,'rb')) clustermask = volume_file+'_clustermask'+str(3)+'.nii' ids = dataset.get_ids_by_mask(clustermask) features = dataset.feature_table.get_features_by_ids(ids) #mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz