def coactivation(dataset, seed, threshold=0.0, output_dir='.', prefix='', r=6): """ Compute and save coactivation map given input image as seed. This is essentially just a wrapper for a meta-analysis defined by the contrast between those studies that activate within the seed and those that don't. Args: dataset: a Dataset instance containing study and activation data. seed: either a Nifti or Analyze image defining the boundaries of the seed, or a list of triples (x/y/z) defining the seed(s). Note that voxels do not need to be contiguous to define a seed--all supra- threshold voxels will be lumped together. threshold: optional float indicating the threshold above which voxels are considered to be part of the seed ROI (default = 0) r: optional integer indicating radius (in mm) of spheres to grow (only used if seed is a list of coordinates). output_dir: output directory to write to. Defaults to current. If none, defaults to using the first part of the seed filename. prefix: optional string to prepend to all coactivation images. Output: A set of meta-analysis images identical to that generated by meta.MetaAnalysis. """ if isinstance(seed, string_types): ids = dataset.get_studies(mask=seed, activation_threshold=threshold) else: ids = dataset.get_studies(peaks=seed, r=r, activation_threshold=threshold) ma = meta.MetaAnalysis(dataset, ids) ma.save_results(output_dir, prefix)
def neurosynthContrast(self,papers1,papers2,fdr,outdir=None,outprefix=None,image_list=None): # Do a meta analysis to contrast the two ma = meta.MetaAnalysis(self.db,papers1,papers2,q=float(fdr)) if outdir: print "Saving results to %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list) return ma.images
def test_meta_analysis(self): """ Test full meta-analysis stream. """ # run a meta-analysis ids = ['study1', 'study3'] ma = meta.MetaAnalysis(self.dataset, ids) # save the results tempdir = tempfile.mkdtemp() ma.save_results(tempdir + os.path.sep, prefix='test') from glob import glob files = glob(tempdir + os.path.sep + "test_*.nii.gz") self.assertEquals(len(files), 9) shutil.rmtree(tempdir)
def neurosynthMeta(self,papers,fdr,outdir=None,outprefix=None, image_list=None): # Get valid ids from user list valid_ids = self.get_valid_ids(papers) if (len(valid_ids) > 0): # Do meta analysis ma = meta.MetaAnalysis(self.db,valid_ids,q=float(fdr)) if outdir: print "Saving results to output directory %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list) return ma.images else: print "No studies found in database for ids in question!"
def make_coactivation_map(x, y, z, r=6, min_studies=0.01): """ Generate a coactivation map on-the-fly for the given seed voxel. """ try: dataset = make_coactivation_map.dataset ids = dataset.get_studies(peaks=[[x, y, z]], r=r) if len(ids) < 50: return False ma = meta.MetaAnalysis(dataset, ids, min_studies=min_studies) outdir = join(settings.IMAGE_DIR, 'coactivation') prefix = 'metaanalytic_coactivation_%s_%s_%s' % (str(x), str(y), str(z)) ma.save_results(outdir, prefix, image_list=['specificity_z_FDR_0.01']) return True except Exception, e: print traceback.format_exc() return False
def test_meta_analysis(self): """ Test full meta-analysis stream. """ # run a meta-analysis ids = ['study1', 'study3'] ma = meta.MetaAnalysis(self.dataset, ids) # save the results tempdir = tempfile.mkdtemp() ma.save_results(tempdir + os.path.sep, prefix='test') files = glob(tempdir + os.path.sep + "test_*.nii.gz") self.assertEquals(len(files), 9) shutil.rmtree(tempdir) # test the analyze_features() wrapper tempdir = tempfile.mkdtemp() meta.analyze_features(self.dataset, output_dir=tempdir, prefix="meep") files = glob(tempdir + os.path.sep + "meep*.nii.gz") self.assertEquals(len(files), 9 * 5) shutil.rmtree(tempdir)
def run_metaanalysis(ids, name): """ Run a user-defined meta-analysis. Args: ids (list): list of PMIDs identifying studies to include in the meta-analysis name (string): name of the analysis; used in filename of output images """ try: ma = meta.MetaAnalysis(run_metaanalysis.dataset, ids) outdir = join(settings.IMAGE_DIR, 'custom') ma.save_results( outdir, name, image_list=['specificity_z_FDR_0.01', 'consistency_z_FDR_0.01']) return True except Exception, e: print traceback.format_exc() return False
def _create_cluster_images(self, labels, coactivation_maps): ''' Creates a Nifti image of reconstructed cluster labels. Args: labels: A vector of cluster labels Outputs: Cluster_k.nii.gz: Will output a nifti image with cluster labels ''' # Reconstruct grid into original space # TODO: replace with masker.unmask() if hasattr(self, 'grid'): regions = self.masker.mask(self.grid) unique_regions = np.unique(regions) n_regions = unique_regions.size m = np.zeros(regions.size) for i in range(n_regions): m[regions == unique_regions[i]] = labels[i] + 1 labels = m clusters = np.unique(labels) n_clusters = len(clusters) prefix = '' if self.prefix is None else self.prefix + '_' output_dir = join(self.output_dir, prefix + self.algorithm + '_k' + str(n_clusters)) if not isdir(output_dir): os.makedirs(output_dir) outfile = join(output_dir, 'cluster_labels.nii.gz') imageutils.save_img(labels, outfile, self.masker) # Generate a coactivation map for each cluster if coactivation_maps: coact_dir = join(output_dir, 'coactivation') if not isdir(coact_dir): os.makedirs(coact_dir) for c in clusters: img = np.zeros_like(labels) img[labels == c] = 1 img = self.masker.unmask(img) ids = self.dataset.get_ids_by_mask(img, 0.25) ma = meta.MetaAnalysis(self.dataset, ids) ma.save_results(coact_dir, 'cluster_%d' % c)
def create_nii_files_test(dataset, target_folder, year=None, after=True): df_keyword = pd.read_csv('./keyword_list.csv') if year is not None: raw_data_db = pd.read_csv("./raw_data/database.txt", sep='\t', low_memory=False) if after: masked_id = set( raw_data_db.loc[raw_data_db['year'] > year]['id'].tolist()) else: masked_id = set( raw_data_db.loc[raw_data_db['year'] <= year]['id'].tolist()) for word in df_keyword['keyword'].tolist(): ids = dataset.get_studies(features=word, frequency_threshold=0.05) if year is not None: ids = list(set(ids) & masked_id) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('./' + target_folder, word)
def test_feature_search(self): """ Test feature-based Mappable search. Tests both the FeatureTable method and the Dataset wrapper. """ tt = self.dataset.feature_table features = tt.search_features(['f*']) self.assertEqual(len(features), 4) d = self.dataset ids = d.get_ids_by_features(['f*'], threshold=0.001) self.assertEqual(len(ids), 4) img_data = d.get_ids_by_features(['f1', 'f3', 'g1'], 0.001, func='max', get_image_data=True) self.assertEqual(img_data.shape, (228453, 5)) # And some smoke-tests: # run a meta-analysis ma = meta.MetaAnalysis(d, ids) # save the results tempdir = tempfile.mkdtemp() ma.save_results(tempdir + os.path.sep) shutil.rmtree(tempdir)
def coactivation(dataset, seed, threshold=0.0, outroot=None): """ Compute and save coactivation map given input image as seed. This is essentially just a wrapper for a meta-analysis defined by the contrast between those studies that activate within the seed and those that don't. Args: dataset: a Dataset instance containing study and activation data. seed: a Nifti or Analyze image defining the boundaries of the seed region. Note that voxels do not need to be contiguous threshold: optional float indicating the threshold above which voxels are considered to be part of the seed ROI (default = 0) outroot: optional string to prepend to all coactivation images. If none, defaults to using the first part of the seed filename. """ studies = dataset.get_ids_by_mask(seed, threshold=threshold) ma = meta.MetaAnalysis(dataset, studies) if outroot is None: outroot = seed.split('.')[0] + "_coact" ma.save_results(outroot)
from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta import os dataset = Dataset('database.txt') dataset.add_features('features.txt') print dataset.get_feature_names() ids = dataset.get_ids_by_features('emo*', threshold=0.001) print len(ids) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('emotion')
def create_nii_files(dataset, target_folder): df_keyword = pd.read_csv('./keyword_list.csv') for word in df_keyword['keyword'].tolist(): ids = dataset.get_studies(features=word, frequency_threshold=0.05) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('./' + target_folder, word)
def magic(dataset, method='coactivation', roi_mask=None, coactivation_mask=None, features=None, feature_threshold=0.05, min_voxels_per_study=None, min_studies_per_voxel=None, reduce_reference='pca', n_components=100, distance_metric='correlation', clustering_algorithm='kmeans', n_clusters=5, clustering_kwargs={}, output_dir=None, filename=None, coactivation_images=False, coactivation_threshold=0.1): ''' Execute a full clustering analysis pipeline. Args: dataset: a Dataset instance to extract all data from. method (str): the overall clustering approach to use. Valid options: 'coactivation' (default): Clusters voxel within the ROI mask based on shared pattern of coactivation with the rest of the brain. 'studies': Treat each study as a feature in an n-dimensional space. I.e., voxels will be assigned to the same cluster if they tend to be co-reported in similar studies. roi_mask: A string, nibabel image, or numpy array providing an inclusion mask of voxels to cluster. If None, the default mask in the Dataset instance is used (typically, all in-brain voxels). coactivation_mask: If method='coactivation', this mask defines the voxels to use when generating the pairwise distance matrix. For example, if a PFC mask is passed, all voxels in the roi_mask will be clustered based on how similar their patterns of coactivation with PFC voxels are. Can be a str, nibabel image, or numpy array. features (str or list): Optional string or list of strings specifying any feature names to use for study selection. E.g., passing ['emotion', 'reward'] would retain for analysis only those studies associated with the features emotion or reward at a frequency greater than feature_threshold. feature_threshold (float): The threshold to use when selecting studies on the basis of features. min_voxels_per_study (int): Minimum number of active voxels a study must report in order to be retained in the dataset. By default, all studies are used. min_studies_per_voxel (int): Minimum number of studies a voxel must be active in in order to be retained in analysis. By default, all voxels are used. reduce_reference (str, scikit-learn object or None): The dimensionality reduction algorithm to apply to the feature space prior to the computation of pairwise distances. If a string is passed (either 'pca' or 'ica'), n_components must be specified. If None, no dimensionality reduction will be applied. Otherwise, must be a scikit-learn-style object that exposes a transform() method. n_components (int): Number of components to extract during the dimensionality reduction step. Only used if reduce_reference is a string. distance_metric (str): The distance metric to use when computing pairwise distances on the to-be-clustered voxels. Can be any of the metrics supported by sklearn.metrics.pairwise_distances. clustering_algorithm (str or scikit-learn object): the clustering algorithm to use. If a string, must be one of 'kmeans' or 'minik'. Otherwise, any sklearn class that exposes a fit_predict() method. n_clusters (int): If clustering_algorithm is a string, the number of clusters to extract. clustering_kwargs (dict): Additional keywords to pass to the clustering object. output_dir (str): The directory to write results to. If None (default), returns the cluster label image rather than saving to disk. filename (str): Name of cluster label image file. Defaults to cluster_labels_k{k}.nii.gz, where k is the number of clusters. coactivation_images (bool): If True, saves a meta-analytic coactivation map for every ROI in the resulting cluster map. coactivation_threshold (float or int): If coactivation_images is True, this is the threshold used to define whether or not a study is considered to activation within a cluster ROI. Integer values are interpreted as minimum number of voxels within the ROI; floats are interpreted as the proportion of voxels. Defaults to 0.1 (i.e., 10% of all voxels within ROI must be active). ''' roi = Clusterable(dataset, roi_mask, min_voxels=min_voxels_per_study, min_studies=min_studies_per_voxel, features=features, feature_threshold=feature_threshold) if method == 'coactivation': reference = Clusterable(dataset, coactivation_mask, min_voxels=min_voxels_per_study, min_studies=min_studies_per_voxel, features=features, feature_threshold=feature_threshold) elif method == 'features': reference = deepcopy(roi) feature_data = dataset.feature_table.data n_studies = len(feature_data) reference.data = reference.data.dot(feature_data.values) / n_studies elif method == 'studies': reference = roi if reduce_reference is not None: if isinstance(reduce_reference, string_types): reduce_reference = { 'pca': sk_decomp.RandomizedPCA, 'ica': sk_decomp.FastICA }[reduce_reference](n_components) transpose = (method == 'coactivation') reference = reference.transform(reduce_reference, transpose=transpose) if method == 'coactivation': distances = pairwise_distances(roi.data, reference.data, metric=distance_metric) else: distances = reference.data # TODO: add additional clustering methods if isinstance(clustering_algorithm, string_types): clustering_algorithm = { 'kmeans': sk_cluster.KMeans, 'minik': sk_cluster.MiniBatchKMeans }[clustering_algorithm](n_clusters, **clustering_kwargs) labels = clustering_algorithm.fit_predict(distances) + 1. header = roi.masker.get_header() header['cal_max'] = labels.max() header['cal_min'] = labels.min() voxel_labels = roi.masker.unmask(labels) img = nifti1.Nifti1Image(voxel_labels, None, header) if output_dir is not None: if not exists(output_dir): makedirs(output_dir) if filename is None: filename = 'cluster_labels_k%d.nii.gz' % n_clusters outfile = join(output_dir, filename) img.to_filename(outfile) # Write coactivation images if coactivation_images: for l in np.unique(voxel_labels): roi_mask = np.copy(voxel_labels) roi_mask[roi_mask != l] = 0 ids = dataset.get_studies( mask=roi_mask, activation_threshold=coactivation_threshold) ma = meta.MetaAnalysis(dataset, ids) ma.save_results(output_dir=join(output_dir, 'coactivation'), prefix='cluster_%d_coactivation' % l) else: return img
# <codecell> len(ids) # <markdowncell> # The resulting set includes 639 studies. # # Once we've got a set of studies we're happy with, we can run a simple meta-analysis, prefixing all output files with the string 'emotion' to distinguish them from other analyses we might run: # <codecell> # Run a meta-analysis on emotion ids = dataset.get_ids_by_features('emo*', threshold=0.001) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('emotion') # <markdowncell> # You should now have a set of Nifti-format brain images on your drive that display various meta-analytic results. The image names are somewhat cryptic; see documentation elsewhere for details. It's important to note that the meta-analysis routines currently implemented in Neurosynth aren't very sophisticated; they're designed primarily for efficiency (most analyses should take just a few seconds), and take multiple shortcuts as compared to other packages like ALE or MKDA. But with that caveat in mind (and one that will hopefully be remedied in the near future), Neurosynth gives you a streamlined and quick way of running large-scale meta-analyses of fMRI data. Of course, all of the images you could generate using individual features are already available on the Neurosynth website, so there's probably not much point in doing this kind of thing yourself unless you've defined entirely new features. # # ### More complex feature-based meta-analyses # # Fortunately, we're not constrained to using single features in our meta-analyses. Neurosynth implements a parsing expression grammar, which is a fancy way of saying you can combine terms according to syntactic rules--in this case, basic logical operations. # # For example, suppose we want to restrict our analysis to studies of emotion that do NOT use the terms 'reward' or 'pain', which we might construe as somewhat non-prototypical affective states. Then we could do the following: # <codecell> ids = dataset.get_ids_by_expression('emo* &~ (reward* | pain*)', threshold=0.001)
def neurosynthMatch(db, papers, author, outdir=None, outprefix=None): """Match neurosynth id with papers id""" # Get all IDs in neuroSynth neurosynth_ids = getIDs(db) # LIST OF IDS --------------------------------------------------------- # Input is DOI with list of papers if bool(re.search("[/]", papers[0])): # NeuroSynth is also DOI if bool(re.search("[/]", neurosynth_ids[0])): print "Search for " + str( len(papers)) + " ids in NeuroSynth database..." # Find intersection valid_ids = [x for x in papers if x in neurosynth_ids] # Neurosynth is PMID else: print "ERROR: Please provide doi to use the 525 database!" sys.exit() # Input is pmid with list of papers else: # NeuroSynth is also pmid if not bool(re.search("[/]", neurosynth_ids[0])): print "Search for " + str( len(papers)) + " ids in NeuroSynth database..." # Find intersection valid_ids = [x for x in papers if x in neurosynth_ids] # Neurosynth is doi else: print "ERROR: Please provide pmid to use the 3000 database!" sys.exit() if (len(valid_ids) > 0): # Do meta analysis ma = meta.MetaAnalysis(db, valid_ids) # 1) the z score map corresponding to the probability that a study in the database is tagged with a particular feature given that activation is present at a particular voxel, FDR corrected .05 dataFDR = ma.images[ma.images.keys()[1]] # 2) the probability of feature given activation with uniform prior imposed dataPRIOR = ma.images[ma.images.keys()[6]] # 3) the probability of feature given activation data = ma.images[ma.images.keys()[7]] # 4) the probability of feature given activation, Z score dataZ = ma.images[ma.images.keys()[8]] # 5) z score map corresponding to the probability of activation given that a study is tagged with the feature (author) datapAgF = ma.images[ma.images.keys()[4]] # If user specifies an output directory if outdir: print "Saving results to output directory " + outdir + "..." if not outprefix: outprefix = author.replace(" ", "") imageutils.save_img( datapAgF, '%s/%s_pAgF_z_FDR_0.05.nii.gz' % (outdir, outprefix), db.volume) imageutils.save_img( dataFDR, '%s/%s_pFgA_z_FDR_0.05.nii.gz' % (outdir, outprefix), db.volume) imageutils.save_img( dataPRIOR, '%s/%s_pFgA_given_pF=0.50.nii.gz' % (outdir, outprefix), db.volume) imageutils.save_img(data, '%s/%s_pFgA.nii.gz' % (outdir, outprefix), db.volume) imageutils.save_img(dataZ, '%s/%s_pFgA_z.nii.gz' % (outdir, outprefix), db.volume) return ma.images else: print "No overlapping studies found in database for author " + author + "."