def test_dataset_save_and_load(self): # smoke test of saving and loading t = tempfile.mktemp() self.dataset.save(t, keep_mappables=True) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertIsNotNone(dataset) self.assertIsNotNone(dataset.mappables) self.assertEqual(len(dataset.mappables), 5) # Now with the mappables deleted dataset.save(t) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertEqual(len(dataset.mappables), 0) os.unlink(t)
def test_dataset_save_and_load(self): # smoke test of saving and loading t = tempfile.mktemp() self.dataset.save(t, keep_mappables=True) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertIsNotNone(dataset) self.assertIsNotNone(dataset.mappables) self.assertEqual(len(dataset.mappables), 5) # Now with the mappables deleted dataset.save(t) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertEqual(len(dataset.mappables), 0) os.unlink(t)
def create_voxel_x_feature_matrix(path_to_dataset, path_to_image_files): dataset = Dataset.load(path_to_dataset) feature_list = dataset.get_feature_names() vox_feat_matrix = zeros((dataset.volume.num_vox_in_mask, len(feature_list)), dtype=int16) for (i,feature) in enumerate(feature_list): image_path = path_to_image_files + feature + '_pFgA_z.nii.gz' vox_feat_matrix[:,i] = dataset.volume.mask(image_path) return vox_feat_matrix
def test_dataset_save_and_load(self): # smoke test of saving and loading t = tempfile.mktemp() self.dataset.save(t) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertIsNotNone(dataset) self.assertEqual(len(dataset.image_table.ids), 5) os.unlink(t)
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=False): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!") self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, str) and not os.path.exists(dataset)): print("\tInitializing a new Dataset...") if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE) else: print("Loading existing Dataset...") dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print("WARNING: RESETTING DATABASE!!!") self.reset_database()
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=True): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. Note that the Dataset must contain the list of Mappables (i.e., save() must have been called with keep_mappables set to True). studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!" self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)): print "\tInitializing a new Dataset..." if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE, keep_mappables=True) else: print "\tLoading existing Dataset..." dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print "WARNING: RESETTING DATABASE!!!" self.reset_database()
def get_dataset(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'dataset.pkl')) and not force_load: print('loading database from', os.path.join(self.datadir, 'dataset.pkl')) self.dataset = Dataset.load( os.path.join(self.datadir, 'dataset.pkl')) else: print('loading database - this takes a few minutes') self.dataset = Dataset(os.path.join(self.datadir, 'database.txt')) self.dataset.add_features( os.path.join(self.datadir, 'features.txt')) self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))
def functional_preference_profile_prep(): """ Function for extracting functional preference profile data """ from neurosynth.base.dataset import Dataset dataset = Dataset.load("data/neurosynth_60_0.4.pkl") nicknames = pd.read_csv('data/v4-topics-60.txt', delimiter='\t') nicknames['topic_name'] = nicknames.apply(lambda row: '_'.join([str(row.topic_number)] + row.top_words.split(' ')[0:3]), axis=1) nicknames = nicknames.sort_values('topic_name') word_keys = pd.read_csv("data/topic_keys60-july_cognitive.csv") word_keys['top_2'] = word_keys['Top words'].apply(lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]) word_keys['topic_name'] = "topic" + word_keys['topic'].astype('str') return dataset,nicknames,word_keys
def __init__( self, metric="emd", image_type="pAgF", name=None, multi=True, image_transform="block_reduce", downsample=8, auto_save=True, ): self.image_type = image_type self.multi = multi self.downsample = downsample self.auto_save = auto_save if callable(metric): self.metric = metric elif metric == "emd": self.metric = euclidean_emd else: raise ValueError("{metric} is not a valid metric".format(**locals())) if callable(image_transform): self.image_transform = image_transform elif image_transform == "block_reduce": from functools import partial self.image_transform = partial(block_reduce, factor=downsample) # def block_reduce_transform(image): # """The default transformation.""" # return block_reduce(image, downsample, blur) # self.image_transform = block_reduce_transform else: raise ValueError(("{image_transform} is not a valid" "transform function").format(**locals())) self.name = name if name else time.strftime("analysis_from_%m-%d_%H-%M-%S") try: self.data = Dataset.load("data/dataset.pkl") except FileNotFoundError: self.data = _getdata()
from neurosynth.analysis.cluster import magic from neurosynth.base.dataset import Dataset # mydir = "/home/delavega/projects/permutation_clustering/" mydir = "../data/" dataset = Dataset.load(mydir + '/datasets/abs_60topics_filt_jul.pkl') roi_mask = mydir + '../masks/new_medial_fc_30.nii.gz' global_mask = mydir + "../masks/MNI152_T1_2mm_brain.nii.gz" magic(dataset, 10, method='coactivation', features=['topic57', 'topic32', 'topic39', 'topic44'], output_dir='../results/ef_cluster/all_cluster/', min_studies_per_voxel=100, filename='okay')
t = dlmread('/Users/lukechang/Dropbox/Github/toolbox/Python/ImageProcessing/tal2icbm_fsl.mat'); dat.volInfo.mat = inv(t)*dat.volInfo.mat; dat.fullpath = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend.nii'; write(dat) # 2) Reorient using FSL - Unix fslreorient2std Friend Friend_Or # 3) Coregister to 2mm MNI space - Unix /usr/local/fsl/bin/flirt -in /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or.nii.gz -ref /usr/local/fsl/data/standard/MNI152_T1_2mm_brain -out /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.nii.gz -omat /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.mat -bins 256 -cost corratio -searchrx -90 90 -searchry -90 90 -searchrz -90 90 -dof 12 -interp trilinear # 4) Decode - Python DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl' PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/' INFILE = 'Friend_Or_Mni.nii.gz' dataset = Dataset.load(DATASET_FILE) decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once. img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask) result = decoder.decode(img) np.savetxt(PREFIX + 'Friend_Decoded.txt', result) # 5) Threshold at .001 - unix fslmaths Friend_Or_Mni -thr 3 Friend_Or_Mni_001 # 6) Decode thresholded map - python DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl' PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/' INFILE = 'Friend_Or_Mni_001.nii.gz' dataset = Dataset.load(DATASET_FILE) decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once. img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
# -*- coding: utf-8 -*- # Here I use Yeo to test Neurosynth's classify functions from neurosynth.base.dataset import Dataset from neurosynth.analysis import classify import os import itertools import re import numpy as np import pdb import sys from nipype.interfaces import fsl from sklearn.ensemble import GradientBoostingClassifier dataset = Dataset.load('../data/pickled.pkl') masklist = ['7Networks_Liberal_1.nii.gz', '7Networks_Liberal_2.nii.gz', '7Networks_Liberal_3.nii.gz', '7Networks_Liberal_4.nii.gz', '7Networks_Liberal_5.nii.gz', '7Networks_Liberal_6.nii.gz', '7Networks_Liberal_7.nii.gz'] rootdir = '../masks/Yeo_JNeurophysiol11_MNI152/standardized/' class maskClassifier: def __init__(self, classifier=GradientBoostingClassifier(), param_grid={'max_features': np.arange(2, 140, 44), 'n_estimators': np.arange(5, 141, 50), 'learning_rate': np.arange(0.05, 1, 0.1)}, thresh = 0.08) diffs = {}
from base.classifiers import OnevsallContinuous from neurosynth.analysis import cluster from neurosynth.base.dataset import Dataset from sklearn.metrics import r2_score from sklearn.linear_model import Ridge import cPickle from sklearn.decomposition import RandomizedPCA dataset = Dataset.load('../data/datasets/abs_60topics_filt_jul.pkl') roi_mask = '../masks/mpfc_nfp.nii.gz' global_mask = "../masks/MNI152_T1_2mm_brain.nii.gz" n_regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # print resolution clf_file = "../data/clfs/all_vox_Ridge_mpfc.pkl" print "trying to load" try: clf = OnevsallContinuous.load(clf_file) except: print "Loading failed" clf = OnevsallContinuous(dataset, None, classifier=Ridge()) clf.classify(scoring=r2_score, processes=8) try: clf.save(clf_file) except: pass reduc = RandomizedPCA(n_components=100)
from neurosynth.base.dataset import Dataset import joblib from sklearn.metrics import pairwise_distances from sklearn.preprocessing import scale from neurosynth.analysis.cluster import Clusterable dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl') from fastcluster import ward roi = Clusterable( dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz') saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl' reference = joblib.load(saved_pca) distances = pairwise_distances(roi.data, reference.data, metric='correlation') distances = scale(distances, axis=1) joblib.dump( distances, '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl' ) Z = ward(distances) joblib.dump( Z, '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl' )
import numpy as np from neurosynth.analysis.cluster import Clusterable from sklearn.cluster import KMeans from sklearn.metrics import pairwise_distances from nibabel import nifti1 from copy import deepcopy from os.path import exists, join, split from os import makedirs import pickle def binarize_nib(img): img = deepcopy(img) img.get_data()[img.get_data() != 0] = 1 return img dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-100.pkl') infile = '../masks/mars/NeubertCingulateOrbitoFrontalParcellation/Neubert_2mm_medial_bilateral.nii.gz' outdir = '../results/clustering/matched/mars/' outfile = join(outdir, split(infile)[-1]) saved_pca = '../results/clustering/dv_v6_reference_pca.pkl' reference = pickle.load(open(saved_pca, 'r')) match_roi = nib.load(infile) roi = Clusterable(dataset, binarize_nib(match_roi)) print "Computing roi ref distances" distances = pairwise_distances(roi.data, reference.data, metric='correlation') n_clusters = np.unique(match_roi.get_data()).nonzero()[0].shape[0]
from neurosynth.base.dataset import Dataset from neurosynth.analysis import decode, meta from os.path import join parent_dir = '/Users/Katie/Dropbox/Data/neurosynth_current_data/' roi_dir = '/Users/Katie/Dropbox/Data/NSvBM-decoding/ROIs/' sink_dir = '/Users/Katie/Dropbox/Data/NSvBM-decoding/' paracentral = join(roi_dir, 'l-paracentral.nii.gz') ofc = join(roi_dir, 'r-ofc.nii.gz') uncus = join(roi_dir, 'r-ofc.nii.gz') visual = join(roi_dir, 'smith-rsn70-1.nii.gz') mask_list = [paracentral, ofc, uncus, visual] mask_names = ['paracentral', 'orbitofrontal', 'uncus', 'visual'] dataset = Dataset.load(join(parent_dir, 'dataset.pkl')) for i, mask in enumerate(mask_list): ids = dataset.get_studies(mask=mask) ma = meta.MetaAnalysis(dataset, ids) ma.save_results(output_dir='.', prefix=mask_names[i]) decoder = decode.Decoder(dataset) result = decoder.decode(['{0}_pFgA_z.nii.gz'.format(mask_names)], save=join(sink_dir, 'decoded_{0}.txt'.format(mask_names)))
from neurosynth.base.dataset import Dataset import neurosynth.base.imageutils as it dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl") print "Filtering voxels..." data = dataset.image_table.data.toarray() voxel_mask = data.mean(axis=1) > 0.005 img = it.load_imgs('../masks/ward/30.nii.gz', dataset.masker) good_voxels = img[voxel_mask] it.save_img(good_voxels, "../masks/ward/30_masked.nii.gz", dataset.masker)
# <markdowncell> # Because this takes a while, we'll save our Dataset object to disk. That way, the next time we want to use it, we won't have to sit through the whole creation operation again: # <codecell> dataset.save('dataset.pkl') # <markdowncell> # Now in future, instead of waiting, we could just load the dataset from file: # <codecell> dataset = Dataset.load('dataset.pkl') # Note the capital D in the second Dataset--load() is a class method # <markdowncell> # ## Doing stuff with Neurosynth # Now that our Dataset has both activation data and some features, we're ready to start doing some analyses! By design, Neurosynth focuses on facilitating simple, fast, and modestly useful analyses. This means you probably won't break any new ground using Neurosynth, but you should be able to supplement results you've generated using other approaches with a bunch of nifty analyses that take just 2 - 3 lines of code. # # ### Simple feature-based meta-analyses # The most straightforward thing you can do with Neurosynth is use the features we just loaded above to perform automated large-scale meta-analyses of the literature. Let's see what features we have: # <codecell> dataset.get_feature_names() # <markdowncell>
from sklearn.metrics import roc_auc_score import sys from base.mv import bootstrap_mv_full from neurosynth.base.dataset import Dataset dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl") from sklearn.naive_bayes import GaussianNB print sys.argv try: cmd, iterations, job_id = sys.argv except: raise Exception("Incorect number of arguments") import csv cognitive_topics = [ 'topic' + topic[0] for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU')) if topic[1] == "T" ] results = bootstrap_mv_full( dataset, GaussianNB(), roc_auc_score, '../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz', features=cognitive_topics, processes=None, boot_n=int(iterations), outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC_60_ ' + str(job_id) + '.csv')
### # This script shuffle the classification labels and reruns classification many times to get data to calculate a confidence interval around the null hypothesis from sklearn.linear_model import RidgeClassifier from base.classifiers import OnevsallClassifier from neurosynth.base.dataset import Dataset from sklearn.metrics import roc_auc_score import pickle from random import shuffle def shuffle_data(classifier): for region in classifier.c_data: shuffle(region[1]) d_abs_topics_filt = Dataset.load('../data/datasets/abs_topics_filt_july.pkl') results = [] clf = OnevsallClassifier(d_abs_topics_filt, '../masks/Ward/50.nii.gz', cv='4-Fold', thresh=10, thresh_low=0, memsave=True, classifier=RidgeClassifier()) clf.load_data(None, None) clf.initalize_containers(None, None, None) for i in range(0, 500): shuffle_data(clf) clf.classify(scoring=roc_auc_score, processes=8, class_weight=None) results = list(clf.class_score) + results print(i),
def dataset(self): return Dataset.load(settings.PICKLE_DATABASE)
from neurosynth.base.dataset import Dataset from neurosynth.analysis import decode # Load a saved Dataset file. This example will work with the # file saved in the create_a_new_dataset_and_load_features example. dataset = Dataset.load('dataset.pkl') # Initialize a new Decoder instance with a few features. Note that # if you don't specify a subset of features, ALL features in the # Dataset will be loaded, which will take a long time because # meta-analysis images for each feature need to be generated. decoder = decode.Decoder( dataset, features=['emotion', 'pain', 'somatosensory', 'wm', 'inhibition']) # Decode three images. The sample images here are coactivation # maps for ventral, dorsal, and posterior insula clusters, # respectively. Maps are drawn from data reported in # Chang, Yarkoni, Khaw, & Sanfey (2012); see paper for details. # We save the output--an image x features matrix--to a file. # By default, the decoder will use Pearson correlation, i.e., # each value in our results table indicates the correlation # between the input image and each feature's meta-analysis image. result = decoder.decode(['vIns.nii.gz', 'dIns.nii.gz', 'pIns.nii.gz'], save='decoding_results.txt')
# pass # def fit(self, X): # self.cf = MFastHCluster(method='ward') # self.cf.linkage(X) # def predict(self, n): # for i in range(1, self.cf.cut(0).shape[0]): # labels = self.cf.cut(i) # if np.bincount(labels).shape[0] == n: # break # return labels mydir = "/projects/delavega/clustering/" dataset = Dataset.load(mydir + 'abs_60topics_filt_jul.pkl') roi_mask = mydir + 'masks/new_medial_fc_30.nii.gz' ns = [3, 9] save_images = True output_dir = join(mydir, 'results/MFC/') out_model = None roi = Clusterable(dataset, roi_mask, min_studies=80) reference = Clusterable(dataset, min_studies=80) reduce_reference = sk_decomp.RandomizedPCA(100) reference = reference.transform(reduce_reference, transpose=True) # distances = pairwise_distances(roi.data, reference.data, # metric='correlation')
def load_dataset_instance(instance_filename): dataset = Dataset.load('./raw_data/' + instance_filename + '.pkl') return dataset
from neurosynth.base.dataset import Dataset from neurosynth.analysis.cluster import Clusterable from sklearn import decomposition as sk_decomp import pickle dataset = Dataset.load( '/home/delavega/projects/classification/data/datasets/abs_60topics_filt_jul.pkl' ) out = '/projects/delavega/clustering/dv_v5_reference_min_80_pca.pkl' reference = Clusterable(dataset, min_studies=80) print "Running PCA" reduce_reference = sk_decomp.RandomizedPCA(100) reference = reference.transform(reduce_reference, transpose=True) pickle.dump(reference, open(out, 'w'))
neurosynth_data_dir = "/home/data/nbc/misc-projects/niconn-macm/code/neurosynth/" if not op.isfile(op.join(neurosynth_data_dir, "dataset.pkl")): # Create Dataset instance from a database file. dataset = Dataset(op.join(neurosynth_data_dir, "database.txt")) # Load features from file dataset.add_features(op.join(neurosynth_data_dir, "features.txt")) # Pickle the Dataset to file so we can use Dataset.load() next time # instead of having to sit through the generation process again. dataset.save(op.join(neurosynth_data_dir, "dataset.pkl")) # Load pickled Dataset--assumes you've previously saved it. If not, # follow the create_a_new_dataset_and_load_features example. dataset = Dataset.load(op.join(neurosynth_data_dir, "dataset.pkl")) # Get the full list of feature names feature_list = dataset.get_feature_names() # Run a meta-analysis on each feature, and save all the results to # a directory called results. Note that the directory will not be # created for you, so make sure it exists. # Here we use the default frequency threshold of 0.001 (i.e., a # study is said to have a feature if more than 1 in every 1,000 # words is the target word), and an FDR correction level of 0.05. out_dir = "/home/data/nbc/misc-projects/meta-gradients/code/feature_maps" for tmp_feature in feature_list: print(tmp_feature) meta.analyze_features(
from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC, RidgeClassifier, RidgeClassifierCV from sklearn.ensemble import GradientBoostingClassifier from base.tools import Logger from base.pipelines import pipeline from base.classifiers import PairwiseClassifier, OnevsallClassifier from neurosynth.base.dataset import Dataset from sklearn.metrics import roc_auc_score now = datetime.datetime.now() n_topics = 60 dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-%d.pkl' % n_topics) # cognitive_topics = ['topic' + topic[0] for topic in csv.reader( # open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "T"] # junk_topics = ['topic' + topic[0] for topic in csv.reader( # open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "F"] # Analyses def complete_analysis(dataset, dataset_name, name, masklist, processes=1, features=None):