Example #1
0
def neurosynthInit(dbsize):
    print "Initializing Neurosynth database..."
    dataset = Dataset('data/' + dbsize + 'terms/database.txt')
    dataset.add_features('data/' + dbsize + 'terms/features.txt')    

    #print "Loading standard space brain..."
    #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz")
    #standard = img.get_data()
    return dataset
Example #2
0
def neurosynthInit(dbsize):
    """Initialize Neurosynth Database, return database object"""
    print "Initializing Neurosynth database..."
    db = Dataset('data/' + str(dbsize) + 'terms/database.txt')
    db.add_features('data/' + str(dbsize) + 'terms/features.txt')

    #print "Loading standard space brain..."
    #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz")
    #standard = img.get_data()
    return db
Example #3
0
    def get_dataset(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'dataset.pkl')) and not force_load:
            print('loading database from',
                  os.path.join(self.datadir, 'dataset.pkl'))
            self.dataset = Dataset.load(
                os.path.join(self.datadir, 'dataset.pkl'))
        else:
            print('loading database - this takes a few minutes')
            self.dataset = Dataset(os.path.join(self.datadir, 'database.txt'))
            self.dataset.add_features(
                os.path.join(self.datadir, 'features.txt'))

            self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))
    def __init__(self,
                 db,
                 dataset=None,
                 studies=None,
                 features=None,
                 reset_db=False,
                 reset_dataset=False,
                 download_data=False):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.
        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!")
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, str) and
                                                not os.path.exists(dataset)):
            print("\tInitializing a new Dataset...")
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE)
        else:
            print("Loading existing Dataset...")
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print("WARNING: RESETTING DATABASE!!!")
            self.reset_database()
def _getdata():
    """Downloads data from neurosynth and returns it as a Dataset.

    Also pickles the dataset for future use."""
    LOG.warning("Downloading and processing Neurosynth database")

    os.makedirs("data", exist_ok=True)
    from neurosynth.base.dataset import download

    download(path="data", unpack=True)

    data = Dataset("data/database.txt")
    data.add_features("data/features.txt")
    data.save("data/dataset.pkl")
    return data
Example #6
0
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t, keep_mappables=True)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertIsNotNone(dataset.mappables)
     self.assertEqual(len(dataset.mappables), 5)
     # Now with the mappables deleted
     dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertEqual(len(dataset.mappables), 0)
     os.unlink(t)
Example #7
0
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t, keep_mappables=True)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertIsNotNone(dataset.mappables)
     self.assertEqual(len(dataset.mappables), 5)
     # Now with the mappables deleted
     dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertEqual(len(dataset.mappables), 0)
     os.unlink(t)
def create_voxel_x_feature_matrix(path_to_dataset, path_to_image_files):
        dataset = Dataset.load(path_to_dataset)
	feature_list = dataset.get_feature_names()
	vox_feat_matrix = zeros((dataset.volume.num_vox_in_mask, len(feature_list)), dtype=int16)
	for (i,feature) in enumerate(feature_list):
		image_path = path_to_image_files + feature + '_pFgA_z.nii.gz'
		vox_feat_matrix[:,i] = dataset.volume.mask(image_path)
	return vox_feat_matrix
Example #9
0
def generate_maps(terms,output_dir):

    f,d = download_data()
    features = pandas.read_csv(f,sep="\t")  
    database = pandas.read_csv(d,sep="\t")  

    output_dir = "%s/maps" %(output_dir)

    print "Deriving pickled maps to extract relationships from..."
    dataset = Dataset(d)
    dataset.add_features(f)
    for t in range(len(terms)):
        term = terms[t]
        print "Generating P(term|activation) for term %s, %s of %s" %(term,t,len(terms))
        ids = dataset.get_ids_by_features(term)
        maps = meta.MetaAnalysis(dataset,ids)
        term_name = term.replace(" ","_")
        pickle.dump(maps.images["pFgA_z"],open("%s/%s_pFgA_z.pkl" %(output_dir,term_name),"wb"))
Example #10
0
 def test_dataset_initializes(self):
     """ Test whether dataset initializes properly. """
     Dataset(get_test_data_path() + 'test_dataset.txt', get_test_data_path() + 'test_features.txt')
     self.assertIsNotNone(self.dataset.masker)
     self.assertIsNotNone(self.dataset.image_table)
     self.assertEqual(len(self.dataset.mappables), 5)
     self.assertIsNotNone(self.dataset.masker)
     self.assertIsNotNone(self.dataset.r)
     self.assertIsNotNone(self.dataset.mappables[0].data['extra_field'].iloc[2], 'field')
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertEqual(len(dataset.image_table.ids), 5)
     os.unlink(t)
Example #12
0
    def __init__(self, db, dataset=None, studies=None, features=None,
                 reset_db=False, reset_dataset=False, download_data=True):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.

        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
                Note that the Dataset must contain the list of Mappables (i.e.,
                    save() must have been called with keep_mappables set to
                    True).
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!"
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)):

            print "\tInitializing a new Dataset..."
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE, keep_mappables=True)
        else:
            print "\tLoading existing Dataset..."
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print "WARNING: RESETTING DATABASE!!!"
            self.reset_database()
Example #13
0
def fetch_neurosynth_dataset(data_dir, return_pkl=True):
    """Downloads the Neurosynth dataset

    Parameters
    ----------
    data_dir : str
        Directory in which to download the dataset.
    return_pkl : bool
        If true, creates and returns the .pkl file. Otherwise returns
        the dataset and features files.

    Returns
    -------
    tuple, str
        If save_pkl is false, returns a tuple containing the path to the
        database.txt and the features.txt file. Otherwise returns the path
        to the .pkl file.

    """
    if not os.path.isdir(data_dir):
        os.mkdir(data_dir)

    dataset_file = os.path.join(data_dir, "database.txt")
    if not os.path.isfile(dataset_file):
        logging.info("Downloading the Neurosynth dataset.")
        download(data_dir, unpack=True)
    feature_file = os.path.join(data_dir, "features.txt")

    if return_pkl:
        pkl_file = os.path.join(data_dir, "dataset.pkl")
        if not os.path.isfile(pkl_file):
            logging.info(
                "Converting Neurosynth data to a .pkl file. This may take a while."
            )
            dataset = Dataset(dataset_file, feature_file)
            dataset.save(pkl_file)
        return pkl_file

    return (dataset_file, feature_file)
Example #14
0
def save_dataset_instance(db_filename, kw_filename, instance_filename):
    # Create a new Dataset instance
    dataset = Dataset('./raw_data/' + db_filename + '.txt')
    # Add some features
    dataset.add_features('./raw_data/' + kw_filename + '.txt')
    # Save new file
    dataset.save('./raw_data/' + instance_filename + '.pkl')
    return dataset
Example #15
0
def extract_relations(terms,maps_dir,output_dir):

    if isinstance(terms,str):
        terms = [terms]

    f,d = download_data()
    features = pandas.read_csv(f,sep="\t")  
    database = pandas.read_csv(d,sep="\t")  
    allterms = features.columns.tolist()
    allterms.pop(0)  #pmid

    dataset = Dataset(d)
    dataset.add_features(f)
    image_matrix = pandas.DataFrame(columns=range(228453))
    for t in range(len(allterms)):
        term = allterms[t]
        term_name = term.replace(" ","_")
        pickled_map = "%s/%s_pFgA_z.pkl" %(maps_dir,term_name)
        if not os.path.exists(pickled_map):
            print "Generating P(term|activation) for term %s" %(term)
            ids = dataset.get_ids_by_features(term)
            maps = meta.MetaAnalysis(dataset,ids)
            pickle.dump(maps.images["pFgA_z"],open(pickled_map,"wb"))
        map_data = pickle.load(open(pickled_map,"rb"))
        image_matrix.loc[term] = map_data

    sims = pandas.DataFrame(columns=image_matrix.index)
    tuples = []
    for t1 in range(len(terms)):
        term1 = terms[t1]
        print "Extracting NeuroSynth relationships for term %s..." %(term1)
        for t2 in range(len(terms)):
            term2 = terms[t2]
            if t1<t2:
                score = pearsonr(image_matrix.loc[term1],image_matrix.loc[term2])[0]
                tuples.append((term1,term2,score))

    save_relations(output_dir=output_dir,relations=tuples)
Example #16
0
class TestAnalysis(unittest.TestCase):

  def setUp(self):
    """ Create a new Dataset and add features. """
    self.dataset = Dataset('data/test_dataset.txt')
    self.dataset.add_features('data/test_features.txt')
  
  def test_meta_analysis(self):
    """ Test full meta-analysis stream. """
    pass

  def test_decoder(self):
    pass

  def test_coactivation(self):
    """ Test seed-based coactivation. """ 
    pass

  def test_roi_averaging(self):
    pass

  def test_get_random_voxels(self):
    pass
Example #17
0
def functional_preference_profile_prep():
	"""
	Function for extracting functional preference profile data
	"""
	from neurosynth.base.dataset import Dataset
	dataset = Dataset.load("data/neurosynth_60_0.4.pkl")

	nicknames = pd.read_csv('data/v4-topics-60.txt', delimiter='\t')
	nicknames['topic_name'] = nicknames.apply(lambda row: '_'.join([str(row.topic_number)] + row.top_words.split(' ')[0:3]), axis=1)
	nicknames = nicknames.sort_values('topic_name')

	word_keys = pd.read_csv("data/topic_keys60-july_cognitive.csv")
	word_keys['top_2'] = word_keys['Top words'].apply(lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])
	word_keys['topic_name'] = "topic" + word_keys['topic'].astype('str')
	
	return dataset,nicknames,word_keys
Example #18
0
    def import_neurosynth_git(self):
        # Add the appropriate neurosynth git folder to the python path. 
        sys.path.append(self.npath)
        from neurosynth.base.dataset import Dataset
        from neurosynth.analysis import meta

        # Try to load a pickle if it exists. Create a new dataset instance 
        # if it doesn't.
        try:
            self.dataset = cPickle.load(
                open(self.npath+os.sep+'data/dataset.pkl', 'rb'))
        except IOError:
        # Create Dataset instance from a database file.
            self.dataset = Dataset(self.npath+os.sep+'data/database.txt')

        # Load features from file
        self.dataset.add_features(self.npath+os.sep+'data/features.txt')

        # Get names of features. 
        self.feature_list = self.dataset.get_feature_names()
Example #19
0
    def __init__(
        self,
        metric="emd",
        image_type="pAgF",
        name=None,
        multi=True,
        image_transform="block_reduce",
        downsample=8,
        auto_save=True,
    ):
        self.image_type = image_type
        self.multi = multi
        self.downsample = downsample
        self.auto_save = auto_save

        if callable(metric):
            self.metric = metric
        elif metric == "emd":
            self.metric = euclidean_emd
        else:
            raise ValueError("{metric} is not a valid metric".format(**locals()))

        if callable(image_transform):
            self.image_transform = image_transform
        elif image_transform == "block_reduce":
            from functools import partial

            self.image_transform = partial(block_reduce, factor=downsample)
            # def block_reduce_transform(image):
            # """The default transformation."""
            # return block_reduce(image, downsample, blur)
            # self.image_transform = block_reduce_transform
        else:
            raise ValueError(("{image_transform} is not a valid" "transform function").format(**locals()))
        self.name = name if name else time.strftime("analysis_from_%m-%d_%H-%M-%S")

        try:
            self.data = Dataset.load("data/dataset.pkl")
        except FileNotFoundError:
            self.data = _getdata()
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import meta

""" Create a new Dataset instance from a database file and load features. 
This is basically the example from the quickstart in the README. 
Assumes you have database.txt and features.txt files in the current dir.
"""

# Create Dataset instance from a database file.
dataset = Dataset('database.txt')

# Load features from file
dataset.add_features('features.txt')

# Pickle the Dataset to file so we can use Dataset.load() next time 
# instead of having to sit through the generation process again.
dataset.save('dataset.pkl')

# Get Mappable IDs for all features that start with 'emo'
ids = dataset.get_ids_by_features('emo*', threshold=0.001)

# Run a meta-analysis and save results
ma = meta.MetaAnalysis(dataset, ids)
ma.save_results('emotion')
Example #21
0
def get_test_dataset():
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + 'test_dataset.txt')
    dataset.add_features(test_data_path + 'test_features.txt')
    return dataset
Example #22
0
class TestBase(unittest.TestCase):

  def setUp(self):
    """ Create a new Dataset and add features. """
    self.dataset = Dataset('data/test_dataset.txt')
    self.dataset.add_features('data/test_features.txt')
  
  def test_dataset_initializes(self):
    """ Test whether dataset initializes properly. """
    self.assertIsNotNone(self.dataset.volume)
    self.assertIsNotNone(self.dataset.image_table)
    self.assertEqual(len(self.dataset.mappables), 5)
    self.assertIsNotNone(self.dataset.volume)
    self.assertIsNotNone(self.dataset.r)

  def test_image_table_loads(self):
    """ Test ImageTable initialization. """
    self.assertIsNotNone(self.dataset.image_table)
    it = self.dataset.image_table
    self.assertEqual(len(it.ids), 5)
    self.assertIsNotNone(it.volume)
    self.assertIsNotNone(it.r)
    self.assertEqual(it.data.shape, (228453, 5))
    # Add tests for values in table

  def test_feature_table_loads(self):
    """ Test FeatureTable initialization. """
    tt = self.dataset.feature_table
    self.assertIsNotNone(tt)
    self.assertEqual(len(self.dataset.list_features()), 5)
    self.assertEqual(tt.data.shape, (5,5))
    self.assertEqual(tt.feature_names[3], 'f4')
    self.assertEqual(tt.data[0,0], 0.0003)

  def test_feature_search(self):
    """ Test feature-based Mappable search. Tests both the FeatureTable method 
    and the Dataset wrapper. """
    tt = self.dataset.feature_table
    features = tt.search_features(['f*'])
    self.assertEqual(len(features), 4)
    d = self.dataset
    ids = d.get_ids_by_features(['f*'], threshold=0.001)
    self.assertEqual(len(ids), 4)
    img_data = d.get_ids_by_features(['f1', 'f3', 'g1'], 0.001, func='max', get_image_data=True)
    self.assertEqual(img_data.shape, (228453, 5))

  def test_selection_by_mask(self):
    """ Test mask-based Mappable selection.
    Only one peak in the test dataset (in study5) should be within the sgACC. """
    ids = self.dataset.get_ids_by_mask('data/sgacc_mask.nii.gz')
    self.assertEquals(len(ids), 1)
    self.assertEquals('study5', ids[0])

  def test_selection_by_peaks(self):
    """ Test peak-based Mappable selection. """
    ids = self.dataset.get_ids_by_peaks(np.array([[3, 30, -9]]))
    self.assertEquals(len(ids), 1)
    self.assertEquals('study5', ids[0])
  
  # def test_invalid_coordinates_ignored(self):
    """ Test dataset contains 3 valid coordinates and one outside mask. But this won't work 
Example #23
0
from neurosynth.analysis.cluster import magic
from neurosynth.base.dataset import Dataset

# mydir = "/home/delavega/projects/permutation_clustering/"

mydir = "../data/"

dataset = Dataset.load(mydir + '/datasets/abs_60topics_filt_jul.pkl')

roi_mask = mydir + '../masks/new_medial_fc_30.nii.gz'
global_mask = mydir +  "../masks/MNI152_T1_2mm_brain.nii.gz"

magic(dataset, 10, method='coactivation', features=['topic57', 'topic32', 'topic39', 'topic44'], output_dir='../results/ef_cluster/all_cluster/', min_studies_per_voxel=100, filename='okay')
Example #24
0
# -*- coding: utf-8 -*-

# Here I use Yeo to test Neurosynth's classify functions
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import classify
import os
import itertools
import re
import numpy as np
import pdb
import sys
from nipype.interfaces import fsl
from sklearn.ensemble import GradientBoostingClassifier


dataset = Dataset.load('../data/pickled.pkl')

masklist = ['7Networks_Liberal_1.nii.gz', '7Networks_Liberal_2.nii.gz',
            '7Networks_Liberal_3.nii.gz', '7Networks_Liberal_4.nii.gz',
            '7Networks_Liberal_5.nii.gz', '7Networks_Liberal_6.nii.gz',
            '7Networks_Liberal_7.nii.gz']

rootdir = '../masks/Yeo_JNeurophysiol11_MNI152/standardized/'


class maskClassifier:
    def __init__(self, classifier=GradientBoostingClassifier(), param_grid={'max_features': np.arange(2, 140, 44), 'n_estimators': np.arange(5, 141, 50),
          'learning_rate': np.arange(0.05, 1, 0.1)}, thresh = 0.08)


diffs = {}
Example #25
0
import neurosynth.base.dataset
from neurosynth.base.dataset import Dataset
print neurosynth.base.dataset.__file__
dataset = Dataset('../data/unprocessed/abstract/full_database_revised.txt')
dataset.add_features('../data/unprocessed/abstract/abstract_features.txt')
dataset.save('../data/datasets/dataset_abs_words_pandas.pkl')

dataset = Dataset('../data/unprocessed/abstract/full_database_revised.txt')
dataset.add_features('../data/unprocessed/abstract_topics/doc_features100.txt')
dataset.save('../data/datasets/dataset_abs_topics_pandas.pkl')
Example #26
0
class NeurosynthMerge:
    def __init__(self, thesaurus, npath, outdir, test_mode=False):
        """
        Generates a new set of images using the neurosynth repository combining 
        across terms in a thesarus.

        Args:
            - thesaurus: A list of tuples where:[('term that will be the name 
                of the file', 'the other term', 'expression combining the 
                terms')]
                    - the last expression is alphanumeric and separated by: 
                    (& for and) (&~ for andnot) (| for or) 
            - npath: directory where the neurosynth git repository is locally 
            on your machine (https://github.com/neurosynth/neurosynth)
            - outdir: directory where the generated images will be saved
            - test_mode: when true, the code will run an abridged version for 
            test purposes (as implemented by test.Neurosynth.py)
        """
        self.thesaurus = thesaurus
        self.npath = npath
        self.outdir = outdir

        self.import_neurosynth_git()
        from neurosynth.analysis import meta

        # Take out first two terms from the feature_list and insert the third 
        # term from the tuple.
        for triplet in thesaurus:
            self.feature_list = [feature for feature in self.feature_list \
            if feature not in triplet]
            self.feature_list.append(triplet[-1])

        # This makes an abridged version of feature_list for testing purposes. 
        if test_mode:
            self.feature_list = [triplet[-1] for triplet in thesaurus]

        # Run metanalyses on the new features set and save the results to the 
            #outdir.
        for feature in self.feature_list:
            self.ids = self.dataset.get_ids_by_expression(feature, 
                threshold=0.001)
            ma = meta.MetaAnalysis(self.dataset, self.ids)

            # Parse the feature name (to avoid conflicts with illegal 
                #characters as file names)
            regex = re.compile('\W+')
            split = re.split(regex, feature)
            feat_fname = split[0] 

            # Save the results (many different types of files)
            ma.save_results(self.outdir+os.sep+feat_fname)

    def import_neurosynth_git(self):
        # Add the appropriate neurosynth git folder to the python path. 
        sys.path.append(self.npath)
        from neurosynth.base.dataset import Dataset
        from neurosynth.analysis import meta

        # Try to load a pickle if it exists. Create a new dataset instance 
        # if it doesn't.
        try:
            self.dataset = cPickle.load(
                open(self.npath+os.sep+'data/dataset.pkl', 'rb'))
        except IOError:
        # Create Dataset instance from a database file.
            self.dataset = Dataset(self.npath+os.sep+'data/database.txt')

        # Load features from file
        self.dataset.add_features(self.npath+os.sep+'data/features.txt')

        # Get names of features. 
        self.feature_list = self.dataset.get_feature_names()
Example #27
0
 def __init__(self, dbsize):
     print "Initializing Neurosynth database..."
     self.db = Dataset("data/" + str(dbsize) + "terms/database.txt")
     self.db.add_features("data/" + str(dbsize) + "terms/features.txt")
     self.ids = self.getIDs()
     self.decoder = None
Example #28
0
class NeuroSynth:

    """Initialize Neurosynth Database"""

    def __init__(self, dbsize):
        print "Initializing Neurosynth database..."
        self.db = Dataset("data/" + str(dbsize) + "terms/database.txt")
        self.db.add_features("data/" + str(dbsize) + "terms/features.txt")
        self.ids = self.getIDs()
        self.decoder = None
        # self.masker = mask.Mask("data/X.nii.gz")

    """Do contrast analysis between two sets of """

    def neurosynthContrast(self, papers1, papers2, fdr, outdir=None, outprefix=None, image_list=None):

        # Do a meta analysis to contrast the two
        ma = meta.MetaAnalysis(self.db, papers1, papers2, q=float(fdr))
        if outdir:
            print "Saving results to %s" % (outdir)
            ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list)
        return ma.images

    """Conduct meta analysis with particular set of ids"""

    def neurosynthMeta(self, papers, fdr, outdir=None, outprefix=None, image_list=None):
        # Get valid ids from user list
        valid_ids = self.get_valid_ids(papers)

        if len(valid_ids) > 0:
            # Do meta analysis
            ma = meta.MetaAnalysis(self.db, valid_ids, q=float(fdr))
            if outdir:
                print "Saving results to output directory %s" % (outdir)
                ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list)
            return ma.images
        else:
            print "No studies found in database for ids in question!"

    """Return list of valid ids from user input"""

    def get_valid_ids(self, papers):
        # Input is DOI with list of papers
        valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids]
        print "Found %s valid ids." % (str(len(valid_ids)))
        return valid_ids

    """Decode an image, return 100 results"""

    def decode(self, images, outfile, mrs=None, round=4):
        if not self.decoder:
            self.decoder = decode.Decoder(self.db)

        # If mrs is not specified, do decoding against neurosynth database
        if not mrs:
            result = self.decoder.decode(images, save=outfile)

        # If mrs is specified, do decoding against custom set of images
        else:
            # This is akin to traditional neurosynth method - pearson's r correlation
            imgs_to_compare = imageutils.load_imgs(mrs, self.masker)
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
            x, y = imgs_to_compare.astype(float), imgs_to_decode.astype(float)
            x, y = x - x.mean(0), y - y.mean(0)
            x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
            result = np.around(x.T.dot(y).T, round)
            features = [os.path.basename(m) for m in mrs]
            rownames = [os.path.basename(m) for m in images]
            df = pd.DataFrame(result, columns=features)
            df.index = rownames
            df.to_csv(outfile, sep="\t")
        return result

    """Return features in neurosynth database"""

    def getFeatures(self, dataset):
        return dataset.get_feature_names()

    """Extract pubmed IDs or dois from Neurosynth Database"""

    def getIDs(self):
        # Get all IDs in neuroSynth
        return self.db.image_table.ids

    """Extract author names for a given pmid or doi"""

    def getAuthor(self, db, id):
        article = self.db.get_mappables(id)
        meta = article[0].__dict__
        tmp = meta["data"]["authors"]
        tmp = tmp.split(",")
        authors = [x.strip("^ ") for x in tmp]
        return authors

    """Extract all author names in database"""

    def getAuthors(self, db):
        articles = db.mappables
        uniqueAuthors = []
        for a in articles:
            meta = a.__dict__
            tmp = meta["data"]["authors"]
            tmp = tmp.split(",")
            authors = [x.strip("^ ") for x in tmp]
            for a in authors:
                uniqueAuthors.append(a)
        uniqueAuthors = list(np.unique(uniqueAuthors))
        return uniqueAuthors

    """Extract activation points and all meta information for a particular pmid"""

    def getPaperMeta(self, db, pmid):
        articles = db.mappables
        m = []
        for a in articles:
            tmp = a.__dict__
            if tmp["data"]["id"] == str(pmid):
                journal = tmp["data"]["journal"]
                title = tmp["data"]["title"]
                year = tmp["data"]["year"]
                doi = tmp["data"]["doi"]
                auth = tmp["data"]["authors"]
                peaks = tmp["data"]["peaks"]
                pmid = tmp["data"]["id"]
                tmp = (journal, title, year, doi, pmid, auth, peaks)
                m.append(tmp)
        return m
Example #29
0
def get_test_dataset():
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + 'test_dataset.txt')
    dataset.add_features(test_data_path + 'test_features.txt')
    return dataset
Example #30
0
from sklearn.cluster import KMeans, DBSCAN, MiniBatchKMeans
from sklearn import metrics
from scipy import stats

base_path = '/home/pauli/Development/neurobabel/'
test_data_path = base_path + 'ACE/'
masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz'
atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz'
mask = nib.load(masker_filename)
masker = Masker(mask)
r = 1.0
transform = {'BREGMA': transformations.bregma_to_whs()}
target = 'WHS'

# load data set
dataset = Dataset(os.path.join(test_data_path, 'db_bregma_cog_atlas_export.txt'), masker=masker_filename, r=r, transform=transform, target=target)
dataset.feature_table = FeatureTable(dataset)
dataset.add_features(os.path.join(test_data_path, "db_bregma_cog_atlas_features.txt")) # add features
fn = dataset.get_feature_names()
features = dataset.get_feature_data()

n_xyz, n_articles = dataset.image_table.data.shape
# do topic modeling (LSA)
n_components = 20
svd = TruncatedSVD(n_components=n_components)
X = svd.fit_transform(features)
X_orig = X.copy()

X = StandardScaler().fit_transform(X_orig)

# db = DBSCAN(eps=10.0, min_samples=10).fit(X)
Example #31
0
from neurosynth.base.dataset import Dataset
from sklearn.cluster import KMeans
from sklearn.cluster import Ward

import numpy as np
from neurosynth.base.imageutils import save_img
from scipy import sparse

dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl")

print "Filtering voxels..."

data = dataset.image_table.data.toarray()

voxel_mask = data.mean(axis=1) > 0.0135

good_voxels = data[voxel_mask]

good_voxels = sparse.csr_matrix(good_voxels)

for i in [20, 30, 40, 50]:
	print "Clustering..."

	print i

	k_means = KMeans(init='k-means++', n_clusters=i, n_jobs=16)
	k_means.fit(good_voxels)

	# ward = Ward(n_clusters=30)
	# ward.fit(good_voxels)
Example #32
0
 def dataset(self):
     return Dataset.load(settings.PICKLE_DATABASE)
Example #33
0
def get_test_dataset(prefix='test'):
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix)
    dataset.add_features(test_data_path + '%s_features.txt' % prefix)
    return dataset
Example #34
0
def load_dataset_instance(instance_filename):
    dataset = Dataset.load('./raw_data/' + instance_filename + '.pkl')
    return dataset
from neurosynth.base.dataset import Dataset
import joblib
from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import scale
from neurosynth.analysis.cluster import Clusterable
dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl')
from fastcluster import ward

roi = Clusterable(
    dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz')

saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl'
reference = joblib.load(saved_pca)

distances = pairwise_distances(roi.data, reference.data, metric='correlation')
distances = scale(distances, axis=1)

joblib.dump(
    distances,
    '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl'
)

Z = ward(distances)

joblib.dump(
    Z,
    '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl'
)
from neurosynth.analysis import meta

base_path = '/home/pauli/Development/neurobabel/'
test_data_path = base_path + 'ACE/'
masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz'
atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz'
mask = nb.load(masker_filename)
masker = Masker(mask)
r = 1.0
# transform = {'BREGMA': transformations.bregma_to_whs()}
#transform = {'BREGMA': transformations.identity()}
transform = {'BREGMA': transformations.bregma_to_whs()}
target = 'WHS'

# load data set
dataset = Dataset(os.path.join(test_data_path, 'db_bregma_export.txt'), masker=masker_filename, r=r, transform=transform, target=target)
dataset.feature_table = FeatureTable(dataset)
dataset.add_features(os.path.join(test_data_path, "db_bregma_features.txt")) # add features
fn = dataset.get_feature_names()

def get_whs_labels(filename=os.path.join(base_path, "atlases/whs_sd/WHS_SD_rat_atlas_v2.label")):
    ''' load the names of all labelled areas in the atlas (e.g. brainstem), return list of them '''
    in_file = open(filename, 'r')
    lines = in_file.readlines()
    labels = {}
    for line in lines:
        start = line.find("\"") + 1
        if start > 0:
            stop = line.find("\"", start)
            label = line[start:stop]
            idx = line.split()[0]
from neurosynth.base.dataset import Dataset
import neurosynth.base.imageutils as it

dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl")

print "Filtering voxels..."

data = dataset.image_table.data.toarray()

voxel_mask = data.mean(axis=1) > 0.005

img = it.load_imgs('../masks/ward/30.nii.gz', dataset.masker)

good_voxels = img[voxel_mask]

it.save_img(good_voxels, "../masks/ward/30_masked.nii.gz", dataset.masker)
Example #38
0
def get_test_dataset(prefix='test'):
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix)
    dataset.add_features(test_data_path + '%s_features.txt' % prefix)
    return dataset
Example #39
0
from sklearn.metrics import roc_auc_score
import sys
from base.mv import bootstrap_mv_full
from neurosynth.base.dataset import Dataset
dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl")

from sklearn.linear_model import LassoLarsIC

print sys.argv
try:
	cmd, iterations, job_id = sys.argv
except:
	raise Exception("Incorect number of arguments")

import csv
cognitive_topics = ['topic' + topic[0] for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU')) if topic[1] == "T"]

results = bootstrap_mv_full(dataset, LassoLarsIC(), roc_auc_score, 
	'../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz', features=cognitive_topics, processes=None, 
	boot_n=int(iterations), outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC__LASSO_LARS_60_ ' + str(job_id) + '.csv')

Example #40
0
#         pass

#     def fit(self, X):
#         self.cf = MFastHCluster(method='ward')
#         self.cf.linkage(X)

#     def predict(self, n):
#         for i in range(1, self.cf.cut(0).shape[0]):
#             labels = self.cf.cut(i)
#             if np.bincount(labels).shape[0] == n:
#                 break

#         return labels

mydir = "/projects/delavega/clustering/"
dataset = Dataset.load(mydir + 'abs_60topics_filt_jul.pkl')

roi_mask = mydir + 'masks/new_medial_fc_30.nii.gz'
ns = [3, 9]
save_images = True
output_dir = join(mydir, 'results/MFC/')
out_model = None

roi = Clusterable(dataset, roi_mask, min_studies=80)

reference = Clusterable(dataset, min_studies=80)
reduce_reference = sk_decomp.RandomizedPCA(100)
reference = reference.transform(reduce_reference, transpose=True)

# distances = pairwise_distances(roi.data, reference.data,
#                                metric='correlation')
Example #41
0
t = dlmread('/Users/lukechang/Dropbox/Github/toolbox/Python/ImageProcessing/tal2icbm_fsl.mat');
dat.volInfo.mat = inv(t)*dat.volInfo.mat;
dat.fullpath = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend.nii';
write(dat)

# 2) Reorient using FSL - Unix
fslreorient2std Friend Friend_Or

# 3) Coregister to 2mm MNI space - Unix
/usr/local/fsl/bin/flirt -in /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or.nii.gz -ref /usr/local/fsl/data/standard/MNI152_T1_2mm_brain -out /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.nii.gz -omat /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.mat -bins 256 -cost corratio -searchrx -90 90 -searchry -90 90 -searchrz -90 90 -dof 12  -interp trilinear

# 4) Decode - Python
DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl'
PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/'
INFILE = 'Friend_Or_Mni.nii.gz'
dataset = Dataset.load(DATASET_FILE)
decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once.
img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
result = decoder.decode(img)
np.savetxt(PREFIX + 'Friend_Decoded.txt', result)

# 5) Threshold at .001 - unix
fslmaths Friend_Or_Mni -thr 3 Friend_Or_Mni_001

# 6) Decode thresholded map - python
DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl'
PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/'
INFILE = 'Friend_Or_Mni_001.nii.gz'
dataset = Dataset.load(DATASET_FILE)
decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once.
img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
Example #42
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis.cluster import Clusterable
from sklearn import decomposition as sk_decomp
import pickle

dataset = Dataset.load(
    '/home/delavega/projects/classification/data/datasets/abs_60topics_filt_jul.pkl'
)

out = '/projects/delavega/clustering/dv_v5_reference_min_80_pca.pkl'

reference = Clusterable(dataset, min_studies=80)
print "Running PCA"
reduce_reference = sk_decomp.RandomizedPCA(100)
reference = reference.transform(reduce_reference, transpose=True)

pickle.dump(reference, open(out, 'w'))
Example #43
0
###
# This script shuffle the classification labels and reruns classification many times to get data to calculate a confidence interval around the null hypothesis

from sklearn.linear_model import RidgeClassifier
from base.classifiers import OnevsallClassifier
from neurosynth.base.dataset import Dataset
from sklearn.metrics import roc_auc_score
import pickle
from random import shuffle

def shuffle_data(classifier):
	for region in classifier.c_data:
		shuffle(region[1])


d_abs_topics_filt = Dataset.load('../data/datasets/abs_topics_filt_july.pkl')

results = []

clf = OnevsallClassifier(d_abs_topics_filt, '../masks/Ward/50.nii.gz', cv='4-Fold',
	 thresh=10, thresh_low=0, memsave=True, classifier=RidgeClassifier())
clf.load_data(None, None)
clf.initalize_containers(None, None, None)


for i in range(0, 500):
	shuffle_data(clf)
	clf.classify(scoring=roc_auc_score, processes=8, class_weight=None)
	results = list(clf.class_score) + results
	print(i),
neurosynth.set_logging_level('info')

# <markdowncell>

# ## Creating a new dataset
# 
# Next, we create a Dataset, which is the core object most Neurosynth tools operate on. We initialize a Dataset by passing in a database file, which is essentially just a giant list of activation coordinates and associated study IDs. This file can be downloaded from the Neurosynth website or installed from the data submodule (see the Readme for instructions).
# 
# Creating the object will take a few minutes on most machines, as we need to process about 200,000 activations drawn from nearly 6,000 studies. Once that's done, we also need to add some features to the Dataset. Features are just variables associated with the studies in our dataset; literally any dimension a study could be coded on can constitute a feature that Neurosynth can use. In practice, the default set of features included in the data download includes 500 psychological terms (e.g., 'language', 'emotion', 'memory', etc.) that occur with some frequency in the dataset. So when we're talking about the "emotion" feature, we're really talking about how frequently each study in the Dataset uses the word 'emotion' in the full-text of the corresponding article.
# 
# Let's go ahead and create a dataset and add some features:

# <codecell>

# Create a new Dataset instance
dataset = Dataset('data/database.txt')

# Add some features
dataset.add_features('data/features.txt')

# <markdowncell>

# Because this takes a while, we'll save our Dataset object to disk. That way, the next time we want to use it, we won't have to sit through the whole creation operation again:

# <codecell>

dataset.save('dataset.pkl')

# <markdowncell>

# Now in future, instead of waiting, we could just load the dataset from file:
Example #45
0
 def setUp(self):
   """ Create a new Dataset and add features. """
   self.dataset = Dataset('data/test_dataset.txt')
   self.dataset.add_features('data/test_features.txt')
Example #46
0
from sklearn.metrics import roc_auc_score
import sys
from base.mv import bootstrap_mv_full
from neurosynth.base.dataset import Dataset
dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl")

from sklearn.naive_bayes import GaussianNB

print sys.argv
try:
    cmd, iterations, job_id = sys.argv
except:
    raise Exception("Incorect number of arguments")

import csv
cognitive_topics = [
    'topic' + topic[0]
    for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU'))
    if topic[1] == "T"
]

results = bootstrap_mv_full(
    dataset,
    GaussianNB(),
    roc_auc_score,
    '../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz',
    features=cognitive_topics,
    processes=None,
    boot_n=int(iterations),
    outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC_60_ ' +
    str(job_id) + '.csv')
Example #47
0
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC, RidgeClassifier, RidgeClassifierCV
from sklearn.ensemble import GradientBoostingClassifier

from base.tools import Logger
from base.pipelines import pipeline
from base.classifiers import PairwiseClassifier, OnevsallClassifier

from neurosynth.base.dataset import Dataset
from sklearn.metrics import roc_auc_score

now = datetime.datetime.now()

n_topics = 60
dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-%d.pkl' % n_topics)

# cognitive_topics = ['topic' + topic[0] for topic in csv.reader(
# 	open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys'  + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "T"]

# junk_topics = ['topic' + topic[0] for topic in csv.reader(
# 	open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "F"]


# Analyses
def complete_analysis(dataset,
                      dataset_name,
                      name,
                      masklist,
                      processes=1,
                      features=None):
Example #48
0
xfm2vol.run()


#make masks to input into neurosynth
def cluster2masks(clusterfile):
    clustermap = nb.load(clusterfile).get_data()
    for x in range(1, clustermap.max() + 1):
        clustermask = (clustermap == x).astype(int)
        nImg = nb.Nifti1Image(clustermask, None)
        nb.save(
            nImg,
            os.path.abspath(clusterfile + '_clustermask' + str(x) + '.nii'))


cluster2masks(volume_file)

dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl'
if not os.path.exists(dataset_file):
    dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt')
    dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt')
    dataset.save(dataset_file)
else:
    dataset = cPickle.load(open(dataset_file, 'rb'))

clustermask = volume_file + '_clustermask' + str(3) + '.nii'

ids = dataset.get_ids_by_mask(clustermask)
features = dataset.feature_table.get_features_by_ids(ids)

#mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz
    if mask_img_data.shape != (91, 109, 91):
        resampled_roi = resample_to_img(
            roi, mni152_2mm, interpolation="nearest", copy=True
        )
        resampled_file = join(map_dir, "{0}_mni2mm.nii.gz".format(file))
        resampled_roi.to_filename(resampled_file)
        roi_files[i] = resampled_file
        plot_glass_brain(
            resampled_file,
            output_file=join(map_dir, "{0}_mni2mm.png".format(basename(file))),
        )


print("loading dataset...")
tds = datetime.now()
dataset = Dataset("/Users/kbottenh/Dropbox/Data/neurosynth-v0.7/database.txt")
tdf = datetime.now()


print("dataset loaded! only took {0}".format((tdf - tds)))

for i in np.arange(0, len(mask_names)):
    print("{0}\nmeta-analyzing {1}...".format(datetime.now(), mask_names[i]))
    tmas = datetime.now()
    ids = dataset.get_studies(
        mask=roi_files[i],
    )
    ma = meta.MetaAnalysis(dataset, ids)
    ma.save_results(
        output_dir=sink_dir,
        prefix=mask_names[i],
Example #50
0
from base.classifiers import OnevsallContinuous
from neurosynth.analysis import cluster
from neurosynth.base.dataset import Dataset
from sklearn.metrics import r2_score
from sklearn.linear_model import Ridge
import cPickle
from sklearn.decomposition import RandomizedPCA

dataset = Dataset.load('../data/datasets/abs_60topics_filt_jul.pkl')

roi_mask = '../masks/mpfc_nfp.nii.gz'
global_mask = "../masks/MNI152_T1_2mm_brain.nii.gz"

n_regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# print resolution
clf_file = "../data/clfs/all_vox_Ridge_mpfc.pkl"

print "trying to load"
try:
    clf = OnevsallContinuous.load(clf_file)
except:
    print "Loading failed"
    clf = OnevsallContinuous(dataset, None, classifier=Ridge())
    clf.classify(scoring=r2_score, processes=8)
    try:
        clf.save(clf_file)
    except:
        pass

reduc = RandomizedPCA(n_components=100)
Example #51
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import meta
import os
dataset = Dataset('database.txt')
dataset.add_features('features.txt')
print dataset.get_feature_names()
ids = dataset.get_ids_by_features('emo*', threshold=0.001)
print len(ids)
ma = meta.MetaAnalysis(dataset, ids)
ma.save_results('emotion')
Example #52
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import decode

# Load a saved Dataset file. This example will work with the 
# file saved in the create_a_new_dataset_and_load_features example.
dataset = Dataset.load('dataset.pkl')

# Initialize a new Decoder instance with a few features. Note that 
# if you don't specify a subset of features, ALL features in the 
# Dataset will be loaded, which will take a long time because 
# meta-analysis images for each feature need to be generated.
decoder = decode.Decoder(dataset, features=['emotion', 'pain', 'somatosensory', 'wm', 'inhibition'])

# Decode three images. The sample images here are coactivation 
# maps for ventral, dorsal, and posterior insula clusters, 
# respectively. Maps are drawn from data reported in 
# Chang, Yarkoni, Khaw, & Sanfey (2012); see paper for details.
# We save the output--an image x features matrix--to a file.
# By default, the decoder will use Pearson correlation, i.e., 
# each value in our results table indicates the correlation 
# between the input image and each feature's meta-analysis image.
result = decoder.decode(['vIns.nii.gz', 'dIns.nii.gz', 'pIns.nii.gz'], save='decoding_results.txt')
Example #53
0
 def dataset(self):
     return Dataset.load(settings.PICKLE_DATABASE)
Example #54
0
class Neurosynth:
    def __init__(self,
                 datadir='../data/neurosynth',
                 verbose=True,
                 ma_count_thresh=16,
                 meta_image='consistency_z',
                 resolution=3):
        self.dataset = None
        self.concepts = None
        self.concepts_df = None
        self.concept_pmids = {}
        self.datadir = datadir
        self.datafile = os.path.join(datadir, 'database.txt')
        self.verbose = verbose
        self.ma_count_thresh = ma_count_thresh
        self.meta_image = meta_image
        self.resolution = resolution
        self.imagedir_resampled = None
        self.image_concepts = None
        self.desmtx = None

        if not os.path.exists(os.path.join(self.datadir, 'database.txt')):
            print('downloading neurosynth data')
            ns.dataset.download(path='/tmp', unpack=True)
            print('extracting data')
            tfile = tarfile.open("/tmp/current_data.tar.gz", 'r:gz')
            if not os.path.exists(self.datadir):
                os.mkdir(self.datadir)
            tfile.extractall(self.datadir)
            os.remove("/tmp/current_data.tar.gz")
            print('done creating dataset in', self.datadir)

        self.imagedir = os.path.join(self.datadir, 'ma_images')
        if not os.path.exists(self.imagedir):
            os.mkdir(self.imagedir)

    def get_dataset(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'dataset.pkl')) and not force_load:
            print('loading database from',
                  os.path.join(self.datadir, 'dataset.pkl'))
            self.dataset = Dataset.load(
                os.path.join(self.datadir, 'dataset.pkl'))
        else:
            print('loading database - this takes a few minutes')
            self.dataset = Dataset(os.path.join(self.datadir, 'database.txt'))
            self.dataset.add_features(
                os.path.join(self.datadir, 'features.txt'))

            self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))

    def get_concepts(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'concepts_df.csv')) and not force_load:
            print('using cached cognitive atlas concepts')
            self.concepts_df = pandas.read_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        else:
            self.concepts_df = get_concept().pandas
            self.concepts_df.to_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        self.concepts = self.concepts_df.name.tolist()

    def get_concept_pmids(self, retmax=2000000, force_load=False):
        # get the pmids for each concept that are in neurosynth
        # for single-word concepts we use the neurosynth search tool
        # for phrases we use pubmed
        if os.path.exists(os.path.join(
                self.datadir, 'concept_pmids.pkl')) and not force_load:
            print('using cached concept_pmids')
            self.concept_pmids = pickle.load(
                open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'rb'))
            return

        print('loading all neurosynth pmids')
        all_neurosynth_ids = self.dataset.image_table.ids.tolist()
        for id in self.concepts:
            time.sleep(0.5)
            handle = Entrez.esearch(db="pubmed",
                                    retmax=retmax,
                                    term='"%s"' % id)
            record = Entrez.read(handle)
            handle.close()
            # make sure we got all the records - rerun if we didn't
            if int(record['Count']) > retmax:
                handle = Entrez.esearch(db="pubmed",
                                        retmax=int(record['Count']),
                                        term='"%s"' % id)
                record = Entrez.read(handle)
                handle.close()
            records_int = [int(i) for i in record['IdList']]
            ns_pmids = intersect(all_neurosynth_ids, records_int)
            print('pubmed found', len(ns_pmids), 'matching pmids for', id)
            self.concept_pmids[id] = ns_pmids
        pickle.dump(
            self.concept_pmids,
            open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'wb'))

    def get_concept_images(self, force_load=False):

        for c in self.concept_pmids.keys():
            if not force_load and os.path.exists(
                    os.path.join(
                        self.imagedir,
                        '%s_specificity_z.nii.gz' % c.replace(' ', '-'))):
                continue
            if len(self.concept_pmids[c]) < self.ma_count_thresh:
                #print('skipping',c,len(self.concept_pmids[c]),'pmids')
                continue
            print('running meta-analysis for', c)
            ma = meta.MetaAnalysis(self.dataset, self.concept_pmids[c])
            ma.save_results(self.imagedir, c.replace(' ', '-'))

        if force_load or not os.path.exists(
                os.path.join(self.imagedir, 'mask_image.nii.gz')):
            # make mask of voxels with zero standard deviation
            concept_images = glob.glob(
                os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))

            imgdata = numpy.zeros((91, 109, 91, len(concept_images)))
            print('loading concept images to compute std')
            for i, c in enumerate(concept_images):
                tmp = nibabel.load(c).get_data()
                imgdata[:, :, :, i] = tmp

            imgstd = numpy.std(imgdata, axis=3)
            maskdata = (imgstd > 0).astype('int')
            maskimg = nibabel.Nifti1Image(maskdata,
                                          affine=nibabel.load(c).affine)
            maskimg.to_filename(
                os.path.join(self.imagedir, 'mask_image.nii.gz'))

    def get_resampled_images(self, shape=None, affine=None, force_load=False):
        # use 3 mm as default
        if not shape:
            shape = [60, 72, 60]
            affine = numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                  [0, 0, 3, -72], [0, 0, 0, 1]])
            self.resolution = affine[1, 1].astype('int')
        print('resampling data to %d mm' % self.resolution)
        self.imagedir_resampled = os.path.join(
            self.datadir, 'ma_images_%dmm' % self.resolution)
        if not os.path.exists(self.imagedir_resampled):
            os.mkdir(self.imagedir_resampled)
        concept_images = glob.glob(
            os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))
        for c in concept_images:
            if force_load or not os.path.exists(
                    os.path.join(self.imagedir_resampled,
                                 os.path.basename(c))):
                img = nilearn.image.resample_img(c,
                                                 target_affine=affine,
                                                 target_shape=shape)
                img.to_filename(
                    os.path.join(self.imagedir_resampled, os.path.basename(c)))

        if not os.path.exists(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution)):
            # make MNI mask at chosen resolution
            mask = os.path.join(
                os.environ['FSLDIR'],
                'data/standard/MNI152_T1_2mm_brain_mask.nii.gz')
            maskimg = nilearn.image.resample_img(mask,
                                                 target_affine=affine,
                                                 target_shape=shape)
            maskimg.to_filename(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution))

    def load_concept_images(self, force_load=True):
        concept_images = glob.glob(
            os.path.join(self.imagedir_resampled,
                         '*_%s.nii.gz' % self.meta_image))
        concept_images.sort()
        self.image_concepts = [
            os.path.basename(i).split('_')[0] for i in concept_images
        ]
        if os.path.exists(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution)):
            self.imgdata = numpy.load(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution))
            # make sure it's the right size
            if self.imgdata.shape[1] == len(concept_images):
                print('using cached concept image data')
                return

        masker = nilearn.input_data.NiftiMasker(
            mask_img=os.path.join(self.datadir,
                                  'mask_%dmm.nii.gz' % self.resolution),
            target_shape=[60, 72, 60],
            target_affine=numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                       [0, 0, 3, -72], [0, 0, 0, 1]]))
        print('loading concept image data')
        self.imgdata = masker.fit_transform(concept_images)
        numpy.save(
            os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution),
            self.imgdata)

    def save(self):
        with open('%s/neurovault_%dmm.pkl' % (self.datadir, self.resolution),
                  'wb') as f:
            pickle.dump(self, f)

    def build_design_matrix(self, force_load=False):
        if not force_load and os.path.exists(
                os.path.join(self.datadir, 'desmtx.csv')):
            self.desmtx = pandas.DataFrame.from_csv(
                os.path.join(self.datadir, 'desmtx.csv'))
            print('using cached design matrix')
            return
        print('building design matrix')
        all_concept_pmids = []
        for k in self.concept_pmids.keys():
            all_concept_pmids = all_concept_pmids + self.concept_pmids[k]
        all_concept_pmids = list(set(all_concept_pmids))
        all_concept_pmids.sort()
        all_concepts = list(self.concept_pmids.keys())
        self.desmtx = pandas.DataFrame(data=0,
                                       index=all_concept_pmids,
                                       columns=all_concepts)

        for k in self.concept_pmids.keys():
            pmids = self.concept_pmids[k]
            self.desmtx[k][pmids] = 1
        # drop columns with too few matches
        self.desmtx = self.desmtx.ix[:,
                                     self.desmtx.sum() > self.ma_count_thresh]
        self.desmtx.to_csv(os.path.join(self.datadir, 'desmtx.csv'))
Example #55
0
# path of WHS atlas files
resource_dir = path.join(path.pardir, 'resources')

# make sure we have the data
dataset_dir = path.join(path.expanduser('~'), 'Documents', 'neurosynth-data')
database_path = path.join(dataset_dir, 'database_bregma.txt')
neurosynth_data_url = 'https://github.com/wmpauli/neurosynth-data'
if not path.exists(database_path):
    print("Please download dataset from %s and store it in %s" % (neurosynth_data_url, dataset_dir))

# load dataset, both image table and feature table
r = 1.0 # 1mm smoothing kernel
transform = {'BREGMA': transformations.bregma_to_whs()}
target = 'WHS'
masker_filename = path.join(resource_dir, 'WHS_SD_rat_brainmask_sm_v2.nii.gz')
dataset = Dataset(path.join(dataset_dir, 'database_bregma.txt'), masker=masker_filename, r=r, transform=transform, target=target)
dataset.feature_table = FeatureTable(dataset)
dataset.add_features(path.join(dataset_dir, "features_bregma.txt")) # add features
fn = dataset.get_feature_names()

# get the ids of studies where this feature occurs
ids = dataset.get_ids_by_features(('%s*' % feature), threshold=0.1)
ma = meta.MetaAnalysis(dataset, ids)
results_path = path.join('results', 'meta', feature)
if not path.exists(results_path):
    makedirs(results_path)

    print("saving results to: %s" % results_path)
ma.save_results(results_path)

# note, figure 2 of manuscript was used by plotting the z-score statistical maps for forward inference (pAgF_z.nii.gz) and reverse inference (pFgA_z.nii.gz)
Example #56
0
        resampled_roi = resample_to_img(roi,
                                        mni152_2mm,
                                        interpolation='nearest',
                                        copy=True)
        resampled_file = join(map_dir, '{0}_mni2mm.nii.gz'.format(file))
        resampled_roi.to_filename(resampled_file)
        roi_files[i] = resampled_file
        plot_glass_brain(resampled_file,
                         output_file=join(
                             map_dir, '{0}_mni2mm.png'.format(basename(file))))

# In[16]:

print('loading dataset...')
tds = datetime.now()
dataset = Dataset('/Users/Katie/Dropbox/Data/neurosynth-v0.7/database.txt')
dataset.add_features('/Users/Katie/Dropbox/Data/neurosynth-v0.7/features.txt')
tdf = datetime.now()

print('dataset loaded! only took {0}'.format((tdf - tds)))

for i in np.arange(0, len(mask_names)):
    print('{0}\nmeta-analyzing {1}...'.format(datetime.now(), mask_names[i]))
    tmas = datetime.now()
    ids = dataset.get_studies(mask=roi_files[i], )
    ma = meta.MetaAnalysis(dataset, ids)
    ma.save_results(
        output_dir=sink_dir,
        prefix=mask_names[i],
        image_list=['association-test_z', 'association-test_z_FDR_0.01'])
    tmaf = datetime.now()