예제 #1
0
def save_dataset_instance(db_filename, kw_filename, instance_filename):
    # Create a new Dataset instance
    dataset = Dataset('./raw_data/' + db_filename + '.txt')
    # Add some features
    dataset.add_features('./raw_data/' + kw_filename + '.txt')
    # Save new file
    dataset.save('./raw_data/' + instance_filename + '.pkl')
    return dataset
예제 #2
0
    def __init__(self,
                 db,
                 dataset=None,
                 studies=None,
                 features=None,
                 reset_db=False,
                 reset_dataset=False,
                 download_data=False):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.
        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!")
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, str) and
                                                not os.path.exists(dataset)):
            print("\tInitializing a new Dataset...")
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE)
        else:
            print("Loading existing Dataset...")
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print("WARNING: RESETTING DATABASE!!!")
            self.reset_database()
예제 #3
0
    def __init__(self, db, dataset=None, studies=None, features=None,
                 reset_db=False, reset_dataset=False, download_data=True):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.

        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
                Note that the Dataset must contain the list of Mappables (i.e.,
                    save() must have been called with keep_mappables set to
                    True).
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!"
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)):

            print "\tInitializing a new Dataset..."
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE, keep_mappables=True)
        else:
            print "\tLoading existing Dataset..."
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print "WARNING: RESETTING DATABASE!!!"
            self.reset_database()
예제 #4
0
def _getdata():
    """Downloads data from neurosynth and returns it as a Dataset.

    Also pickles the dataset for future use."""
    LOG.warning("Downloading and processing Neurosynth database")

    os.makedirs("data", exist_ok=True)
    from neurosynth.base.dataset import download

    download(path="data", unpack=True)

    data = Dataset("data/database.txt")
    data.add_features("data/features.txt")
    data.save("data/dataset.pkl")
    return data
예제 #5
0
def fetch_neurosynth_dataset(data_dir, return_pkl=True):
    """Downloads the Neurosynth dataset

    Parameters
    ----------
    data_dir : str
        Directory in which to download the dataset.
    return_pkl : bool
        If true, creates and returns the .pkl file. Otherwise returns
        the dataset and features files.

    Returns
    -------
    tuple, str
        If save_pkl is false, returns a tuple containing the path to the
        database.txt and the features.txt file. Otherwise returns the path
        to the .pkl file.

    """
    if not os.path.isdir(data_dir):
        os.mkdir(data_dir)

    dataset_file = os.path.join(data_dir, "database.txt")
    if not os.path.isfile(dataset_file):
        logging.info("Downloading the Neurosynth dataset.")
        download(data_dir, unpack=True)
    feature_file = os.path.join(data_dir, "features.txt")

    if return_pkl:
        pkl_file = os.path.join(data_dir, "dataset.pkl")
        if not os.path.isfile(pkl_file):
            logging.info(
                "Converting Neurosynth data to a .pkl file. This may take a while."
            )
            dataset = Dataset(dataset_file, feature_file)
            dataset.save(pkl_file)
        return pkl_file

    return (dataset_file, feature_file)
# <codecell>

# Create a new Dataset instance
dataset = Dataset('data/database.txt')

# Add some features
dataset.add_features('data/features.txt')

# <markdowncell>

# Because this takes a while, we'll save our Dataset object to disk. That way, the next time we want to use it, we won't have to sit through the whole creation operation again:

# <codecell>

dataset.save('dataset.pkl')

# <markdowncell>

# Now in future, instead of waiting, we could just load the dataset from file:

# <codecell>

dataset = Dataset.load('dataset.pkl')   # Note the capital D in the second Dataset--load() is a class method

# <markdowncell>

# ## Doing stuff with Neurosynth
# Now that our Dataset has both activation data and some features, we're ready to start doing some analyses! By design, Neurosynth focuses on facilitating simple, fast, and modestly useful analyses. This means you probably won't break any new ground using Neurosynth, but you should be able to supplement results you've generated using other approaches with a bunch of nifty analyses that take just 2 - 3 lines of code.
# 
# ### Simple feature-based meta-analyses
def create_dataset(database_location, feature_location):
	dataset = Dataset(database_location)
	dataset.add_features(feature_location)
	dataset.save('neurosynth-dataset.pkl')
	return dataset
예제 #8
0
xfm2vol.run()


#make masks to input into neurosynth
def cluster2masks(clusterfile):
    clustermap = nb.load(clusterfile).get_data()
    for x in range(1, clustermap.max() + 1):
        clustermask = (clustermap == x).astype(int)
        nImg = nb.Nifti1Image(clustermask, None)
        nb.save(
            nImg,
            os.path.abspath(clusterfile + '_clustermask' + str(x) + '.nii'))


cluster2masks(volume_file)

dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl'
if not os.path.exists(dataset_file):
    dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt')
    dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt')
    dataset.save(dataset_file)
else:
    dataset = cPickle.load(open(dataset_file, 'rb'))

clustermask = volume_file + '_clustermask' + str(3) + '.nii'

ids = dataset.get_ids_by_mask(clustermask)
features = dataset.feature_table.get_features_by_ids(ids)

#mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz
def create_dataset(database_location, feature_location):
	dataset = Dataset(database_location)
	dataset.add_features(feature_location)
	dataset.save('dataset-old.pkl')
        print 'created dataset'
	return dataset
예제 #10
0
class Neurosynth:
    def __init__(self,
                 datadir='../data/neurosynth',
                 verbose=True,
                 ma_count_thresh=16,
                 meta_image='consistency_z',
                 resolution=3):
        self.dataset = None
        self.concepts = None
        self.concepts_df = None
        self.concept_pmids = {}
        self.datadir = datadir
        self.datafile = os.path.join(datadir, 'database.txt')
        self.verbose = verbose
        self.ma_count_thresh = ma_count_thresh
        self.meta_image = meta_image
        self.resolution = resolution
        self.imagedir_resampled = None
        self.image_concepts = None
        self.desmtx = None

        if not os.path.exists(os.path.join(self.datadir, 'database.txt')):
            print('downloading neurosynth data')
            ns.dataset.download(path='/tmp', unpack=True)
            print('extracting data')
            tfile = tarfile.open("/tmp/current_data.tar.gz", 'r:gz')
            if not os.path.exists(self.datadir):
                os.mkdir(self.datadir)
            tfile.extractall(self.datadir)
            os.remove("/tmp/current_data.tar.gz")
            print('done creating dataset in', self.datadir)

        self.imagedir = os.path.join(self.datadir, 'ma_images')
        if not os.path.exists(self.imagedir):
            os.mkdir(self.imagedir)

    def get_dataset(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'dataset.pkl')) and not force_load:
            print('loading database from',
                  os.path.join(self.datadir, 'dataset.pkl'))
            self.dataset = Dataset.load(
                os.path.join(self.datadir, 'dataset.pkl'))
        else:
            print('loading database - this takes a few minutes')
            self.dataset = Dataset(os.path.join(self.datadir, 'database.txt'))
            self.dataset.add_features(
                os.path.join(self.datadir, 'features.txt'))

            self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))

    def get_concepts(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'concepts_df.csv')) and not force_load:
            print('using cached cognitive atlas concepts')
            self.concepts_df = pandas.read_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        else:
            self.concepts_df = get_concept().pandas
            self.concepts_df.to_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        self.concepts = self.concepts_df.name.tolist()

    def get_concept_pmids(self, retmax=2000000, force_load=False):
        # get the pmids for each concept that are in neurosynth
        # for single-word concepts we use the neurosynth search tool
        # for phrases we use pubmed
        if os.path.exists(os.path.join(
                self.datadir, 'concept_pmids.pkl')) and not force_load:
            print('using cached concept_pmids')
            self.concept_pmids = pickle.load(
                open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'rb'))
            return

        print('loading all neurosynth pmids')
        all_neurosynth_ids = self.dataset.image_table.ids.tolist()
        for id in self.concepts:
            time.sleep(0.5)
            handle = Entrez.esearch(db="pubmed",
                                    retmax=retmax,
                                    term='"%s"' % id)
            record = Entrez.read(handle)
            handle.close()
            # make sure we got all the records - rerun if we didn't
            if int(record['Count']) > retmax:
                handle = Entrez.esearch(db="pubmed",
                                        retmax=int(record['Count']),
                                        term='"%s"' % id)
                record = Entrez.read(handle)
                handle.close()
            records_int = [int(i) for i in record['IdList']]
            ns_pmids = intersect(all_neurosynth_ids, records_int)
            print('pubmed found', len(ns_pmids), 'matching pmids for', id)
            self.concept_pmids[id] = ns_pmids
        pickle.dump(
            self.concept_pmids,
            open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'wb'))

    def get_concept_images(self, force_load=False):

        for c in self.concept_pmids.keys():
            if not force_load and os.path.exists(
                    os.path.join(
                        self.imagedir,
                        '%s_specificity_z.nii.gz' % c.replace(' ', '-'))):
                continue
            if len(self.concept_pmids[c]) < self.ma_count_thresh:
                #print('skipping',c,len(self.concept_pmids[c]),'pmids')
                continue
            print('running meta-analysis for', c)
            ma = meta.MetaAnalysis(self.dataset, self.concept_pmids[c])
            ma.save_results(self.imagedir, c.replace(' ', '-'))

        if force_load or not os.path.exists(
                os.path.join(self.imagedir, 'mask_image.nii.gz')):
            # make mask of voxels with zero standard deviation
            concept_images = glob.glob(
                os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))

            imgdata = numpy.zeros((91, 109, 91, len(concept_images)))
            print('loading concept images to compute std')
            for i, c in enumerate(concept_images):
                tmp = nibabel.load(c).get_data()
                imgdata[:, :, :, i] = tmp

            imgstd = numpy.std(imgdata, axis=3)
            maskdata = (imgstd > 0).astype('int')
            maskimg = nibabel.Nifti1Image(maskdata,
                                          affine=nibabel.load(c).affine)
            maskimg.to_filename(
                os.path.join(self.imagedir, 'mask_image.nii.gz'))

    def get_resampled_images(self, shape=None, affine=None, force_load=False):
        # use 3 mm as default
        if not shape:
            shape = [60, 72, 60]
            affine = numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                  [0, 0, 3, -72], [0, 0, 0, 1]])
            self.resolution = affine[1, 1].astype('int')
        print('resampling data to %d mm' % self.resolution)
        self.imagedir_resampled = os.path.join(
            self.datadir, 'ma_images_%dmm' % self.resolution)
        if not os.path.exists(self.imagedir_resampled):
            os.mkdir(self.imagedir_resampled)
        concept_images = glob.glob(
            os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))
        for c in concept_images:
            if force_load or not os.path.exists(
                    os.path.join(self.imagedir_resampled,
                                 os.path.basename(c))):
                img = nilearn.image.resample_img(c,
                                                 target_affine=affine,
                                                 target_shape=shape)
                img.to_filename(
                    os.path.join(self.imagedir_resampled, os.path.basename(c)))

        if not os.path.exists(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution)):
            # make MNI mask at chosen resolution
            mask = os.path.join(
                os.environ['FSLDIR'],
                'data/standard/MNI152_T1_2mm_brain_mask.nii.gz')
            maskimg = nilearn.image.resample_img(mask,
                                                 target_affine=affine,
                                                 target_shape=shape)
            maskimg.to_filename(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution))

    def load_concept_images(self, force_load=True):
        concept_images = glob.glob(
            os.path.join(self.imagedir_resampled,
                         '*_%s.nii.gz' % self.meta_image))
        concept_images.sort()
        self.image_concepts = [
            os.path.basename(i).split('_')[0] for i in concept_images
        ]
        if os.path.exists(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution)):
            self.imgdata = numpy.load(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution))
            # make sure it's the right size
            if self.imgdata.shape[1] == len(concept_images):
                print('using cached concept image data')
                return

        masker = nilearn.input_data.NiftiMasker(
            mask_img=os.path.join(self.datadir,
                                  'mask_%dmm.nii.gz' % self.resolution),
            target_shape=[60, 72, 60],
            target_affine=numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                       [0, 0, 3, -72], [0, 0, 0, 1]]))
        print('loading concept image data')
        self.imgdata = masker.fit_transform(concept_images)
        numpy.save(
            os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution),
            self.imgdata)

    def save(self):
        with open('%s/neurovault_%dmm.pkl' % (self.datadir, self.resolution),
                  'wb') as f:
            pickle.dump(self, f)

    def build_design_matrix(self, force_load=False):
        if not force_load and os.path.exists(
                os.path.join(self.datadir, 'desmtx.csv')):
            self.desmtx = pandas.DataFrame.from_csv(
                os.path.join(self.datadir, 'desmtx.csv'))
            print('using cached design matrix')
            return
        print('building design matrix')
        all_concept_pmids = []
        for k in self.concept_pmids.keys():
            all_concept_pmids = all_concept_pmids + self.concept_pmids[k]
        all_concept_pmids = list(set(all_concept_pmids))
        all_concept_pmids.sort()
        all_concepts = list(self.concept_pmids.keys())
        self.desmtx = pandas.DataFrame(data=0,
                                       index=all_concept_pmids,
                                       columns=all_concepts)

        for k in self.concept_pmids.keys():
            pmids = self.concept_pmids[k]
            self.desmtx[k][pmids] = 1
        # drop columns with too few matches
        self.desmtx = self.desmtx.ix[:,
                                     self.desmtx.sum() > self.ma_count_thresh]
        self.desmtx.to_csv(os.path.join(self.datadir, 'desmtx.csv'))
예제 #11
0
""" Load a Dataset and generate a full set of meta-analysis
images--i.e., run a meta-analysis on every single feature.
"""

neurosynth_data_dir = "/home/data/nbc/misc-projects/niconn-macm/code/neurosynth/"

if not op.isfile(op.join(neurosynth_data_dir, "dataset.pkl")):
    # Create Dataset instance from a database file.
    dataset = Dataset(op.join(neurosynth_data_dir, "database.txt"))

    # Load features from file
    dataset.add_features(op.join(neurosynth_data_dir, "features.txt"))

    # Pickle the Dataset to file so we can use Dataset.load() next time
    # instead of having to sit through the generation process again.
    dataset.save(op.join(neurosynth_data_dir, "dataset.pkl"))

# Load pickled Dataset--assumes you've previously saved it. If not,
# follow the create_a_new_dataset_and_load_features example.
dataset = Dataset.load(op.join(neurosynth_data_dir, "dataset.pkl"))

# Get the full list of feature names
feature_list = dataset.get_feature_names()

# Run a meta-analysis on each feature, and save all the results to
# a directory called results. Note that the directory will not be
# created for you, so make sure it exists.
# Here we use the default frequency threshold of 0.001 (i.e., a
# study is said to have a feature if more than 1 in every 1,000
# words is the target word), and an FDR correction level of 0.05.
out_dir = "/home/data/nbc/misc-projects/meta-gradients/code/feature_maps"
예제 #12
0
xfm2vol.inputs.identity = 'fsaverage4'
xfm2vol.inputs.hemi = 'lh'
xfm2vol.inputs.transformed_file = volume_file
xfm2vol.inputs.template_file = template
xfm2vol.run()

#make masks to input into neurosynth
def cluster2masks(clusterfile):
    clustermap = nb.load(clusterfile).get_data()
    for x in range(1,clustermap.max()+1):
        clustermask = (clustermap==x).astype(int)
        nImg = nb.Nifti1Image(clustermask, None)
        nb.save(nImg, os.path.abspath(clusterfile+'_clustermask'+str(x)+'.nii'))

cluster2masks(volume_file)

dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl'
if not os.path.exists(dataset_file):
    dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt')
    dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt')
    dataset.save(dataset_file)
else:
    dataset = cPickle.load(open(dataset_file,'rb'))

clustermask = volume_file+'_clustermask'+str(3)+'.nii'

ids = dataset.get_ids_by_mask(clustermask)
features = dataset.feature_table.get_features_by_ids(ids)

#mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz