예제 #1
0
def save_dataset_instance(db_filename, kw_filename, instance_filename):
    # Create a new Dataset instance
    dataset = Dataset('./raw_data/' + db_filename + '.txt')
    # Add some features
    dataset.add_features('./raw_data/' + kw_filename + '.txt')
    # Save new file
    dataset.save('./raw_data/' + instance_filename + '.pkl')
    return dataset
예제 #2
0
def neurosynthInit(dbsize):
    print "Initializing Neurosynth database..."
    dataset = Dataset('data/' + dbsize + 'terms/database.txt')
    dataset.add_features('data/' + dbsize + 'terms/features.txt')    

    #print "Loading standard space brain..."
    #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz")
    #standard = img.get_data()
    return dataset
예제 #3
0
def neurosynthInit(dbsize):
    """Initialize Neurosynth Database, return database object"""
    print "Initializing Neurosynth database..."
    db = Dataset('data/' + str(dbsize) + 'terms/database.txt')
    db.add_features('data/' + str(dbsize) + 'terms/features.txt')

    #print "Loading standard space brain..."
    #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz")
    #standard = img.get_data()
    return db
예제 #4
0
    def __init__(self,
                 db,
                 dataset=None,
                 studies=None,
                 features=None,
                 reset_db=False,
                 reset_dataset=False,
                 download_data=False):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.
        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!")
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, str) and
                                                not os.path.exists(dataset)):
            print("\tInitializing a new Dataset...")
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE)
        else:
            print("Loading existing Dataset...")
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print("WARNING: RESETTING DATABASE!!!")
            self.reset_database()
예제 #5
0
    def __init__(self, db, dataset=None, studies=None, features=None,
                 reset_db=False, reset_dataset=False, download_data=True):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.

        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
                Note that the Dataset must contain the list of Mappables (i.e.,
                    save() must have been called with keep_mappables set to
                    True).
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!"
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)):

            print "\tInitializing a new Dataset..."
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE, keep_mappables=True)
        else:
            print "\tLoading existing Dataset..."
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print "WARNING: RESETTING DATABASE!!!"
            self.reset_database()
예제 #6
0
def _getdata():
    """Downloads data from neurosynth and returns it as a Dataset.

    Also pickles the dataset for future use."""
    LOG.warning("Downloading and processing Neurosynth database")

    os.makedirs("data", exist_ok=True)
    from neurosynth.base.dataset import download

    download(path="data", unpack=True)

    data = Dataset("data/database.txt")
    data.add_features("data/features.txt")
    data.save("data/dataset.pkl")
    return data
예제 #7
0
def generate_maps(terms,output_dir):

    f,d = download_data()
    features = pandas.read_csv(f,sep="\t")  
    database = pandas.read_csv(d,sep="\t")  

    output_dir = "%s/maps" %(output_dir)

    print "Deriving pickled maps to extract relationships from..."
    dataset = Dataset(d)
    dataset.add_features(f)
    for t in range(len(terms)):
        term = terms[t]
        print "Generating P(term|activation) for term %s, %s of %s" %(term,t,len(terms))
        ids = dataset.get_ids_by_features(term)
        maps = meta.MetaAnalysis(dataset,ids)
        term_name = term.replace(" ","_")
        pickle.dump(maps.images["pFgA_z"],open("%s/%s_pFgA_z.pkl" %(output_dir,term_name),"wb"))
예제 #8
0
def extract_relations(terms,maps_dir,output_dir):

    if isinstance(terms,str):
        terms = [terms]

    f,d = download_data()
    features = pandas.read_csv(f,sep="\t")  
    database = pandas.read_csv(d,sep="\t")  
    allterms = features.columns.tolist()
    allterms.pop(0)  #pmid

    dataset = Dataset(d)
    dataset.add_features(f)
    image_matrix = pandas.DataFrame(columns=range(228453))
    for t in range(len(allterms)):
        term = allterms[t]
        term_name = term.replace(" ","_")
        pickled_map = "%s/%s_pFgA_z.pkl" %(maps_dir,term_name)
        if not os.path.exists(pickled_map):
            print "Generating P(term|activation) for term %s" %(term)
            ids = dataset.get_ids_by_features(term)
            maps = meta.MetaAnalysis(dataset,ids)
            pickle.dump(maps.images["pFgA_z"],open(pickled_map,"wb"))
        map_data = pickle.load(open(pickled_map,"rb"))
        image_matrix.loc[term] = map_data

    sims = pandas.DataFrame(columns=image_matrix.index)
    tuples = []
    for t1 in range(len(terms)):
        term1 = terms[t1]
        print "Extracting NeuroSynth relationships for term %s..." %(term1)
        for t2 in range(len(terms)):
            term2 = terms[t2]
            if t1<t2:
                score = pearsonr(image_matrix.loc[term1],image_matrix.loc[term2])[0]
                tuples.append((term1,term2,score))

    save_relations(output_dir=output_dir,relations=tuples)
예제 #9
0
class TestAnalysis(unittest.TestCase):

  def setUp(self):
    """ Create a new Dataset and add features. """
    self.dataset = Dataset('data/test_dataset.txt')
    self.dataset.add_features('data/test_features.txt')
  
  def test_meta_analysis(self):
    """ Test full meta-analysis stream. """
    pass

  def test_decoder(self):
    pass

  def test_coactivation(self):
    """ Test seed-based coactivation. """ 
    pass

  def test_roi_averaging(self):
    pass

  def test_get_random_voxels(self):
    pass
def create_dataset(database_location, feature_location):
	dataset = Dataset(database_location)
	dataset.add_features(feature_location)
	dataset.save('neurosynth-dataset.pkl')
	return dataset
예제 #11
0
resource_dir = path.join(path.pardir, 'resources')

# make sure we have the data
dataset_dir = path.join(path.expanduser('~'), 'Documents', 'neurosynth-data')
database_path = path.join(dataset_dir, 'database_bregma.txt')
neurosynth_data_url = 'https://github.com/wmpauli/neurosynth-data'
if not path.exists(database_path):
    print("Please download dataset from %s and store it in %s" % (neurosynth_data_url, dataset_dir))

# load dataset, both image table and feature table
r = 1.0 # 1mm smoothing kernel
transform = {'BREGMA': transformations.bregma_to_whs()}
target = 'WHS'
masker_filename = path.join(resource_dir, 'WHS_SD_rat_brainmask_sm_v2.nii.gz')
dataset = Dataset(path.join(dataset_dir, 'database_bregma.txt'), masker=masker_filename, r=r, transform=transform, target=target)
dataset.feature_table = FeatureTable(dataset)
dataset.add_features(path.join(dataset_dir, "features_bregma.txt")) # add features
fn = dataset.get_feature_names()

# get the ids of studies where this feature occurs
ids = dataset.get_ids_by_features(('%s*' % feature), threshold=0.1)
ma = meta.MetaAnalysis(dataset, ids)
results_path = path.join('results', 'meta', feature)
if not path.exists(results_path):
    makedirs(results_path)

    print("saving results to: %s" % results_path)
ma.save_results(results_path)

# note, figure 2 of manuscript was used by plotting the z-score statistical maps for forward inference (pAgF_z.nii.gz) and reverse inference (pFgA_z.nii.gz)
예제 #12
0
class NeuroSynth:

  """Initialize Neurosynth Database"""
  def __init__(self,dbsize):
    print "Initializing Neurosynth database..."
    self.db = Dataset('data/' + str(dbsize) + 'terms/database.txt')
    self.db.add_features('data/' + str(dbsize) + 'terms/features.txt')
    self.ids = self.getIDs()
    self.decoder = None
    #self.masker = mask.Mask("data/X.nii.gz")

  """Do contrast analysis between two sets of """
  def neurosynthContrast(self,papers1,papers2,fdr,outdir=None,outprefix=None,image_list=None):
    
    # Do a meta analysis to contrast the two
    ma = meta.MetaAnalysis(self.db,papers1,papers2,q=float(fdr))
    if outdir:
      print "Saving results to %s" % (outdir)
      ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list)
    return ma.images
    
  """Conduct meta analysis with particular set of ids"""
  def neurosynthMeta(self,papers,fdr,outdir=None,outprefix=None, image_list=None):
    # Get valid ids from user list
    valid_ids = self.get_valid_ids(papers)

    if (len(valid_ids) > 0):
      # Do meta analysis
      ma = meta.MetaAnalysis(self.db,valid_ids,q=float(fdr))
      if outdir:
        print "Saving results to output directory %s" % (outdir)
        ma.save_results(outdir, prefix=outprefix, prefix_sep='_', image_list=image_list)
      return ma.images
    else:
      print "No studies found in database for ids in question!"

  """Return list of valid ids from user input"""
  def get_valid_ids(self,papers):
  # Input is DOI with list of papers
    valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids]
    print "Found %s valid ids." % (str(len(valid_ids)))
    return valid_ids

  """Decode an image, return 100 results"""
  def decode(self,images,outfile,mrs=None,round=4):
    if not self.decoder:
      self.decoder = decode.Decoder(self.db)

    # If mrs is not specified, do decoding against neurosynth database
    if not mrs:
      result = self.decoder.decode(images, save=outfile)
  
    # If mrs is specified, do decoding against custom set of images
    else:
      # This is akin to traditional neurosynth method - pearson's r correlation
      imgs_to_compare = imageutils.load_imgs(mrs,self.masker)
      imgs_to_decode = imageutils.load_imgs(images,self.masker)
      x, y = imgs_to_compare.astype(float),imgs_to_decode.astype(float)
      x, y = x - x.mean(0), y - y.mean(0)
      x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
      result = np.around(x.T.dot(y).T,round)
      features = [os.path.basename(m) for m in mrs]
      rownames = [os.path.basename(m) for m in images]
      df = pd.DataFrame(result,columns=features)
      df.index = rownames
      df.to_csv(outfile,sep="\t")
    return result

  """Return features in neurosynth database"""
  def getFeatures(self,dataset):
    return dataset.get_feature_names()

  """Extract pubmed IDs or dois from Neurosynth Database"""
  def getIDs(self):
    # Get all IDs in neuroSynth
    return self.db.image_table.ids


  """Extract author names for a given pmid or doi"""
  def getAuthor(self,db,id):   
   article = self.db.get_mappables(id)
   meta = article[0].__dict__
   tmp = meta['data']['authors']
   tmp = tmp.split(",")
   authors = [ x.strip("^ ") for x in tmp]
   return authors

  """Extract all author names in database"""
  def getAuthors(self,db):
    articles = db.mappables
    uniqueAuthors = []
    for a in articles:
      meta = a.__dict__
      tmp = meta['data']['authors']
      tmp = tmp.split(",")
      authors = [ x.strip("^ ") for x in tmp]
      for a in authors:
        uniqueAuthors.append(a)
    uniqueAuthors = list(np.unique(uniqueAuthors))
    return uniqueAuthors

  """Extract activation points and all meta information for a particular pmid"""
  def getPaperMeta(self,db,pmid):
    articles = db.mappables
    m = []
    for a in articles:
        tmp = a.__dict__
        if tmp['data']['id'] == str(pmid):
          journal = tmp['data']['journal']
          title = tmp['data']['title']
          year = tmp['data']['year']
          doi = tmp['data']['doi']
          auth = tmp['data']['authors']
          peaks = tmp['data']['peaks']
          pmid = tmp['data']['id']
          tmp = (journal,title,year,doi,pmid,auth,peaks)
          m.append(tmp)
    return m
예제 #13
0
class NeuroSynth:

    """Initialize Neurosynth Database"""

    def __init__(self, dbsize):
        print "Initializing Neurosynth database..."
        self.db = Dataset("data/" + str(dbsize) + "terms/database.txt")
        self.db.add_features("data/" + str(dbsize) + "terms/features.txt")
        self.ids = self.getIDs()
        self.decoder = None
        # self.masker = mask.Mask("data/X.nii.gz")

    """Do contrast analysis between two sets of """

    def neurosynthContrast(self, papers1, papers2, fdr, outdir=None, outprefix=None, image_list=None):

        # Do a meta analysis to contrast the two
        ma = meta.MetaAnalysis(self.db, papers1, papers2, q=float(fdr))
        if outdir:
            print "Saving results to %s" % (outdir)
            ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list)
        return ma.images

    """Conduct meta analysis with particular set of ids"""

    def neurosynthMeta(self, papers, fdr, outdir=None, outprefix=None, image_list=None):
        # Get valid ids from user list
        valid_ids = self.get_valid_ids(papers)

        if len(valid_ids) > 0:
            # Do meta analysis
            ma = meta.MetaAnalysis(self.db, valid_ids, q=float(fdr))
            if outdir:
                print "Saving results to output directory %s" % (outdir)
                ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list)
            return ma.images
        else:
            print "No studies found in database for ids in question!"

    """Return list of valid ids from user input"""

    def get_valid_ids(self, papers):
        # Input is DOI with list of papers
        valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids]
        print "Found %s valid ids." % (str(len(valid_ids)))
        return valid_ids

    """Decode an image, return 100 results"""

    def decode(self, images, outfile, mrs=None, round=4):
        if not self.decoder:
            self.decoder = decode.Decoder(self.db)

        # If mrs is not specified, do decoding against neurosynth database
        if not mrs:
            result = self.decoder.decode(images, save=outfile)

        # If mrs is specified, do decoding against custom set of images
        else:
            # This is akin to traditional neurosynth method - pearson's r correlation
            imgs_to_compare = imageutils.load_imgs(mrs, self.masker)
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
            x, y = imgs_to_compare.astype(float), imgs_to_decode.astype(float)
            x, y = x - x.mean(0), y - y.mean(0)
            x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
            result = np.around(x.T.dot(y).T, round)
            features = [os.path.basename(m) for m in mrs]
            rownames = [os.path.basename(m) for m in images]
            df = pd.DataFrame(result, columns=features)
            df.index = rownames
            df.to_csv(outfile, sep="\t")
        return result

    """Return features in neurosynth database"""

    def getFeatures(self, dataset):
        return dataset.get_feature_names()

    """Extract pubmed IDs or dois from Neurosynth Database"""

    def getIDs(self):
        # Get all IDs in neuroSynth
        return self.db.image_table.ids

    """Extract author names for a given pmid or doi"""

    def getAuthor(self, db, id):
        article = self.db.get_mappables(id)
        meta = article[0].__dict__
        tmp = meta["data"]["authors"]
        tmp = tmp.split(",")
        authors = [x.strip("^ ") for x in tmp]
        return authors

    """Extract all author names in database"""

    def getAuthors(self, db):
        articles = db.mappables
        uniqueAuthors = []
        for a in articles:
            meta = a.__dict__
            tmp = meta["data"]["authors"]
            tmp = tmp.split(",")
            authors = [x.strip("^ ") for x in tmp]
            for a in authors:
                uniqueAuthors.append(a)
        uniqueAuthors = list(np.unique(uniqueAuthors))
        return uniqueAuthors

    """Extract activation points and all meta information for a particular pmid"""

    def getPaperMeta(self, db, pmid):
        articles = db.mappables
        m = []
        for a in articles:
            tmp = a.__dict__
            if tmp["data"]["id"] == str(pmid):
                journal = tmp["data"]["journal"]
                title = tmp["data"]["title"]
                year = tmp["data"]["year"]
                doi = tmp["data"]["doi"]
                auth = tmp["data"]["authors"]
                peaks = tmp["data"]["peaks"]
                pmid = tmp["data"]["id"]
                tmp = (journal, title, year, doi, pmid, auth, peaks)
                m.append(tmp)
        return m
예제 #14
0
base_path = '/home/pauli/Development/neurobabel/'
test_data_path = base_path + 'ACE/'
masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz'
atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz'
mask = nb.load(masker_filename)
masker = Masker(mask)
r = 1.0
# transform = {'BREGMA': transformations.bregma_to_whs()}
#transform = {'BREGMA': transformations.identity()}
transform = {'BREGMA': transformations.bregma_to_whs()}
target = 'WHS'

# load data set
dataset = Dataset(os.path.join(test_data_path, 'db_bregma_export.txt'), masker=masker_filename, r=r, transform=transform, target=target)
dataset.feature_table = FeatureTable(dataset)
dataset.add_features(os.path.join(test_data_path, "db_bregma_features.txt")) # add features
fn = dataset.get_feature_names()

def get_whs_labels(filename=os.path.join(base_path, "atlases/whs_sd/WHS_SD_rat_atlas_v2.label")):
    ''' load the names of all labelled areas in the atlas (e.g. brainstem), return list of them '''
    in_file = open(filename, 'r')
    lines = in_file.readlines()
    labels = {}
    for line in lines:
        start = line.find("\"") + 1
        if start > 0:
            stop = line.find("\"", start)
            label = line[start:stop]
            idx = line.split()[0]
            labels[label] = int(idx)
    in_file.close()
예제 #15
0
class Neurosynth:
    def __init__(self,
                 datadir='../data/neurosynth',
                 verbose=True,
                 ma_count_thresh=16,
                 meta_image='consistency_z',
                 resolution=3):
        self.dataset = None
        self.concepts = None
        self.concepts_df = None
        self.concept_pmids = {}
        self.datadir = datadir
        self.datafile = os.path.join(datadir, 'database.txt')
        self.verbose = verbose
        self.ma_count_thresh = ma_count_thresh
        self.meta_image = meta_image
        self.resolution = resolution
        self.imagedir_resampled = None
        self.image_concepts = None
        self.desmtx = None

        if not os.path.exists(os.path.join(self.datadir, 'database.txt')):
            print('downloading neurosynth data')
            ns.dataset.download(path='/tmp', unpack=True)
            print('extracting data')
            tfile = tarfile.open("/tmp/current_data.tar.gz", 'r:gz')
            if not os.path.exists(self.datadir):
                os.mkdir(self.datadir)
            tfile.extractall(self.datadir)
            os.remove("/tmp/current_data.tar.gz")
            print('done creating dataset in', self.datadir)

        self.imagedir = os.path.join(self.datadir, 'ma_images')
        if not os.path.exists(self.imagedir):
            os.mkdir(self.imagedir)

    def get_dataset(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'dataset.pkl')) and not force_load:
            print('loading database from',
                  os.path.join(self.datadir, 'dataset.pkl'))
            self.dataset = Dataset.load(
                os.path.join(self.datadir, 'dataset.pkl'))
        else:
            print('loading database - this takes a few minutes')
            self.dataset = Dataset(os.path.join(self.datadir, 'database.txt'))
            self.dataset.add_features(
                os.path.join(self.datadir, 'features.txt'))

            self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))

    def get_concepts(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'concepts_df.csv')) and not force_load:
            print('using cached cognitive atlas concepts')
            self.concepts_df = pandas.read_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        else:
            self.concepts_df = get_concept().pandas
            self.concepts_df.to_csv(
                os.path.join(self.datadir, 'concepts_df.csv'))
        self.concepts = self.concepts_df.name.tolist()

    def get_concept_pmids(self, retmax=2000000, force_load=False):
        # get the pmids for each concept that are in neurosynth
        # for single-word concepts we use the neurosynth search tool
        # for phrases we use pubmed
        if os.path.exists(os.path.join(
                self.datadir, 'concept_pmids.pkl')) and not force_load:
            print('using cached concept_pmids')
            self.concept_pmids = pickle.load(
                open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'rb'))
            return

        print('loading all neurosynth pmids')
        all_neurosynth_ids = self.dataset.image_table.ids.tolist()
        for id in self.concepts:
            time.sleep(0.5)
            handle = Entrez.esearch(db="pubmed",
                                    retmax=retmax,
                                    term='"%s"' % id)
            record = Entrez.read(handle)
            handle.close()
            # make sure we got all the records - rerun if we didn't
            if int(record['Count']) > retmax:
                handle = Entrez.esearch(db="pubmed",
                                        retmax=int(record['Count']),
                                        term='"%s"' % id)
                record = Entrez.read(handle)
                handle.close()
            records_int = [int(i) for i in record['IdList']]
            ns_pmids = intersect(all_neurosynth_ids, records_int)
            print('pubmed found', len(ns_pmids), 'matching pmids for', id)
            self.concept_pmids[id] = ns_pmids
        pickle.dump(
            self.concept_pmids,
            open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'wb'))

    def get_concept_images(self, force_load=False):

        for c in self.concept_pmids.keys():
            if not force_load and os.path.exists(
                    os.path.join(
                        self.imagedir,
                        '%s_specificity_z.nii.gz' % c.replace(' ', '-'))):
                continue
            if len(self.concept_pmids[c]) < self.ma_count_thresh:
                #print('skipping',c,len(self.concept_pmids[c]),'pmids')
                continue
            print('running meta-analysis for', c)
            ma = meta.MetaAnalysis(self.dataset, self.concept_pmids[c])
            ma.save_results(self.imagedir, c.replace(' ', '-'))

        if force_load or not os.path.exists(
                os.path.join(self.imagedir, 'mask_image.nii.gz')):
            # make mask of voxels with zero standard deviation
            concept_images = glob.glob(
                os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))

            imgdata = numpy.zeros((91, 109, 91, len(concept_images)))
            print('loading concept images to compute std')
            for i, c in enumerate(concept_images):
                tmp = nibabel.load(c).get_data()
                imgdata[:, :, :, i] = tmp

            imgstd = numpy.std(imgdata, axis=3)
            maskdata = (imgstd > 0).astype('int')
            maskimg = nibabel.Nifti1Image(maskdata,
                                          affine=nibabel.load(c).affine)
            maskimg.to_filename(
                os.path.join(self.imagedir, 'mask_image.nii.gz'))

    def get_resampled_images(self, shape=None, affine=None, force_load=False):
        # use 3 mm as default
        if not shape:
            shape = [60, 72, 60]
            affine = numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                  [0, 0, 3, -72], [0, 0, 0, 1]])
            self.resolution = affine[1, 1].astype('int')
        print('resampling data to %d mm' % self.resolution)
        self.imagedir_resampled = os.path.join(
            self.datadir, 'ma_images_%dmm' % self.resolution)
        if not os.path.exists(self.imagedir_resampled):
            os.mkdir(self.imagedir_resampled)
        concept_images = glob.glob(
            os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image))
        for c in concept_images:
            if force_load or not os.path.exists(
                    os.path.join(self.imagedir_resampled,
                                 os.path.basename(c))):
                img = nilearn.image.resample_img(c,
                                                 target_affine=affine,
                                                 target_shape=shape)
                img.to_filename(
                    os.path.join(self.imagedir_resampled, os.path.basename(c)))

        if not os.path.exists(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution)):
            # make MNI mask at chosen resolution
            mask = os.path.join(
                os.environ['FSLDIR'],
                'data/standard/MNI152_T1_2mm_brain_mask.nii.gz')
            maskimg = nilearn.image.resample_img(mask,
                                                 target_affine=affine,
                                                 target_shape=shape)
            maskimg.to_filename(
                os.path.join(self.datadir,
                             'mask_%dmm.nii.gz' % self.resolution))

    def load_concept_images(self, force_load=True):
        concept_images = glob.glob(
            os.path.join(self.imagedir_resampled,
                         '*_%s.nii.gz' % self.meta_image))
        concept_images.sort()
        self.image_concepts = [
            os.path.basename(i).split('_')[0] for i in concept_images
        ]
        if os.path.exists(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution)):
            self.imgdata = numpy.load(
                os.path.join(self.datadir,
                             'imgdata_%dmm.npy' % self.resolution))
            # make sure it's the right size
            if self.imgdata.shape[1] == len(concept_images):
                print('using cached concept image data')
                return

        masker = nilearn.input_data.NiftiMasker(
            mask_img=os.path.join(self.datadir,
                                  'mask_%dmm.nii.gz' % self.resolution),
            target_shape=[60, 72, 60],
            target_affine=numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126],
                                       [0, 0, 3, -72], [0, 0, 0, 1]]))
        print('loading concept image data')
        self.imgdata = masker.fit_transform(concept_images)
        numpy.save(
            os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution),
            self.imgdata)

    def save(self):
        with open('%s/neurovault_%dmm.pkl' % (self.datadir, self.resolution),
                  'wb') as f:
            pickle.dump(self, f)

    def build_design_matrix(self, force_load=False):
        if not force_load and os.path.exists(
                os.path.join(self.datadir, 'desmtx.csv')):
            self.desmtx = pandas.DataFrame.from_csv(
                os.path.join(self.datadir, 'desmtx.csv'))
            print('using cached design matrix')
            return
        print('building design matrix')
        all_concept_pmids = []
        for k in self.concept_pmids.keys():
            all_concept_pmids = all_concept_pmids + self.concept_pmids[k]
        all_concept_pmids = list(set(all_concept_pmids))
        all_concept_pmids.sort()
        all_concepts = list(self.concept_pmids.keys())
        self.desmtx = pandas.DataFrame(data=0,
                                       index=all_concept_pmids,
                                       columns=all_concepts)

        for k in self.concept_pmids.keys():
            pmids = self.concept_pmids[k]
            self.desmtx[k][pmids] = 1
        # drop columns with too few matches
        self.desmtx = self.desmtx.ix[:,
                                     self.desmtx.sum() > self.ma_count_thresh]
        self.desmtx.to_csv(os.path.join(self.datadir, 'desmtx.csv'))
예제 #16
0
def get_test_dataset():
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + 'test_dataset.txt')
    dataset.add_features(test_data_path + 'test_features.txt')
    return dataset
예제 #17
0
""" Create a new Dataset instance from a database file and load features.
This is basically the example from the quickstart in the README.
Assumes you have database.txt and features.txt files in the current dir.
"""
""" Load a Dataset and generate a full set of meta-analysis
images--i.e., run a meta-analysis on every single feature.
"""

neurosynth_data_dir = "/home/data/nbc/misc-projects/niconn-macm/code/neurosynth/"

if not op.isfile(op.join(neurosynth_data_dir, "dataset.pkl")):
    # Create Dataset instance from a database file.
    dataset = Dataset(op.join(neurosynth_data_dir, "database.txt"))

    # Load features from file
    dataset.add_features(op.join(neurosynth_data_dir, "features.txt"))

    # Pickle the Dataset to file so we can use Dataset.load() next time
    # instead of having to sit through the generation process again.
    dataset.save(op.join(neurosynth_data_dir, "dataset.pkl"))

# Load pickled Dataset--assumes you've previously saved it. If not,
# follow the create_a_new_dataset_and_load_features example.
dataset = Dataset.load(op.join(neurosynth_data_dir, "dataset.pkl"))

# Get the full list of feature names
feature_list = dataset.get_feature_names()

# Run a meta-analysis on each feature, and save all the results to
# a directory called results. Note that the directory will not be
# created for you, so make sure it exists.
예제 #18
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import meta
import os
dataset = Dataset('database.txt')
dataset.add_features('features.txt')
print dataset.get_feature_names()
ids = dataset.get_ids_by_features('emo*', threshold=0.001)
print len(ids)
ma = meta.MetaAnalysis(dataset, ids)
ma.save_results('emotion')
예제 #19
0
class TestBase(unittest.TestCase):

  def setUp(self):
    """ Create a new Dataset and add features. """
    self.dataset = Dataset('data/test_dataset.txt')
    self.dataset.add_features('data/test_features.txt')
  
  def test_dataset_initializes(self):
    """ Test whether dataset initializes properly. """
    self.assertIsNotNone(self.dataset.volume)
    self.assertIsNotNone(self.dataset.image_table)
    self.assertEqual(len(self.dataset.mappables), 5)
    self.assertIsNotNone(self.dataset.volume)
    self.assertIsNotNone(self.dataset.r)

  def test_image_table_loads(self):
    """ Test ImageTable initialization. """
    self.assertIsNotNone(self.dataset.image_table)
    it = self.dataset.image_table
    self.assertEqual(len(it.ids), 5)
    self.assertIsNotNone(it.volume)
    self.assertIsNotNone(it.r)
    self.assertEqual(it.data.shape, (228453, 5))
    # Add tests for values in table

  def test_feature_table_loads(self):
    """ Test FeatureTable initialization. """
    tt = self.dataset.feature_table
    self.assertIsNotNone(tt)
    self.assertEqual(len(self.dataset.list_features()), 5)
    self.assertEqual(tt.data.shape, (5,5))
    self.assertEqual(tt.feature_names[3], 'f4')
    self.assertEqual(tt.data[0,0], 0.0003)

  def test_feature_search(self):
    """ Test feature-based Mappable search. Tests both the FeatureTable method 
    and the Dataset wrapper. """
    tt = self.dataset.feature_table
    features = tt.search_features(['f*'])
    self.assertEqual(len(features), 4)
    d = self.dataset
    ids = d.get_ids_by_features(['f*'], threshold=0.001)
    self.assertEqual(len(ids), 4)
    img_data = d.get_ids_by_features(['f1', 'f3', 'g1'], 0.001, func='max', get_image_data=True)
    self.assertEqual(img_data.shape, (228453, 5))

  def test_selection_by_mask(self):
    """ Test mask-based Mappable selection.
    Only one peak in the test dataset (in study5) should be within the sgACC. """
    ids = self.dataset.get_ids_by_mask('data/sgacc_mask.nii.gz')
    self.assertEquals(len(ids), 1)
    self.assertEquals('study5', ids[0])

  def test_selection_by_peaks(self):
    """ Test peak-based Mappable selection. """
    ids = self.dataset.get_ids_by_peaks(np.array([[3, 30, -9]]))
    self.assertEquals(len(ids), 1)
    self.assertEquals('study5', ids[0])
  
  # def test_invalid_coordinates_ignored(self):
    """ Test dataset contains 3 valid coordinates and one outside mask. But this won't work 
def create_dataset(database_location, feature_location):
	dataset = Dataset(database_location)
	dataset.add_features(feature_location)
	dataset.save('dataset-old.pkl')
        print 'created dataset'
	return dataset
예제 #21
0
xfm2vol.run()


#make masks to input into neurosynth
def cluster2masks(clusterfile):
    clustermap = nb.load(clusterfile).get_data()
    for x in range(1, clustermap.max() + 1):
        clustermask = (clustermap == x).astype(int)
        nImg = nb.Nifti1Image(clustermask, None)
        nb.save(
            nImg,
            os.path.abspath(clusterfile + '_clustermask' + str(x) + '.nii'))


cluster2masks(volume_file)

dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl'
if not os.path.exists(dataset_file):
    dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt')
    dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt')
    dataset.save(dataset_file)
else:
    dataset = cPickle.load(open(dataset_file, 'rb'))

clustermask = volume_file + '_clustermask' + str(3) + '.nii'

ids = dataset.get_ids_by_mask(clustermask)
features = dataset.feature_table.get_features_by_ids(ids)

#mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz
예제 #22
0
def get_test_dataset(prefix='test'):
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix)
    dataset.add_features(test_data_path + '%s_features.txt' % prefix)
    return dataset
예제 #23
0
def get_test_dataset(prefix='test'):
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix)
    dataset.add_features(test_data_path + '%s_features.txt' % prefix)
    return dataset
예제 #24
0
파일: utils.py 프로젝트: jdnc/ml-project
def get_test_dataset():
    test_data_path = get_test_data_path()
    dataset = Dataset(test_data_path + 'test_dataset.txt')
    dataset.add_features(test_data_path + 'test_features.txt')
    return dataset
예제 #25
0
                                        mni152_2mm,
                                        interpolation='nearest',
                                        copy=True)
        resampled_file = join(map_dir, '{0}_mni2mm.nii.gz'.format(file))
        resampled_roi.to_filename(resampled_file)
        roi_files[i] = resampled_file
        plot_glass_brain(resampled_file,
                         output_file=join(
                             map_dir, '{0}_mni2mm.png'.format(basename(file))))

# In[16]:

print('loading dataset...')
tds = datetime.now()
dataset = Dataset('/Users/Katie/Dropbox/Data/neurosynth-v0.7/database.txt')
dataset.add_features('/Users/Katie/Dropbox/Data/neurosynth-v0.7/features.txt')
tdf = datetime.now()

print('dataset loaded! only took {0}'.format((tdf - tds)))

for i in np.arange(0, len(mask_names)):
    print('{0}\nmeta-analyzing {1}...'.format(datetime.now(), mask_names[i]))
    tmas = datetime.now()
    ids = dataset.get_studies(mask=roi_files[i], )
    ma = meta.MetaAnalysis(dataset, ids)
    ma.save_results(
        output_dir=sink_dir,
        prefix=mask_names[i],
        image_list=['association-test_z', 'association-test_z_FDR_0.01'])
    tmaf = datetime.now()
    print('meta-analysis took {0}\ndecoding {1}...'.format((tmaf - tmas),
예제 #26
0
class NeurosynthMerge:
    def __init__(self, thesaurus, npath, outdir, test_mode=False):
        """
        Generates a new set of images using the neurosynth repository combining 
        across terms in a thesarus.

        Args:
            - thesaurus: A list of tuples where:[('term that will be the name 
                of the file', 'the other term', 'expression combining the 
                terms')]
                    - the last expression is alphanumeric and separated by: 
                    (& for and) (&~ for andnot) (| for or) 
            - npath: directory where the neurosynth git repository is locally 
            on your machine (https://github.com/neurosynth/neurosynth)
            - outdir: directory where the generated images will be saved
            - test_mode: when true, the code will run an abridged version for 
            test purposes (as implemented by test.Neurosynth.py)
        """
        self.thesaurus = thesaurus
        self.npath = npath
        self.outdir = outdir

        self.import_neurosynth_git()
        from neurosynth.analysis import meta

        # Take out first two terms from the feature_list and insert the third 
        # term from the tuple.
        for triplet in thesaurus:
            self.feature_list = [feature for feature in self.feature_list \
            if feature not in triplet]
            self.feature_list.append(triplet[-1])

        # This makes an abridged version of feature_list for testing purposes. 
        if test_mode:
            self.feature_list = [triplet[-1] for triplet in thesaurus]

        # Run metanalyses on the new features set and save the results to the 
            #outdir.
        for feature in self.feature_list:
            self.ids = self.dataset.get_ids_by_expression(feature, 
                threshold=0.001)
            ma = meta.MetaAnalysis(self.dataset, self.ids)

            # Parse the feature name (to avoid conflicts with illegal 
                #characters as file names)
            regex = re.compile('\W+')
            split = re.split(regex, feature)
            feat_fname = split[0] 

            # Save the results (many different types of files)
            ma.save_results(self.outdir+os.sep+feat_fname)

    def import_neurosynth_git(self):
        # Add the appropriate neurosynth git folder to the python path. 
        sys.path.append(self.npath)
        from neurosynth.base.dataset import Dataset
        from neurosynth.analysis import meta

        # Try to load a pickle if it exists. Create a new dataset instance 
        # if it doesn't.
        try:
            self.dataset = cPickle.load(
                open(self.npath+os.sep+'data/dataset.pkl', 'rb'))
        except IOError:
        # Create Dataset instance from a database file.
            self.dataset = Dataset(self.npath+os.sep+'data/database.txt')

        # Load features from file
        self.dataset.add_features(self.npath+os.sep+'data/features.txt')

        # Get names of features. 
        self.feature_list = self.dataset.get_feature_names()
예제 #27
0
xfm2vol.inputs.identity = 'fsaverage4'
xfm2vol.inputs.hemi = 'lh'
xfm2vol.inputs.transformed_file = volume_file
xfm2vol.inputs.template_file = template
xfm2vol.run()

#make masks to input into neurosynth
def cluster2masks(clusterfile):
    clustermap = nb.load(clusterfile).get_data()
    for x in range(1,clustermap.max()+1):
        clustermask = (clustermap==x).astype(int)
        nImg = nb.Nifti1Image(clustermask, None)
        nb.save(nImg, os.path.abspath(clusterfile+'_clustermask'+str(x)+'.nii'))

cluster2masks(volume_file)

dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl'
if not os.path.exists(dataset_file):
    dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt')
    dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt')
    dataset.save(dataset_file)
else:
    dataset = cPickle.load(open(dataset_file,'rb'))

clustermask = volume_file+'_clustermask'+str(3)+'.nii'

ids = dataset.get_ids_by_mask(clustermask)
features = dataset.feature_table.get_features_by_ids(ids)

#mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz