Exemple #1
0
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t, keep_mappables=True)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertIsNotNone(dataset.mappables)
     self.assertEqual(len(dataset.mappables), 5)
     # Now with the mappables deleted
     dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertEqual(len(dataset.mappables), 0)
     os.unlink(t)
Exemple #2
0
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t, keep_mappables=True)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertIsNotNone(dataset.mappables)
     self.assertEqual(len(dataset.mappables), 5)
     # Now with the mappables deleted
     dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertEqual(len(dataset.mappables), 0)
     os.unlink(t)
def create_voxel_x_feature_matrix(path_to_dataset, path_to_image_files):
        dataset = Dataset.load(path_to_dataset)
	feature_list = dataset.get_feature_names()
	vox_feat_matrix = zeros((dataset.volume.num_vox_in_mask, len(feature_list)), dtype=int16)
	for (i,feature) in enumerate(feature_list):
		image_path = path_to_image_files + feature + '_pFgA_z.nii.gz'
		vox_feat_matrix[:,i] = dataset.volume.mask(image_path)
	return vox_feat_matrix
 def test_dataset_save_and_load(self):
     # smoke test of saving and loading
     t = tempfile.mktemp()
     self.dataset.save(t)
     self.assertTrue(os.path.exists(t))
     dataset = Dataset.load(t)
     self.assertIsNotNone(dataset)
     self.assertEqual(len(dataset.image_table.ids), 5)
     os.unlink(t)
    def __init__(self,
                 db,
                 dataset=None,
                 studies=None,
                 features=None,
                 reset_db=False,
                 reset_dataset=False,
                 download_data=False):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.
        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!")
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, str) and
                                                not os.path.exists(dataset)):
            print("\tInitializing a new Dataset...")
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE)
        else:
            print("Loading existing Dataset...")
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print("WARNING: RESETTING DATABASE!!!")
            self.reset_database()
    def __init__(self, db, dataset=None, studies=None, features=None,
                 reset_db=False, reset_dataset=False, download_data=True):
        """
        Initialize instance from a pickled Neurosynth Dataset instance or a
        pair of study and analysis .txt files.

        Args:
            db: the SQLAlchemy database connection to use.
            dataset: an optional filename of a pickled neurosynth Dataset
                instance.
                Note that the Dataset must contain the list of Mappables (i.e.,
                    save() must have been called with keep_mappables set to
                    True).
            studies: name of file containing activation data. If passed, a new
                Dataset instance will be constructed.
            features: name of file containing feature data.
            reset_db: if True, will drop and re-create all database tables
                before adding new content. If False (default), will add content
                incrementally.
            reset_dataset: if True, will regenerate the pickled Neurosynth
                dataset.
            download_data: if True, ignores any existing files and downloads
                the latest Neurosynth data files from GitHub.
        """

        if (studies is not None and not os.path.exists(studies)) \
                or settings.RESET_ASSETS:
            print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!"
            self.reset_assets(download_data)

        # Load or create Neurosynth Dataset instance
        if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)):

            print "\tInitializing a new Dataset..."
            if (studies is None) or (features is None):
                raise ValueError(
                    "To generate a new Dataset instance, both studies and "
                    "analyses must be provided.")
            dataset = Dataset(studies)
            dataset.add_features(features)
            dataset.save(settings.PICKLE_DATABASE, keep_mappables=True)
        else:
            print "\tLoading existing Dataset..."
            dataset = Dataset.load(dataset)
            if features is not None:
                dataset.add_features(features)

        self.dataset = dataset
        self.db = db

        if reset_db:
            print "WARNING: RESETTING DATABASE!!!"
            self.reset_database()
Exemple #7
0
    def get_dataset(self, force_load=False):
        if os.path.exists(os.path.join(self.datadir,
                                       'dataset.pkl')) and not force_load:
            print('loading database from',
                  os.path.join(self.datadir, 'dataset.pkl'))
            self.dataset = Dataset.load(
                os.path.join(self.datadir, 'dataset.pkl'))
        else:
            print('loading database - this takes a few minutes')
            self.dataset = Dataset(os.path.join(self.datadir, 'database.txt'))
            self.dataset.add_features(
                os.path.join(self.datadir, 'features.txt'))

            self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))
Exemple #8
0
def functional_preference_profile_prep():
	"""
	Function for extracting functional preference profile data
	"""
	from neurosynth.base.dataset import Dataset
	dataset = Dataset.load("data/neurosynth_60_0.4.pkl")

	nicknames = pd.read_csv('data/v4-topics-60.txt', delimiter='\t')
	nicknames['topic_name'] = nicknames.apply(lambda row: '_'.join([str(row.topic_number)] + row.top_words.split(' ')[0:3]), axis=1)
	nicknames = nicknames.sort_values('topic_name')

	word_keys = pd.read_csv("data/topic_keys60-july_cognitive.csv")
	word_keys['top_2'] = word_keys['Top words'].apply(lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])
	word_keys['topic_name'] = "topic" + word_keys['topic'].astype('str')
	
	return dataset,nicknames,word_keys
    def __init__(
        self,
        metric="emd",
        image_type="pAgF",
        name=None,
        multi=True,
        image_transform="block_reduce",
        downsample=8,
        auto_save=True,
    ):
        self.image_type = image_type
        self.multi = multi
        self.downsample = downsample
        self.auto_save = auto_save

        if callable(metric):
            self.metric = metric
        elif metric == "emd":
            self.metric = euclidean_emd
        else:
            raise ValueError("{metric} is not a valid metric".format(**locals()))

        if callable(image_transform):
            self.image_transform = image_transform
        elif image_transform == "block_reduce":
            from functools import partial

            self.image_transform = partial(block_reduce, factor=downsample)
            # def block_reduce_transform(image):
            # """The default transformation."""
            # return block_reduce(image, downsample, blur)
            # self.image_transform = block_reduce_transform
        else:
            raise ValueError(("{image_transform} is not a valid" "transform function").format(**locals()))
        self.name = name if name else time.strftime("analysis_from_%m-%d_%H-%M-%S")

        try:
            self.data = Dataset.load("data/dataset.pkl")
        except FileNotFoundError:
            self.data = _getdata()
Exemple #10
0
from neurosynth.analysis.cluster import magic
from neurosynth.base.dataset import Dataset

# mydir = "/home/delavega/projects/permutation_clustering/"

mydir = "../data/"

dataset = Dataset.load(mydir + '/datasets/abs_60topics_filt_jul.pkl')

roi_mask = mydir + '../masks/new_medial_fc_30.nii.gz'
global_mask = mydir +  "../masks/MNI152_T1_2mm_brain.nii.gz"

magic(dataset, 10, method='coactivation', features=['topic57', 'topic32', 'topic39', 'topic44'], output_dir='../results/ef_cluster/all_cluster/', min_studies_per_voxel=100, filename='okay')
Exemple #11
0
t = dlmread('/Users/lukechang/Dropbox/Github/toolbox/Python/ImageProcessing/tal2icbm_fsl.mat');
dat.volInfo.mat = inv(t)*dat.volInfo.mat;
dat.fullpath = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend.nii';
write(dat)

# 2) Reorient using FSL - Unix
fslreorient2std Friend Friend_Or

# 3) Coregister to 2mm MNI space - Unix
/usr/local/fsl/bin/flirt -in /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or.nii.gz -ref /usr/local/fsl/data/standard/MNI152_T1_2mm_brain -out /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.nii.gz -omat /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.mat -bins 256 -cost corratio -searchrx -90 90 -searchry -90 90 -searchrz -90 90 -dof 12  -interp trilinear

# 4) Decode - Python
DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl'
PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/'
INFILE = 'Friend_Or_Mni.nii.gz'
dataset = Dataset.load(DATASET_FILE)
decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once.
img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
result = decoder.decode(img)
np.savetxt(PREFIX + 'Friend_Decoded.txt', result)

# 5) Threshold at .001 - unix
fslmaths Friend_Or_Mni -thr 3 Friend_Or_Mni_001

# 6) Decode thresholded map - python
DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl'
PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/'
INFILE = 'Friend_Or_Mni_001.nii.gz'
dataset = Dataset.load(DATASET_FILE)
decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once.
img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
Exemple #12
0
# -*- coding: utf-8 -*-

# Here I use Yeo to test Neurosynth's classify functions
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import classify
import os
import itertools
import re
import numpy as np
import pdb
import sys
from nipype.interfaces import fsl
from sklearn.ensemble import GradientBoostingClassifier


dataset = Dataset.load('../data/pickled.pkl')

masklist = ['7Networks_Liberal_1.nii.gz', '7Networks_Liberal_2.nii.gz',
            '7Networks_Liberal_3.nii.gz', '7Networks_Liberal_4.nii.gz',
            '7Networks_Liberal_5.nii.gz', '7Networks_Liberal_6.nii.gz',
            '7Networks_Liberal_7.nii.gz']

rootdir = '../masks/Yeo_JNeurophysiol11_MNI152/standardized/'


class maskClassifier:
    def __init__(self, classifier=GradientBoostingClassifier(), param_grid={'max_features': np.arange(2, 140, 44), 'n_estimators': np.arange(5, 141, 50),
          'learning_rate': np.arange(0.05, 1, 0.1)}, thresh = 0.08)


diffs = {}
from base.classifiers import OnevsallContinuous
from neurosynth.analysis import cluster
from neurosynth.base.dataset import Dataset
from sklearn.metrics import r2_score
from sklearn.linear_model import Ridge
import cPickle
from sklearn.decomposition import RandomizedPCA

dataset = Dataset.load('../data/datasets/abs_60topics_filt_jul.pkl')

roi_mask = '../masks/mpfc_nfp.nii.gz'
global_mask = "../masks/MNI152_T1_2mm_brain.nii.gz"

n_regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# print resolution
clf_file = "../data/clfs/all_vox_Ridge_mpfc.pkl"

print "trying to load"
try:
    clf = OnevsallContinuous.load(clf_file)
except:
    print "Loading failed"
    clf = OnevsallContinuous(dataset, None, classifier=Ridge())
    clf.classify(scoring=r2_score, processes=8)
    try:
        clf.save(clf_file)
    except:
        pass

reduc = RandomizedPCA(n_components=100)
from neurosynth.base.dataset import Dataset
import joblib
from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import scale
from neurosynth.analysis.cluster import Clusterable
dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl')
from fastcluster import ward

roi = Clusterable(
    dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz')

saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl'
reference = joblib.load(saved_pca)

distances = pairwise_distances(roi.data, reference.data, metric='correlation')
distances = scale(distances, axis=1)

joblib.dump(
    distances,
    '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl'
)

Z = ward(distances)

joblib.dump(
    Z,
    '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl'
)
Exemple #15
0
import numpy as np
from neurosynth.analysis.cluster import Clusterable
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances
from nibabel import nifti1
from copy import deepcopy
from os.path import exists, join, split
from os import makedirs
import pickle

def binarize_nib(img):
    img = deepcopy(img)
    img.get_data()[img.get_data() != 0] = 1
    return img

dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-100.pkl')

infile = '../masks/mars/NeubertCingulateOrbitoFrontalParcellation/Neubert_2mm_medial_bilateral.nii.gz'
outdir = '../results/clustering/matched/mars/'
outfile = join(outdir, split(infile)[-1])
saved_pca = '../results/clustering/dv_v6_reference_pca.pkl'

reference = pickle.load(open(saved_pca, 'r'))

match_roi = nib.load(infile)
roi = Clusterable(dataset, binarize_nib(match_roi))

print "Computing roi ref distances"
distances = pairwise_distances(roi.data, reference.data, metric='correlation')

n_clusters = np.unique(match_roi.get_data()).nonzero()[0].shape[0]
Exemple #16
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import decode, meta
from os.path import join

parent_dir = '/Users/Katie/Dropbox/Data/neurosynth_current_data/'
roi_dir = '/Users/Katie/Dropbox/Data/NSvBM-decoding/ROIs/'
sink_dir = '/Users/Katie/Dropbox/Data/NSvBM-decoding/'
paracentral = join(roi_dir, 'l-paracentral.nii.gz')
ofc = join(roi_dir, 'r-ofc.nii.gz')
uncus = join(roi_dir, 'r-ofc.nii.gz')
visual = join(roi_dir, 'smith-rsn70-1.nii.gz')
mask_list = [paracentral, ofc, uncus, visual]
mask_names = ['paracentral', 'orbitofrontal', 'uncus', 'visual']
dataset = Dataset.load(join(parent_dir, 'dataset.pkl'))
for i, mask in enumerate(mask_list):
    ids = dataset.get_studies(mask=mask)
    ma = meta.MetaAnalysis(dataset, ids)
    ma.save_results(output_dir='.', prefix=mask_names[i])
    decoder = decode.Decoder(dataset)
    result = decoder.decode(['{0}_pFgA_z.nii.gz'.format(mask_names)],
                            save=join(sink_dir,
                                      'decoded_{0}.txt'.format(mask_names)))
from neurosynth.base.dataset import Dataset
import neurosynth.base.imageutils as it

dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl")

print "Filtering voxels..."

data = dataset.image_table.data.toarray()

voxel_mask = data.mean(axis=1) > 0.005

img = it.load_imgs('../masks/ward/30.nii.gz', dataset.masker)

good_voxels = img[voxel_mask]

it.save_img(good_voxels, "../masks/ward/30_masked.nii.gz", dataset.masker)
# <markdowncell>

# Because this takes a while, we'll save our Dataset object to disk. That way, the next time we want to use it, we won't have to sit through the whole creation operation again:

# <codecell>

dataset.save('dataset.pkl')

# <markdowncell>

# Now in future, instead of waiting, we could just load the dataset from file:

# <codecell>

dataset = Dataset.load('dataset.pkl')   # Note the capital D in the second Dataset--load() is a class method

# <markdowncell>

# ## Doing stuff with Neurosynth
# Now that our Dataset has both activation data and some features, we're ready to start doing some analyses! By design, Neurosynth focuses on facilitating simple, fast, and modestly useful analyses. This means you probably won't break any new ground using Neurosynth, but you should be able to supplement results you've generated using other approaches with a bunch of nifty analyses that take just 2 - 3 lines of code.
# 
# ### Simple feature-based meta-analyses
# The most straightforward thing you can do with Neurosynth is use the features we just loaded above to perform automated large-scale meta-analyses of the literature. Let's see what features we have:

# <codecell>

dataset.get_feature_names()

# <markdowncell>
from sklearn.metrics import roc_auc_score
import sys
from base.mv import bootstrap_mv_full
from neurosynth.base.dataset import Dataset
dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl")

from sklearn.naive_bayes import GaussianNB

print sys.argv
try:
    cmd, iterations, job_id = sys.argv
except:
    raise Exception("Incorect number of arguments")

import csv
cognitive_topics = [
    'topic' + topic[0]
    for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU'))
    if topic[1] == "T"
]

results = bootstrap_mv_full(
    dataset,
    GaussianNB(),
    roc_auc_score,
    '../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz',
    features=cognitive_topics,
    processes=None,
    boot_n=int(iterations),
    outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC_60_ ' +
    str(job_id) + '.csv')
Exemple #20
0
###
# This script shuffle the classification labels and reruns classification many times to get data to calculate a confidence interval around the null hypothesis

from sklearn.linear_model import RidgeClassifier
from base.classifiers import OnevsallClassifier
from neurosynth.base.dataset import Dataset
from sklearn.metrics import roc_auc_score
import pickle
from random import shuffle

def shuffle_data(classifier):
	for region in classifier.c_data:
		shuffle(region[1])


d_abs_topics_filt = Dataset.load('../data/datasets/abs_topics_filt_july.pkl')

results = []

clf = OnevsallClassifier(d_abs_topics_filt, '../masks/Ward/50.nii.gz', cv='4-Fold',
	 thresh=10, thresh_low=0, memsave=True, classifier=RidgeClassifier())
clf.load_data(None, None)
clf.initalize_containers(None, None, None)


for i in range(0, 500):
	shuffle_data(clf)
	clf.classify(scoring=roc_auc_score, processes=8, class_weight=None)
	results = list(clf.class_score) + results
	print(i),
Exemple #21
0
 def dataset(self):
     return Dataset.load(settings.PICKLE_DATABASE)
Exemple #22
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import decode

# Load a saved Dataset file. This example will work with the
# file saved in the create_a_new_dataset_and_load_features example.
dataset = Dataset.load('dataset.pkl')

# Initialize a new Decoder instance with a few features. Note that
# if you don't specify a subset of features, ALL features in the
# Dataset will be loaded, which will take a long time because
# meta-analysis images for each feature need to be generated.
decoder = decode.Decoder(
    dataset, features=['emotion', 'pain', 'somatosensory', 'wm', 'inhibition'])

# Decode three images. The sample images here are coactivation
# maps for ventral, dorsal, and posterior insula clusters,
# respectively. Maps are drawn from data reported in
# Chang, Yarkoni, Khaw, & Sanfey (2012); see paper for details.
# We save the output--an image x features matrix--to a file.
# By default, the decoder will use Pearson correlation, i.e.,
# each value in our results table indicates the correlation
# between the input image and each feature's meta-analysis image.
result = decoder.decode(['vIns.nii.gz', 'dIns.nii.gz', 'pIns.nii.gz'],
                        save='decoding_results.txt')
#         pass

#     def fit(self, X):
#         self.cf = MFastHCluster(method='ward')
#         self.cf.linkage(X)

#     def predict(self, n):
#         for i in range(1, self.cf.cut(0).shape[0]):
#             labels = self.cf.cut(i)
#             if np.bincount(labels).shape[0] == n:
#                 break

#         return labels

mydir = "/projects/delavega/clustering/"
dataset = Dataset.load(mydir + 'abs_60topics_filt_jul.pkl')

roi_mask = mydir + 'masks/new_medial_fc_30.nii.gz'
ns = [3, 9]
save_images = True
output_dir = join(mydir, 'results/MFC/')
out_model = None

roi = Clusterable(dataset, roi_mask, min_studies=80)

reference = Clusterable(dataset, min_studies=80)
reduce_reference = sk_decomp.RandomizedPCA(100)
reference = reference.transform(reduce_reference, transpose=True)

# distances = pairwise_distances(roi.data, reference.data,
#                                metric='correlation')
def load_dataset_instance(instance_filename):
    dataset = Dataset.load('./raw_data/' + instance_filename + '.pkl')
    return dataset
Exemple #25
0
from neurosynth.base.dataset import Dataset
from neurosynth.analysis.cluster import Clusterable
from sklearn import decomposition as sk_decomp
import pickle

dataset = Dataset.load(
    '/home/delavega/projects/classification/data/datasets/abs_60topics_filt_jul.pkl'
)

out = '/projects/delavega/clustering/dv_v5_reference_min_80_pca.pkl'

reference = Clusterable(dataset, min_studies=80)
print "Running PCA"
reduce_reference = sk_decomp.RandomizedPCA(100)
reference = reference.transform(reduce_reference, transpose=True)

pickle.dump(reference, open(out, 'w'))
Exemple #26
0
neurosynth_data_dir = "/home/data/nbc/misc-projects/niconn-macm/code/neurosynth/"

if not op.isfile(op.join(neurosynth_data_dir, "dataset.pkl")):
    # Create Dataset instance from a database file.
    dataset = Dataset(op.join(neurosynth_data_dir, "database.txt"))

    # Load features from file
    dataset.add_features(op.join(neurosynth_data_dir, "features.txt"))

    # Pickle the Dataset to file so we can use Dataset.load() next time
    # instead of having to sit through the generation process again.
    dataset.save(op.join(neurosynth_data_dir, "dataset.pkl"))

# Load pickled Dataset--assumes you've previously saved it. If not,
# follow the create_a_new_dataset_and_load_features example.
dataset = Dataset.load(op.join(neurosynth_data_dir, "dataset.pkl"))

# Get the full list of feature names
feature_list = dataset.get_feature_names()

# Run a meta-analysis on each feature, and save all the results to
# a directory called results. Note that the directory will not be
# created for you, so make sure it exists.
# Here we use the default frequency threshold of 0.001 (i.e., a
# study is said to have a feature if more than 1 in every 1,000
# words is the target word), and an FDR correction level of 0.05.
out_dir = "/home/data/nbc/misc-projects/meta-gradients/code/feature_maps"

for tmp_feature in feature_list:
    print(tmp_feature)
    meta.analyze_features(
Exemple #27
0
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC, RidgeClassifier, RidgeClassifierCV
from sklearn.ensemble import GradientBoostingClassifier

from base.tools import Logger
from base.pipelines import pipeline
from base.classifiers import PairwiseClassifier, OnevsallClassifier

from neurosynth.base.dataset import Dataset
from sklearn.metrics import roc_auc_score

now = datetime.datetime.now()

n_topics = 60
dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-%d.pkl' % n_topics)

# cognitive_topics = ['topic' + topic[0] for topic in csv.reader(
# 	open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys'  + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "T"]

# junk_topics = ['topic' + topic[0] for topic in csv.reader(
# 	open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "F"]


# Analyses
def complete_analysis(dataset,
                      dataset_name,
                      name,
                      masklist,
                      processes=1,
                      features=None):