Ejemplo n.º 1
0
def prepare_data(data_dir, output_dir, pipeline = "cpac", quality_checked = True):
    # get dataset
    print("Loading dataset...")
    abide = datasets.fetch_abide_pcp(data_dir = data_dir,
                                     pipeline = pipeline,
                                     quality_checked = quality_checked)
    # make list of filenames
    fmri_filenames = abide.func_preproc

    # load atlas
    multiscale = datasets.fetch_atlas_basc_multiscale_2015()
    atlas_filename = multiscale.scale064

    # initialize masker object
    masker = NiftiLabelsMasker(labels_img=atlas_filename,
                               standardize=True,
                               memory='nilearn_cache',
                               verbose=0)

    # initialize correlation measure
    correlation_measure = ConnectivityMeasure(kind='correlation', vectorize=True,
                                             discard_diagonal=True)

    try: # check if feature file already exists
        # load features
        feat_file = os.path.join(output_dir, 'ABIDE_BASC064_features.npz')
        X_features = np.load(feat_file)['a']
        print("Feature file found.")

    except: # if not, extract features
        X_features = [] # To contain upper half of matrix as 1d array
        print("No feature file found. Extracting features...")

        for i,sub in enumerate(fmri_filenames):
            # extract the timeseries from the ROIs in the atlas
            time_series = masker.fit_transform(sub)
            # create a region x region correlation matrix
            correlation_matrix = correlation_measure.fit_transform([time_series])[0]
            # add to our container
            X_features.append(correlation_matrix)
            # keep track of status
            print('finished extracting %s of %s'%(i+1,len(fmri_filenames)))
        # Save features
        np.savez_compressed(os.path.join(output_dir, 'ABIDE_BASC064_features'),
                                         a = X_features)

    # Dimensionality reduction of features with PCA
    print("Running PCA...")
    pca = PCA(0.99).fit(X_features) # keeping 99% of variance
    X_features_pca = pca.transform(X_features)

    # Transform phenotypic data into dataframe
    abide_pheno = pd.DataFrame(abide.phenotypic)

    # Get the target vector
    y_target = abide_pheno['DX_GROUP']

    return(X_features_pca, y_target)
Ejemplo n.º 2
0
def test_fetch_abide_pcp():
    local_url = "file://" + datadir
    ids = [('50%03d' % i).encode() for i in range(800)]
    filenames = ['no_filename'] * 800
    filenames[::2] = ['filename'] * 400
    pheno = np.asarray(list(zip(ids, filenames)), dtype=[('subject_id', int),
                                                         ('FILE_ID', 'U11')])
    # pheno = pheno.T.view()
    file_mock.add_csv('Phenotypic_V1_0b_preprocessed1.csv', pheno)

    # All subjects
    dataset = datasets.fetch_abide_pcp(data_dir=tmpdir, url=local_url,
                                       quality_checked=False, verbose=0)
    assert_equal(len(dataset.func_preproc), 400)
Ejemplo n.º 3
0
def test_fetch_abide_pcp():
    local_url = "file://" + datadir
    ids = [('50%03d' % i).encode() for i in range(800)]
    filenames = ['no_filename'] * 800
    filenames[::2] = ['filename'] * 400
    pheno = np.asarray(list(zip(ids, filenames)),
                       dtype=[('subject_id', int), ('FILE_ID', 'U11')])
    # pheno = pheno.T.view()
    file_mock.add_csv('Phenotypic_V1_0b_preprocessed1.csv', pheno)

    # All subjects
    dataset = datasets.fetch_abide_pcp(data_dir=tmpdir,
                                       url=local_url,
                                       quality_checked=False,
                                       verbose=0)
    assert_equal(len(dataset.func_preproc), 400)
Ejemplo n.º 4
0
def get_dataset(dataset, max_images=np.inf, **kwargs):
    """Retrieve & normalize dataset from nilearn"""
    # Download
    if dataset == 'neurovault':
        images, term_scores = fetch_neurovault(max_images=max_images, **kwargs)

    elif dataset == 'abide':
        dataset = datasets.fetch_abide_pcp(
            n_subjects=min(94, max_images), **kwargs)
        images = [{'absolute_path': p} for p in dataset['func_preproc']]
        term_scores = None

    elif dataset == 'nyu':
        dataset = datasets.fetch_nyu_rest(
            n_subjects=min(25, max_images), **kwargs)
        images = [{'absolute_path': p} for p in dataset['func']]
        term_scores = None

    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    return images, term_scores
Ejemplo n.º 5
0
def fetch_generated_data(data_dir, output_dir):

    print("Loading dataset...")
    abide = datasets.fetch_abide_pcp(data_dir=data_dir,
                                     pipeline='cpac',
                                     quality_checked=True)

    feat_file = os.path.join(output_dir, 'ABIDE_BASC064_features.npz')
    X_features = np.load(feat_file)['a']

    feat_file_autism = os.path.join(output_dir, 'generated_data_asd_300.csv')
    X_features_autism = pd.read_csv(feat_file_autism, sep=',', header=None, skiprows=1)
    X_features_autism = X_features_autism.drop(X_features_autism.columns[0], axis=1)

    feat_file_control = os.path.join(output_dir, 'generated_data_tc_300.csv')
    X_features_control = pd.read_csv(feat_file_control, sep=',', header=None, skiprows=1)
    X_features_control = X_features_control.drop(X_features_control.columns[0], axis=1)

    X_features_all = np.vstack((X_features, X_features_autism.values, X_features_control.values))
    print('Stacked synthetic and original features ', X_features_all.shape)

    print("Running PCA...")
    pca = PCA(0.99).fit(X_features_all)  # keeping 99% of variance
    X_features_pca = pca.transform(X_features_all)

    # Transform phenotypic data into dataframe
    abide_pheno = pd.DataFrame(abide.phenotypic)

    # Get the target vector
    y_target = abide_pheno['DX_GROUP']

    # add dx_group for synthetic data
    y_target_arr = np.append(y_target.values, np.ones(100)) # add asd group
    y_target_arr = np.append(y_target_arr, np.zeros(100)) # add tc group

    y_target = pd.Series(y_target_arr)

    return (X_features_pca, y_target)
Ejemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec  9 20:03:28 2018
@author: xinyang
This program is to find out the top five correlatd and anti-correlated ROIs of ASD and TD
"""

from nilearn.datasets import fetch_abide_pcp
import pandas as pd
import numpy as np

labels = pd.read_csv('CC400_ROI_labels.csv')
atlas_filename = 'cc400_roi_atlas.nii.gz'

abide = fetch_abide_pcp(derivatives = ['rois_cc400'], pipeline = 'cpac', quality_checked = False)
y = abide.phenotypic['DX_GROUP']
names = ['DX_GROUP','rois_cc400']
data = pd.DataFrame([y,abide.rois_cc400]).transpose()
data.columns = names

ASD =  data[data['DX_GROUP']==1]['rois_cc400'] #505
TD = data[data['DX_GROUP']==2]['rois_cc400'] #530

##############################################################################
# Compute and display a correlation matrix
# -----------------------------------------
# Plot the correlation matrix
from nilearn import plotting
from nilearn.connectome import ConnectivityMeasure
correlation_measure = ConnectivityMeasure(kind='correlation')
Ejemplo n.º 7
0
import gzip
import csv
import numpy as np
from nilearn.input_data import NiftiLabelsMasker
from nilearn import datasets

__author__ = '2d Lt Kyle Palko'
__version__ = 'v0.1'

pipe = 'cpac'
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved
# path = '/home/kap/Thesis/Data/DL'
  # define the pipeline used to preprocess the data
derivative = 'rois_tt'  # define what data should be pulled

datasets.fetch_abide_pcp(data_dir=path, pipeline=pipe, band_pass_filtering=True, global_signal_regression=True,
                         derivatives=[derivative])

# local variables and paths #
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/'  # working directory
filt = 'filt_global'
stud = 'Test/ABIDE_pcp/{0}/{1}/'.format(pipe, filt)  # location that download happened to
# stud = 'C200/ABIDE_pcp/{0}/{1}/'.format(pipe, filt)
# stud = '/ABIDE_pcp/{0}/{1}/'.format(pipe, filt)
# stud = 'Data/'
# lab = path + 'Label/'  # location of CSV files for labeling
lab = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Data/'
# lab = '/home/kap/Thesis/Data/Label/'
mask_name = 'tt97_prep'

# build two lists of strings from CSV files to use to match the subjects and their diagnosis
idlab = []  # subject IDs
sys.path.append("../inverse_covariance")
from inverse_covariance import (
    QuicGraphicalLasso,
    QuicGraphicalLassoCV,
    QuicGraphicalLassoEBIC,
    AdaptiveGraphicalLasso,
)

plt.ion()

# Fetch the coordinates of power atlas
power = datasets.fetch_coords_power_2011()
coords = np.vstack((power.rois["x"], power.rois["y"], power.rois["z"])).T

# Loading the functional datasets
abide = datasets.fetch_abide_pcp(n_subjects=1)
abide.func = abide.func_preproc

# print basic information on the dataset
# 4D data
print("First subject functional nifti images (4D) are at: %s" % abide.func[0])

###############################################################################
# Masking: taking the signal in a sphere of radius 5mm around Power coords

masker = input_data.NiftiSpheresMasker(
    seeds=coords,
    smoothing_fwhm=4,
    radius=5.,
    standardize=True,
    detrend=True,
Ejemplo n.º 9
0
from nilearn.datasets import fetch_abide_pcp
import pandas
import os
df = pandas.read_csv('Phenotypic_V1_0b_preprocessed1.csv', sep='\t')
site_id = np.unique(df['SITE_ID'])
print(site_id)

for S in (site_id):

    data_dir = 'test'
    #DX_GROUP: 1 is autism, 2 is control
    abidedata_func_normal = fetch_abide_pcp(data_dir=data_dir,
                                            n_subjects=None,
                                            pipeline='cpac',
                                            band_pass_filtering=True,
                                            global_signal_regression=True,
                                            derivatives=['func_preproc'],
                                            quality_checked=True,
                                            DX_GROUP=2,
                                            SITE_ID=S)
    abidedata_func_autism = fetch_abide_pcp(data_dir=data_dir,
                                            n_subjects=None,
                                            pipeline='cpac',
                                            band_pass_filtering=True,
                                            global_signal_regression=True,
                                            derivatives=['func_preproc'],
                                            quality_checked=True,
                                            DX_GROUP=1,
                                            SITE_ID=S)

    print(len(abidedata_func_normal['func_preproc']))
Ejemplo n.º 10
0
import numpy as np

from nilearn.input_data import NiftiMapsMasker

from nilearn.regions import RegionExtractor

from nilearn.connectome import ConnectivityMeasure
import matplotlib.pyplot as plt


# In[ ]:


# for fetching dataset for first instance only
abide = datasets.fetch_abide_pcp(data_dir= data_dir2,derivatives=['func_preproc'],
                        SITE_ID=['NYU'],
#                         n_subjects=3
                                )
print(abide.keys())


func = abide.func_preproc


# In[ ]:


type(func)


# In[ ]:
Ejemplo n.º 11
0
import gzip
import numpy as np
import csv

path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved
# sub_id = [50060]  # sets the subject ids that should be pulled simple 3
# sub_id = range(50003, 50061)  # calls all of the PITT study subjects
pipeline = 'cpac'  # define the pipeline used to preprocess the data
derivative = 'func_preproc'  # define what data should be pulled

# set the directory where the data is stored so the script can find and rename the data
os.chdir('/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test/ABIDE_pcp/cpac/filt_noglobal/')

for n in sub_id:
    # download the fMRI image
    datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline=pipeline, band_pass_filtering=True,
                             derivatives=[derivative], SUB_ID=n)  # fetch the data based on the subject ID
    # extract and rename the image file
    for name in glob.glob('*' + str(n) + '*.gz'):  # use glob to find the recently download filename
        inF = gzip.open(name, 'rb')  # opens .gz file
        outF = open('{0}.nii'.format(n), 'wb')   # creates a new file using fileID as the name
        outF.write(inF.read())  # extract and write the .nii file
        inF.close()
        outF.close()
        os.remove(name)  # deletes the .nii.gz file

    # download the image's corresponding mask
#    datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline=pipeline, band_pass_filtering=True,
#                             derivatives=['func_mask'], SUB_ID=n)
#    # extract and rename the mask
#    for name in glob.glob('*' + str(n) + '*.gz'):  # use glob to find the recently download filename
#        inF = gzip.open(name, 'rb')  # opens .gz file
    # By default it used two classifiers, LinearSVC ('l1', 'l2') and Ridge
    # Not so good documentation and implementation here according to me
    learn_brain_regions.classify(labels, cv=[(train_index, test_index)],
                                 scoring='roc_auc')
    print(learn_brain_regions.scores_)

    return learn_brain_regions

###########################################################################
# Data
# ----
# Load the datasets from Nilearn

from nilearn import datasets

abide_data = datasets.fetch_abide_pcp(pipeline='cpac')
func_imgs = abide_data.func_preproc
phenotypic = abide_data.phenotypic

# class type for each subject is different
class_type = 'DX_GROUP'
cache_path = 'data_processing_abide'

from sklearn.externals.joblib import Memory, Parallel, delayed
mem = Memory(cachedir=cache_path)

connectome_regress_confounds = None

from nilearn_utils import data_info
target_shape, target_affine, _ = data_info(func_imgs[0])
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Download ABIDE data and compute functional connectivity matrices')
    parser.add_argument(
        '--pipeline',
        default='cpac',
        type=str,
        help=
        'Pipeline to preprocess ABIDE data. Available options are ccs, cpac, dparsf and niak.'
        ' default: cpac.')
    parser.add_argument(
        '--atlas',
        default='cc200',
        help=
        'Brain parcellation atlas. Options: ho, cc200 and cc400, default: cc200.'
    )
    parser.add_argument(
        '--download',
        default=True,
        type=str2bool,
        help=
        'Dowload data or just compute functional connectivity. default: True')
    args = parser.parse_args()
    print(args)

    params = dict()

    pipeline = args.pipeline
    atlas = args.atlas
    download = args.download

    # Files to fetch

    files = ['rois_' + atlas]

    filemapping = {
        'func_preproc': 'func_preproc.nii.gz',
        files[0]: files[0] + '.1D'
    }

    # Download database files
    if download == True:
        abide = datasets.fetch_abide_pcp(data_dir=root_folder,
                                         pipeline=pipeline,
                                         band_pass_filtering=True,
                                         global_signal_regression=False,
                                         derivatives=files,
                                         quality_checked=False)

    subject_IDs = Reader.get_ids()  #changed path to data path
    subject_IDs = subject_IDs.tolist()

    # Create a folder for each subject
    for s, fname in zip(subject_IDs,
                        Reader.fetch_filenames(subject_IDs, files[0], atlas)):
        subject_folder = os.path.join(data_folder, s)
        if not os.path.exists(subject_folder):
            os.mkdir(subject_folder)

        # Get the base filename for each subject
        base = fname.split(files[0])[0]

        # Move each subject file to the subject folder
        for fl in files:
            if not os.path.exists(
                    os.path.join(subject_folder, base + filemapping[fl])):
                shutil.move(base + filemapping[fl], subject_folder)

    time_series = Reader.get_timeseries(subject_IDs, atlas)

    # Compute and save connectivity matrices
    Reader.subject_connectivity(time_series, subject_IDs, atlas, 'correlation')
    Reader.subject_connectivity(time_series, subject_IDs, atlas,
                                'partial correlation')
Ejemplo n.º 14
0
__author__ = '2d Lt Kyle Palko'

from nilearn import datasets
import time

start = time.time()
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved
pipeline = 'cpac'  # define the pipeline used to preprocess the data
derivative = 'func_preproc'  # define what data should be pulled
stud = '/ABIDE_pcp/cpac/filt_noglobal/e806a9ef657f316b760441d3649f7cb6'

datasets.fetch_abide_pcp(data_dir=path, pipeline=pipeline, band_pass_filtering=True, derivatives=[derivative])
Ejemplo n.º 15
0
num_subjects = 871  # Number of subjects
root_folder = '/path/to/data/'
data_folder = os.path.join(root_folder, 'ABIDE_pcp/cpac/filt_noglobal')

# Files to fetch
files = ['rois_ho']

filemapping = {'func_preproc': 'func_preproc.nii.gz', 'rois_ho': 'rois_ho.1D'}

shutil.copyfile('./subject_IDs.txt',
                os.path.join(data_folder, 'subject_IDs.txt'))

# Download database files
abide = datasets.fetch_abide_pcp(data_dir=root_folder,
                                 n_subjects=num_subjects,
                                 pipeline=pipeline,
                                 band_pass_filtering=True,
                                 global_signal_regression=False,
                                 derivatives=files)

subject_IDs = Reader.get_ids(num_subjects)
subject_IDs = subject_IDs.tolist()

# Create a folder for each subject
for s, fname in zip(subject_IDs, Reader.fetch_filenames(subject_IDs,
                                                        files[0])):
    subject_folder = os.path.join(data_folder, s)
    if not os.path.exists(subject_folder):
        os.mkdir(subject_folder)

    # Get the base filename for each subject
    base = fname.split(files[0])[0]
Ejemplo n.º 16
0
# sub_id = [50060]  # sets the subject ids that should be pulled simple 3
# sub_id = range(50003, 50061)  # calls all of the PITT study subjects
pipeline = 'cpac'  # define the pipeline used to preprocess the data
derivative = 'func_preproc'  # define what data should be pulled

# set the directory where the data is stored so the script can find and rename the data
os.chdir(
    '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test/ABIDE_pcp/cpac/filt_noglobal/'
)

for n in sub_id:
    # download the fMRI image
    datasets.fetch_abide_pcp(
        data_dir=path,
        n_subjects=1,
        pipeline=pipeline,
        band_pass_filtering=True,
        derivatives=[derivative],
        SUB_ID=n)  # fetch the data based on the subject ID
    # extract and rename the image file
    for name in glob.glob(
            '*' + str(n) +
            '*.gz'):  # use glob to find the recently download filename
        inF = gzip.open(name, 'rb')  # opens .gz file
        outF = open('{0}.nii'.format(n),
                    'wb')  # creates a new file using fileID as the name
        outF.write(inF.read())  # extract and write the .nii file
        inF.close()
        outF.close()
        os.remove(name)  # deletes the .nii.gz file
Ejemplo n.º 17
0
import numpy as np
from nilearn.input_data import NiftiLabelsMasker
from nilearn import datasets

__author__ = '2d Lt Kyle Palko'
__version__ = 'v0.1'

pipe = 'cpac'
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved
# path = '/home/kap/Thesis/Data/DL'
# define the pipeline used to preprocess the data
derivative = 'rois_tt'  # define what data should be pulled

datasets.fetch_abide_pcp(data_dir=path,
                         pipeline=pipe,
                         band_pass_filtering=True,
                         global_signal_regression=True,
                         derivatives=[derivative])

# local variables and paths #
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/'  # working directory
filt = 'filt_global'
stud = 'Test/ABIDE_pcp/{0}/{1}/'.format(
    pipe, filt)  # location that download happened to
# stud = 'C200/ABIDE_pcp/{0}/{1}/'.format(pipe, filt)
# stud = '/ABIDE_pcp/{0}/{1}/'.format(pipe, filt)
# stud = 'Data/'
# lab = path + 'Label/'  # location of CSV files for labeling
lab = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Data/'
# lab = '/home/kap/Thesis/Data/Label/'
mask_name = 'tt97_prep'
Ejemplo n.º 18
0
from matplotlib import pyplot as plt

# Plot the atlas. Different Atlases provide different regions of interest (ROIs). Some ROIs are larger than others. The
# TT and AAL atlases are much smaller than the CC400. These atlases must be downloaded beforehand from
# https://preprocessed-connectomes-project.github.io/abide/Pipelines.html#regions_of_interest
pltt.plot_roi('tt_mask_pad.nii', output_file='tt_roi_plot')
pltt.plot_roi('aal_mask_pad.nii', output_file='aal_roi_plot')
pltt.plot_roi('CC400.nii', output_file='cc400_roi_plot')
print('Completed brain ROI Images')

# download the data (only two images in this case)
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved

ab_img_one = datasets.fetch_abide_pcp(data_dir=path,
                                      n_subjects=1,
                                      pipeline='cpac',
                                      band_pass_filtering=True,
                                      derivatives=['func_preproc'],
                                      SUB_ID=[50003])
ab_img_two = datasets.fetch_abide_pcp(data_dir=path,
                                      n_subjects=1,
                                      pipeline='cpac',
                                      band_pass_filtering=True,
                                      derivatives=['func_preproc'],
                                      SUB_ID=[50004])

ab_mask_one = datasets.fetch_abide_pcp(data_dir=path,
                                       n_subjects=1,
                                       pipeline='cpac',
                                       band_pass_filtering=True,
                                       derivatives=['func_mask'],
                                       SUB_ID=[50003])
Ejemplo n.º 19
0
from nilearn import datasets
from nilearn import plotting as pltt
from matplotlib import pyplot as plt

# Plot the atlas. Different Atlases provide different regions of interest (ROIs). Some ROIs are larger than others. The
# TT and AAL atlases are much smaller than the CC400. These atlases must be downloaded beforehand from
# https://preprocessed-connectomes-project.github.io/abide/Pipelines.html#regions_of_interest
pltt.plot_roi('tt_mask_pad.nii', output_file='tt_roi_plot')
pltt.plot_roi('aal_mask_pad.nii', output_file='aal_roi_plot')
pltt.plot_roi('CC400.nii', output_file='cc400_roi_plot')
print('Completed brain ROI Images')

# download the data (only two images in this case)
path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test'  # set where the data should be saved

ab_img_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True,
                                      derivatives=['func_preproc'], SUB_ID=[50003])
ab_img_two = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True,
                                      derivatives=['func_preproc'], SUB_ID=[50004])

ab_mask_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True,
                                       derivatives=['func_mask'], SUB_ID=[50003])
ab_mask_two = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True,
                                       derivatives=['func_mask'], SUB_ID=[50003])

my_data = ['pitt3.nii', 'pitt4.nii']  # have to rename the two files that were downloaded

# apply mask to the fMRI images. The mask is the regions of the image that will be extracted for use.
from nilearn.masking import apply_mask
masked_data = apply_mask(my_data[0], 'pitt3mask.nii')  # just mask the first image (fMRI, mask)
print('Completed Masking')