def prepare_data(data_dir, output_dir, pipeline = "cpac", quality_checked = True): # get dataset print("Loading dataset...") abide = datasets.fetch_abide_pcp(data_dir = data_dir, pipeline = pipeline, quality_checked = quality_checked) # make list of filenames fmri_filenames = abide.func_preproc # load atlas multiscale = datasets.fetch_atlas_basc_multiscale_2015() atlas_filename = multiscale.scale064 # initialize masker object masker = NiftiLabelsMasker(labels_img=atlas_filename, standardize=True, memory='nilearn_cache', verbose=0) # initialize correlation measure correlation_measure = ConnectivityMeasure(kind='correlation', vectorize=True, discard_diagonal=True) try: # check if feature file already exists # load features feat_file = os.path.join(output_dir, 'ABIDE_BASC064_features.npz') X_features = np.load(feat_file)['a'] print("Feature file found.") except: # if not, extract features X_features = [] # To contain upper half of matrix as 1d array print("No feature file found. Extracting features...") for i,sub in enumerate(fmri_filenames): # extract the timeseries from the ROIs in the atlas time_series = masker.fit_transform(sub) # create a region x region correlation matrix correlation_matrix = correlation_measure.fit_transform([time_series])[0] # add to our container X_features.append(correlation_matrix) # keep track of status print('finished extracting %s of %s'%(i+1,len(fmri_filenames))) # Save features np.savez_compressed(os.path.join(output_dir, 'ABIDE_BASC064_features'), a = X_features) # Dimensionality reduction of features with PCA print("Running PCA...") pca = PCA(0.99).fit(X_features) # keeping 99% of variance X_features_pca = pca.transform(X_features) # Transform phenotypic data into dataframe abide_pheno = pd.DataFrame(abide.phenotypic) # Get the target vector y_target = abide_pheno['DX_GROUP'] return(X_features_pca, y_target)
def test_fetch_abide_pcp(): local_url = "file://" + datadir ids = [('50%03d' % i).encode() for i in range(800)] filenames = ['no_filename'] * 800 filenames[::2] = ['filename'] * 400 pheno = np.asarray(list(zip(ids, filenames)), dtype=[('subject_id', int), ('FILE_ID', 'U11')]) # pheno = pheno.T.view() file_mock.add_csv('Phenotypic_V1_0b_preprocessed1.csv', pheno) # All subjects dataset = datasets.fetch_abide_pcp(data_dir=tmpdir, url=local_url, quality_checked=False, verbose=0) assert_equal(len(dataset.func_preproc), 400)
def test_fetch_abide_pcp(): local_url = "file://" + datadir ids = [('50%03d' % i).encode() for i in range(800)] filenames = ['no_filename'] * 800 filenames[::2] = ['filename'] * 400 pheno = np.asarray(list(zip(ids, filenames)), dtype=[('subject_id', int), ('FILE_ID', 'U11')]) # pheno = pheno.T.view() file_mock.add_csv('Phenotypic_V1_0b_preprocessed1.csv', pheno) # All subjects dataset = datasets.fetch_abide_pcp(data_dir=tmpdir, url=local_url, quality_checked=False, verbose=0) assert_equal(len(dataset.func_preproc), 400)
def get_dataset(dataset, max_images=np.inf, **kwargs): """Retrieve & normalize dataset from nilearn""" # Download if dataset == 'neurovault': images, term_scores = fetch_neurovault(max_images=max_images, **kwargs) elif dataset == 'abide': dataset = datasets.fetch_abide_pcp( n_subjects=min(94, max_images), **kwargs) images = [{'absolute_path': p} for p in dataset['func_preproc']] term_scores = None elif dataset == 'nyu': dataset = datasets.fetch_nyu_rest( n_subjects=min(25, max_images), **kwargs) images = [{'absolute_path': p} for p in dataset['func']] term_scores = None else: raise ValueError("Unknown dataset: %s" % dataset) return images, term_scores
def fetch_generated_data(data_dir, output_dir): print("Loading dataset...") abide = datasets.fetch_abide_pcp(data_dir=data_dir, pipeline='cpac', quality_checked=True) feat_file = os.path.join(output_dir, 'ABIDE_BASC064_features.npz') X_features = np.load(feat_file)['a'] feat_file_autism = os.path.join(output_dir, 'generated_data_asd_300.csv') X_features_autism = pd.read_csv(feat_file_autism, sep=',', header=None, skiprows=1) X_features_autism = X_features_autism.drop(X_features_autism.columns[0], axis=1) feat_file_control = os.path.join(output_dir, 'generated_data_tc_300.csv') X_features_control = pd.read_csv(feat_file_control, sep=',', header=None, skiprows=1) X_features_control = X_features_control.drop(X_features_control.columns[0], axis=1) X_features_all = np.vstack((X_features, X_features_autism.values, X_features_control.values)) print('Stacked synthetic and original features ', X_features_all.shape) print("Running PCA...") pca = PCA(0.99).fit(X_features_all) # keeping 99% of variance X_features_pca = pca.transform(X_features_all) # Transform phenotypic data into dataframe abide_pheno = pd.DataFrame(abide.phenotypic) # Get the target vector y_target = abide_pheno['DX_GROUP'] # add dx_group for synthetic data y_target_arr = np.append(y_target.values, np.ones(100)) # add asd group y_target_arr = np.append(y_target_arr, np.zeros(100)) # add tc group y_target = pd.Series(y_target_arr) return (X_features_pca, y_target)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Dec 9 20:03:28 2018 @author: xinyang This program is to find out the top five correlatd and anti-correlated ROIs of ASD and TD """ from nilearn.datasets import fetch_abide_pcp import pandas as pd import numpy as np labels = pd.read_csv('CC400_ROI_labels.csv') atlas_filename = 'cc400_roi_atlas.nii.gz' abide = fetch_abide_pcp(derivatives = ['rois_cc400'], pipeline = 'cpac', quality_checked = False) y = abide.phenotypic['DX_GROUP'] names = ['DX_GROUP','rois_cc400'] data = pd.DataFrame([y,abide.rois_cc400]).transpose() data.columns = names ASD = data[data['DX_GROUP']==1]['rois_cc400'] #505 TD = data[data['DX_GROUP']==2]['rois_cc400'] #530 ############################################################################## # Compute and display a correlation matrix # ----------------------------------------- # Plot the correlation matrix from nilearn import plotting from nilearn.connectome import ConnectivityMeasure correlation_measure = ConnectivityMeasure(kind='correlation')
import gzip import csv import numpy as np from nilearn.input_data import NiftiLabelsMasker from nilearn import datasets __author__ = '2d Lt Kyle Palko' __version__ = 'v0.1' pipe = 'cpac' path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved # path = '/home/kap/Thesis/Data/DL' # define the pipeline used to preprocess the data derivative = 'rois_tt' # define what data should be pulled datasets.fetch_abide_pcp(data_dir=path, pipeline=pipe, band_pass_filtering=True, global_signal_regression=True, derivatives=[derivative]) # local variables and paths # path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/' # working directory filt = 'filt_global' stud = 'Test/ABIDE_pcp/{0}/{1}/'.format(pipe, filt) # location that download happened to # stud = 'C200/ABIDE_pcp/{0}/{1}/'.format(pipe, filt) # stud = '/ABIDE_pcp/{0}/{1}/'.format(pipe, filt) # stud = 'Data/' # lab = path + 'Label/' # location of CSV files for labeling lab = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Data/' # lab = '/home/kap/Thesis/Data/Label/' mask_name = 'tt97_prep' # build two lists of strings from CSV files to use to match the subjects and their diagnosis idlab = [] # subject IDs
sys.path.append("../inverse_covariance") from inverse_covariance import ( QuicGraphicalLasso, QuicGraphicalLassoCV, QuicGraphicalLassoEBIC, AdaptiveGraphicalLasso, ) plt.ion() # Fetch the coordinates of power atlas power = datasets.fetch_coords_power_2011() coords = np.vstack((power.rois["x"], power.rois["y"], power.rois["z"])).T # Loading the functional datasets abide = datasets.fetch_abide_pcp(n_subjects=1) abide.func = abide.func_preproc # print basic information on the dataset # 4D data print("First subject functional nifti images (4D) are at: %s" % abide.func[0]) ############################################################################### # Masking: taking the signal in a sphere of radius 5mm around Power coords masker = input_data.NiftiSpheresMasker( seeds=coords, smoothing_fwhm=4, radius=5., standardize=True, detrend=True,
from nilearn.datasets import fetch_abide_pcp import pandas import os df = pandas.read_csv('Phenotypic_V1_0b_preprocessed1.csv', sep='\t') site_id = np.unique(df['SITE_ID']) print(site_id) for S in (site_id): data_dir = 'test' #DX_GROUP: 1 is autism, 2 is control abidedata_func_normal = fetch_abide_pcp(data_dir=data_dir, n_subjects=None, pipeline='cpac', band_pass_filtering=True, global_signal_regression=True, derivatives=['func_preproc'], quality_checked=True, DX_GROUP=2, SITE_ID=S) abidedata_func_autism = fetch_abide_pcp(data_dir=data_dir, n_subjects=None, pipeline='cpac', band_pass_filtering=True, global_signal_regression=True, derivatives=['func_preproc'], quality_checked=True, DX_GROUP=1, SITE_ID=S) print(len(abidedata_func_normal['func_preproc']))
import numpy as np from nilearn.input_data import NiftiMapsMasker from nilearn.regions import RegionExtractor from nilearn.connectome import ConnectivityMeasure import matplotlib.pyplot as plt # In[ ]: # for fetching dataset for first instance only abide = datasets.fetch_abide_pcp(data_dir= data_dir2,derivatives=['func_preproc'], SITE_ID=['NYU'], # n_subjects=3 ) print(abide.keys()) func = abide.func_preproc # In[ ]: type(func) # In[ ]:
import gzip import numpy as np import csv path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved # sub_id = [50060] # sets the subject ids that should be pulled simple 3 # sub_id = range(50003, 50061) # calls all of the PITT study subjects pipeline = 'cpac' # define the pipeline used to preprocess the data derivative = 'func_preproc' # define what data should be pulled # set the directory where the data is stored so the script can find and rename the data os.chdir('/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test/ABIDE_pcp/cpac/filt_noglobal/') for n in sub_id: # download the fMRI image datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline=pipeline, band_pass_filtering=True, derivatives=[derivative], SUB_ID=n) # fetch the data based on the subject ID # extract and rename the image file for name in glob.glob('*' + str(n) + '*.gz'): # use glob to find the recently download filename inF = gzip.open(name, 'rb') # opens .gz file outF = open('{0}.nii'.format(n), 'wb') # creates a new file using fileID as the name outF.write(inF.read()) # extract and write the .nii file inF.close() outF.close() os.remove(name) # deletes the .nii.gz file # download the image's corresponding mask # datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline=pipeline, band_pass_filtering=True, # derivatives=['func_mask'], SUB_ID=n) # # extract and rename the mask # for name in glob.glob('*' + str(n) + '*.gz'): # use glob to find the recently download filename # inF = gzip.open(name, 'rb') # opens .gz file
# By default it used two classifiers, LinearSVC ('l1', 'l2') and Ridge # Not so good documentation and implementation here according to me learn_brain_regions.classify(labels, cv=[(train_index, test_index)], scoring='roc_auc') print(learn_brain_regions.scores_) return learn_brain_regions ########################################################################### # Data # ---- # Load the datasets from Nilearn from nilearn import datasets abide_data = datasets.fetch_abide_pcp(pipeline='cpac') func_imgs = abide_data.func_preproc phenotypic = abide_data.phenotypic # class type for each subject is different class_type = 'DX_GROUP' cache_path = 'data_processing_abide' from sklearn.externals.joblib import Memory, Parallel, delayed mem = Memory(cachedir=cache_path) connectome_regress_confounds = None from nilearn_utils import data_info target_shape, target_affine, _ = data_info(func_imgs[0])
def main(): parser = argparse.ArgumentParser( description= 'Download ABIDE data and compute functional connectivity matrices') parser.add_argument( '--pipeline', default='cpac', type=str, help= 'Pipeline to preprocess ABIDE data. Available options are ccs, cpac, dparsf and niak.' ' default: cpac.') parser.add_argument( '--atlas', default='cc200', help= 'Brain parcellation atlas. Options: ho, cc200 and cc400, default: cc200.' ) parser.add_argument( '--download', default=True, type=str2bool, help= 'Dowload data or just compute functional connectivity. default: True') args = parser.parse_args() print(args) params = dict() pipeline = args.pipeline atlas = args.atlas download = args.download # Files to fetch files = ['rois_' + atlas] filemapping = { 'func_preproc': 'func_preproc.nii.gz', files[0]: files[0] + '.1D' } # Download database files if download == True: abide = datasets.fetch_abide_pcp(data_dir=root_folder, pipeline=pipeline, band_pass_filtering=True, global_signal_regression=False, derivatives=files, quality_checked=False) subject_IDs = Reader.get_ids() #changed path to data path subject_IDs = subject_IDs.tolist() # Create a folder for each subject for s, fname in zip(subject_IDs, Reader.fetch_filenames(subject_IDs, files[0], atlas)): subject_folder = os.path.join(data_folder, s) if not os.path.exists(subject_folder): os.mkdir(subject_folder) # Get the base filename for each subject base = fname.split(files[0])[0] # Move each subject file to the subject folder for fl in files: if not os.path.exists( os.path.join(subject_folder, base + filemapping[fl])): shutil.move(base + filemapping[fl], subject_folder) time_series = Reader.get_timeseries(subject_IDs, atlas) # Compute and save connectivity matrices Reader.subject_connectivity(time_series, subject_IDs, atlas, 'correlation') Reader.subject_connectivity(time_series, subject_IDs, atlas, 'partial correlation')
__author__ = '2d Lt Kyle Palko' from nilearn import datasets import time start = time.time() path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved pipeline = 'cpac' # define the pipeline used to preprocess the data derivative = 'func_preproc' # define what data should be pulled stud = '/ABIDE_pcp/cpac/filt_noglobal/e806a9ef657f316b760441d3649f7cb6' datasets.fetch_abide_pcp(data_dir=path, pipeline=pipeline, band_pass_filtering=True, derivatives=[derivative])
num_subjects = 871 # Number of subjects root_folder = '/path/to/data/' data_folder = os.path.join(root_folder, 'ABIDE_pcp/cpac/filt_noglobal') # Files to fetch files = ['rois_ho'] filemapping = {'func_preproc': 'func_preproc.nii.gz', 'rois_ho': 'rois_ho.1D'} shutil.copyfile('./subject_IDs.txt', os.path.join(data_folder, 'subject_IDs.txt')) # Download database files abide = datasets.fetch_abide_pcp(data_dir=root_folder, n_subjects=num_subjects, pipeline=pipeline, band_pass_filtering=True, global_signal_regression=False, derivatives=files) subject_IDs = Reader.get_ids(num_subjects) subject_IDs = subject_IDs.tolist() # Create a folder for each subject for s, fname in zip(subject_IDs, Reader.fetch_filenames(subject_IDs, files[0])): subject_folder = os.path.join(data_folder, s) if not os.path.exists(subject_folder): os.mkdir(subject_folder) # Get the base filename for each subject base = fname.split(files[0])[0]
# sub_id = [50060] # sets the subject ids that should be pulled simple 3 # sub_id = range(50003, 50061) # calls all of the PITT study subjects pipeline = 'cpac' # define the pipeline used to preprocess the data derivative = 'func_preproc' # define what data should be pulled # set the directory where the data is stored so the script can find and rename the data os.chdir( '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test/ABIDE_pcp/cpac/filt_noglobal/' ) for n in sub_id: # download the fMRI image datasets.fetch_abide_pcp( data_dir=path, n_subjects=1, pipeline=pipeline, band_pass_filtering=True, derivatives=[derivative], SUB_ID=n) # fetch the data based on the subject ID # extract and rename the image file for name in glob.glob( '*' + str(n) + '*.gz'): # use glob to find the recently download filename inF = gzip.open(name, 'rb') # opens .gz file outF = open('{0}.nii'.format(n), 'wb') # creates a new file using fileID as the name outF.write(inF.read()) # extract and write the .nii file inF.close() outF.close() os.remove(name) # deletes the .nii.gz file
import numpy as np from nilearn.input_data import NiftiLabelsMasker from nilearn import datasets __author__ = '2d Lt Kyle Palko' __version__ = 'v0.1' pipe = 'cpac' path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved # path = '/home/kap/Thesis/Data/DL' # define the pipeline used to preprocess the data derivative = 'rois_tt' # define what data should be pulled datasets.fetch_abide_pcp(data_dir=path, pipeline=pipe, band_pass_filtering=True, global_signal_regression=True, derivatives=[derivative]) # local variables and paths # path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/' # working directory filt = 'filt_global' stud = 'Test/ABIDE_pcp/{0}/{1}/'.format( pipe, filt) # location that download happened to # stud = 'C200/ABIDE_pcp/{0}/{1}/'.format(pipe, filt) # stud = '/ABIDE_pcp/{0}/{1}/'.format(pipe, filt) # stud = 'Data/' # lab = path + 'Label/' # location of CSV files for labeling lab = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Data/' # lab = '/home/kap/Thesis/Data/Label/' mask_name = 'tt97_prep'
from matplotlib import pyplot as plt # Plot the atlas. Different Atlases provide different regions of interest (ROIs). Some ROIs are larger than others. The # TT and AAL atlases are much smaller than the CC400. These atlases must be downloaded beforehand from # https://preprocessed-connectomes-project.github.io/abide/Pipelines.html#regions_of_interest pltt.plot_roi('tt_mask_pad.nii', output_file='tt_roi_plot') pltt.plot_roi('aal_mask_pad.nii', output_file='aal_roi_plot') pltt.plot_roi('CC400.nii', output_file='cc400_roi_plot') print('Completed brain ROI Images') # download the data (only two images in this case) path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved ab_img_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_preproc'], SUB_ID=[50003]) ab_img_two = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_preproc'], SUB_ID=[50004]) ab_mask_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_mask'], SUB_ID=[50003])
from nilearn import datasets from nilearn import plotting as pltt from matplotlib import pyplot as plt # Plot the atlas. Different Atlases provide different regions of interest (ROIs). Some ROIs are larger than others. The # TT and AAL atlases are much smaller than the CC400. These atlases must be downloaded beforehand from # https://preprocessed-connectomes-project.github.io/abide/Pipelines.html#regions_of_interest pltt.plot_roi('tt_mask_pad.nii', output_file='tt_roi_plot') pltt.plot_roi('aal_mask_pad.nii', output_file='aal_roi_plot') pltt.plot_roi('CC400.nii', output_file='cc400_roi_plot') print('Completed brain ROI Images') # download the data (only two images in this case) path = '/media/kap/8e22f6f8-c4df-4d97-a388-0adcae3ec1fb/Python/Thesis/Test' # set where the data should be saved ab_img_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_preproc'], SUB_ID=[50003]) ab_img_two = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_preproc'], SUB_ID=[50004]) ab_mask_one = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_mask'], SUB_ID=[50003]) ab_mask_two = datasets.fetch_abide_pcp(data_dir=path, n_subjects=1, pipeline='cpac', band_pass_filtering=True, derivatives=['func_mask'], SUB_ID=[50003]) my_data = ['pitt3.nii', 'pitt4.nii'] # have to rename the two files that were downloaded # apply mask to the fMRI images. The mask is the regions of the image that will be extracted for use. from nilearn.masking import apply_mask masked_data = apply_mask(my_data[0], 'pitt3mask.nii') # just mask the first image (fMRI, mask) print('Completed Masking')