def get_ws_data(test_p, fold_shifted, included, hemi): print( '\nLoading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'.format(test_p)) train_resp = [] for run in included: avg = [] if run == 4: resp = mv.gifti_dataset(os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-5, :] else: resp = mv.gifti_dataset(os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-4, :] resp = resp[:, cortical_vertices[hemi] == 1] mv.zscore(resp, chunks_attr=None) print('train', run, resp.shape) train_resp.append(resp) if fold_shifted == 4: test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5, :] else: test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4, :] test_resp = test_resp[:, cortical_vertices[hemi] == 1] mv.zscore(test_resp, chunks_attr=None) print('test', fold_shifted, test_resp.shape) return train_resp, test_resp
def get_ha_testsubj_data(test_p, mappers, fold_shifted, included, hemi): train_p = [x for x in participants if x != test_p] print('\nLoading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'.format(test_p)) train_resp = [] for run in included: avg = [] for participant in train_p: if run == 4: resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr_fmri[run], run, hemi))).samples[4:-5,:] else: resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr_fmri[run], run, hemi))).samples[4:-4,:] mv.zscore(resp, chunks_attr=None) resp = mappers[participant].forward(resp) mv.zscore(resp, chunks_attr=None) resp = mappers[test_p].reverse(resp) resp = resp[:,cortical_vertices[hemi] == 1] mv.zscore(resp, chunks_attr=None) avg.append(resp) avg = np.mean(avg, axis=0) mv.zscore(avg, chunks_attr=None) print('train', run, avg.shape) train_resp.append(avg) if fold_shifted == 4: test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5,cortical_vertices[hemi] == 1] else: test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4,cortical_vertices[hemi] == 1] mv.zscore(test_resp, chunks_attr=None) print('test', fold_shifted, test_resp.shape) return train_resp, test_resp
def get_voxel_coords(ds, append=True, zscore=True): """ This function is able to append coordinates (and their squares, etc., to a dataset. If append = False, it returns a dataset with only coordinates, and no fmri data. Such a dataset is useful for a sanity check of the classification. """ ds_coords = ds.copy('deep') # Append voxel coordinates (and squares, cubes) products = np.column_stack((ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1], ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 2], ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2], ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2])) coords = np.hstack((ds.sa.voxel_indices, ds.sa.voxel_indices ** 2, ds.sa.voxel_indices ** 3, products)) coords = mv.Dataset(coords, sa=ds_coords.sa) if zscore: mv.zscore(coords, chunks_attr='participant') ds_coords.fa.clear() if append: ds_coords.samples = np.hstack((ds_coords.samples, coords.samples)) elif not append: ds_coords.samples = coords.samples return ds_coords
def load_data(filename): ds = mv.gifti_dataset(filename) ds.sa.pop('intents') ds.sa['subjects'] = [participant] * ds.shape[0] ds.fa['node_indices'] = range(n_vertices) # z-score features across samples mv.zscore(ds, chunks_attr=None) return ds
def preprocess_and_tmp_save_fmri(data_path, task, subj, model, tmp_path, group_mask=None): ''' Generator for preprocessed fMRI runs from one subject of Forrest Gump aligns to group template run-wise linear de-trending and z-scoring IN: data_path - string, path pointing to the Forrest Gump directory task - string, which part of the Forrest Gump dataset to load subj - int, subject to pre-process tmp_path - string, path to save the dataset temporarily to OUT: preprocessed fMRI samples per run''' from nipype.interfaces import fsl dhandle = mvpa.OpenFMRIDataset(data_path) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' if group_mask is None: group_mask = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'brain_mask.nii.gz') mask_fname = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, chunks=run_id - 1, mask=mask_fname, flavor=flavor) filename = 'brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_file = os.path.join(tmp_path, filename) save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_file) warp = fsl.ApplyWarp() warp.inputs.in_file = tmp_file warp.inputs.out_file = os.path.join(tmp_path, 'group_' + filename) warp.inputs.ref_file = os.path.join(data_path, 'templates', 'grpbold7Tp1', 'brain.nii.gz') warp.inputs.field_file = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'subj2tmpl_warp.nii.gz') warp.inputs.interp = 'nn' warp.run() os.remove(tmp_file) run_ds = mvpa.fmri_dataset(os.path.join(tmp_path, 'group_' + filename), mask=group_mask, chunks=run_id - 1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) os.remove(os.path.join(tmp_path, 'group_' + filename)) yield run_ds.samples.astype('float32')
def tmp_save_fmri(datapath, task, subj, model): dhandle = mvpa.OpenFMRIDataset(datapath) #mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') flavor = 'dico_bold7Tp1_to_subjbold7Tp1' group_brain_mask = '/home/mboos/SpeechEncoding/brainmask_group_template.nii.gz' mask_fname = os.path.join(datapath, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') #mask_fname = '/home/mboos/SpeechEncoding/masks/epi_subj_{}.nii.gz'.format(subj) scratch_path = '/home/data/scratch/mboos/prepro/tmp/' for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) filename = 'whole_brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_path = scratch_path + filename save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_path) os.system('applywarp -i {0} -o {1} -r /home/data/psyinf/forrest_gump/anondata/templates/grpbold7Tp1/brain.nii.gz -w /home/data/psyinf/forrest_gump/anondata/sub{2:03}/templates/bold7Tp1/in_grpbold7Tp1/subj2tmpl_warp.nii.gz --interp=nn'.format(tmp_path, scratch_path+'group_'+filename,subj)) os.remove(tmp_path) run_ds = mvpa.fmri_dataset(scratch_path+'group_'+filename, mask=group_brain_mask, chunks=run_id-1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) joblib.dump(run_ds.samples.astype('float32'), '/home/data/scratch/mboos/prepro/tmp/whole_brain_subj_{}_run_{}.pkl'.format(subj, run_id)) os.remove(scratch_path+'group_'+filename) return run_ds.samples.shape[1]
def fx(dataset, behav_file, motion_file, polynomial_order, run_number): print("events -> %s" % behav_file) print("nuisance -> %s" % motion_file) tsds = dataset behav_txt = np.recfromcsv(behav_file, delimiter=',') events = [ dict(onset=float(event['run_volume']) * 2.0, duration=6.0, targets=event['genre'], chunks=int(event['run']), stim=event['stim']) for event in behav_txt ] motion = np.loadtxt(motion_file) add_reg_names = ['tx', 'ty', 'tz', 'rx', 'ry', 'rz'] hrf_estimates = eventrelated_dataset( tsds, events, model='hrf', time_attr='time_coords', condition_attr=(('targets', 'chunks')), design_kwargs=dict(drift_model='polynomial', drift_order=polynomial_order, hrf_model='canonical with derivative', add_regs=motion, add_reg_names=add_reg_names), glmfit_kwargs=dict(model='ar1')) #hrf_estimates.sa['subj'] = [subject] * len(hrf_estimates) hrf_estimates.sa['run'] = [run_number] * len(hrf_estimates) # zscore voxelwise # XXX `hrf_estimates` has no chunks! hence zscoring is not performed run-wise! zscore(hrf_estimates) return hrf_estimates
def get_ha_common_data(test_p, mappers, fold_shifted, included, hemi): train_p = [x for x in participants if x != test_p] print("\n4. hyperalignment common data") print( 'Loading fMRI GIFTI data for HA in test subj space and using {0} as test participant...' .format(test_p)) train_resp = [] for run in included: avg = [] for participant in train_p: # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ # UNCOMMENT LATER - # if run == 4: # resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( # participant, tr_fmri[run], run, hemi))).samples[4:-5, :] # else: # resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( # participant, tr_fmri[run], run, hemi))).samples[4:-4, :] # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ if run == 4: resp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(participant, tr_fmri[run], run, hemi))).samples[4:-5, :] else: resp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(participant, tr_fmri[run], run, hemi))).samples[4:-4, :] mv.zscore(resp, chunks_attr=None) resp = mappers[participant].forward(resp) # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ # DELETE LATER - # resp = resp[:, cortical_vertices[hemi] == 1] resp = resp[:, selected_node] # # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ mv.zscore(resp, chunks_attr=None) avg.append(resp) avg = np.mean(avg, axis=0) mv.zscore(avg, chunks_attr=None) print('train', run, avg.shape) train_resp.append(avg) if fold_shifted == 4: test_resp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5, :] else: test_resp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format( test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4, :] mv.zscore(test_resp, chunks_attr=None) test_resp = mappers[participant].forward(test_resp) # test_resp = test_resp[:, cortical_vertices[hemi] == 1] test_resp = test_resp[:, selected_node] mv.zscore(test_resp, chunks_attr=None) print('test', fold_shifted, test_resp.shape) return train_resp, test_resp
resp = mappers[participant].forward( load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-346vol_run-02.{1}.tproject.gii' .format(participant, hemi))).samples[4:-12, :]) else: resp = mappers[participant].forward( load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii' .format(participant, tr[run], run, hemi))).samples[4:-7, :]) mv.zscore(resp, chunks_attr=None) avg.append(resp) avg = np.mean(avg, axis=0) print(run, avg.shape) train_resp.append(avg) train_resp = np.concatenate(train_resp, axis=0) test_resp = mappers[test_p].forward( load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(test_p, tr[3], 3, hemi))).samples[4:-7, :])
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois): """ Build an hdf5 dataset. """ # initialize a list to load all datasets into: data_dss = [] # get list of participants from root dir participants = sorted( [path.split('/')[-1] for path in glob(rootdir + 'sub-*')]) assert len(participants) != 0 print('The following participants were found: {}'.format(participants)) for participant in participants: # count the number of participant substitutions necessary data_fns = sorted(glob(rootdir + participant + datadir)) print(rootdir + participant + datadir) mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz' if analysis == 'localizer': assert len(data_fns) == 4 if analysis == 'avmovie': assert len(data_fns) == 8 data_ds = mv.vstack([ mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run) for run, data_fn in enumerate(data_fns) ]) data_ds.fa['participant'] = [participant] * data_ds.shape[1] print('loaded data for participant {}.'.format(participant)) # z scoring if analysis == 'localizer' and zscore == 'baseline-zscore': events = get_group_events(eventdir) means, stds = extract_baseline(events, data_ds) mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks') print('finished baseline zscoring for participant {}.'.format( participant)) elif zscore == 'zscore': mv.zscore(data_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') # roi masks all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(rootdir + participant + anatdir + \ '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'l{0}*mask_tmpl.nii.gz'.format(roi))) right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'r{0}*mask_tmpl.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, data_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches dataset dimension assert roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, data_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, data_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, data_ds.shape[1])) # Ensure that number of voxels in ROI mask matches dataset dimension assert lat_roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(rootdir + participant + anatdir + '/{0}_*_mask_tmpl.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign roi mask to dataset feature attributes data_ds.fa['all_ROIs'] = all_rois_flat # join all datasets data_dss.append(data_ds) # save full dataset mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(data_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds) print('Transposed the group-dataset and saved it.') return ds
print sub behav_file = 'all_attr.txt' print roi bold_fname = os.path.join(cwd1, sub, 'betas_sub' + sub + '.nii.gz') #full functional timeseries (beta series) mask_fname = os.path.join(cwd1, sub, 'native_masks', roi) #chooses the mask for a given ROI attr_fname = os.path.join(cwd1, sub, behav_file) #codes stimuli number and run number attr = mvpa2.SampleAttributes(attr_fname) #loads attributes into pymvpa ds = mvpa2.fmri_dataset( bold_fname, targets=attr.targets, chunks=attr.chunks, mask=mask_fname ) #loads dataset with appropriate mask and attribute information mvpa2.zscore(ds, chunks_attr='chunks') #z-scores dataset per run ds = mvpa2.remove_nonfinite_features(ds) ds = mvpa2.remove_invariant_features(ds) stimuli = [] for i in range(0, 16): stimuli.append(ds.uniquetargets[i]) #create all possible pairs for confusion matrix pair_list = list(itertools.combinations(range(len(stimuli)), 2)) pair_list2 = [] for x in range(0, len(pair_list)): pair_list2.append([stimuli[pair_list[x][0]], stimuli[pair_list[x][1]]]) test_accs, val_accs, nfs_per_chunk, val_chunks = clf_wrapper(ds, pair_list2) corrs1, pvals1, dist_list1, nf_list_dists1 = dist_wrapper( ds, pair_list2, test_accs, val_accs, val_chunks)
# 0. parameters ____________________________________________________________ # main_dir = '/Users/h/Documents/projects_local/cluster_projects' main_dir = '/dartfs-hpc/scratch/psyc164/groupXHD' sub_name = sys.argv[1] hemisphere = sys.argv[2] task_list = ['beh', 'tax'] radii = 10.0 # 1. create pymvpa dataset ____________________________________________________________ ds_q2 = generate_dataset.create_dataset(sub_name, main_dir, task_list, hemisphere) ds_q2.sa['chunks'] = ds_q2.sa['beh'] ds_q2.sa['targets'] = ds_q2.sa['tax'] #del ds_q2.sa['intents'] del ds_q2.sa['stats'] mv.zscore(ds_q2, chunks_attr='chunks') n_medial = {'lh': 3486, 'rh': 3491} medial_wall = np.where(np.sum(ds_q2.samples == 0, axis=0) == 200)[0].tolist() cortical_vertices = np.where( np.sum(ds_q2.samples == 0, axis=0) < 200)[0].tolist() assert len(medial_wall) == n_medial[hemisphere] n_vertices = ds_q2.fa.node_indices.shape[0] assert len(medial_wall) + len(cortical_vertices) == n_vertices # 2. cross validation __________________________________________________________________ # setting up classifier clf = mv.LinearCSVMC(space='targets') cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='chunks')) cv_within = cv(ds_q2) cv_within
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array( [value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max( [fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds) #, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events( #event= ds.sa.event_num, chunks=ds.sa.chunks, targets=ds.sa.targets) return ds
ds.sa['chunks'] = np.ones(ds.nsamples)*i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_'+param for param in mc], chunks_attr='chunks' ) #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5/TR ws = [(1/lf)/nf, (1/hf)/nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset onsets = np.arange(0,ds.nsamples - samples_size/TR, samples_size/TR) events = [] for on in onsets: Ev = dict() Ev['onset'] = on
def preprocessing(ds_p, ref_space, warp_files, mask_p, **kwargs): mask_p = str(mask_p) ref_space = str(ref_space) detrending = kwargs.get('detrending', None) use_zscore = kwargs.get('use_zscore', True) use_events = kwargs.get('use_events', False) anno_dir = kwargs.get('anno_dir', None) use_glm_estimates = kwargs.get('use_glm_estimates', False) targets = kwargs.get('targets', None) event_offset = kwargs.get('event_offset', None) event_dur = kwargs.get('event_dur', None) save_disc_space = kwargs.get('save_disc_space', True) rois = kwargs.get('rois', None) vp_num_str = ds_p[(ds_p.find("sub") + 4):(ds_p.find("sub") + 6)] warp_file = [warp_file for warp_file in warp_files if warp_file.find(vp_num_str) != -1][0] part_info = find_participant_info(ds_p) if save_disc_space: temp_file_add = "tmp_warped_data_file.nii.gz" temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", temp_file_add)) else: temp_file_add = "sub-{}_{}-movie_run-{}_warped_file.nii.gz".format(part_info[0], part_info[1], int(part_info[2])) temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", "runs_for_testing", temp_file_add)) # change warped_ds = warp_image(ds_p, ref_space, warp_file, temp_file, save_disc_space=save_disc_space) while not os.path.exists(warped_ds): time.sleep(5) if os.path.isfile(warped_ds): if mask_p is not None: mask = get_adjusted_mask(mask_p, ref_space) if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask) else: if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds) ds.sa['participant'] = [int(part_info[0])] ds.sa["movie_type"] = [part_info[1]] ds.sa['chunks'] = [int(part_info[2])] if detrending is not None: detrender = mvpa.PolyDetrendMapper(polyord=1) ds = ds.get_mapped(detrender) if use_zscore: mvpa.zscore(ds) if use_events: events = create_event_dict(anno_dir, ds_p, targets, event_dur) if use_glm_estimates: ds = mvpa.fit_event_hrf_model(ds, events, time_attr='time_coords', condition_attr='targets') else: ds = mvpa.extract_boxcar_event_samples(ds, events=events, time_attr='time_coords', match='closest', event_offset=event_offset, event_duration=event_dur, eprefix='event', event_mapper=None) ds = fix_info_after_events(ds) return ds
print "masked data has", dataset.shape[1], "voxels in each of", dataset.shape[0], "volumes" print "... which means that", round( 100 - 100 * dataset.shape[1] / N.product(dataset.a.voxel_dim) ), "% of the voxels were masked out" print "of", dataset.shape[1], "remaining features ..." print "summary of conditions/volumes\n", datetime.datetime.now() print dataset.summary_targets() # DETREND print "detrending (remove slow drifts in signal, and jumps between runs) ...", datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr="chunks") # linear detrend print "... done", datetime.datetime.now() # ZSCORE print "zscore normalising (give all voxels similar variance) ...", datetime.datetime.now() M.zscore(dataset, chunks_attr="chunks", param_est=("targets", ["base"])) # zscoring, on basis of rest periods print "... done", datetime.datetime.now() # P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, "wb", 5) pickle.dump(dataset, pickleFile) # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print "averaging over trials ...", datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=["chunks", "targets"])) print "... only", dataset.shape[0], "cases left now" dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets)
#mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 T3 = False #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 datapath = os.path.join('/home','data','psyinf','forrest_gump','anondata') #boldlist = sorted(glob.glob(os.path.join(datapath,'task002*'))) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' for subj in xrange(1,20): mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj%02dbold.nii.gz' % subj) #load and save all datasets run_datasets = [] for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) run_datasets.append(run_ds) s1ds = mvpa.vstack(run_datasets) mvpa.poly_detrend(s1ds,polyord=1,chunks_attr='chunks') mvpa.zscore(s1ds) s1ds.save(os.path.join('/home','mboos','SpeechEncoding','PreProcessed','FG_subj' + str(subj) + 'pp.gzipped.hdf5'),compression=9)
def preprocess_dataset(ds, type_, **kwargs): """ Preprocess the dataset: detrending of single run and for chunks, the zscoring is also done by chunks and by run. Parameters ---------- ds : Dataset The dataset to be preprocessed type : string The experiment to be processed kwargs : dict mean_samples - boolean : if samples should be averaged label_included - list : list of labels to be included in the dataset label_dropped - string : label to be dropped (rest, fixation) Returns ------- Dataset the processed dataset """ mean = False normalization = 'feature' for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'label_included'): label_included = kwargs[arg].split(',') if (arg == 'label_dropped'): label_dropped = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) logger.info('Dataset preprocessing: Detrending...') if len(np.unique(ds.sa['file'])) != 1: poly_detrend(ds, polyord = 1, chunks_attr = 'file') poly_detrend(ds, polyord = 1, chunks_attr = 'chunks') if label_dropped != 'None': logger.info('Removing labels...') ds = ds[ds.sa.targets != label_dropped] if label_included != ['all']: ds = ds[np.array([l in label_included for l in ds.sa.targets], dtype='bool')] if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': #Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
Pstim = get_stim_for_test_fold(run) if run == 4: Presp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(p, tr[run], run, h))).samples[4:-14, :] else: Presp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(p, tr[run], run, h))).samples[4:-7, :] mv.zscore(Presp, chunks_attr=None) forward_resp = mappers[p].forward(Presp) mv.zscore(forward_resp, chunks_attr=None) print("Loaded stim and resp data. Doing prediction...") pred = np.dot(Pstim, wt) print(Pstim.shape, wt.shape, pred.shape) mv.zscore(pred, chunks_attr=None) forward_pred = mappers[p].forward(pred) mv.zscore(forward_pred, chunks_attr=None) print(forward_pred.shape, Presp.shape) # Find prediction correlations nnpred = np.nan_to_num(forward_pred) corrs = np.nan_to_num(
'{0}_task-life_acq-{1}vol_run-0{2}.lh.tproject.gii' .format(participant, tr[run], run)))) else: rh = load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.rh.tproject.gii'. format(participant, tr[run], run))) lh = load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.lh.tproject.gii'. format(participant, tr[run], run))) rh = rh.samples lh = lh.samples mv.zscore(rh, chunks_attr=None) mv.zscore(lh, chunks_attr=None) ds = np.concatenate((rh, lh), axis=1) fc += 2 print('file {0}/{1} loaded'.format(fc, len(participants) * 2)) fmri.append(ds) print(len(fmri)) print('Computing pairwise correlations...') n_nodes = fmri[0].shape[1] n_sub = len(fmri) print(n_nodes)
qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius)) qe.train(ref_ds) # load all subject nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*')) print('Loading participant data from: ') print(chamats) mysubs = nfiles[0:nsubs] # import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets dss = [] for sub in range(len(mysubs)): ds = mv.Dataset(np.load(mysubs[sub])) ds.fa['voxel_indices'] = range(ds.shape[1]) #ds.sa['chunks'] = np.repeat(i,cnx_tx) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Number of data sets in dss: ') print(len(dss)) print('Size of data sets: ') print(dss[0].shape) # create SL hyperalignment instance hyper = SearchlightHyperalignment( queryengine=qe, compute_recon=False, # We don't need to project back from common space to subject space nproc=1, nblocks=N_BLOCKS, dtype ='float64'
print("Loaded movie data for participant {0}".format(participant)) # Perform linear detrending per chunk mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks') # Perform low-pass filtering per chunk movie_ds.samples = clean(movie_ds.samples, sessions=movie_ds.sa.chunks, low_pass=.1, high_pass=None, t_r=2.0, detrend=False, standardize=False) # Z-score movie time series per chunk mv.zscore(movie_ds, chunks_attr='chunks') print("Finished preprocessing (detrending, z-scoring) for participant {0}". format(participant)) # Load ROI masks and attach them to movie data all_rois_mask = np.array([['brain'] * movie_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted( glob(base_dir + participant + anat_dir + 'l{0}_*_mask_tmpl.nii.gz'.format(roi)))
featsel = SelectKBest(f_classif, k=K_FEATS) clf = LogisticRegression(penalty='l2', multi_class='ovr', solver='liblinear') ################# ## LOAD DATA ## ################# map_ds_dict, mem_ds_dict = load_data(MASK) # preprocess for d in [mem_ds_dict, map_ds_dict]: for ds in d.values(): mvpa2.remove_invariant_features(ds) mvpa2.poly_detrend(ds, polyord=1, chunks_attr='chunks') mvpa2.zscore(ds, chunks_attr='chunks') ############################################################## ## build and convert to common space using hyperalignment ## ############################################################## # select features based on localizer data fsel_masks = [ featsel.fit(ds.samples, ds.targets).get_support() for ds in map_ds_dict.values() ] # apply feature selection to all data (localizer and memory) fs_mapds_list = [ ds[:, mask] for ds, mask in zip(map_ds_dict.values(), fsel_masks) ] fs_memds_list = [
# load in all of the data into the dataframe targets = range(1, 21) ds = None for x in range(len(files)): chunks = [x + 1] * 20 d = mv.gifti_dataset(files[x], chunks=chunks, targets=targets) d.sa['conditions'] = conditions d.sa['taxonomy'] = taxonomy d.sa['behavior'] = behavior if ds is None: ds = d else: ds = mv.vstack((ds, d)) ds.fa['node_indices'] = range(ds.shape[1]) # zscore all of our samples mv.zscore(ds, chunks_attr='chunks', dtype='float32') # load in surgace and get searchlight query radius = 10 surface = mv.surf.read(join(data_path, '{0}.pial.gii'.format(hemi))) # this is an arbitrary radius and distance metric! query = mv.SurfaceQueryEngine(surface, radius, distance_metric='dijkstra') # based off PyMVPA tutorial clf = mv.LinearNuSVMC(space=predict) cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr=train_on), errorfx=lambda p, t: np.mean(p == t), enable_ca=['stats']) searchlights = mv.Searchlight(cv, queryengine=query, postproc=mv.mean_sample(),
print 'of', dataset.shape[1], 'remaining features ...' print 'summary of conditions/volumes\n', datetime.datetime.now() print dataset.summary_targets() # DETREND print 'detrending (remove slow drifts in signal, and jumps between runs) ...', datetime.datetime.now( ) # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done', datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...', datetime.datetime.now( ) M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done', datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5) pickle.dump(dataset, pickleFile) # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...', datetime.datetime.now() dataset = dataset.get_mapped( M.mean_group_sample(attrs=['chunks', 'targets'])) print '... only', dataset.shape[0], 'cases left now' dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5) # print '\n\n\n'
print 'functional input has',dataset.a.voxel_dim,'voxels of dimesions',dataset.a.voxel_eldim,'mm' print '... or',N.product(dataset.a.voxel_dim),'voxels per volume' print 'masked data has',dataset.shape[1],'voxels in each of',dataset.shape[0],'volumes' print '... which means that',round(100-100*dataset.shape[1]/N.product(dataset.a.voxel_dim)),'% of the voxels were masked out' print 'of',dataset.shape[1],'remaining features ...' print 'summary of conditions/volumes\n',datetime.datetime.now() print dataset.summary_targets() # DETREND print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done',datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now() M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done',datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5); pickle.dump(dataset, pickleFile); # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...',datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets'])) print '... only',dataset.shape[0],'cases left now' dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets)
def clf_wrapper(ds): #1 of 2 main functions. computes the crossvalidated classifier we base our inferences on test_accs_per_chunk = [] #array with accuracies for each test fold val_accs_per_chunk = [] nfs_all_chunks = [] val_chunks = [] mvpa2.zscore(ds, chunks_attr='chunks') for chunk in chunk_num: #does LOOCV val_chunk = np.random.choice(ds.uniquechunks[ds.uniquechunks != chunk]) val_chunks.append(val_chunk) def optimize_clf(nf, optimize=1): acc_list = [ ] #array with accuracies for each pair within each LOOVC fold def nf_select(nf): #fselector = mvpa2.FixedNElementTailSelector(np.round(nf), tail='upper',mode='select', sort=False) #sbfs = mvpa2.SensitivityBasedFeatureSelection(mvpa2.OneWayAnova(), fselector, enable_ca=['sensitivities'], auto_train=True) if (optimize >= 1): not_test_ds = ds[ds.chunks != chunk] val_ds = not_test_ds[not_test_ds.chunks == val_chunk] train_ds = not_test_ds[not_test_ds.chunks != val_chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #val_ds = sbfs(val_ds) return train_ds, val_ds elif (optimize == 0): train_ds = ds[ds.chunks != chunk] test_ds = ds[ds.chunks == chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #test_ds = sbfs(test_ds) return train_ds, test_ds train_ds, not_train_ds = nf_select(nf) for y in range(0, len(pair_list2)): def mask(y, train_ds, test_ds): stim_mask1 = (train_ds.targets == pair_list2[y][0]) | ( train_ds.targets == pair_list2[y][1]) stim_mask2 = (not_train_ds.targets == pair_list2[y][0]) | ( not_train_ds.targets == pair_list2[y][1]) ds_temp_train = train_ds[stim_mask1] ds_temp_not_train = not_train_ds[stim_mask2] return ds_temp_train, ds_temp_not_train ds_temp_train, ds_temp_not_train = mask( y, train_ds, not_train_ds) #clf = mvpa2.LinearNuSVMC(nu=0.5)#defines a classifier, linear SVM in this case clf = NuSVC(nu=0.5, max_iter=2000) #clf = SKLLearnerAdapter(knn) #clf = SKLLearnerAdapter(linear_model.SGDClassifier()) #clf.train(ds_temp_train) clf.fit(ds_temp_train.samples, ds_temp_train.targets) #predictions = clf.predict(ds_temp_not_train) predictions = clf.predict(ds_temp_not_train.samples) labels = ds_temp_not_train.targets bool_vec = predictions == labels acc_list.append( sum(bool_vec) / float(len(bool_vec))) #array with accuracies for each pair if (optimize == 1): #print len(acc_list) #print np.mean(acc_list) return 1 - np.mean(acc_list) else: #print np.mean(acc_list), 'for chunk:', chunk return acc_list #f = minimize_scalar(optimize_clf, bounds=(1, 1500), method='bounded', options={'maxiter': 20, 'xatol': 1e-05}) #nf = int(np.round(f.x)) nf = ds.shape[1] #val_accs = optimize_clf(nf, optimize=2) #val_accs_per_chunk.append(val_accs) test_accs = optimize_clf(nf, optimize=0) test_accs_per_chunk.append(test_accs) nfs_all_chunks.append(nf) #return test_accs_per_chunk,0,nfs_all_chunks,0; corrs1, pvals1, dist_list1, nf_list_dists1 = dist_wrapper( ds, test_accs, 0, 0) return corrs1, pvals1
h, run, t)) for p in participants: wt = np.load( os.path.join( data_dir, '{0}-leftout{1}/{2}/{3}/weights.npy'.format( t, run, p, h))) Pstim = get_stim_for_test_fold(run) Presp = mv.gifti_dataset( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(p, tr[run], run, h))).samples pred = np.dot(Pstim, wt) mv.zscore(pred, chunks_attr=None) forward_pred = mappers[p].forward(pred) mv.zscore(pred, chunks_attr=None) # Find prediction correlations nnpred = np.nan_to_num(forward_pred) corrs = np.nan_to_num( np.array([ np.corrcoef(Presp[:, ii], nnpred[:, ii].ravel())[0, 1] for ii in range(Presp.shape[1]) ])) np.save( os.path.join( data_dir, '{0}-leftout{1}/{2}/{3}/forward_corrs.npy'.format( t, run, p, h)), corrs)
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
def buildadataset(zscore, rois, event_path=None): """buildataset() will build and save participant-specific hdf5 datasets with all rois from preprocessed objectcategories data, stack them for a group dataset and save them, and transpose the group dataset and save it. The parameter 'zscore' determines whether and what kind of z-scoring should be performed.""" print('I am building a dataset with the following option: {}.'.format( zscore)) # get the participants and rois participants = sorted( [path.split('/')[-1] for path in glob(base_dir + 'sub-*')]) localizer_dss = [] for participant in participants: localizer_fns = sorted(glob(base_dir + participant + locdir + \ '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format( participant))) mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz' assert len(localizer_fns) == 4 localizer_ds = mv.vstack([ mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run) for run, localizer_fn in enumerate(localizer_fns) ]) localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1] print('loaded localizer data for participant {}.'.format(participant)) # zscore the data with means and standard deviations from no-stimulation # periods if zscore == 'custom': events = get_group_events(event_path) means, stds = extract_baseline(events, localizer_ds) # zscore stuff mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks') print('finished custom zscoring for participant {}.'.format( participant)) elif zscore == 'z-score': mv.zscore(localizer_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') all_rois_mask = np.array([['brain'] * localizer_ds.shape[1] ]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(base_dir + participant + anat_dir + \ '{0}_*_mask.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'l{0}_*_mask.nii.gz'.format(roi))) right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'r{0}_*_mask.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, localizer_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches localizer data assert roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, localizer_ds.shape[1])) # Ensure that number of voxels in ROI mask matches localizer data assert lat_roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '/{0}_*_mask.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['all_ROIs'] = all_rois_flat if save_per_subject: mv.h5save(base_dir + participant + locdir + \ '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format( participant), localizer_ds) print('Saved dataset for {}.'.format(participant)) # join all datasets localizer_dss.append(localizer_ds) # save full dataset mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5', localizer_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(localizer_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5', ds) print('Transposed the group-dataset and saved it.') return ds
target_list.append(targets) sample_list.append(samples) chunk_list.append(subject_list_chunks) band_list.append(band_) targets = np.hstack(target_list) samples = np.vstack(sample_list) chunks = np.hstack(chunk_list) zsamples = sc_zscore(samples, axis=0) ds = dataset_wizard(zsamples, targets=targets, chunks=chunks) ds.sa['band'] = np.hstack(band_list) zscore(ds) n_folds = [4] #n_feats = np.arange(10, 1220, 50) n_feats = [10] err_lst = [] sens_mat = [] for k in n_folds: for n in n_feats: #fsel = SensitivityBasedFeatureSelection(OneWayAnova(), # FixedNElementTailSelector( # n, mode = 'select',tail = 'upper')) ''' rfesvm_split = SplitClassifier(LinearCSVMC())
ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples) * i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_' + param for param in mc], chunks_attr='chunks') #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5 / TR ws = [(1 / lf) / nf, (1 / hf) / nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset onsets = np.arange(0, ds.nsamples - samples_size / TR, samples_size / TR) events = [] for on in onsets: Ev = dict() Ev['onset'] = on