def bilateralize(ds): """combine lateralized ROIs in a dataset.""" ds_ROIs = ds.copy('deep') ds_ROIs.sa['bilat_ROIs'] = [label.split(' ')[-1] for label in ds_ROIs.sa.all_ROIs] mv.h5save(results_dir + 'ds_ROIs.hdf5', ds_ROIs) print('Combined lateralized ROIs for the provided dataset and saved the dataset.') return ds_ROIs
def mk_movie_dataset(gd, subj, mask, task=1, flavor='', filter=None, writeto=None, add_fa=None): cur_max_time = 0 segments = [] for seg in range(1,9): print 'Seg', seg ds = fmri_dataset( gd.get_run_fmri(subj, task, seg, flavor=flavor), mask=mask, add_fa=add_fa) if task == 1: # sanitize TR ds.sa.time_coords = np.arange(len(ds)) * 2.0 mc = gd.get_run_motion_estimates(subj, task, seg) for i, par in enumerate(('mc_xtrans', 'mc_ytrans', 'mc_ztrans', 'mc_xrot', 'mc_yrot', 'mc_zrot')): ds.sa[par] = mc.T[i] ds.sa['movie_segment'] = [seg] * len(ds) TR = np.diff(ds.sa.time_coords).mean() if not filter is None: print 'filter' ds = filter(ds) # truncate segment time series to remove overlap if seg > 1: ds = ds[4:] if seg < 8: ds = ds[:-4] ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time cur_max_time = ds.sa.movie_time[-1] + TR if writeto is None: segments.append(ds) else: ds.samples = ds.samples.astype('float32') h5save(writeto % (subj, task, seg), ds, compression=9) return segments
def dotheglm(sensitivities, eventdir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them in, average the durations because there are tiny differences between subjects, and then it will put all of that into a glm. """ sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # average onsets into one event file events = get_group_events(eventdir) # save the event_file fmt = "%10.3f\t%10.3f\t%16s\t%60s" np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='', header='onset\tduration\ttrial_type\tstim_file', fmt=fmt) # get events into dictionary events_dicts = [] for i in range(0, len(events)): dic = { 'onset': events[i][0], 'duration': events[i][1], 'condition': events[i][2] } events_dicts.append(dic) hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates) print('calculated glm, saving results.') return hrf_estimates
def load_create_save_ds(ds_save_p, dataset_list, ref_space, warp_files, mask, **kwargs): detrending = kwargs.get('detrending', True) use_zscore = kwargs.get('use_zscore', True) use_events = kwargs.get('use_events', False) anno_dir = kwargs.get('anno_dir', None) use_glm_estimates = kwargs.get('use_glm_estimates', False) targets = kwargs.get('targets', None) event_offset = kwargs.get('event_offset', None) event_dur = kwargs.get('event_dur', None) save_disc_space = kwargs.get('save_disc_space', True) rois = kwargs.get('rois', None) if ds_save_p.exists(): ds = mvpa.h5load(str(ds_save_p)) else: ds = preprocess_datasets(dataset_list, ref_space, warp_files, mask, detrending=detrending, use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir, use_glm_estimates=use_glm_estimates, targets=targets, event_offset=event_offset, event_dur=event_dur, rois=rois, save_disc_space=save_disc_space) mvpa.h5save(str(ds_save_p), ds) # , compression=9 return ds
def dotheclassification(ds, bilateral, store_sens=True): """ Dotheclassification does the classification. It builds a linear gaussian naive bayes classifier, performs a leave-one-out crossvalidation and stores the sensitivities from the SGD classifier of each fold in a combined dataset for further use in a glm. If sens == False, the sensitivities are not stored, and only a classification is performed""" import matplotlib.pyplot as plt # set up the dataset: If I understand the sourcecode correctly, the # MulticlassClassifier wants to have unique labels in a sample attribute # called 'targets' and is quite stubborn with this name - I could not convince # it to look for targets somewhere else, so now I catering to his demands if bilateral: ds.sa['targets'] = ds.sa.bilat_ROIs else: ds.sa['targets'] = ds.sa.all_ROIs # necessary I believe regardless of the SKLLearnerAdapter from sklearn.linear_model import SGDClassifier # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter. # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier clf = mv.MulticlassClassifier( mv.SKLLearnerAdapter( SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced'))) # prepare for callback of sensitivity extraction within CrossValidation sensitivities = [] if store_sens: def store_sens(data, node, result): sens = node.measure.get_sensitivity_analyzer( force_train=False)(data) # we also need to manually append the time attributes to the sens ds sens.fa['time_coords'] = data.fa['time_coords'] sens.fa['chunks'] = data.fa['chunks'] sensitivities.append(sens) # do a crossvalidation classification cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats'], callback=store_sens) else: cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats']) results = cv(ds) # save classification results with open(results_dir + 'avmovie_clf.txt', 'a') as f: f.write(cv.ca.stats.as_string(description=True)) # printing of the confusion matrix if bilateral: desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA'] else: desired_order = [ 'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA', 'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA', 'right PPA' ] labels = get_known_labels(desired_order, cv.ca.stats.labels) # plot the confusion matrix with pymvpas build-in plot function currently fails # cv.ca.stats.plot(labels=labels, # numbers=True, # cmap='gist_heat_r') # plt.savefig(results_dir + 'confusion_matrix.png') # if niceplot: # ACC = cv.ca.stats.stats['mean(ACC)'] # plot_confusion(cv, # labels, # fn=results_dir + 'confusion_matrix_avmovie.svg', # figsize=(9, 9), # vmax=100, # cmap='Blues', # ACC='%.2f' % ACC) # mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results) print('Saved the crossvalidation results.') if store_sens: mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities) print('Saved the sensitivities.') # results now has the overall accuracy. results.samples gives the # accuracy per participant. # sensitivities contains a dataset for each participant with the # sensitivities as samples and class-pairings as attributes return sensitivities, cv
def dotheclassification(ds, bilateral, store_sens=True): """ Dotheclassification does the classification. It builds a linear gaussian naive bayes classifier, performs a leave-one-out crossvalidation and stores the sensitivities from the GNB classifier of each fold in a combined dataset for further use in a glm. If sens == False, the sensitivities are not stored, and only a classification is performed""" import matplotlib.pyplot as plt # set up classifier prior = 'ratio' if bilateral: targets = 'bilat_ROIs' else: targets = 'all_ROIs' gnb = mv.GNB(common_variance=True, prior=prior, space=targets) # prepare for callback of sensitivity extraction within CrossValidation sensitivities = [] if store_sens: def store_sens(data, node, result): sens = node.measure.get_sensitivity_analyzer( force_train=False)(data) # we also need to manually append the time attributes to the sens ds sens.fa['time_coords'] = data.fa['time_coords'] sens.fa['chunks'] = data.fa['chunks'] sensitivities.append(sens) # do a crossvalidation classification cv = mv.CrossValidation(gnb, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats'], callback=store_sens) else: cv = mv.CrossValidation(gnb, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats']) results = cv(ds) # save classification results with open(results_dir + 'avmovie_clf.txt', 'a') as f: f.write(cv.ca.stats.as_string(description=True)) # printing of the confusion matrix if bilateral: desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA'] else: desired_order = [ 'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA', 'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA', 'right PPA' ] labels = get_known_labels(desired_order, cv.ca.stats.labels) # plot the confusion matrix with pymvpas build-in plot function currently fails # cv.ca.stats.plot(labels=labels, # numbers=True, # cmap='gist_heat_r') # plt.savefig(results_dir + 'confusion_matrix.png') if niceplot: ACC = cv.ca.stats.stats['mean(ACC)'] plot_confusion(cv, labels, fn=results_dir + 'confusion_matrix_avmovie.svg', figsize=(9, 9), vmax=100, cmap='Blues', ACC='%.2f' % ACC) mv.h5save(results_dir + 'gnb_cv_classification_results.hdf5', results) print('Saved the crossvalidation results.') if store_sens: mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities) print('Saved the sensitivities.') # results now has the overall accuracy. results.samples gives the # accuracy per participant. # sensitivities contains a dataset for each participant with the # sensitivities as samples and class-pairings as attributes return sensitivities, cv
axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to movie data feature attributes movie_ds.fa['all_ROIs'] = all_rois_flat movie_dss.append(movie_ds) if save_per_subject: mv.h5save( base_dir + participant + data_dir + '{0}_avmovie_detrend{1}_lowpass_ROIs_tmpl_bold.hdf5'.format( participant, polyord), movie_ds) print("Finished participant {0}, saved the data".format(participant)) mv.h5save( results_dir + 'allsub_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format(polyord), movie_dss) print('Saved the group dataset in {}.'.format(results_dir)) # Horizontally stack all data sets ds_wide = mv.hstack(movie_dss) # Transpose brain so voxels are now samples ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy())
#bandpass filter nf = 0.5/TR ws = [(1/lf)/nf, (1/hf)/nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset onsets = np.arange(0,ds.nsamples - samples_size/TR, samples_size/TR) events = [] for on in onsets: Ev = dict() Ev['onset'] = on Ev['duration'] = samples_size / TR Ev['target'] = on*TR Ev['subj'] = subj events.append(Ev) evds = mvpa.eventrelated_dataset(ds, events=events) evds.fa['1stidx'] = evds.fa.event_offsetidx==0 #Save pymvpa-dataset as hdf5 in dataset directory try: os.mkdir(os.path.join(path,'dataset')) except: print 'results directory already exists' dsfile = subj+'_z'+str(zsc)+'_'+str(samples_size)+'_'+align mvpa.h5save(os.path.join(path,'dataset',dsfile+'.hdf5'), evds, compression='gzip')
n_samples = ds.samples.shape[0] # Exclude medial wall print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples)) medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist() print(len(medial_wall)) cortical_vertices = np.where(np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist() assert len(medial_wall) == n_medial[hemi] assert len(medial_wall) + len(cortical_vertices) == n_vertices # Estimate searchlight hyperalignment transformation on movie data sl_hyper = mv.SearchlightHyperalignment(queryengine=qe, nproc=n_proc, nblocks=n_proc*8, featsel=1.0, mask_node_ids=cortical_vertices, tmp_prefix='/fastscratch/cara/tmpsl') print("Estimated transformation!") mv.debug.active += ['HPAL', 'SLC'] mappers = sl_hyper(dss) print("Finished creating hyperalignment mappers!") # Organize and save fitted hyperalignment mappers assert len(participants) == len(mappers) mappers = {participant: mapper for participant, mapper in zip(participants, mappers)} print("Reorganized hyperalignment mappers") mv.h5save(join(mvpa_dir, 'search_hyper_mappers_life_mask_nofsel_{0}_leftout_{1}_reverse.hdf5'.format(hemi, left_out)), mappers) print("Successfully saved hyperalignment mappers for left out run {0}".format(left_out))
else: clf = args.clf(args.target_attr) tm = TransferMeasure(clf, splitter) res = tm(partitions) # make a record of the tuned hyper parameter for comprehensive # reporting if args.tune_hyperparam: res.a['tuned_hyperparam'] = tuned_par results.append(res) # feed predictions into the confusion tracker as a new set confusion.add(res.sa[args.target_attr].value, res.samples[:, 0]) # one result dataset results = vstack(results, a='all') # report analysis params for the afterlife results.a['confusion'] = confusion results.a['mask'] = args.mask results.a['fwhm'] = args.fwhm results.a['dog_bandwidth'] = args.dog_bandwidth results.a['filter_type'] = args.filter_type for k, v in mkds_args.items(): results.a['mkds_{}'.format(k)] = v # brag about it print(results) print(confusion) # safe to disk h5save(opj(args.output_dir, '_'.join(args.result_labels) + '.hdf5'), results)
# the one with the lean one cv_rsa = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'), mv.HalfPartitioner(attr='sessions'), errorfx=None, postproc=lean_errorfx) sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'], nproc=1, results_backend='native') #sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'], # nproc=1, results_backend='native', roi_ids=cortical_vertices) #tmp_prefix='/local/tmp/sam_sl_p{0}_{1}_'.format(participant_id, hemi) mv.debug.active += ['SLC'] sl_result = sl(ds) assert len(sl_result.sa) == 0 # we didn't pass any sl_result.sa = target_sa print '>>>', np.mean(sl.ca.roi_sizes), np.std(sl.ca.roi_sizes) sl_means = np.mean(np.dstack((sl_result.samples[:n_conditions**2, :], sl_result.samples[n_conditions**2:, :])), axis=2) sl_final = mv.Dataset( sl_means, sa={'conditions': sl_result.sa.conditions[:sl_means.shape[0], :].tolist(), 'participants': [int(participant[-2:])] * sl_means.shape[0]}, fa=sl_result.fa, a=sl_result.a) #assert sl_result.shape[0] == n_conditions**2 print(sl_final) mv.h5save('/idata/DBIC/cara/life/search_RDMs_sq_zscore_HA_{0}_{1}.hdf5'.format(participant, hemi), sl_final) #mv.niml.write(join(mvpa_dir, 'search_RDMs_sq_p{0}_{1}_TEST.niml.dset'.format( # participant_id, hemi)), sl_result)
'{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(participant, tr[run], run, hemi))) mv.zscore(ds, chunks_attr=None) if hyperalign: ds = mappers[participant].forward(ds) mv.zscore(ds, chunks_attr=None) ds.fa['node_indices'] = range(ds.shape[1]) # n_samples = ds.samples.shape[0] # # # Exclude medial wall # print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples)) n_samples = ds.samples.shape[0] medial_wall = np.where( np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist() print(len(medial_wall)) cortical_vertices = np.where( np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist() assert len(medial_wall) == n_medial[hemi] assert len(medial_wall) + len(cortical_vertices) == n_vertices sl_result = sl(ds) print(ds.samples.shape, sl_result.samples.shape) list_of_RDMs.append(sl_result) final = mv.vstack(list_of_RDMs) print(final.shape) mv.h5save( '/idata/DBIC/cara/search_hyper_mappers_life_mask_nofsel_{0}_{1}_leftout_{1}_{2}.hdf5' .format(participant, hemi, left_out, sys.argv[1]), final)
def dotheclassification(ds, bilateral): """This functions performs the classification in a one-vs-all fashion with a stochastic gradient descent. Future TODO: Selection of alpha may be better performed via GridSearchCV. To quote sklearns documentation: 'Finding a reasonable regularization term is best done using GridSearchCV, usually in the range 10.0**-np.arange(1,7).'""" # set up the dataset: If I understand the sourcecode correctly, the # SGDclassifier wants to have unique labels in a sample attribute # called 'targets' and is quite stubborn with this name - I could not convince # it to look for targets somewhere else, so now I'm catering to his demands if bilateral: ds.sa['targets'] = ds.sa.bilat_ROIs else: ds.sa['targets'] = ds.sa.all_ROIs clf = mv.SKLLearnerAdapter( SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced')) cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats']) results = cv(ds) # save classification results with open(results_dir + 'SGD_clf.txt', 'a') as f: f.write(cv.ca.stats.as_string(description=True)) if bilateral: desired_order = ['brain', 'VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA'] else: desired_order = [ 'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA', 'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA', 'right PPA' ] labels = get_known_labels(desired_order, cv.ca.stats.labels) # print confusion matrix with pymvpas build in function cv.ca.stats.plot(labels=labels, numbers=True, cmap='gist_heat_r') plt.savefig(results_dir + 'confusion_matrix.png') # print confusion matrix with matplotlib if niceplot: ACC = cv.ca.stats.stats['mean(ACC)'] plot_confusion(cv, labels, fn=results_dir + 'confusion_matrix_SGD.svg', figsize=(9, 9), vmax=100, cmap='Blues', ACC='%.2f' % ACC) mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results) print('Saved the crossvalidation results.') return cv
def buildadataset(zscore, rois, event_path=None): """buildataset() will build and save participant-specific hdf5 datasets with all rois from preprocessed objectcategories data, stack them for a group dataset and save them, and transpose the group dataset and save it. The parameter 'zscore' determines whether and what kind of z-scoring should be performed.""" print('I am building a dataset with the following option: {}.'.format( zscore)) # get the participants and rois participants = sorted( [path.split('/')[-1] for path in glob(base_dir + 'sub-*')]) localizer_dss = [] for participant in participants: localizer_fns = sorted(glob(base_dir + participant + locdir + \ '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format( participant))) mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz' assert len(localizer_fns) == 4 localizer_ds = mv.vstack([ mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run) for run, localizer_fn in enumerate(localizer_fns) ]) localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1] print('loaded localizer data for participant {}.'.format(participant)) # zscore the data with means and standard deviations from no-stimulation # periods if zscore == 'custom': events = get_group_events(event_path) means, stds = extract_baseline(events, localizer_ds) # zscore stuff mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks') print('finished custom zscoring for participant {}.'.format( participant)) elif zscore == 'z-score': mv.zscore(localizer_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') all_rois_mask = np.array([['brain'] * localizer_ds.shape[1] ]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(base_dir + participant + anat_dir + \ '{0}_*_mask.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'l{0}_*_mask.nii.gz'.format(roi))) right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'r{0}_*_mask.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, localizer_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches localizer data assert roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, localizer_ds.shape[1])) # Ensure that number of voxels in ROI mask matches localizer data assert lat_roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '/{0}_*_mask.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['all_ROIs'] = all_rois_flat if save_per_subject: mv.h5save(base_dir + participant + locdir + \ '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format( participant), localizer_ds) print('Saved dataset for {}.'.format(participant)) # join all datasets localizer_dss.append(localizer_ds) # save full dataset mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5', localizer_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(localizer_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5', ds) print('Transposed the group-dataset and saved it.') return ds
ds.sa.pop('intents') ds.sa['subjects'] = [participant] * ds.shape[0] ds.fa['node_indices'] = range(n_vertices) # z-score features across samples mv.zscore(ds, chunks_attr=None) return ds t = [] for hemi in hemispheres: mappers = mv.h5load( os.path.join( mvpa_dir, 'search_hyper_mappers_life_mask_nofsel_{0}.hdf5'.format(hemi))) print('\nLoading fMRI GIFTI data...') l = [] for participant in participants: p = [] for run in range(1, 5): p.append(mappers[participant].forward( load_data( os.path.join( sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(participant, tr[run], run, hemi))))) l.append(p) t.append(l) mv.h5save("hyperaligned.hdf5", t)
def dotheglm(sensitivities, eventdir, annot_dir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them into an apprpriate. data structure. It will compute one glm per run. """ # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # get a list of the event files with occurances of faces event_files = sorted(glob(eventdir + '/*')) assert len(event_files) == 8 # get additional events from the location annotation location_annotation = pd.read_csv(annot_dir, sep='\t') # get all settings with more than one occurrence setting = [ set for set in location_annotation.setting.unique() if (location_annotation.setting[location_annotation.setting == set].value_counts()[0] > 1) ] # get onsets and durations onset = [] duration = [] condition = [] for set in setting: for i in range(location_annotation.setting[ location_annotation['setting'] == set].value_counts()[0]): onset.append(location_annotation[location_annotation['setting'] == set]['onset'].values[i]) duration.append(location_annotation[location_annotation['setting'] == set]['duration'].values[i]) condition.append([set] * (i + 1)) # flatten conditions condition = [y for x in condition for y in x] assert len(condition) == len(onset) == len(duration) # concatenate the strings condition_str = [set.replace(' ', '_') for set in condition] condition_str = ['location_' + set for set in condition_str] # put it in a dataframe locations = pd.DataFrame({ 'onset': onset, 'duration': duration, 'condition': condition_str }) # sort according to onsets to be paranoid locations_sorted = locations.sort_values(by='onset') # this is a dataframe encoding flow of time time_forward = pd.DataFrame( [{ 'condition': 'time+', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['+', '++']]) time_back = pd.DataFrame( [{ 'condition': 'time-', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['-', '--']]) # sort according to onsets to be paranoid time_forward_sorted = time_forward.sort_values(by='onset') time_back_sorted = time_back.sort_values(by='onset') scene_change = pd.DataFrame([{ 'condition': 'scene-change', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1)]) scene_change_sorted = scene_change.sort_values(by='onset') # this is a dataframe encoding exterior exterior = pd.DataFrame([{ 'condition': 'exterior', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['int_or_ext'][i] == 'ext') ]) # sort according to onsets to be paranoid exterior_sorted = exterior.sort_values(by='onset') # this is a dataframe encoding nighttime night = pd.DataFrame([{ 'condition': 'night', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['time_of_day'][i] == 'night') ]) # sort according to onsets to be paranoid night_sorted = night.sort_values(by='onset') assert np.all( locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values ) assert np.all( time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values ) assert np.all(time_forward_sorted.onset[1:].values >= time_forward_sorted.onset[:-1].values) assert np.all( exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values) assert np.all( night_sorted.onset[1:].values >= night_sorted.onset[:-1].values) assert np.all(scene_change_sorted.onset[1:].values >= scene_change_sorted.onset[:-1].values) # check whether chunks are increasing as well as sanity check chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 # make time coordinates real seconds mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # get runs, and runlengths in seconds runs = sorted(mean_sens_transposed.UC) assert runs == range(len(runs)) runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] assert len(runs) == 8 # initialize the list of dicts that gets later passed to the glm events_dicts = [] # This is relevant to later stack all dataframes together # and paranoidly make sure that they have the same columns cols = ['onset', 'duration', 'condition'] for run in runs: # get face data eventfile = sorted(event_files)[run] events = pd.read_csv(eventfile, sep='\t') for index, row in events.iterrows(): # disregard no faces, put everything else into event structure if row['condition'] != 'no_face': dic = { 'onset': row['onset'] + runonsets[run], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # concatenate all event dataframes run_reg = pd.DataFrame([{ 'onset': runonsets[i], 'duration': abs(runonsets[i] - runonsets[i + 1]), 'condition': 'run-' + str(i + 1) } for i in range(7)]) # get all of these wonderful dataframes into a list and squish them dfs = [ locations_sorted[cols], scene_change_sorted[cols], time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols], night_sorted[cols], run_reg[cols] ] allevents = pd.concat(dfs) # save all non-face related events in an event file, just for the sake of it allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv', sep='\t', index=False) # append non-faceevents to event structure for glm for index, row in allevents.iterrows(): dic = { 'onset': row['onset'], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # save this event dicts structure as a tsv file import csv with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile: fieldnames = ['onset', 'duration', 'condition'] writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(events_dicts) # save this event file also as json file... can there ever be enough different files... import json with open(results_dir + '/' + 'allevents.json', 'w') as f: json.dump(events_dicts, f) # do the glm - we've earned it hrf_estimates = mv.fit_event_hrf_model( mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5', hrf_estimates) print('calculated the, saving results.') return hrf_estimates
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois): """ Build an hdf5 dataset. """ # initialize a list to load all datasets into: data_dss = [] # get list of participants from root dir participants = sorted( [path.split('/')[-1] for path in glob(rootdir + 'sub-*')]) assert len(participants) != 0 print('The following participants were found: {}'.format(participants)) for participant in participants: # count the number of participant substitutions necessary data_fns = sorted(glob(rootdir + participant + datadir)) print(rootdir + participant + datadir) mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz' if analysis == 'localizer': assert len(data_fns) == 4 if analysis == 'avmovie': assert len(data_fns) == 8 data_ds = mv.vstack([ mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run) for run, data_fn in enumerate(data_fns) ]) data_ds.fa['participant'] = [participant] * data_ds.shape[1] print('loaded data for participant {}.'.format(participant)) # z scoring if analysis == 'localizer' and zscore == 'baseline-zscore': events = get_group_events(eventdir) means, stds = extract_baseline(events, data_ds) mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks') print('finished baseline zscoring for participant {}.'.format( participant)) elif zscore == 'zscore': mv.zscore(data_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') # roi masks all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(rootdir + participant + anatdir + \ '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'l{0}*mask_tmpl.nii.gz'.format(roi))) right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'r{0}*mask_tmpl.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, data_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches dataset dimension assert roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, data_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, data_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, data_ds.shape[1])) # Ensure that number of voxels in ROI mask matches dataset dimension assert lat_roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(rootdir + participant + anatdir + '/{0}_*_mask_tmpl.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign roi mask to dataset feature attributes data_ds.fa['all_ROIs'] = all_rois_flat # join all datasets data_dss.append(data_ds) # save full dataset mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(data_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds) print('Transposed the group-dataset and saved it.') return ds
def dotheclassification(ds_movie, ds_loc, classifier, bilateral): """ Dotheclassification does the classification. Input: the dataset on which to perform a leave-one-out crossvalidation with a classifier of choice. Specify: the classifier to be used (gnb (linear gnb), l-sgd (linear sgd), sgd) whether the sensitivities should be computed and stored for later use whether the dataset has ROIs combined across hemisphere (bilateral) """ dfs = [] for idx, ds in enumerate([ds_movie, ds_loc]): if bilateral: ds.sa['targets'] = ds.sa.bilat_ROIs else: ds.sa['targets'] = ds.sa.all_ROIs if classifier == 'gnb': # set up classifier prior = 'ratio' clf = mv.GNB(common_variance=True, prior=prior) elif classifier == 'sgd': # necessary I believe regardless of the SKLLearnerAdapter from sklearn.linear_model import SGDClassifier clf = mv.SKLLearnerAdapter(SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced')) elif classifier == 'l-sgd': # necessary I believe regardless of the SKLLearnerAdapter from sklearn.linear_model import SGDClassifier # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter. # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier clf = mv.MulticlassClassifier(mv.SKLLearnerAdapter(SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced' ))) # prepare for callback of sensitivity extraction within CrossValidation classifications = [] def store_class(data, node, result): # import pdb; pdb.set_trace() class_ds = mv.Dataset(samples=data.sa.voxel_indices) class_ds.sa['targets'] = data.sa.targets class_ds.sa['partitions'] = data.sa.partitions class_ds.sa['predictions'] = clf.predict(data) class_ds.sa['participant'] = data.sa.participant classifications.append(class_ds) # do a crossvalidation classification and store the classification results cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'), errorfx=mv.mean_match_accuracy, enable_ca=['stats'], callback=store_class) # import pdb; pdb.set_trace() results = cv(ds) # import pdb; pdb.set_trace() # save classification results as a Dataset ds_type = ['movie', 'loc'] mv.h5save(results_dir + 'cv_classification_results_{}.hdf5'.format(ds_type[idx]), classifications) print('Saved the classification results obtained during crossvalidation.') # get the classification list into a pandas dataframe for i, classification in enumerate(classifications): df = pd.DataFrame(data={'voxel_indices': list(classification.samples), 'targets': list(classification.sa.targets), 'predictions': list(classification.sa.predictions), 'partitions': list(classification.sa.partitions), 'participants': list(classification.sa.participant), 'ds_type': [ds_type[idx]] * len(classification.sa.predictions) } ) dfs.append(df) # two helper functions for later use in a lamda function def hits(row): if row['predictions'] == row['targets']: return 1 else: return 0 def parts(row): if row['partitions'] == 1: return "train" elif row['partitions'] == 2: return "test" # get all folds into one dataframe, disregard the index all_classifications = pd.concat(dfs, ignore_index=True) # compute hits as correspondence between target and prediction all_classifications['hits'] = all_classifications.apply(lambda row: hits(row), axis=1) # assign string labels to testing and training partitions (instead of 1, 2) all_classifications['parts'] = all_classifications.apply(lambda row: parts(row), axis=1) # transform voxel coordinates from arrays (unhashable) into tuples all_classifications['voxel_indices'] = all_classifications['voxel_indices'].apply(tuple) # subset the dataset to contain only the testing data all_testing = all_classifications[all_classifications.parts == "test"] # check that every participant is in the data assert len(all_testing.participants.unique()) == 15 # to check for correspondence between the sum of the two experiments confusion matrices, # do sth like this: len(all_testing[(all_testing['predictions'] == 'PPA') & (all_testing['targets'] == 'VIS')]) # this counts hits per fold across experiments (2 if both experiments classified correctly, # 1 if 1 experiment classified correctly, 0 is none did). Also, append the targets per voxel. # we use 'min' here because aggregate needs any function, but targets are the same between # the experiments compare_exp = all_testing.groupby(['voxel_indices', 'participants']).agg( {'hits': 'sum', 'targets': 'min'}).reset_index().sort_values(['voxel_indices', 'participants']) all_testing_movie = all_testing[all_testing.ds_type == 'movie'].sort_values( ['voxel_indices', 'participants']).reset_index() all_testing_loc = all_testing[all_testing.ds_type == 'loc'].sort_values( ['voxel_indices', 'participants']).reset_index() # append movie and loc predictions to the dataframe compare_exp['pred_movie'] = all_testing_movie.predictions compare_exp['pred_loc'] = all_testing_loc.predictions # get the ROIS from the classification ROIS = np.unique(ds_movie.sa.targets) # there can't be values greater than two or lower than zero assert compare_exp.hits.max() <= 2 assert compare_exp.hits.min() >= 0 return compare_exp, all_testing, ROIS
def project_betas(ds, analysis, eventdir, results_dir, annot_dir=None, ): """ Currently unused, but can become relevant later on. Will keep it in utils.py. Project beta values from 2nd analysis approach into the brain. Current problem: For first analysis type overlaps are excluded (for classification purposes), so we need to do the glm on data with overlaps. Thats why its a separate function and not integrated into the reversed analysis. :return: nifti images... many nifti images in a dictionary # project beta estimates back into a brain. I'll save-guard this function for now, because there is still # the unsolved overlap issue... project_beta = False if project_beta: print('going on to project resulting betas back into brain...') subs = np.unique(hrf_estimates_transposed.sa.participant) regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(ds_type, sub, data.samples.T[idx], # we select one beta vector per regressor ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) """ ds_transposed = ds.get_mapped(mv.TransposeMapper()) assert ds_transposed.shape[0] < ds_transposed.shape[1] # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset chunks, runs, runonsets = False, False, False if analysis == 'avmovie': ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed) events_dicts = get_events(analysis=analysis, eventdir=eventdir, results_dir=results_dir, chunks=chunks, runs=runs, runonsets=runonsets, annot_dir=annot_dir, multimatch=False) # step 1: do the glm on the data hrf_estimates = mv.fit_event_hrf_model(ds_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) # lets save these mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates) print('calculated the glm, saving results') # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper()) assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1] subs = np.unique(hrf_estimates_transposed.sa.participant) print('going on to project resulting betas back into brain...') regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(sub, data.samples.T[idx], # we select one beta vector per regressor ds_type='full', # currently we can only do this for the full ds. ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) return result_maps
ws = [(1 / lf) / nf, (1 / hf) / nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset onsets = np.arange(0, ds.nsamples - samples_size / TR, samples_size / TR) events = [] for on in onsets: Ev = dict() Ev['onset'] = on Ev['duration'] = samples_size / TR Ev['target'] = on * TR Ev['subj'] = subj events.append(Ev) evds = mvpa.eventrelated_dataset(ds, events=events) evds.fa['1stidx'] = evds.fa.event_offsetidx == 0 #Save pymvpa-dataset as hdf5 in dataset directory try: os.mkdir(os.path.join(path, 'dataset')) except: print 'results directory already exists' dsfile = subj + '_z' + str(zsc) + '_' + str(samples_size) + '_' + align mvpa.h5save(os.path.join(path, 'dataset', dsfile + '.hdf5'), evds, compression='gzip')