def norm_and_mean(norm, bilateral, classifier, sensitivities): """This function normalizes a list of sensitivities to their L2 norm if norm = True, else just stacks them according to the classifier they were build with. Resulting stack of sensitivities is averaged with the mean_group_sample() function.""" if norm: from sklearn.preprocessing import normalize import copy # default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize(sensitivities_to_normalize[i].samples, axis=1) * np.sqrt(sensitivities[i].shape[1]) print(sensitivities[i].shape) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) print('I normalized the data.') else: sensitivities_stacked = mv.vstack(sensitivities) sgds = ['sgd', 'l-sgd'] if bilateral: if classifier in sgds: # Note: All SGD based classifier wanted an explicit # 'target' sample attribute, therefore, this is still present # in the sensitivities. # note to self: we were wondering whether we assign correct estimates to label # I double checked now (May 19) that estimates here are assigned the correct estimate. # references: ulabels are assigned with the help of np.unique, which returns a sorted # array. Given https://github.com/PyMVPA/PyMVPA/pull/607/files#diff-bbf744fd29d7f3e4abdf7a1586a5aa95, # the sensitivity calculation uses this order further lexicographically. sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.targets) else: # ...whereas in GNB, the results are in 'bilat_ROIs' sample attribute sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: if classifier in sgds: # Note: All SGD based classifier wanted an explicit # 'target' sample attribute, therefore, this is still present # in the sensitivities. sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.targets) else: # ...whereas in GNB, the results are in 'all_ROIs' sample attribute sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) # return the averaged sensitivities return mean_sens
def dotheglm(sensitivities, eventdir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them in, average the durations because there are tiny differences between subjects, and then it will put all of that into a glm. """ sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # average onsets into one event file events = get_group_events(eventdir) # save the event_file fmt = "%10.3f\t%10.3f\t%16s\t%60s" np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='', header='onset\tduration\ttrial_type\tstim_file', fmt=fmt) # get events into dictionary events_dicts = [] for i in range(0, len(events)): dic = { 'onset': events[i][0], 'duration': events[i][1], 'condition': events[i][2] } events_dicts.append(dic) hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates) print('calculated glm, saving results.') return hrf_estimates
print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done',datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now() M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done',datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5); pickle.dump(dataset, pickleFile); # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...',datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets'])) print '... only',dataset.shape[0],'cases left now' dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets) # print dataset.chunks # print len(dataset.chunks) # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (JAPANESE VS ENGLISH) dataset.targets = [t[0:2] for t in dataset.targets] dataset = dataset[N.array([l in ['jj', 'je', 'ej', 'ee'] for l in dataset.sa.targets], dtype='bool')] print '... and only',dataset.shape[0],'cases of interest (Language Switch between Japanese vs English)' dataset=M.datasets.miscfx.remove_invariant_features(dataset) print 'saving as compressed file',trimmedCache
print 'zscore normalising (give all voxels similar variance) ...', datetime.datetime.now( ) M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done', datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5) pickle.dump(dataset, pickleFile) # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...', datetime.datetime.now() dataset = dataset.get_mapped( M.mean_group_sample(attrs=['chunks', 'targets'])) print '... only', dataset.shape[0], 'cases left now' dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets) # print dataset.chunks # print len(dataset.chunks) # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (KEEP VS SWITCH) dataset.targets = [t[0] for t in dataset.targets] dataset = dataset[N.array([l in ['k', 's'] for l in dataset.sa.targets], dtype='bool')] print '... and only', dataset.shape[ 0], 'cases of interest (Keep vs Switch Language)'
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array( [value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max( [fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds) #, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events( #event= ds.sa.event_num, chunks=ds.sa.chunks, targets=ds.sa.targets) return ds
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
def makeaplot(events, sensitivities, hrf_estimates, roi_pair, fn=None, include_all_regressors=False): """ This produces a time series plot for the roi class comparison specified in roi_pair such as roi_pair = ['left FFA', 'left PPA']. If include_all_regressors = True, the function will create a potentially overloaded legend with all of the regressors, regardless of they occurred in the run. (Plotting then takes longer, but is a useful option if all regressors are of relevance and can be twitched in inkscape). If the figure should be saved, spcify an existing path in the parameter fn. # TODO's for the future: runs=None, overlap=False, grouping (should be a way to not rely # on hardcoded stimuli and colors within function anymore, with Ordered Dicts): """ import matplotlib.pyplot as plt # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) # get the mean, because we don't want to have 15 folds of sensitivities, but their average if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords runs = np.unique(mean_sens_transposed.sa.chunks) tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # those runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] # just append any large number to accomodate the fact that the last run also needs an # at some point. runonsets.append(99999) for j in range(len(hrf_estimates.fa.bilat_ROIs_str)): comparison = hrf_estimates.fa.targets[j][0] if (roi_pair[0] in comparison) and (roi_pair[1] in comparison): roi_pair_idx = j roi_betas_ds = hrf_estimates[:, roi_pair_idx] roi_sens_ds = mean_sens_transposed[:, roi_pair_idx] from collections import OrderedDict block_design_betas = OrderedDict( sorted(zip(roi_betas_ds.sa.condition, roi_betas_ds.samples[:, 0]), key=lambda x: x[1])) block_design = list(block_design_betas) for run in runs: fig, ax = plt.subplots(1, 1, figsize=[18, 10]) colors = [ '#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60', '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1' ] plt.suptitle( 'Timecourse of sensitivities, {} versus {}, run {}'.format( roi_pair[0], roi_pair[1], run + 1), fontsize='large') # 2 is a TR here... sorry, we are in rush run_onset = int(runonsets[run] // 2) run_offset = int(runonsets[run + 1] // 2) # for each run, adjust the x-axis plt.xlim([ min(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)] ), max(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)]) ]) plt.ylim([-2.7, 4.5]) plt.xlabel('Time in sec') plt.legend(loc=1) plt.grid(True) # for each stimulus, plot a color band on top of the plot for stimulus in block_design: # color = colors[0] print(stimulus) condition_event_mask = events['condition'] == stimulus onsets = events[condition_event_mask]['onset'].values onsets_run = [ time for time in onsets if np.logical_and(time > run_onset * 2, time < run_offset * 2) ] durations = events[condition_event_mask]['duration'].values durations_run = [ dur for idx, dur in enumerate(durations) if np.logical_and(onsets[idx] > run_onset * 2, onsets[idx] < run_offset * 2) ] # prepare for plotting r_height = 0.3 y = 4 if stimulus.startswith('run'): continue if stimulus.startswith('location'): # gradually decrease alpha level over occurances of location stims y -= r_height color = 'darkgreen' elif 'face' in stimulus: if stimulus == 'many_faces': color = 'tomato' else: color = 'firebrick' elif stimulus == 'exterior': color = 'cornflowerblue' y -= 2 * r_height elif stimulus.startswith('time'): color = 'darkslategrey' y -= 3 * r_height elif stimulus == 'night': color = 'slategray' y -= 4 * r_height elif stimulus == 'scene-change': color = 'black' y -= 5 * r_height # get the beta corresponding to the stimulus to later use in label beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus, 0] if include_all_regressors and onsets_run == []: # if there are no onsets for a particular regressor, but we want to print all # regressors, set i manually to 0 rectangle = plt.Rectangle( (0, 0), 0, 0, fc=color, alpha=0.5, label='_' * 0 + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) for i, x in enumerate(onsets_run): # We need the i to trick the labeling. It will attempt to plot every single occurance # of a stimulus with numbered labels. However, appending a '_' to the label makes # matplotlib disregard it. If we attach an '_' * i to the label, all but the first onset # get a '_' prefix and are ignored. r_width = durations_run[i] rectangle = plt.Rectangle( (x, y), r_width, r_height, fc=color, alpha=0.5, label='_' * i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) plt.legend(loc=1) # plt.axis('scaled') # del colors[0] times = roi_sens_ds.sa.time_coords[run_onset:run_offset] ax.plot(times, roi_sens_ds.samples[run_onset:run_offset], '-', color='black', lw=1.0) # plot glm model results glm_model = hrf_estimates.a.model.results_[0.0].predicted[ run_onset:int(run_offset), roi_pair_idx] # ax2 = ax.twinx() ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0) model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx] plt.title('R squared: %.2f' % model_fit) if fn: plt.savefig(results_dir + 'timecourse_avmovie_glm_sens_{}_vs_{}_run-{}.svg'. format(roi_pair[0], roi_pair[1], run + 1))
def dotheglm(sensitivities, eventdir, annot_dir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them into an apprpriate. data structure. It will compute one glm per run. """ # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # get a list of the event files with occurances of faces event_files = sorted(glob(eventdir + '/*')) assert len(event_files) == 8 # get additional events from the location annotation location_annotation = pd.read_csv(annot_dir, sep='\t') # get all settings with more than one occurrence setting = [ set for set in location_annotation.setting.unique() if (location_annotation.setting[location_annotation.setting == set].value_counts()[0] > 1) ] # get onsets and durations onset = [] duration = [] condition = [] for set in setting: for i in range(location_annotation.setting[ location_annotation['setting'] == set].value_counts()[0]): onset.append(location_annotation[location_annotation['setting'] == set]['onset'].values[i]) duration.append(location_annotation[location_annotation['setting'] == set]['duration'].values[i]) condition.append([set] * (i + 1)) # flatten conditions condition = [y for x in condition for y in x] assert len(condition) == len(onset) == len(duration) # concatenate the strings condition_str = [set.replace(' ', '_') for set in condition] condition_str = ['location_' + set for set in condition_str] # put it in a dataframe locations = pd.DataFrame({ 'onset': onset, 'duration': duration, 'condition': condition_str }) # sort according to onsets to be paranoid locations_sorted = locations.sort_values(by='onset') # this is a dataframe encoding flow of time time_forward = pd.DataFrame( [{ 'condition': 'time+', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['+', '++']]) time_back = pd.DataFrame( [{ 'condition': 'time-', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['-', '--']]) # sort according to onsets to be paranoid time_forward_sorted = time_forward.sort_values(by='onset') time_back_sorted = time_back.sort_values(by='onset') scene_change = pd.DataFrame([{ 'condition': 'scene-change', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1)]) scene_change_sorted = scene_change.sort_values(by='onset') # this is a dataframe encoding exterior exterior = pd.DataFrame([{ 'condition': 'exterior', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['int_or_ext'][i] == 'ext') ]) # sort according to onsets to be paranoid exterior_sorted = exterior.sort_values(by='onset') # this is a dataframe encoding nighttime night = pd.DataFrame([{ 'condition': 'night', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['time_of_day'][i] == 'night') ]) # sort according to onsets to be paranoid night_sorted = night.sort_values(by='onset') assert np.all( locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values ) assert np.all( time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values ) assert np.all(time_forward_sorted.onset[1:].values >= time_forward_sorted.onset[:-1].values) assert np.all( exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values) assert np.all( night_sorted.onset[1:].values >= night_sorted.onset[:-1].values) assert np.all(scene_change_sorted.onset[1:].values >= scene_change_sorted.onset[:-1].values) # check whether chunks are increasing as well as sanity check chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 # make time coordinates real seconds mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # get runs, and runlengths in seconds runs = sorted(mean_sens_transposed.UC) assert runs == range(len(runs)) runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] assert len(runs) == 8 # initialize the list of dicts that gets later passed to the glm events_dicts = [] # This is relevant to later stack all dataframes together # and paranoidly make sure that they have the same columns cols = ['onset', 'duration', 'condition'] for run in runs: # get face data eventfile = sorted(event_files)[run] events = pd.read_csv(eventfile, sep='\t') for index, row in events.iterrows(): # disregard no faces, put everything else into event structure if row['condition'] != 'no_face': dic = { 'onset': row['onset'] + runonsets[run], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # concatenate all event dataframes run_reg = pd.DataFrame([{ 'onset': runonsets[i], 'duration': abs(runonsets[i] - runonsets[i + 1]), 'condition': 'run-' + str(i + 1) } for i in range(7)]) # get all of these wonderful dataframes into a list and squish them dfs = [ locations_sorted[cols], scene_change_sorted[cols], time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols], night_sorted[cols], run_reg[cols] ] allevents = pd.concat(dfs) # save all non-face related events in an event file, just for the sake of it allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv', sep='\t', index=False) # append non-faceevents to event structure for glm for index, row in allevents.iterrows(): dic = { 'onset': row['onset'], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # save this event dicts structure as a tsv file import csv with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile: fieldnames = ['onset', 'duration', 'condition'] writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(events_dicts) # save this event file also as json file... can there ever be enough different files... import json with open(results_dir + '/' + 'allevents.json', 'w') as f: json.dump(events_dicts, f) # do the glm - we've earned it hrf_estimates = mv.fit_event_hrf_model( mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5', hrf_estimates) print('calculated the, saving results.') return hrf_estimates
print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done',datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now() M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done',datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5); pickle.dump(dataset, pickleFile); # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...',datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets'])) print '... only',dataset.shape[0],'cases left now' dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets) # print dataset.chunks # print len(dataset.chunks) # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (JAPANESE VS ENGLISH) dataset.targets = [t[0] for t in dataset.targets] dataset = dataset[N.array([l in ['j', 'e'] for l in dataset.sa.targets], dtype='bool')] print '... and only',dataset.shape[0],'cases of interest (Langauge Presented)' dataset=M.datasets.miscfx.remove_invariant_features(dataset) print 'saving as compressed file',trimmedCache
def _train(self, ds): avg_mapper = mean_group_sample([self._attribute]) ds = ds.get_mapped(avg_mapper) return LinearCSVMC._train(self, ds)
def _train(self, ds): avg_mapper = mean_group_sample(['trial']) ds = ds.get_mapped(avg_mapper) return self._clf._train(ds)
print "detrending (remove slow drifts in signal, and jumps between runs) ...", datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr="chunks") # linear detrend print "... done", datetime.datetime.now() # ZSCORE print "zscore normalising (give all voxels similar variance) ...", datetime.datetime.now() M.zscore(dataset, chunks_attr="chunks", param_est=("targets", ["base"])) # zscoring, on basis of rest periods print "... done", datetime.datetime.now() # P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, "wb", 5) pickle.dump(dataset, pickleFile) # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print "averaging over trials ...", datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=["chunks", "targets"])) print "... only", dataset.shape[0], "cases left now" dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5) # print '\n\n\n' # print dataset.targets # print len(dataset.targets) # print dataset.chunks # print len(dataset.chunks) # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (KEEP VS SWITCH) dataset.targets = [t[0] for t in dataset.targets] dataset = dataset[N.array([l in ["k", "s"] for l in dataset.sa.targets], dtype="bool")] print "... and only", dataset.shape[0], "cases of interest (Keep vs Switch Language)" dataset = M.datasets.miscfx.remove_invariant_features(dataset) print "saving as compressed file", trimmedCache
def preprocess_dataset(ds, type_, **kwargs): """ Preprocess the dataset: detrending of single run and for chunks, the zscoring is also done by chunks and by run. Parameters ---------- ds : Dataset The dataset to be preprocessed type : string The experiment to be processed kwargs : dict mean_samples - boolean : if samples should be averaged label_included - list : list of labels to be included in the dataset label_dropped - string : label to be dropped (rest, fixation) Returns ------- Dataset the processed dataset """ mean = False normalization = 'feature' for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'label_included'): label_included = kwargs[arg].split(',') if (arg == 'label_dropped'): label_dropped = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) logger.info('Dataset preprocessing: Detrending...') if len(np.unique(ds.sa['file'])) != 1: poly_detrend(ds, polyord = 1, chunks_attr = 'file') poly_detrend(ds, polyord = 1, chunks_attr = 'chunks') if label_dropped != 'None': logger.info('Removing labels...') ds = ds[ds.sa.targets != label_dropped] if label_included != ['all']: ds = ds[np.array([l in label_included for l in ds.sa.targets], dtype='bool')] if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': #Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
def makeaplot(events, sensitivities, hrf_estimates, roi_pair, fn=True): """ This produces a time series plot for the roi class comparison specified in roi_pair such as roi_pair = ['left FFA', 'left PPA'] """ import matplotlib.pyplot as plt # take the mean and transpose the sensitivities sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # some parameters # get the conditions block_design = sorted(np.unique(events['trial_type'])) reorder = [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] block_design = [block_design[i] for i in reorder] # end indices to chunk timeseries into runs run_startidx = np.array([0, 157, 313, 469]) run_endidx = np.array([156, 312, 468, 624]) runs = np.unique(mean_sens_transposed.sa.chunks) for j in range(len(hrf_estimates.fa.bilat_ROIs_str)): comparison = hrf_estimates.fa.bilat_ROIs[j][0] if (roi_pair[0] in comparison) and (roi_pair[1] in comparison): roi_pair_idx = j roi_betas_ds = hrf_estimates[:, roi_pair_idx] roi_sens_ds = mean_sens_transposed[:, roi_pair_idx] for run in runs: fig, ax = plt.subplots(1, 1, figsize=[18, 10]) colors = ['#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60', '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1'] plt.suptitle('Timecourse of sensitivities, {} versus {}, run {}'.format(roi_pair[0], roi_pair[1], run + 1), fontsize='large') plt.xlim([0, max(mean_sens_transposed.sa.time_coords)]) plt.ylim([-5, 7]) plt.xlabel('Time in sec') plt.legend(loc=1) plt.grid(True) # for each stimulus, plot a color band on top of the plot for stimulus in block_design: onsets = events[events['trial_type'] == stimulus]['onset'].values durations = events[events['trial_type'] == stimulus]['duration'].values stimulation_end = np.sum([onsets, durations], axis=0) r_height = 1 color = colors[0] y = 6 # get the beta corresponding to the stimulus to later use in label beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus.replace(" ", ""), 0] for i in range(len(onsets)): r_width = durations[i] x = stimulation_end[i] rectangle = plt.Rectangle((x, y), r_width, r_height, fc=color, alpha=0.5, label='_'*i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) plt.legend(loc=1) del colors[0] times = roi_sens_ds.sa.time_coords[run_startidx[run]:run_endidx[run]] ax.plot(times, roi_sens_ds.samples[run_startidx[run]:run_endidx[run]], '-', color='black', lw=1.0) glm_model = hrf_estimates.a.model.results_[0.0].predicted[run_startidx[run]:run_endidx[run], roi_pair_idx] ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0) model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx] plt.title('R squared: %.2f' % model_fit) if fn: plt.savefig(results_dir + 'timecourse_localizer_glm_sens_{}_vs_{}_run-{}.svg'.format(roi_pair[0], roi_pair[1], run + 1))