def balance_dataset_timewise(ds, label, sort=True, **kwargs): ################ To be changed ###################### m_fixation = ds.targets == 'fixation' ev_fix = zip(ds.chunks[m_fixation], 4 * ((ds.sa.events_number[m_fixation] + 2) / 4 - 1) + 2) #################################################### ev_fix = np.array(ev_fix) ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1]) arg_sort = np.argsort(ds.sa.events_number) events = find_events(chunks=ds[arg_sort].sa.chunks, targets=ds[arg_sort].sa.targets) # min duration min_duration = np.min([e['duration'] for e in events]) mask = False for ev in np.unique(ds.sa.events_number): mask_event = ds.sa.events_number == ev mask_event[np.nonzero(mask_event)[0][min_duration - 1] + 1:] = False mask = mask + mask_event if sort == True: arg_sort = np.argsort(ds[mask].sa.events_number) ds = ds[mask][arg_sort] else: ds = ds[mask] ds.a.events = find_events(targets=ds.targets, chunks=ds.chunks) return ds
def balance_dataset_timewise(ds, label, sort=True, **kwargs): ################ To be changed ###################### m_fixation = ds.targets == 'fixation' ev_fix = zip(ds.chunks[m_fixation], 4*((ds.sa.events_number[m_fixation]+2)/4 - 1 )+2) #################################################### ev_fix=np.array(ev_fix) ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1]) arg_sort = np.argsort(ds.sa.events_number) events = find_events(chunks = ds[arg_sort].sa.chunks, targets = ds[arg_sort].sa.targets) # min duration min_duration = np.min( [e['duration'] for e in events]) mask = False for ev in np.unique(ds.sa.events_number): mask_event = ds.sa.events_number == ev mask_event[np.nonzero(mask_event)[0][min_duration-1]+1:] = False mask = mask + mask_event if sort == True: arg_sort = np.argsort(ds[mask].sa.events_number) ds = ds[mask][arg_sort] else: ds = ds[mask] ds.a.events = find_events(targets = ds.targets, chunks = ds.chunks) return ds
def find_events_dataset(ds, **kwargs): ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
def load_spatiotemporal_dataset(ds, **kwargs): onset = 0 for arg in kwargs: if (arg == 'onset'): onset = kwargs[arg] if (arg == 'duration'): duration = kwargs[arg] if (arg == 'enable_results'): enable_results = kwargs[arg] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) #task_events = [e for e in events if e['targets'] in ['Vipassana','Samatha']] if 'duration' in locals(): events = [e for e in events if e['duration'] >= duration] else: duration = np.min([ev['duration'] for ev in events]) for e in events: e['onset'] += onset e['duration'] = duration evds = eventrelated_dataset(ds, events = events) return evds
def test_get_contrasts(): # preamble borrowed from the previous test skip_if_no_external('nibabel') skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation # taking subset of the dataset to speed testing up ds = load_example_fmri_dataset('25mm', literal=True)[{ 'chunks': [0, 1] }, :3] # TODO: simulate short dataset with known properties and use it # for testing events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) tr = ds.a.imghdr['pixdim'][4] for ev in events: for a in ('onset', 'duration'): ev[a] = ev[a] * tr evds = fit_event_hrf_model( ds, events=events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True, ) # Simple one -- stat per each condition cons = get_contrasts(evds) # and let's get p-values cons_p = get_contrasts(evds, fxname='p_value') # Without contrasts explicitly prescribed -- there will be one per each # condition assert_array_equal(cons.UT, evds.UT) # and per each feature assert_equal(cons.shape, (len(evds.UT), evds.nfeatures)) assert_array_less(cons_p, 1) assert_array_less(0, cons_p) cons_fh = get_contrasts( evds, contrasts={ 'face-house': { 'face': 1, 'house': -1 }, 'betterface': { 'face': 1, 'house': -0.5, 'scrambledpix': -0.5 } }, ) # print(cons_fh.samples) assert_array_equal(cons_fh.UT, ['betterface', 'face-house']) # and nipy does one tailed test so all p-values should correspond to z-s skip_if_no_external('scipy') import scipy.stats.distributions as ssd assert_array_almost_equal(ssd.norm().isf(cons_p), cons)
def add_events(ds): ev_list = [] events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) for i in range(len(events)): duration = events[i]['duration'] for _ in range(duration): ev_list.append(i + 1) ds.a['events'] = events # Update event field ds.sa['events_number'] = ev_list # Update event number return ds
def test_samples_attributes(self): sa = SampleAttributes(pathjoin(pymvpa_dataroot, 'attributes_literal.txt'), literallabels=True) ok_(sa.nrows == 1452, msg='There should be 1452 samples') # convert to event list, with some custom attr ev = find_events(**sa) ok_(len(ev) == 17 * (max(sa.chunks) + 1), msg='Not all events got detected.') ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest', msg='First and last event are rest condition.') ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows, msg='Something is wrong with the timiing of the events')
def test_samples_attributes(self): sa = SampleAttributes(os.path.join(pymvpa_dataroot, 'attributes_literal.txt'), literallabels=True) ok_(sa.nrows == 1452, msg='There should be 1452 samples') # convert to event list, with some custom attr ev = find_events(**sa) ok_(len(ev) == 17 * (max(sa.chunks) + 1), msg='Not all events got detected.') ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest', msg='First and last event are rest condition.') ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows, msg='Something is wrong with the timiing of the events')
def test_erdataset(): # 3 chunks, 5 targets, blocks of 5 samples each nchunks = 3 ntargets = 5 blocklength = 5 nfeatures = 10 targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks) chunks = np.repeat(np.arange(nchunks), ntargets * blocklength) samples = np.repeat( np.arange(nchunks * ntargets * blocklength), nfeatures).reshape(-1, nfeatures) ds = dataset_wizard(samples, targets=targets, chunks=chunks) # check if events are determined properly evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) for ev in evs: assert_equal(ev['duration'], blocklength) assert_equal(ntargets * nchunks, len(evs)) for t in range(ntargets): assert_equal(len([ev for ev in evs if ev['targets'] == t]), nchunks) # now turn `ds` into an eventreleated dataset erds = eventrelated_dataset(ds, evs) # the only unprefixed sample attributes are assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]), ['chunks', 'targets']) # samples as expected? assert_array_equal(erds.samples[0], np.repeat(np.arange(blocklength), nfeatures)) # that should also be the temporal feature offset assert_array_equal(erds.samples[0], erds.fa.event_offsetidx) assert_array_equal(erds.sa.event_onsetidx, np.arange(0,71,5)) # finally we should see two mappers assert_equal(len(erds.a.mapper), 2) assert_true(isinstance(erds.a.mapper[0], BoxcarMapper)) assert_true(isinstance(erds.a.mapper[1], FlattenMapper)) # check alternative event mapper # this one does temporal compression by averaging erds_compress = eventrelated_dataset( ds, evs, event_mapper=FxMapper('features', np.mean)) assert_equal(len(erds), len(erds_compress)) assert_array_equal(erds_compress.samples[:,0], np.arange(2,73,5)) # # now check the same dataset with event descretization tr = 2.5 ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr evs = [{'onset': 4.9, 'duration': 6.2}] # doesn't work without conversion assert_raises(ValueError, eventrelated_dataset, ds, evs) erds = eventrelated_dataset(ds, evs, time_attr='time') assert_equal(len(erds), 1) assert_array_equal(erds.samples[0], np.repeat(np.arange(1,5), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [2.4]) assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)]) # now with closest match erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest') expected_nsamples = 3 assert_equal(len(erds), 1) assert_array_equal(erds.samples[0], np.repeat(np.arange(2,2+expected_nsamples), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [-0.1]) assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)]) # now test the way back results = np.arange(erds.nfeatures) assert_array_equal(erds.a.mapper.reverse1(results), results.reshape(expected_nsamples, nfeatures)) # what about multiple results? nresults = 5 results = dataset_wizard([results] * nresults) # and let's have an attribute to make it more difficult results.sa['myattr'] = np.arange(5) rds = erds.a.mapper.reverse(results) assert_array_equal(rds, results.samples.reshape(nresults * expected_nsamples, nfeatures)) assert_array_equal(rds.sa.myattr, np.repeat(results.sa.myattr, expected_nsamples))
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array( [value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max( [fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds) #, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events( #event= ds.sa.event_num, chunks=ds.sa.chunks, targets=ds.sa.targets) return ds
def test_erdataset(): # 3 chunks, 5 targets, blocks of 5 samples each nchunks = 3 ntargets = 5 blocklength = 5 nfeatures = 10 targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks) chunks = np.repeat(np.arange(nchunks), ntargets * blocklength) samples = np.repeat(np.arange(nchunks * ntargets * blocklength), nfeatures).reshape(-1, nfeatures) ds = dataset_wizard(samples, targets=targets, chunks=chunks) # check if events are determined properly evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) for ev in evs: assert_equal(ev['duration'], blocklength) assert_equal(ntargets * nchunks, len(evs)) for t in range(ntargets): assert_equal(len([ev for ev in evs if ev['targets'] == t]), nchunks) # now turn `ds` into an eventreleated dataset erds = eventrelated_dataset(ds, evs) # the only unprefixed sample attributes are assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]), ['chunks', 'targets']) # samples as expected? assert_array_equal(erds.samples[0], np.repeat(np.arange(blocklength), nfeatures)) # that should also be the temporal feature offset assert_array_equal(erds.samples[0], erds.fa.event_offsetidx) assert_array_equal(erds.sa.event_onsetidx, np.arange(0, 71, 5)) # finally we should see two mappers assert_equal(len(erds.a.mapper), 2) assert_true(isinstance(erds.a.mapper[0], BoxcarMapper)) assert_true(isinstance(erds.a.mapper[1], FlattenMapper)) # check alternative event mapper # this one does temporal compression by averaging erds_compress = eventrelated_dataset(ds, evs, event_mapper=FxMapper( 'features', np.mean)) assert_equal(len(erds), len(erds_compress)) assert_array_equal(erds_compress.samples[:, 0], np.arange(2, 73, 5)) # # now check the same dataset with event descretization tr = 2.5 ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr evs = [{'onset': 4.9, 'duration': 6.2}] # doesn't work without conversion assert_raises(ValueError, eventrelated_dataset, ds, evs) erds = eventrelated_dataset(ds, evs, time_attr='time') assert_equal(len(erds), 1) assert_array_equal(erds.samples[0], np.repeat(np.arange(1, 5), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [2.4]) assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)]) # now with closest match erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest') expected_nsamples = 3 assert_equal(len(erds), 1) assert_array_equal( erds.samples[0], np.repeat(np.arange(2, 2 + expected_nsamples), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [-0.1]) assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)]) # now test the way back results = np.arange(erds.nfeatures) assert_array_equal(erds.a.mapper.reverse1(results), results.reshape(expected_nsamples, nfeatures)) # what about multiple results? nresults = 5 results = dataset_wizard([results] * nresults) # and let's have an attribute to make it more difficult results.sa['myattr'] = np.arange(5) rds = erds.a.mapper.reverse(results) assert_array_equal( rds, results.samples.reshape(nresults * expected_nsamples, nfeatures)) assert_array_equal(rds.sa.myattr, np.repeat(results.sa.myattr, expected_nsamples))
def test_hrf_modeling(): skip_if_no_external('nibabel') skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation ds = load_example_fmri_dataset('25mm') #literal=True) # TODO: simulate short dataset with known properties and use it # for testing events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) tr = ds.a.imghdr['pixdim'][4] for ev in events: for a in ('onset', 'duration'): ev[a] = ev[a] * tr evds = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # same voxels assert_equal(ds.nfeatures, evds.nfeatures) assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices) # one sample for each condition, plus constant assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets)) assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant') # with centered data zscore(ds) evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # after demeaning the constant should consume a lot less assert(evds.a.add_regs[0].samples.mean() > evds_demean.a.add_regs[0].samples.mean()) # from eyeballing the sensitivity example -- would be better to test this on # the tutorial data assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \ > evds_demean[evds.sa.targets == 'bottle'].samples.max()) # HRF models assert('regressors' in evds.sa) assert('regressors' in evds.a.add_regs.sa) assert_equal(evds.sa.regressors.shape[1], len(ds)) # custom regressors evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # verify that nothing screwed up time_coords assert_equal(ds.sa.time_coords[0], 0) assert_equal(len(evds_regrs), len(evds)) # one more output sample in .a.add_regs assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs)) # comes last before constant assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2]) # order of main regressors is unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # custom regressors from external sources evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_coords'], design_kwargs=dict(drift_model='blank', add_regs=np.linspace(1, -1, len(ds))[None].T, add_reg_names=['negative_trend']), glmfit_kwargs=dict(model='ols'), model='hrf') assert_equal(len(evds_regrs), len(evds)) # But we got one more in additional regressors assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs)) # comes last before constant assert_array_equal(['negative_trend', 'time_coords', 'constant'], evds_regrs.a.add_regs.sa.regressor_names) # order is otherwise unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # HRF models with estimating per each chunk assert_equal(ds.sa.time_coords[0], 0) evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr=['targets', 'chunks'], regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') assert_true('add_regs' in evds_regrs.a) assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names) assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs)) assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs)) from mvpa2.mappers.fx import mean_group_sample evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs) assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
def build_events_ds(ds, new_duration, **kwargs): """ This function is used to convert a dataset in a event_related dataset. Used for transfer learning and clustering, thus a classifier has been trained on a event related dataset and the prediction should be done on the same kind of the dataset. Parameters ---------- ds : Dataset The dataset to be converted new_duration : integer Is the duration of the single event, if experiment events are of different length, it takes the events greater or equal to new_duration. kwarsg : dict win_number: is the number of window of one single event to be extracted, if it is not setted, it assumes the ratio between event duration and new_duration overlap: Returns ------- Dataset: the event_related dataset """ for arg in kwargs: if arg == 'win_number': win_number = kwargs[arg] if arg == 'overlap': overlap = kwargs[arg] events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) labels = np.unique(ds.targets) current_duration = dict() for l in labels: d = [e['duration'] for e in events if e['targets'] == l] current_duration[l] = np.unique(d)[0] def calc_overlap(w, l, n): return w - np.floor((l - w) / (n - 1)) def calc_win_number(w, l, o): return (l - w) / (w - o) + 1 if 'overlap' not in locals(): overlap = calc_overlap(new_duration, current_duration[l], win_number) else: if overlap >= new_duration: overlap = new_duration - 1 if 'win_number' not in locals(): #win_number = np.ceil(current_duration[l]/np.float(new_duration)) win_number = calc_win_number(new_duration, current_duration[l], overlap) new_event_list = [] for e in events: onset = e['onset'] chunks = e['chunks'] targets = e['targets'] duration = e['duration'] for i in np.arange(win_number): new_onset = onset + i * (new_duration - overlap) new_event = dict() new_event['onset'] = new_onset new_event['duration'] = new_duration new_event['targets'] = targets new_event['chunks'] = chunks new_event_list.append(new_event) logger.info('Building new event related dataset...') evds = eventrelated_dataset(ds, events=new_event_list) return evds
def test_hrf_modeling(): skip_if_no_external('nibabel') skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation ds = load_example_fmri_dataset('25mm') #literal=True) # TODO: simulate short dataset with known properties and use it # for testing events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) tr = ds.a.imghdr['pixdim'][4] for ev in events: for a in ('onset', 'duration'): ev[a] = ev[a] * tr evds = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # same voxels assert_equal(ds.nfeatures, evds.nfeatures) assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices) # one sample for each condition, plus constant assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets)) assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant') # with centered data zscore(ds) evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # after demeaning the constant should consume a lot less assert (evds.a.add_regs[0].samples.mean() > evds_demean.a.add_regs[0].samples.mean()) # from eyeballing the sensitivity example -- would be better to test this on # the tutorial data assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \ > evds_demean[evds.sa.targets == 'bottle'].samples.max()) # HRF models assert ('regressors' in evds.sa) assert ('regressors' in evds.a.add_regs.sa) assert_equal(evds.sa.regressors.shape[1], len(ds)) # custom regressors evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # verify that nothing screwed up time_coords assert_equal(ds.sa.time_coords[0], 0) assert_equal(len(evds_regrs), len(evds)) # one more output sample in .a.add_regs assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs)) # comes last before constant assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2]) # order of main regressors is unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # custom regressors from external sources evds_regrs = eventrelated_dataset( ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_coords'], design_kwargs=dict(drift_model='blank', add_regs=np.linspace(1, -1, len(ds))[None].T, add_reg_names=['negative_trend']), glmfit_kwargs=dict(model='ols'), model='hrf') assert_equal(len(evds_regrs), len(evds)) # But we got one more in additional regressors assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs)) # comes last before constant assert_array_equal(['negative_trend', 'time_coords', 'constant'], evds_regrs.a.add_regs.sa.regressor_names) # order is otherwise unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # HRF models with estimating per each chunk assert_equal(ds.sa.time_coords[0], 0) evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr=['targets', 'chunks'], regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') assert_true('add_regs' in evds_regrs.a) assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names) assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs)) assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs)) from mvpa2.mappers.fx import mean_group_sample evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs) assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
def spatiotemporal(ds, **kwargs): onset = 0 for arg in kwargs: if (arg == 'onset'): onset = kwargs[arg] if (arg == 'duration'): duration = kwargs[arg] if (arg == 'enable_results'): enable_results = kwargs[arg] if (arg == 'permutations'): permutations = int(kwargs[arg]) events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) if 'duration' in locals(): events = [e for e in events if e['duration'] >= duration] else: duration = np.min([ev['duration'] for ev in events]) for e in events: e['onset'] += onset e['duration'] = duration evds = eventrelated_dataset(ds, events=events) [fclf, cvte] = setup_classifier(**kwargs) logger.info('Cross validation is performing ...') res = cvte(evds) print(cvte.ca.stats) if permutations != 0: print(cvte.ca.null_prob.samples) dist_len = len(cvte.null_dist.dists()) err_arr = np.zeros(dist_len) for i in range(dist_len): err_arr[i] = 1 - cvte.ca.stats.stats['ACC'] total_p_value = np.mean(cvte.null_dist.p(err_arr)) p_value = cvte.ca.null_prob.samples else: total_p_value = 0. p_value = np.array([0, 0]) try: sensana = fclf.get_sensitivity_analyzer() res_sens = sensana(evds) except Exception as err: allowed_keys = [ 'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds', 'perm_pvalue', 'p' ] allowed_results = [ None, None, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value, total_p_value ] results_dict = dict(zip(allowed_keys, allowed_results)) results = dict() if not 'enable_results' in locals(): enable_results = allowed_keys[:] for elem in enable_results: if elem in allowed_keys: results[elem] = results_dict[elem] return results sens_comb = res_sens.get_mapped(mean_sample()) mean_map = map2nifti(evds, evds.a.mapper.reverse1(sens_comb)) l_maps = [] for m in res_sens: maps = ds.a.mapper.reverse1(m) nifti = map2nifti(evds, maps) l_maps.append(nifti) l_maps.append(mean_map) # Packing results (to be sobstitute with a function) results = dict() if not 'enable_results' in locals(): enable_results = [ 'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds', 'pvalue', 'p' ] allowed_keys = [ 'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds', 'pvalue', 'p' ] allowed_results = [ l_maps, res_sens, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value, total_p_value ] results_dict = dict(zip(allowed_keys, allowed_results)) for elem in enable_results: if elem in allowed_keys: results[elem] = results_dict[elem] else: print('******** ' + elem + ' result is not allowed! *********') return results
def normalize_dataset(ds, **kwargs): import collections import fractions mean = False normalization = 'feature' chunk_number = None for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) if (arg == 'chunk_number'): chunk_number = kwargs[arg] n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min() if chunk_number == 'adaptive': n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)]) if n_chunks == 1: n_chunks = 4 elif isinstance(chunk_number, int): n_chunks = int(chunk_number) if chunk_number != None: argsort = np.argsort(ds.targets) chunks = [] for _ in ds.uniquetargets: chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int) chunks.append(chunk) ds.chunks[argsort] = np.hstack(chunks) if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': # Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 # Find event related stuff ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
def spatiotemporal(ds, **kwargs): onset = 0 for arg in kwargs: if (arg == 'onset'): onset = kwargs[arg] if (arg == 'duration'): duration = kwargs[arg] if (arg == 'enable_results'): enable_results = kwargs[arg] if (arg == 'permutations'): permutations = int(kwargs[arg]) events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) if 'duration' in locals(): events = [e for e in events if e['duration'] >= duration] else: duration = np.min([ev['duration'] for ev in events]) for e in events: e['onset'] += onset e['duration'] = duration evds = eventrelated_dataset(ds, events = events) [fclf, cvte] = setup_classifier(**kwargs) logger.info('Cross validation is performing ...') res = cvte(evds) print cvte.ca.stats if permutations != 0: print cvte.ca.null_prob.samples dist_len = len(cvte.null_dist.dists()) err_arr = np.zeros(dist_len) for i in range(dist_len): err_arr[i] = 1 - cvte.ca.stats.stats['ACC'] total_p_value = np.mean(cvte.null_dist.p(err_arr)) p_value = cvte.ca.null_prob.samples else: total_p_value = 0. p_value = np.array([0,0]) try: sensana = fclf.get_sensitivity_analyzer() res_sens = sensana(evds) except Exception, err: allowed_keys = ['map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds', 'perm_pvalue', 'p'] allowed_results = [None, None, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value, total_p_value] results_dict = dict(zip(allowed_keys, allowed_results)) results = dict() if not 'enable_results' in locals(): enable_results = allowed_keys[:] for elem in enable_results: if elem in allowed_keys: results[elem] = results_dict[elem] return results
def run(args): ds = arg2ds(args.data) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # build list of events events = [] timebased_events = False if args.event_attrs is not None: def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs]) events = find_events(**def_attrs) elif args.csv_events is not None: if args.csv_events == '-': csv = sys.stdin.read() import cStringIO csv = cStringIO.StringIO(csv) else: csv = open(args.csv_events, 'rU') csvt = _load_csv_table(csv) if not len(csvt): raise ValueError("no CSV columns found") if args.onset_column: csvt['onset'] = csvt[args.onset_column] nevents = len(csvt[csvt.keys()[0]]) events = [] for ev in xrange(nevents): events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()])) elif args.onsets is not None: if not len(args.onsets): args.onsets = [i for i in sys.stdin] # time or sample-based? if args.time_attr is None: oconv = int else: oconv = float events = [{'onset': oconv(o)} for o in args.onsets] elif args.fsl_ev3 is not None: timebased_events = True from mvpa2.misc.fsl import FslEV3 events = [] for evsrc in args.fsl_ev3: events.extend(FslEV3(evsrc).to_events()) if not len(events): raise ValueError("no events defined") verbose(2, 'Extracting %i events' % len(events)) if args.event_compression is None: evmap = None elif args.event_compression == 'mean': evmap = FxMapper('features', np.mean, attrfx=merge2first) elif args.event_compression == 'median': evmap = FxMapper('features', np.median, attrfx=merge2first) elif args.event_compression == 'min': evmap = FxMapper('features', np.min, attrfx=merge2first) elif args.event_compression == 'max': evmap = FxMapper('features', np.max, attrfx=merge2first) # convert to event-related ds evds = eventrelated_dataset(ds, events, time_attr=args.time_attr, match=args.match_strategy, event_offset=args.offset, event_duration=args.duration, event_mapper=evmap) # act on all attribute options evds = process_common_dsattr_opts(evds, args) # and store ds2hdf5(evds, args.output, compression=args.hdf5_compression) return evds
def build_events_ds(ds, new_duration, **kwargs): """ This function is used to convert a dataset in a event_related dataset. Used for transfer learning and clustering, thus a classifier has been trained on a event related dataset and the prediction should be done on the same kind of the dataset. Parameters ---------- ds : Dataset The dataset to be converted new_duration : integer Is the duration of the single event, if experiment events are of different length, it takes the events greater or equal to new_duration. kwarsg : dict win_number: is the number of window of one single event to be extracted, if it is not setted, it assumes the ratio between event duration and new_duration overlap: Returns ------- Dataset: the event_related dataset """ for arg in kwargs: if arg == 'win_number': win_number = kwargs[arg] if arg == 'overlap': overlap = kwargs[arg] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) labels = np.unique(ds.targets) current_duration = dict() for l in labels: d = [e['duration'] for e in events if e['targets'] == l] current_duration[l] = np.unique(d)[0] def calc_overlap(w, l, n): return w - np.floor((l - w)/(n - 1)) def calc_win_number (w, l, o): return (l - w)/(w - o) + 1 if 'overlap' not in locals(): overlap = calc_overlap(new_duration, current_duration[l], win_number) else: if overlap >= new_duration: overlap = new_duration - 1 if 'win_number' not in locals(): #win_number = np.ceil(current_duration[l]/np.float(new_duration)) win_number = calc_win_number(new_duration, current_duration[l], overlap) new_event_list = [] for e in events: onset = e['onset'] chunks = e['chunks'] targets = e['targets'] duration = e['duration'] for i in np.arange(win_number): new_onset = onset + i * (new_duration - overlap) new_event = dict() new_event['onset'] = new_onset new_event['duration'] = new_duration new_event['targets'] = targets new_event['chunks'] = chunks new_event_list.append(new_event) logger.info('Building new event related dataset...') evds = eventrelated_dataset(ds, events = new_event_list) return evds
def find_events_dataset(ds, **kwargs): ds.a.events = find_events( #event= ds.sa.event_num, chunks=ds.sa.chunks, targets=ds.sa.targets) return ds