Example #1
0
def balance_dataset_timewise(ds, label, sort=True, **kwargs):

    ################ To be changed ######################
    m_fixation = ds.targets == 'fixation'
    ev_fix = zip(ds.chunks[m_fixation],
                 4 * ((ds.sa.events_number[m_fixation] + 2) / 4 - 1) + 2)
    ####################################################

    ev_fix = np.array(ev_fix)
    ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1])
    arg_sort = np.argsort(ds.sa.events_number)
    events = find_events(chunks=ds[arg_sort].sa.chunks,
                         targets=ds[arg_sort].sa.targets)
    # min duration
    min_duration = np.min([e['duration'] for e in events])

    mask = False

    for ev in np.unique(ds.sa.events_number):
        mask_event = ds.sa.events_number == ev
        mask_event[np.nonzero(mask_event)[0][min_duration - 1] + 1:] = False

        mask = mask + mask_event

    if sort == True:
        arg_sort = np.argsort(ds[mask].sa.events_number)
        ds = ds[mask][arg_sort]
    else:
        ds = ds[mask]

    ds.a.events = find_events(targets=ds.targets, chunks=ds.chunks)

    return ds
Example #2
0
def balance_dataset_timewise(ds, label, sort=True, **kwargs):
    
    
    ################ To be changed ######################
    m_fixation = ds.targets == 'fixation'
    ev_fix = zip(ds.chunks[m_fixation], 
                 4*((ds.sa.events_number[m_fixation]+2)/4 - 1 )+2)
    ####################################################
    
    ev_fix=np.array(ev_fix)
    ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1])
    arg_sort = np.argsort(ds.sa.events_number)
    events = find_events(chunks = ds[arg_sort].sa.chunks, 
                         targets = ds[arg_sort].sa.targets)
    # min duration
    min_duration = np.min( [e['duration'] for e in events])

    mask = False

    for ev in np.unique(ds.sa.events_number):
        mask_event = ds.sa.events_number == ev
        mask_event[np.nonzero(mask_event)[0][min_duration-1]+1:] = False
    
        mask = mask + mask_event
    
    if sort == True:
        arg_sort = np.argsort(ds[mask].sa.events_number)
        ds = ds[mask][arg_sort]
    else:
        ds = ds[mask]
    
    ds.a.events = find_events(targets = ds.targets, chunks = ds.chunks)
    
    return ds
Example #3
0
def find_events_dataset(ds, **kwargs):
    
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Example #4
0
def load_spatiotemporal_dataset(ds, **kwargs):
    
    onset = 0
    
    for arg in kwargs:
        if (arg == 'onset'):
            onset = kwargs[arg]
        if (arg == 'duration'):
            duration = kwargs[arg]
        if (arg == 'enable_results'):
            enable_results = kwargs[arg]
        
        
        
    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)   
    
    #task_events = [e for e in events if e['targets'] in ['Vipassana','Samatha']]
    
    if 'duration' in locals():
        events = [e for e in events if e['duration'] >= duration]
    else:
        duration = np.min([ev['duration'] for ev in events])

    for e in events:
        e['onset'] += onset           
        e['duration'] = duration
        
    evds = eventrelated_dataset(ds, events = events)
    
    return evds
Example #5
0
def test_get_contrasts():
    # preamble borrowed from the previous test
    skip_if_no_external('nibabel')
    skip_if_no_external('nipy')  # ATM relies on NiPy's GLM implementation
    # taking subset of the dataset to speed testing up
    ds = load_example_fmri_dataset('25mm', literal=True)[{
        'chunks': [0, 1]
    }, :3]
    # TODO: simulate short dataset with known properties and use it
    # for testing
    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    tr = ds.a.imghdr['pixdim'][4]
    for ev in events:
        for a in ('onset', 'duration'):
            ev[a] = ev[a] * tr
    evds = fit_event_hrf_model(
        ds,
        events=events,
        time_attr='time_coords',
        condition_attr='targets',
        design_kwargs=dict(drift_model='blank'),
        glmfit_kwargs=dict(model='ols'),
        return_model=True,
    )
    # Simple one -- stat per each condition
    cons = get_contrasts(evds)
    # and let's get p-values
    cons_p = get_contrasts(evds, fxname='p_value')
    # Without contrasts explicitly prescribed -- there will be one per each
    # condition
    assert_array_equal(cons.UT, evds.UT)
    # and per each feature
    assert_equal(cons.shape, (len(evds.UT), evds.nfeatures))
    assert_array_less(cons_p, 1)
    assert_array_less(0, cons_p)

    cons_fh = get_contrasts(
        evds,
        contrasts={
            'face-house': {
                'face': 1,
                'house': -1
            },
            'betterface': {
                'face': 1,
                'house': -0.5,
                'scrambledpix': -0.5
            }
        },
    )

    # print(cons_fh.samples)
    assert_array_equal(cons_fh.UT, ['betterface', 'face-house'])

    # and nipy does one tailed test so all p-values should correspond to z-s
    skip_if_no_external('scipy')
    import scipy.stats.distributions as ssd
    assert_array_almost_equal(ssd.norm().isf(cons_p), cons)
Example #6
0
def add_events(ds):
    
    ev_list = []
    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    for i in range(len(events)):
        duration = events[i]['duration']
        for _ in range(duration):
            ev_list.append(i + 1)
    
    ds.a['events'] = events # Update event field
    ds.sa['events_number'] = ev_list # Update event number
    
    return ds
Example #7
0
    def test_samples_attributes(self):
        sa = SampleAttributes(pathjoin(pymvpa_dataroot,
                                       'attributes_literal.txt'),
                              literallabels=True)

        ok_(sa.nrows == 1452, msg='There should be 1452 samples')

        # convert to event list, with some custom attr
        ev = find_events(**sa)
        ok_(len(ev) == 17 * (max(sa.chunks) + 1),
            msg='Not all events got detected.')

        ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest',
            msg='First and last event are rest condition.')

        ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows,
            msg='Something is wrong with the timiing of the events')
Example #8
0
    def test_samples_attributes(self):
        sa = SampleAttributes(os.path.join(pymvpa_dataroot,
                                           'attributes_literal.txt'),
                              literallabels=True)

        ok_(sa.nrows == 1452, msg='There should be 1452 samples')

        # convert to event list, with some custom attr
        ev = find_events(**sa)
        ok_(len(ev) == 17 * (max(sa.chunks) + 1),
            msg='Not all events got detected.')

        ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest',
            msg='First and last event are rest condition.')

        ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows,
            msg='Something is wrong with the timiing of the events')
Example #9
0
def test_erdataset():
    # 3 chunks, 5 targets, blocks of 5 samples each
    nchunks = 3
    ntargets = 5
    blocklength = 5
    nfeatures = 10
    targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks)
    chunks = np.repeat(np.arange(nchunks), ntargets * blocklength)
    samples = np.repeat(
                np.arange(nchunks * ntargets * blocklength),
                nfeatures).reshape(-1, nfeatures)
    ds = dataset_wizard(samples, targets=targets, chunks=chunks)
    # check if events are determined properly
    evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    for ev in evs:
        assert_equal(ev['duration'], blocklength)
    assert_equal(ntargets * nchunks, len(evs))
    for t in range(ntargets):
        assert_equal(len([ev for ev in evs if ev['targets'] == t]),
                     nchunks)
    # now turn `ds` into an eventreleated dataset
    erds = eventrelated_dataset(ds, evs)
    # the only unprefixed sample attributes are 
    assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]),
                 ['chunks', 'targets'])
    # samples as expected?
    assert_array_equal(erds.samples[0],
                       np.repeat(np.arange(blocklength), nfeatures))
    # that should also be the temporal feature offset
    assert_array_equal(erds.samples[0], erds.fa.event_offsetidx)
    assert_array_equal(erds.sa.event_onsetidx, np.arange(0,71,5))
    # finally we should see two mappers
    assert_equal(len(erds.a.mapper), 2)
    assert_true(isinstance(erds.a.mapper[0], BoxcarMapper))
    assert_true(isinstance(erds.a.mapper[1], FlattenMapper))
    # check alternative event mapper
    # this one does temporal compression by averaging
    erds_compress = eventrelated_dataset(
                        ds, evs, event_mapper=FxMapper('features', np.mean))
    assert_equal(len(erds), len(erds_compress))
    assert_array_equal(erds_compress.samples[:,0], np.arange(2,73,5))
    #
    # now check the same dataset with event descretization
    tr = 2.5
    ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr
    evs = [{'onset': 4.9, 'duration': 6.2}]
    # doesn't work without conversion
    assert_raises(ValueError, eventrelated_dataset, ds, evs)
    erds = eventrelated_dataset(ds, evs, time_attr='time')
    assert_equal(len(erds), 1)
    assert_array_equal(erds.samples[0], np.repeat(np.arange(1,5), nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [2.4])
    assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)])
    # now with closest match
    erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest')
    expected_nsamples = 3
    assert_equal(len(erds), 1)
    assert_array_equal(erds.samples[0],
                       np.repeat(np.arange(2,2+expected_nsamples),
                                nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [-0.1])
    assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)])
    # now test the way back
    results = np.arange(erds.nfeatures)
    assert_array_equal(erds.a.mapper.reverse1(results),
                       results.reshape(expected_nsamples, nfeatures))
    # what about multiple results?
    nresults = 5
    results = dataset_wizard([results] * nresults)
    # and let's have an attribute to make it more difficult
    results.sa['myattr'] = np.arange(5)
    rds = erds.a.mapper.reverse(results)
    assert_array_equal(rds,
                       results.samples.reshape(nresults * expected_nsamples,
                                               nfeatures))
    assert_array_equal(rds.sa.myattr, np.repeat(results.sa.myattr,
                                               expected_nsamples))
Example #10
0
def normalize_dataset(ds, **kwargs):

    import collections
    import fractions

    mean = False
    normalization = 'feature'
    chunk_number = None

    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]

    n_targets = np.array(
        [value for value in collections.Counter(ds.targets).values()]).min()

    if chunk_number == 'adaptive':
        n_chunks = np.max(
            [fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)

    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0,
                                n_chunks,
                                n_targets,
                                endpoint=False,
                                dtype=np.int)
            chunks.append(chunk)

        ds.chunks[argsort] = np.hstack(chunks)

    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num'])
        ds = ds.get_mapped(avg_mapper)

    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)  #, param_est=('targets', ['fixation']))

    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]

        ds.samples[np.isnan(ds.samples)] = 0

    # Find event related stuff
    ds.a.events = find_events(  #event= ds.sa.event_num, 
        chunks=ds.sa.chunks, targets=ds.sa.targets)

    return ds
Example #11
0
def test_erdataset():
    # 3 chunks, 5 targets, blocks of 5 samples each
    nchunks = 3
    ntargets = 5
    blocklength = 5
    nfeatures = 10
    targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks)
    chunks = np.repeat(np.arange(nchunks), ntargets * blocklength)
    samples = np.repeat(np.arange(nchunks * ntargets * blocklength),
                        nfeatures).reshape(-1, nfeatures)
    ds = dataset_wizard(samples, targets=targets, chunks=chunks)
    # check if events are determined properly
    evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    for ev in evs:
        assert_equal(ev['duration'], blocklength)
    assert_equal(ntargets * nchunks, len(evs))
    for t in range(ntargets):
        assert_equal(len([ev for ev in evs if ev['targets'] == t]), nchunks)
    # now turn `ds` into an eventreleated dataset
    erds = eventrelated_dataset(ds, evs)
    # the only unprefixed sample attributes are
    assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]),
                 ['chunks', 'targets'])
    # samples as expected?
    assert_array_equal(erds.samples[0],
                       np.repeat(np.arange(blocklength), nfeatures))
    # that should also be the temporal feature offset
    assert_array_equal(erds.samples[0], erds.fa.event_offsetidx)
    assert_array_equal(erds.sa.event_onsetidx, np.arange(0, 71, 5))
    # finally we should see two mappers
    assert_equal(len(erds.a.mapper), 2)
    assert_true(isinstance(erds.a.mapper[0], BoxcarMapper))
    assert_true(isinstance(erds.a.mapper[1], FlattenMapper))
    # check alternative event mapper
    # this one does temporal compression by averaging
    erds_compress = eventrelated_dataset(ds,
                                         evs,
                                         event_mapper=FxMapper(
                                             'features', np.mean))
    assert_equal(len(erds), len(erds_compress))
    assert_array_equal(erds_compress.samples[:, 0], np.arange(2, 73, 5))
    #
    # now check the same dataset with event descretization
    tr = 2.5
    ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr
    evs = [{'onset': 4.9, 'duration': 6.2}]
    # doesn't work without conversion
    assert_raises(ValueError, eventrelated_dataset, ds, evs)
    erds = eventrelated_dataset(ds, evs, time_attr='time')
    assert_equal(len(erds), 1)
    assert_array_equal(erds.samples[0], np.repeat(np.arange(1, 5), nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [2.4])
    assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)])
    # now with closest match
    erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest')
    expected_nsamples = 3
    assert_equal(len(erds), 1)
    assert_array_equal(
        erds.samples[0],
        np.repeat(np.arange(2, 2 + expected_nsamples), nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [-0.1])
    assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)])
    # now test the way back
    results = np.arange(erds.nfeatures)
    assert_array_equal(erds.a.mapper.reverse1(results),
                       results.reshape(expected_nsamples, nfeatures))
    # what about multiple results?
    nresults = 5
    results = dataset_wizard([results] * nresults)
    # and let's have an attribute to make it more difficult
    results.sa['myattr'] = np.arange(5)
    rds = erds.a.mapper.reverse(results)
    assert_array_equal(
        rds, results.samples.reshape(nresults * expected_nsamples, nfeatures))
    assert_array_equal(rds.sa.myattr,
                       np.repeat(results.sa.myattr, expected_nsamples))
Example #12
0
def test_hrf_modeling():
    skip_if_no_external('nibabel')
    skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation
    ds = load_example_fmri_dataset('25mm') #literal=True)
    # TODO: simulate short dataset with known properties and use it
    # for testing
    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    tr = ds.a.imghdr['pixdim'][4]
    for ev in events:
        for a in ('onset', 'duration'):
            ev[a] = ev[a] * tr
    evds = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # same voxels
    assert_equal(ds.nfeatures, evds.nfeatures)
    assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices)
    # one sample for each condition, plus constant
    assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets))
    assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant')
    # with centered data
    zscore(ds)
    evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # after demeaning the constant should consume a lot less
    assert(evds.a.add_regs[0].samples.mean()
           > evds_demean.a.add_regs[0].samples.mean())
    # from eyeballing the sensitivity example -- would be better to test this on
    # the tutorial data
    assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \
           > evds_demean[evds.sa.targets == 'bottle'].samples.max())
    # HRF models
    assert('regressors' in evds.sa)
    assert('regressors' in evds.a.add_regs.sa)
    assert_equal(evds.sa.regressors.shape[1], len(ds))

    # custom regressors
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                regr_attrs=['time_indices'],
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # verify that nothing screwed up time_coords
    assert_equal(ds.sa.time_coords[0], 0)
    assert_equal(len(evds_regrs), len(evds))
    # one more output sample in .a.add_regs
    assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs))
    # comes last before constant
    assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2])
    # order of main regressors is unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # custom regressors from external sources
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                regr_attrs=['time_coords'],
                                design_kwargs=dict(drift_model='blank',
                                                   add_regs=np.linspace(1, -1, len(ds))[None].T,
                                                   add_reg_names=['negative_trend']),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    assert_equal(len(evds_regrs), len(evds))
    # But we got one more in additional regressors
    assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs))
    # comes last before constant
    assert_array_equal(['negative_trend', 'time_coords', 'constant'],
                       evds_regrs.a.add_regs.sa.regressor_names)
    # order is otherwise unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # HRF models with estimating per each chunk
    assert_equal(ds.sa.time_coords[0], 0)
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr=['targets', 'chunks'],
                                regr_attrs=['time_indices'],
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    assert_true('add_regs' in evds_regrs.a)
    assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names)

    assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs))
    assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs))

    from mvpa2.mappers.fx import mean_group_sample
    evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs)
    assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
Example #13
0
def build_events_ds(ds, new_duration, **kwargs):
    """
    This function is used to convert a dataset in a event_related dataset. Used for
    transfer learning and clustering, thus a classifier has been trained on a 
    event related dataset and the prediction should be done on the same kind of the 
    dataset.
    
    Parameters    
    ----------
    
    ds : Dataset
        The dataset to be converted
    new_duration : integer
        Is the duration of the single event, if experiment events are of different
        length, it takes the events greater or equal to new_duration.
    kwarsg : dict
        win_number: is the number of window of one single event to be extracted,
        if it is not setted, it assumes the ratio between event duration and new_duration
        overlap:
        
    Returns
    -------
    
    Dataset:
        the event_related dataset
    """

    for arg in kwargs:
        if arg == 'win_number':
            win_number = kwargs[arg]
        if arg == 'overlap':
            overlap = kwargs[arg]

    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    labels = np.unique(ds.targets)
    current_duration = dict()
    for l in labels:
        d = [e['duration'] for e in events if e['targets'] == l]
        current_duration[l] = np.unique(d)[0]

    def calc_overlap(w, l, n):
        return w - np.floor((l - w) / (n - 1))

    def calc_win_number(w, l, o):
        return (l - w) / (w - o) + 1

    if 'overlap' not in locals():
        overlap = calc_overlap(new_duration, current_duration[l], win_number)
    else:
        if overlap >= new_duration:
            overlap = new_duration - 1

    if 'win_number' not in locals():
        #win_number = np.ceil(current_duration[l]/np.float(new_duration))
        win_number = calc_win_number(new_duration, current_duration[l],
                                     overlap)

    new_event_list = []

    for e in events:
        onset = e['onset']
        chunks = e['chunks']
        targets = e['targets']
        duration = e['duration']

        for i in np.arange(win_number):
            new_onset = onset + i * (new_duration - overlap)

            new_event = dict()
            new_event['onset'] = new_onset
            new_event['duration'] = new_duration
            new_event['targets'] = targets
            new_event['chunks'] = chunks

            new_event_list.append(new_event)

    logger.info('Building new event related dataset...')
    evds = eventrelated_dataset(ds, events=new_event_list)

    return evds
Example #14
0
def test_hrf_modeling():
    skip_if_no_external('nibabel')
    skip_if_no_external('nipy')  # ATM relies on NiPy's GLM implementation
    ds = load_example_fmri_dataset('25mm')  #literal=True)
    # TODO: simulate short dataset with known properties and use it
    # for testing
    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    tr = ds.a.imghdr['pixdim'][4]
    for ev in events:
        for a in ('onset', 'duration'):
            ev[a] = ev[a] * tr
    evds = eventrelated_dataset(ds,
                                events,
                                time_attr='time_coords',
                                condition_attr='targets',
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # same voxels
    assert_equal(ds.nfeatures, evds.nfeatures)
    assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices)
    # one sample for each condition, plus constant
    assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets))
    assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant')
    # with centered data
    zscore(ds)
    evds_demean = eventrelated_dataset(ds,
                                       events,
                                       time_attr='time_coords',
                                       condition_attr='targets',
                                       design_kwargs=dict(drift_model='blank'),
                                       glmfit_kwargs=dict(model='ols'),
                                       model='hrf')
    # after demeaning the constant should consume a lot less
    assert (evds.a.add_regs[0].samples.mean() >
            evds_demean.a.add_regs[0].samples.mean())
    # from eyeballing the sensitivity example -- would be better to test this on
    # the tutorial data
    assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \
           > evds_demean[evds.sa.targets == 'bottle'].samples.max())
    # HRF models
    assert ('regressors' in evds.sa)
    assert ('regressors' in evds.a.add_regs.sa)
    assert_equal(evds.sa.regressors.shape[1], len(ds))

    # custom regressors
    evds_regrs = eventrelated_dataset(ds,
                                      events,
                                      time_attr='time_coords',
                                      condition_attr='targets',
                                      regr_attrs=['time_indices'],
                                      design_kwargs=dict(drift_model='blank'),
                                      glmfit_kwargs=dict(model='ols'),
                                      model='hrf')
    # verify that nothing screwed up time_coords
    assert_equal(ds.sa.time_coords[0], 0)
    assert_equal(len(evds_regrs), len(evds))
    # one more output sample in .a.add_regs
    assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs))
    # comes last before constant
    assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2])
    # order of main regressors is unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # custom regressors from external sources
    evds_regrs = eventrelated_dataset(
        ds,
        events,
        time_attr='time_coords',
        condition_attr='targets',
        regr_attrs=['time_coords'],
        design_kwargs=dict(drift_model='blank',
                           add_regs=np.linspace(1, -1, len(ds))[None].T,
                           add_reg_names=['negative_trend']),
        glmfit_kwargs=dict(model='ols'),
        model='hrf')
    assert_equal(len(evds_regrs), len(evds))
    # But we got one more in additional regressors
    assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs))
    # comes last before constant
    assert_array_equal(['negative_trend', 'time_coords', 'constant'],
                       evds_regrs.a.add_regs.sa.regressor_names)
    # order is otherwise unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # HRF models with estimating per each chunk
    assert_equal(ds.sa.time_coords[0], 0)
    evds_regrs = eventrelated_dataset(ds,
                                      events,
                                      time_attr='time_coords',
                                      condition_attr=['targets', 'chunks'],
                                      regr_attrs=['time_indices'],
                                      design_kwargs=dict(drift_model='blank'),
                                      glmfit_kwargs=dict(model='ols'),
                                      model='hrf')
    assert_true('add_regs' in evds_regrs.a)
    assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names)

    assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs))
    assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs))

    from mvpa2.mappers.fx import mean_group_sample
    evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs)
    assert_array_equal(evds_regrs_meaned.T,
                       evds.T)  # targets should be the same
Example #15
0
def spatiotemporal(ds, **kwargs):

    onset = 0

    for arg in kwargs:
        if (arg == 'onset'):
            onset = kwargs[arg]
        if (arg == 'duration'):
            duration = kwargs[arg]
        if (arg == 'enable_results'):
            enable_results = kwargs[arg]
        if (arg == 'permutations'):
            permutations = int(kwargs[arg])

    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)

    if 'duration' in locals():
        events = [e for e in events if e['duration'] >= duration]
    else:
        duration = np.min([ev['duration'] for ev in events])

    for e in events:
        e['onset'] += onset
        e['duration'] = duration

    evds = eventrelated_dataset(ds, events=events)

    [fclf, cvte] = setup_classifier(**kwargs)

    logger.info('Cross validation is performing ...')
    res = cvte(evds)

    print(cvte.ca.stats)

    if permutations != 0:
        print(cvte.ca.null_prob.samples)
        dist_len = len(cvte.null_dist.dists())
        err_arr = np.zeros(dist_len)
        for i in range(dist_len):
            err_arr[i] = 1 - cvte.ca.stats.stats['ACC']

        total_p_value = np.mean(cvte.null_dist.p(err_arr))
        p_value = cvte.ca.null_prob.samples
    else:
        total_p_value = 0.
        p_value = np.array([0, 0])

    try:
        sensana = fclf.get_sensitivity_analyzer()
        res_sens = sensana(evds)
    except Exception as err:
        allowed_keys = [
            'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds',
            'perm_pvalue', 'p'
        ]

        allowed_results = [
            None, None, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value,
            total_p_value
        ]

        results_dict = dict(zip(allowed_keys, allowed_results))
        results = dict()
        if not 'enable_results' in locals():
            enable_results = allowed_keys[:]
        for elem in enable_results:
            if elem in allowed_keys:
                results[elem] = results_dict[elem]

        return results

    sens_comb = res_sens.get_mapped(mean_sample())
    mean_map = map2nifti(evds, evds.a.mapper.reverse1(sens_comb))

    l_maps = []
    for m in res_sens:
        maps = ds.a.mapper.reverse1(m)
        nifti = map2nifti(evds, maps)
        l_maps.append(nifti)

    l_maps.append(mean_map)
    # Packing results    (to be sobstitute with a function)
    results = dict()
    if not 'enable_results' in locals():
        enable_results = [
            'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds',
            'pvalue', 'p'
        ]

    allowed_keys = [
        'map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds',
        'pvalue', 'p'
    ]

    allowed_results = [
        l_maps, res_sens, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value,
        total_p_value
    ]

    results_dict = dict(zip(allowed_keys, allowed_results))

    for elem in enable_results:

        if elem in allowed_keys:
            results[elem] = results_dict[elem]
        else:
            print('******** ' + elem + ' result is not allowed! *********')

    return results
Example #16
0
def normalize_dataset(ds, **kwargs):
    
    import collections
    import fractions
    
    mean = False
    normalization = 'feature'
    chunk_number = None
    
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]
        
    n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min()
    
    if chunk_number == 'adaptive':
        n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)
        
    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int)
            chunks.append(chunk)
        
        
        ds.chunks[argsort] = np.hstack(chunks)
        
    
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    
    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    # Find event related stuff
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Example #17
0
def spatiotemporal(ds, **kwargs):
      
    onset = 0
    
    for arg in kwargs:
        if (arg == 'onset'):
            onset = kwargs[arg]
        if (arg == 'duration'):
            duration = kwargs[arg]
        if (arg == 'enable_results'):
            enable_results = kwargs[arg]
        if (arg == 'permutations'):
            permutations = int(kwargs[arg])
       
    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)   
    
    if 'duration' in locals():
        events = [e for e in events if e['duration'] >= duration]
    else:
        duration = np.min([ev['duration'] for ev in events])

    for e in events:
        e['onset'] += onset           
        e['duration'] = duration
        
    evds = eventrelated_dataset(ds, events = events) 
    
    [fclf, cvte] = setup_classifier(**kwargs)
    
    logger.info('Cross validation is performing ...')
    res = cvte(evds)
    
    print cvte.ca.stats 
    
    
    if permutations != 0:
        print cvte.ca.null_prob.samples
        dist_len = len(cvte.null_dist.dists())
        err_arr = np.zeros(dist_len)
        for i in range(dist_len):
            err_arr[i] = 1 - cvte.ca.stats.stats['ACC']
    
        total_p_value = np.mean(cvte.null_dist.p(err_arr))
        p_value = cvte.ca.null_prob.samples
    else:
        total_p_value = 0.
        p_value = np.array([0,0])
    
    
    try:
        sensana = fclf.get_sensitivity_analyzer()
        res_sens = sensana(evds)
    except Exception, err:
        allowed_keys = ['map', 'sensitivities', 'stats', 
                        'mapper', 'classifier', 'ds', 
                        'perm_pvalue', 'p']
        
        allowed_results = [None, None, cvte.ca.stats, 
                           evds.a.mapper, fclf, evds, 
                           p_value, total_p_value]
        
        results_dict = dict(zip(allowed_keys, allowed_results))
        results = dict()
        if not 'enable_results' in locals():
            enable_results = allowed_keys[:]
        for elem in enable_results:
            if elem in allowed_keys:
                results[elem] = results_dict[elem]
                
        return results
Example #18
0
def run(args):
    ds = arg2ds(args.data)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # build list of events
    events = []
    timebased_events = False
    if args.event_attrs is not None:
        def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs])
        events = find_events(**def_attrs)
    elif args.csv_events is not None:
        if args.csv_events == '-':
            csv = sys.stdin.read()
            import cStringIO
            csv = cStringIO.StringIO(csv)
        else:
            csv = open(args.csv_events, 'rU')
        csvt = _load_csv_table(csv)
        if not len(csvt):
            raise ValueError("no CSV columns found")
        if args.onset_column:
            csvt['onset'] = csvt[args.onset_column]
        nevents = len(csvt[csvt.keys()[0]])
        events = []
        for ev in xrange(nevents):
            events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()]))
    elif args.onsets is not None:
        if not len(args.onsets):
            args.onsets = [i for i in sys.stdin]
        # time or sample-based?
        if args.time_attr is None:
            oconv = int
        else:
            oconv = float
        events = [{'onset': oconv(o)} for o in args.onsets]
    elif args.fsl_ev3 is not None:
        timebased_events = True
        from mvpa2.misc.fsl import FslEV3
        events = []
        for evsrc in args.fsl_ev3:
            events.extend(FslEV3(evsrc).to_events())
    if not len(events):
        raise ValueError("no events defined")
    verbose(2, 'Extracting %i events' % len(events))
    if args.event_compression is None:
        evmap = None
    elif args.event_compression == 'mean':
        evmap = FxMapper('features', np.mean, attrfx=merge2first)
    elif args.event_compression == 'median':
        evmap = FxMapper('features', np.median, attrfx=merge2first)
    elif args.event_compression == 'min':
        evmap = FxMapper('features', np.min, attrfx=merge2first)
    elif args.event_compression == 'max':
        evmap = FxMapper('features', np.max, attrfx=merge2first)
    # convert to event-related ds
    evds = eventrelated_dataset(ds, events, time_attr=args.time_attr,
                                match=args.match_strategy,
                                event_offset=args.offset,
                                event_duration=args.duration,
                                event_mapper=evmap)
    # act on all attribute options
    evds = process_common_dsattr_opts(evds, args)
    # and store
    ds2hdf5(evds, args.output, compression=args.hdf5_compression)
    return evds
Example #19
0
def build_events_ds(ds, new_duration, **kwargs):
    """
    This function is used to convert a dataset in a event_related dataset. Used for
    transfer learning and clustering, thus a classifier has been trained on a 
    event related dataset and the prediction should be done on the same kind of the 
    dataset.
    
    Parameters    
    ----------
    
    ds : Dataset
        The dataset to be converted
    new_duration : integer
        Is the duration of the single event, if experiment events are of different
        length, it takes the events greater or equal to new_duration.
    kwarsg : dict
        win_number: is the number of window of one single event to be extracted,
        if it is not setted, it assumes the ratio between event duration and new_duration
        overlap:
        
    Returns
    -------
    
    Dataset:
        the event_related dataset
    """
    
    for arg in kwargs:
        if arg == 'win_number':
            win_number = kwargs[arg]
        if arg == 'overlap':
            overlap = kwargs[arg]

    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)
    labels = np.unique(ds.targets)
    current_duration = dict()
    for l in labels:
        d = [e['duration'] for e in events if e['targets'] == l]
        current_duration[l] = np.unique(d)[0]

    def calc_overlap(w, l, n):
        return w - np.floor((l - w)/(n - 1))
    
    def calc_win_number (w, l, o):
        return (l - w)/(w - o) + 1
    
    if 'overlap' not in locals():
        overlap = calc_overlap(new_duration, current_duration[l], win_number)
    else:
        if overlap >= new_duration:
            overlap = new_duration - 1
            
    if 'win_number' not in locals():
        #win_number = np.ceil(current_duration[l]/np.float(new_duration))
        win_number = calc_win_number(new_duration, current_duration[l], overlap)
        
    new_event_list = []
    
    for e in events:
        onset = e['onset']
        chunks = e['chunks']
        targets = e['targets']
        duration = e['duration']

        for i in np.arange(win_number):
            new_onset = onset + i * (new_duration - overlap)
            
            new_event = dict()
            new_event['onset'] = new_onset
            new_event['duration'] = new_duration
            new_event['targets'] = targets
            new_event['chunks'] = chunks
            
            new_event_list.append(new_event)
    
    
    logger.info('Building new event related dataset...')
    evds = eventrelated_dataset(ds, events = new_event_list)
    
    return evds
Example #20
0
def run(args):
    ds = arg2ds(args.data)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # build list of events
    events = []
    timebased_events = False
    if args.event_attrs is not None:
        def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs])
        events = find_events(**def_attrs)
    elif args.csv_events is not None:
        if args.csv_events == '-':
            csv = sys.stdin.read()
            import cStringIO
            csv = cStringIO.StringIO(csv)
        else:
            csv = open(args.csv_events, 'rU')
        csvt = _load_csv_table(csv)
        if not len(csvt):
            raise ValueError("no CSV columns found")
        if args.onset_column:
            csvt['onset'] = csvt[args.onset_column]
        nevents = len(csvt[csvt.keys()[0]])
        events = []
        for ev in xrange(nevents):
            events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()]))
    elif args.onsets is not None:
        if not len(args.onsets):
            args.onsets = [i for i in sys.stdin]
        # time or sample-based?
        if args.time_attr is None:
            oconv = int
        else:
            oconv = float
        events = [{'onset': oconv(o)} for o in args.onsets]
    elif args.fsl_ev3 is not None:
        timebased_events = True
        from mvpa2.misc.fsl import FslEV3
        events = []
        for evsrc in args.fsl_ev3:
            events.extend(FslEV3(evsrc).to_events())
    if not len(events):
        raise ValueError("no events defined")
    verbose(2, 'Extracting %i events' % len(events))
    if args.event_compression is None:
        evmap = None
    elif args.event_compression == 'mean':
        evmap = FxMapper('features', np.mean, attrfx=merge2first)
    elif args.event_compression == 'median':
        evmap = FxMapper('features', np.median, attrfx=merge2first)
    elif args.event_compression == 'min':
        evmap = FxMapper('features', np.min, attrfx=merge2first)
    elif args.event_compression == 'max':
        evmap = FxMapper('features', np.max, attrfx=merge2first)
    # convert to event-related ds
    evds = eventrelated_dataset(ds,
                                events,
                                time_attr=args.time_attr,
                                match=args.match_strategy,
                                event_offset=args.offset,
                                event_duration=args.duration,
                                event_mapper=evmap)
    # act on all attribute options
    evds = process_common_dsattr_opts(evds, args)
    # and store
    ds2hdf5(evds, args.output, compression=args.hdf5_compression)
    return evds
Example #21
0
def find_events_dataset(ds, **kwargs):

    ds.a.events = find_events(  #event= ds.sa.event_num, 
        chunks=ds.sa.chunks, targets=ds.sa.targets)

    return ds