Beispiel #1
0
def norm_and_mean(norm,
                  bilateral,
                  classifier,
                  sensitivities):
    """This function normalizes a list of sensitivities to their
    L2 norm if norm = True, else just stacks them according to the
    classifier they were build with. Resulting stack of sensitivities
    is averaged with the mean_group_sample() function."""
    if norm:
        from sklearn.preprocessing import normalize
        import copy
        # default for normalization is the L2 norm
        sensitivities_to_normalize = copy.deepcopy(sensitivities)
        for i in range(len(sensitivities)):
            sensitivities_to_normalize[i].samples = normalize(sensitivities_to_normalize[i].samples, axis=1) * np.sqrt(sensitivities[i].shape[1])
            print(sensitivities[i].shape)

        sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
        print('I normalized the data.')

    else:
        sensitivities_stacked = mv.vstack(sensitivities)

    sgds = ['sgd', 'l-sgd']

    if bilateral:
        if classifier in sgds:
            # Note: All SGD based classifier wanted an explicit
            # 'target' sample attribute, therefore, this is still present
            # in the sensitivities.
            # note to self: we were wondering whether we assign correct estimates to label
            # I double checked now (May 19) that estimates here are assigned the correct estimate.
            # references: ulabels are assigned with the help of np.unique, which returns a sorted
            # array. Given https://github.com/PyMVPA/PyMVPA/pull/607/files#diff-bbf744fd29d7f3e4abdf7a1586a5aa95,
            # the sensitivity calculation uses this order further lexicographically.
            sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                             sensitivities_stacked.sa.targets)
        else:
            # ...whereas in GNB, the results are in 'bilat_ROIs' sample attribute
            sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                             sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)

    else:
        if classifier in sgds:
            # Note: All SGD based classifier wanted an explicit
            # 'target' sample attribute, therefore, this is still present
            # in the sensitivities.
            sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                           sensitivities_stacked.sa.targets)
        else:
            # ...whereas in GNB, the results are in 'all_ROIs' sample attribute
            sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                           sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)

    # return the averaged sensitivities
    return mean_sens
Beispiel #2
0
def dotheglm(sensitivities, eventdir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them in, average the
    durations because there are tiny differences between subjects, and then it
    will put all of that into a glm.
    """
    sensitivities_stacked = mv.vstack(sensitivities)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # average onsets into one event file
    events = get_group_events(eventdir)
    # save the event_file
    fmt = "%10.3f\t%10.3f\t%16s\t%60s"
    np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='',
               header='onset\tduration\ttrial_type\tstim_file', fmt=fmt)
    # get events into dictionary
    events_dicts = []
    for i in range(0, len(events)):
        dic = {
            'onset': events[i][0],
            'duration': events[i][1],
            'condition': events[i][2]
        }
        events_dicts.append(dic)

    hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)
    mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates)
    print('calculated glm, saving results.')
    return hrf_estimates
		print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive!
		M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend
		print '... done',datetime.datetime.now()

		# ZSCORE
		print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now()
		M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods
		print '... done',datetime.datetime.now()
		#P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

		pickleFile = gzip.open(preprocessedCache, 'wb', 5);
		pickle.dump(dataset, pickleFile);

	# AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
	print 'averaging over trials ...',datetime.datetime.now()
	dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets']))
	print '... only',dataset.shape[0],'cases left now'
	dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5)

	# print '\n\n\n'
	# print dataset.targets
	# print len(dataset.targets)
	# print dataset.chunks
	# print len(dataset.chunks)

	# REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (JAPANESE VS ENGLISH)
	dataset.targets = [t[0:2] for t in dataset.targets]
	dataset = dataset[N.array([l in ['jj', 'je', 'ej', 'ee'] for l in dataset.sa.targets], dtype='bool')]
	print '... and only',dataset.shape[0],'cases of interest (Language Switch between Japanese vs English)'
	dataset=M.datasets.miscfx.remove_invariant_features(dataset)
	print 'saving as compressed file',trimmedCache
Beispiel #4
0
        print 'zscore normalising (give all voxels similar variance) ...', datetime.datetime.now(
        )
        M.zscore(dataset,
                 chunks_attr='chunks',
                 param_est=('targets',
                            ['base']))  # zscoring, on basis of rest periods
        print '... done', datetime.datetime.now()
        #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

        pickleFile = gzip.open(preprocessedCache, 'wb', 5)
        pickle.dump(dataset, pickleFile)

    # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
    print 'averaging over trials ...', datetime.datetime.now()
    dataset = dataset.get_mapped(
        M.mean_group_sample(attrs=['chunks', 'targets']))
    print '... only', dataset.shape[0], 'cases left now'
    dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5)

    # print '\n\n\n'
    # print dataset.targets
    # print len(dataset.targets)
    # print dataset.chunks
    # print len(dataset.chunks)

    # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (KEEP VS SWITCH)
    dataset.targets = [t[0] for t in dataset.targets]
    dataset = dataset[N.array([l in ['k', 's'] for l in dataset.sa.targets],
                              dtype='bool')]
    print '... and only', dataset.shape[
        0], 'cases of interest (Keep vs Switch Language)'
Beispiel #5
0
def normalize_dataset(ds, **kwargs):

    import collections
    import fractions

    mean = False
    normalization = 'feature'
    chunk_number = None

    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]

    n_targets = np.array(
        [value for value in collections.Counter(ds.targets).values()]).min()

    if chunk_number == 'adaptive':
        n_chunks = np.max(
            [fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)

    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0,
                                n_chunks,
                                n_targets,
                                endpoint=False,
                                dtype=np.int)
            chunks.append(chunk)

        ds.chunks[argsort] = np.hstack(chunks)

    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num'])
        ds = ds.get_mapped(avg_mapper)

    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)  #, param_est=('targets', ['fixation']))

    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]

        ds.samples[np.isnan(ds.samples)] = 0

    # Find event related stuff
    ds.a.events = find_events(  #event= ds.sa.event_num, 
        chunks=ds.sa.chunks, targets=ds.sa.targets)

    return ds
Beispiel #6
0
def normalize_dataset(ds, **kwargs):
    
    import collections
    import fractions
    
    mean = False
    normalization = 'feature'
    chunk_number = None
    
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]
        
    n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min()
    
    if chunk_number == 'adaptive':
        n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)
        
    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int)
            chunks.append(chunk)
        
        
        ds.chunks[argsort] = np.hstack(chunks)
        
    
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    
    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    # Find event related stuff
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Beispiel #7
0
def makeaplot(events,
              sensitivities,
              hrf_estimates,
              roi_pair,
              fn=None,
              include_all_regressors=False):
    """
    This produces a time series plot for the roi class comparison specified in
    roi_pair such as roi_pair = ['left FFA', 'left PPA'].
    If include_all_regressors = True, the function will create a potentially overloaded
    legend with all of the regressors, regardless of they occurred in the run. (Plotting
    then takes longer, but is a useful option if all regressors are of relevance and can
    be twitched in inkscape).
    If the figure should be saved, spcify an existing path in the parameter fn.

    # TODO's for the future: runs=None, overlap=False, grouping (should be a way to not rely
    # on hardcoded stimuli and colors within function anymore, with Ordered Dicts):

    """
    import matplotlib.pyplot as plt

    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)

    # get the mean, because we don't want to have 15 folds of sensitivities, but their average
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)

    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    runs = np.unique(mean_sens_transposed.sa.chunks)
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty
    # those
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    # just append any large number to accomodate the fact that the last run also needs an
    # at some point.
    runonsets.append(99999)

    for j in range(len(hrf_estimates.fa.bilat_ROIs_str)):
        comparison = hrf_estimates.fa.targets[j][0]
        if (roi_pair[0] in comparison) and (roi_pair[1] in comparison):
            roi_pair_idx = j
    roi_betas_ds = hrf_estimates[:, roi_pair_idx]
    roi_sens_ds = mean_sens_transposed[:, roi_pair_idx]
    from collections import OrderedDict
    block_design_betas = OrderedDict(
        sorted(zip(roi_betas_ds.sa.condition, roi_betas_ds.samples[:, 0]),
               key=lambda x: x[1]))
    block_design = list(block_design_betas)
    for run in runs:
        fig, ax = plt.subplots(1, 1, figsize=[18, 10])
        colors = [
            '#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60',
            '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1'
        ]
        plt.suptitle(
            'Timecourse of sensitivities, {} versus {}, run {}'.format(
                roi_pair[0], roi_pair[1], run + 1),
            fontsize='large')
        # 2 is a TR here... sorry, we are in rush
        run_onset = int(runonsets[run] // 2)
        run_offset = int(runonsets[run + 1] // 2)
        # for each run, adjust the x-axis
        plt.xlim([
            min(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)]
                ),
            max(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)])
        ])
        plt.ylim([-2.7, 4.5])
        plt.xlabel('Time in sec')
        plt.legend(loc=1)
        plt.grid(True)

        # for each stimulus, plot a color band on top of the plot
        for stimulus in block_design:
            # color = colors[0]
            print(stimulus)
            condition_event_mask = events['condition'] == stimulus
            onsets = events[condition_event_mask]['onset'].values
            onsets_run = [
                time for time in onsets
                if np.logical_and(time > run_onset * 2, time < run_offset * 2)
            ]
            durations = events[condition_event_mask]['duration'].values
            durations_run = [
                dur for idx, dur in enumerate(durations)
                if np.logical_and(onsets[idx] > run_onset *
                                  2, onsets[idx] < run_offset * 2)
            ]
            # prepare for plotting
            r_height = 0.3
            y = 4
            if stimulus.startswith('run'):
                continue
            if stimulus.startswith('location'):
                # gradually decrease alpha level over occurances of location stims
                y -= r_height
                color = 'darkgreen'
            elif 'face' in stimulus:
                if stimulus == 'many_faces':
                    color = 'tomato'
                else:
                    color = 'firebrick'
            elif stimulus == 'exterior':
                color = 'cornflowerblue'
                y -= 2 * r_height
            elif stimulus.startswith('time'):
                color = 'darkslategrey'
                y -= 3 * r_height
            elif stimulus == 'night':
                color = 'slategray'
                y -= 4 * r_height
            elif stimulus == 'scene-change':
                color = 'black'
                y -= 5 * r_height

            # get the beta corresponding to the stimulus to later use in label
            beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus,
                                        0]

            if include_all_regressors and onsets_run == []:
                # if there are no onsets for a particular regressor, but we want to print all
                # regressors, set i manually to 0
                rectangle = plt.Rectangle(
                    (0, 0),
                    0,
                    0,
                    fc=color,
                    alpha=0.5,
                    label='_' * 0 + stimulus.replace(" ", "") + '(' +
                    str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)

            for i, x in enumerate(onsets_run):
                # We need the i to trick the labeling. It will attempt to plot every single occurance
                # of a stimulus with numbered labels. However, appending a '_' to the label makes
                # matplotlib disregard it. If we attach an '_' * i to the label, all but the first onset
                # get a '_' prefix and are ignored.
                r_width = durations_run[i]
                rectangle = plt.Rectangle(
                    (x, y),
                    r_width,
                    r_height,
                    fc=color,
                    alpha=0.5,
                    label='_' * i + stimulus.replace(" ", "") + '(' +
                    str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)
                plt.legend(loc=1)
                # plt.axis('scaled')
                # del colors[0]

        times = roi_sens_ds.sa.time_coords[run_onset:run_offset]

        ax.plot(times,
                roi_sens_ds.samples[run_onset:run_offset],
                '-',
                color='black',
                lw=1.0)
        # plot glm model results
        glm_model = hrf_estimates.a.model.results_[0.0].predicted[
            run_onset:int(run_offset), roi_pair_idx]
        # ax2 = ax.twinx()
        ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0)
        model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx]
        plt.title('R squared: %.2f' % model_fit)
        if fn:
            plt.savefig(results_dir +
                        'timecourse_avmovie_glm_sens_{}_vs_{}_run-{}.svg'.
                        format(roi_pair[0], roi_pair[1], run + 1))
Beispiel #8
0
def dotheglm(sensitivities, eventdir, annot_dir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them into an apprpriate.
    data structure. It will compute one glm per run.
    """
    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # get a list of the event files with occurances of faces
    event_files = sorted(glob(eventdir + '/*'))
    assert len(event_files) == 8
    # get additional events from the location annotation
    location_annotation = pd.read_csv(annot_dir, sep='\t')

    # get all settings with more than one occurrence
    setting = [
        set for set in location_annotation.setting.unique()
        if (location_annotation.setting[location_annotation.setting ==
                                        set].value_counts()[0] > 1)
    ]

    # get onsets and durations
    onset = []
    duration = []
    condition = []
    for set in setting:
        for i in range(location_annotation.setting[
                location_annotation['setting'] == set].value_counts()[0]):
            onset.append(location_annotation[location_annotation['setting'] ==
                                             set]['onset'].values[i])
            duration.append(location_annotation[location_annotation['setting']
                                                == set]['duration'].values[i])
        condition.append([set] * (i + 1))
    # flatten conditions
    condition = [y for x in condition for y in x]
    assert len(condition) == len(onset) == len(duration)

    # concatenate the strings
    condition_str = [set.replace(' ', '_') for set in condition]
    condition_str = ['location_' + set for set in condition_str]

    # put it in a dataframe
    locations = pd.DataFrame({
        'onset': onset,
        'duration': duration,
        'condition': condition_str
    })

    # sort according to onsets to be paranoid
    locations_sorted = locations.sort_values(by='onset')

    # this is a dataframe encoding flow of time
    time_forward = pd.DataFrame(
        [{
            'condition': 'time+',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['+', '++']])

    time_back = pd.DataFrame(
        [{
            'condition': 'time-',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['-', '--']])

    # sort according to onsets to be paranoid
    time_forward_sorted = time_forward.sort_values(by='onset')
    time_back_sorted = time_back.sort_values(by='onset')

    scene_change = pd.DataFrame([{
        'condition': 'scene-change',
        'onset': location_annotation['onset'][i],
        'duration': 1.0
    } for i in range(len(location_annotation) - 1)])

    scene_change_sorted = scene_change.sort_values(by='onset')

    # this is a dataframe encoding exterior
    exterior = pd.DataFrame([{
        'condition': 'exterior',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                             if (location_annotation['int_or_ext'][i] == 'ext')
                             ])

    # sort according to onsets to be paranoid
    exterior_sorted = exterior.sort_values(by='onset')

    # this is a dataframe encoding nighttime
    night = pd.DataFrame([{
        'condition': 'night',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                          if (location_annotation['time_of_day'][i] == 'night')
                          ])

    # sort according to onsets to be paranoid
    night_sorted = night.sort_values(by='onset')

    assert np.all(
        locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values
    )
    assert np.all(
        time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values
    )
    assert np.all(time_forward_sorted.onset[1:].values >=
                  time_forward_sorted.onset[:-1].values)
    assert np.all(
        exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values)
    assert np.all(
        night_sorted.onset[1:].values >= night_sorted.onset[:-1].values)
    assert np.all(scene_change_sorted.onset[1:].values >=
                  scene_change_sorted.onset[:-1].values)

    # check whether chunks are increasing as well as sanity check
    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    # make time coordinates real seconds
    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty

    # get runs, and runlengths in seconds
    runs = sorted(mean_sens_transposed.UC)
    assert runs == range(len(runs))
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    assert len(runs) == 8

    # initialize the list of dicts that gets later passed to the glm
    events_dicts = []
    # This is relevant to later stack all dataframes together
    # and paranoidly make sure that they have the same columns
    cols = ['onset', 'duration', 'condition']

    for run in runs:
        # get face data
        eventfile = sorted(event_files)[run]
        events = pd.read_csv(eventfile, sep='\t')

        for index, row in events.iterrows():

            # disregard no faces, put everything else into event structure
            if row['condition'] != 'no_face':
                dic = {
                    'onset': row['onset'] + runonsets[run],
                    'duration': row['duration'],
                    'condition': row['condition']
                }
                events_dicts.append(dic)

    # concatenate all event dataframes
    run_reg = pd.DataFrame([{
        'onset': runonsets[i],
        'duration': abs(runonsets[i] - runonsets[i + 1]),
        'condition': 'run-' + str(i + 1)
    } for i in range(7)])

    # get all of these wonderful dataframes into a list and squish them
    dfs = [
        locations_sorted[cols], scene_change_sorted[cols],
        time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols],
        night_sorted[cols], run_reg[cols]
    ]
    allevents = pd.concat(dfs)

    # save all non-face related events in an event file, just for the sake of it
    allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv',
                     sep='\t',
                     index=False)

    # append non-faceevents to event structure for glm
    for index, row in allevents.iterrows():
        dic = {
            'onset': row['onset'],
            'duration': row['duration'],
            'condition': row['condition']
        }
        events_dicts.append(dic)

    # save this event dicts structure  as a tsv file
    import csv
    with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile:
        fieldnames = ['onset', 'duration', 'condition']
        writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t')
        writer.writeheader()
        writer.writerows(events_dicts)
    # save this event file also as json file... can there ever be enough different files...
    import json
    with open(results_dir + '/' + 'allevents.json', 'w') as f:
        json.dump(events_dicts, f)

    # do the glm - we've earned it
    hrf_estimates = mv.fit_event_hrf_model(
        mean_sens_transposed,
        events_dicts,
        time_attr='time_coords',
        condition_attr='condition',
        design_kwargs=dict(drift_model='blank'),
        glmfit_kwargs=dict(model='ols'),
        return_model=True)

    mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5',
              hrf_estimates)
    print('calculated the, saving results.')

    return hrf_estimates
Beispiel #9
0
		print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive!
		M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend
		print '... done',datetime.datetime.now()

		# ZSCORE
		print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now()
		M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods
		print '... done',datetime.datetime.now()
		#P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

		pickleFile = gzip.open(preprocessedCache, 'wb', 5);
		pickle.dump(dataset, pickleFile);

	# AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
	print 'averaging over trials ...',datetime.datetime.now()
	dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets']))
	print '... only',dataset.shape[0],'cases left now'
	dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5)

	# print '\n\n\n'
	# print dataset.targets
	# print len(dataset.targets)
	# print dataset.chunks
	# print len(dataset.chunks)

	# REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (JAPANESE VS ENGLISH)
	dataset.targets = [t[0] for t in dataset.targets]
	dataset = dataset[N.array([l in ['j', 'e'] for l in dataset.sa.targets], dtype='bool')]
	print '... and only',dataset.shape[0],'cases of interest (Langauge Presented)'
	dataset=M.datasets.miscfx.remove_invariant_features(dataset)
	print 'saving as compressed file',trimmedCache
Beispiel #10
0
 def _train(self, ds):
     avg_mapper = mean_group_sample([self._attribute]) 
     ds = ds.get_mapped(avg_mapper)
     return LinearCSVMC._train(self, ds)
Beispiel #11
0
 def _train(self, ds):
     avg_mapper = mean_group_sample(['trial']) 
     ds = ds.get_mapped(avg_mapper)
     return self._clf._train(ds)
        print "detrending (remove slow drifts in signal, and jumps between runs) ...", datetime.datetime.now()  # can be very memory intensive!
        M.poly_detrend(dataset, polyord=1, chunks_attr="chunks")  # linear detrend
        print "... done", datetime.datetime.now()

        # ZSCORE
        print "zscore normalising (give all voxels similar variance) ...", datetime.datetime.now()
        M.zscore(dataset, chunks_attr="chunks", param_est=("targets", ["base"]))  # zscoring, on basis of rest periods
        print "... done", datetime.datetime.now()
        # P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

        pickleFile = gzip.open(preprocessedCache, "wb", 5)
        pickle.dump(dataset, pickleFile)

        # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
    print "averaging over trials ...", datetime.datetime.now()
    dataset = dataset.get_mapped(M.mean_group_sample(attrs=["chunks", "targets"]))
    print "... only", dataset.shape[0], "cases left now"
    dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5)

    # print '\n\n\n'
    # print dataset.targets
    # print len(dataset.targets)
    # print dataset.chunks
    # print len(dataset.chunks)

    # REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (KEEP VS SWITCH)
    dataset.targets = [t[0] for t in dataset.targets]
    dataset = dataset[N.array([l in ["k", "s"] for l in dataset.sa.targets], dtype="bool")]
    print "... and only", dataset.shape[0], "cases of interest (Keep vs Switch Language)"
    dataset = M.datasets.miscfx.remove_invariant_features(dataset)
    print "saving as compressed file", trimmedCache
Beispiel #13
0
def preprocess_dataset(ds, type_, **kwargs):
    """
    Preprocess the dataset: detrending of single run and for chunks, the zscoring is also
    done by chunks and by run.
    
    Parameters
    ----------
    ds : Dataset
        The dataset to be preprocessed
    type : string
        The experiment to be processed
    kwargs : dict
        mean_samples - boolean : if samples should be averaged
        label_included - list : list of labels to be included in the dataset
        label_dropped - string : label to be dropped (rest, fixation)
        
    Returns
    -------
    Dataset
        the processed dataset
    
    
    """
    mean = False
    normalization = 'feature'
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'label_included'):
            label_included = kwargs[arg].split(',')
        if (arg == 'label_dropped'):
            label_dropped = kwargs[arg] 
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
                
    
    logger.info('Dataset preprocessing: Detrending...')
    if len(np.unique(ds.sa['file'])) != 1:
        poly_detrend(ds, polyord = 1, chunks_attr = 'file')
    poly_detrend(ds, polyord = 1, chunks_attr = 'chunks')
    
    
    if  label_dropped != 'None':
        logger.info('Removing labels...')
        ds = ds[ds.sa.targets != label_dropped]
    if  label_included != ['all']:
        ds = ds[np.array([l in label_included for l in ds.sa.targets],
                          dtype='bool')]
        
               
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    if normalization == 'sample' or normalization == 'both':
        #Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Beispiel #14
0
def makeaplot(events,
              sensitivities,
              hrf_estimates,
              roi_pair,
              fn=True):
    """
    This produces a time series plot for the roi class comparison specified in
    roi_pair such as roi_pair = ['left FFA', 'left PPA']
    """
    import matplotlib.pyplot as plt

    # take the mean and transpose the sensitivities
    sensitivities_stacked = mv.vstack(sensitivities)

    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                                sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)

    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # some parameters
    # get the conditions
    block_design = sorted(np.unique(events['trial_type']))
    reorder = [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11]
    block_design = [block_design[i] for i in reorder]
    # end indices to chunk timeseries into runs
    run_startidx = np.array([0, 157, 313, 469])
    run_endidx = np.array([156, 312, 468, 624])

    runs = np.unique(mean_sens_transposed.sa.chunks)

    for j in range(len(hrf_estimates.fa.bilat_ROIs_str)):
        comparison = hrf_estimates.fa.bilat_ROIs[j][0]
        if (roi_pair[0] in comparison) and (roi_pair[1] in comparison):
            roi_pair_idx = j
    roi_betas_ds = hrf_estimates[:, roi_pair_idx]
    roi_sens_ds = mean_sens_transposed[:, roi_pair_idx]

    for run in runs:
        fig, ax = plt.subplots(1, 1, figsize=[18, 10])
        colors = ['#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60',
                  '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1']
        plt.suptitle('Timecourse of sensitivities, {} versus {}, run {}'.format(roi_pair[0],
                                                                                roi_pair[1],
                                                                                run + 1),
                     fontsize='large')
        plt.xlim([0, max(mean_sens_transposed.sa.time_coords)])
        plt.ylim([-5, 7])
        plt.xlabel('Time in sec')
        plt.legend(loc=1)
        plt.grid(True)
        # for each stimulus, plot a color band on top of the plot
        for stimulus in block_design:
            onsets = events[events['trial_type'] == stimulus]['onset'].values
            durations = events[events['trial_type'] == stimulus]['duration'].values
            stimulation_end = np.sum([onsets, durations], axis=0)
            r_height = 1
            color = colors[0]
            y = 6

            # get the beta corresponding to the stimulus to later use in label
            beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus.replace(" ", ""), 0]

            for i in range(len(onsets)):
                r_width = durations[i]
                x = stimulation_end[i]
                rectangle = plt.Rectangle((x, y),
                                          r_width,
                                          r_height,
                                          fc=color,
                                          alpha=0.5,
                                          label='_'*i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)
                plt.legend(loc=1)
            del colors[0]

        times = roi_sens_ds.sa.time_coords[run_startidx[run]:run_endidx[run]]

        ax.plot(times, roi_sens_ds.samples[run_startidx[run]:run_endidx[run]], '-', color='black', lw=1.0)
        glm_model = hrf_estimates.a.model.results_[0.0].predicted[run_startidx[run]:run_endidx[run], roi_pair_idx]
        ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0)
        model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx]
        plt.title('R squared: %.2f' % model_fit)
        if fn:
            plt.savefig(results_dir + 'timecourse_localizer_glm_sens_{}_vs_{}_run-{}.svg'.format(roi_pair[0], roi_pair[1], run + 1))