Example #1
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Example #2
0
def main(subject, study_dir, mask, suffix='_stim2'):
    from mvpa2.mappers.zscore import zscore
    from mvpa2.mappers.fx import mean_group_sample
    from wikisim import mvpa

    # load subject data
    sp = su.SubjPath(subject, study_dir)
    vols = task.prex_vols(sp.path('behav', 'log'))

    # load fmri data
    ds = mvpa.load_prex_beta(sp, suffix, mask, verbose=1)

    # zscore
    ds.sa['run'] = vols.run.values
    zscore(ds, chunks_attr='run')

    # average over item presentations
    ds.sa['itemno'] = vols.itemno.to_numpy()
    m = mean_group_sample(['itemno'])
    dsm = ds.get_mapped(m)
    m_vols = vols.groupby('itemno', as_index=False).mean()

    # save data samples and corresponding volume information
    res_dir = os.path.join(sp.study_dir, 'batch', 'glm', 'prex' + suffix,
                           'roi', mask)
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    mat_file = os.path.join(res_dir, f'pattern_{subject}.txt')
    tab_file = os.path.join(res_dir, f'pattern_{subject}.csv')
    np.savetxt(mat_file, dsm.samples)
    m_vols.to_csv(tab_file)
    def test_connectivity_hyperalignment(self):
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        dss_train, dss_test, surface = self.get_testdata()
        qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices')
        cha = ConnectivityHyperalignment(mask_ids=[0, 3, 6, 9],
                                         seed_indices=[0, 3, 6, 9],
                                         seed_queryengines=qe,
                                         queryengine=qe)
        mappers = cha(dss_train)
        aligned_train = [
            mapper.forward(ds) for ds, mapper in zip(dss_train, mappers)
        ]
        aligned_test = [
            mapper.forward(ds) for ds, mapper in zip(dss_test, mappers)
        ]
        for ds in aligned_train + aligned_test:
            zscore(ds, chunks_attr=None)
        sim_train_before = self.compute_connectivity_profile_similarity(
            dss_train)
        sim_train_after = self.compute_connectivity_profile_similarity(
            aligned_train)
        sim_test_before = self.compute_connectivity_profile_similarity(
            dss_test)
        sim_test_after = self.compute_connectivity_profile_similarity(
            aligned_test)
        # ISC should be higher after CHA for both training and testing data
        self.assertTrue(sim_train_after.mean() > sim_train_before.mean())
        self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
Example #4
0
def prepare_subject_for_hyperalignment(subject_label, bold_fname, mask_fname, out_dir):
    print('Loading data %s with mask %s' % (bold_fname, mask_fname))
    ds = fmri_dataset(samples=bold_fname, mask=mask_fname)
    zscore(ds, chunks_attr=None)
    out_fname = os.path.join(out_dir, 'sub-%s_data.hdf5' % subject_label)
    print('Saving to %s' % out_fname)
    h5save(out_fname, ds)
 def _get_seed_means(self, measure, queryengine, dataset, seed_indices):
     # Computing seed data as mean timeseries in each SL
     seed_data = Searchlight(measure, queryengine=queryengine,
                             nproc=self.params.nproc, roi_ids=seed_indices)
     seed_data = seed_data(dataset)
     zscore(seed_data, chunks_attr=None)
     return seed_data
 def _get_seed_means(self, measure, queryengine, dataset, seed_indices):
     # Computing seed data as mean timeseries in each SL
     seed_data = Searchlight(measure, queryengine=queryengine,
                             nproc=self.params.nproc, roi_ids=seed_indices)
     seed_data = seed_data(dataset)
     zscore(seed_data, chunks_attr=None)
     return seed_data
Example #7
0
def compute_connectomes(datasets, queryengine, target_indices):
    """
    Parameters:
    -----------
    datasets: a list of PyMVPA datasets, one per subject.
    queryengine: the trained PyMVPA Surface queryengine, trained on the surface defining this data
    and the searchlight radius matching your analysis.
    target_indices: the indices of the vertices where each searchlight for a connectivity target will be centered. 

    Returns:
    -------
    connectomes: a list of connectivity matrices, where each entry is the correlation between the timeseries of a
    corresponding searchlight centered on each a connectivity seed and a connectivity target.
    """

    conn_metric = lambda x,y: np.dot(x.samples, y.samples)/x.nsamples
    connectivity_mapper = FxyMapper(conn_metric)
    mean_feature_measure = MeanFeatureMeasure()

    # compute means for aligning seed features
    conn_means = [seed_means(MeanFeatureMeasure(), queryengine, ds, target_indices) for ds in datasets]

    conn_targets = []
    for csm in conn_means:
        zscore(csm, chunks_attr=None)
        conn_targets.append(csm)

    connectomes = []
    for target, ds in zip(conn_targets, datasets):
        conn_mapper.train(target)
        connectome = connectivity_mapper.forward(ds)
        connectome.fa = ds.fa
        zscore(connectome, chunks_attr=None)
        connectomes.append(connectome)
    return connectomes
Example #8
0
def prepare_subject_for_hyperalignment(subject_label, bold_fname, mask_fname,
                                       out_dir):
    print('Loading data %s with mask %s' % (bold_fname, mask_fname))
    ds = fmri_dataset(samples=bold_fname, mask=mask_fname)
    zscore(ds, chunks_attr=None)
    out_fname = os.path.join(out_dir, 'sub-%s_data.hdf5' % subject_label)
    print('Saving to %s' % out_fname)
    h5save(out_fname, ds)
 def _seed_means(self, measure, queryengine, ds, seed_indices):
     # Seed data is the mean timeseries for each searchlight
     seed_data = Searchlight(measure, queryengine=queryengine, 
                             nproc=self.nproc, roi_ids=np.concatenate(seed_indices).copy())
     if isinstance(ds,np.ndarray):
         ds = Dataset(ds) 
     seed_data = seed_data(ds)
     zscore(seed_data.samples, chunks_attr=None)
     return seed_data
Example #10
0
def test_zscore_withoutchunks():
    # just a smoke test to see if all issues of
    # https://github.com/PyMVPA/PyMVPA/issues/26
    # are fixed
    from mvpa2.datasets import Dataset
    ds = Dataset(np.arange(32).reshape((8, -1)), sa=dict(targets=range(8)))
    zscore(ds, chunks_attr=None)
    assert (np.any(ds.samples != np.arange(32).reshape((8, -1))))
    ds_summary = ds.summary()
    assert (ds_summary is not None)
Example #11
0
def test_zscore_withoutchunks():
    # just a smoke test to see if all issues of
    # https://github.com/PyMVPA/PyMVPA/issues/26
    # are fixed
    from mvpa2.datasets import Dataset
    ds = Dataset(np.arange(32).reshape((8,-1)), sa=dict(targets=range(8)))
    zscore(ds, chunks_attr=None)
    assert(np.any(ds.samples != np.arange(32).reshape((8,-1))))
    ds_summary = ds.summary()
    assert(ds_summary is not None)
 def compute_connectivity_profile_similarity(self, dss):
     # from scipy.spatial.distance import pdist, squareform
     # conns = [1 - squareform(pdist(ds.samples.T, 'correlation')) for ds in dss]
     conns = [np.corrcoef(ds.samples.T) for ds in dss]
     conn_sum = np.sum(conns, axis=0)
     sim = np.zeros((len(dss), dss[0].shape[1]))
     for i, conn in enumerate(conns):
         conn_diff = conn_sum - conn
         zscore(conn_diff, chunks_attr=None)
         zscore(conn, chunks_attr=None)
         sim[i] = np.mean(conn_diff * conn, axis=0)
     return sim
 def compute_connectivity_profile_similarity(self, dss):
     # from scipy.spatial.distance import pdist, squareform
     # conns = [1 - squareform(pdist(ds.samples.T, 'correlation')) for ds in dss]
     conns = [np.corrcoef(ds.samples.T) for ds in dss]
     conn_sum = np.sum(conns, axis=0)
     sim = np.zeros((len(dss), dss[0].shape[1]))
     for i, conn in enumerate(conns):
         conn_diff = conn_sum - conn
         zscore(conn_diff, chunks_attr=None)
         zscore(conn, chunks_attr=None)
         sim[i] = np.mean(conn_diff * conn, axis=0)
     return sim
 def _prep_h2a_data(self, response_data, node_indices):
     for d in response_data:
         if isinstance(d, np.ndarray):
             d = Dataset(d)
         d.fa['node_indices']= node_indices.copy()
     
     connectivity_data = self._get_connectomes(response_data)
     h2a_input_data = self._frobenius_norm_and_merge(connectivity_data, response_data, node_indices)
     for d in h2a_input_data:
         d.fa['node_indices'] = node_indices.copy()
         zscore(d, chunks_attr=None)
     return h2a_input_data
Example #15
0
def preprocess_betas(paths,
                     sub,
                     btype="LSS",
                     c="trial_type",
                     roi="grayMatter",
                     z=True):
    from project_code import projectutils as pu
    rds = pu.loadsubbetas(paths, sub, btype=btype, m=roi)
    rds.sa['targets'] = rds.sa[c]
    if z:
        from mvpa2.mappers.zscore import zscore
        zscore(rds, chunks_attr='chunks')
    return rds
Example #16
0
def main(subject, study_dir, mask, stat, res_name, items='ac',
         suffix='_stim_fix2', feature_mask=None, radius=3, n_perm=1000,
         n_proc=None):

    from mvpa2.datasets.mri import map2nifti
    from mvpa2.mappers.zscore import zscore
    from mvpa2.measures.searchlight import sphere_searchlight
    from nireact import mvpa

    # lookup subject directory
    sp = su.SubjPath(subject, study_dir)

    # load task information
    vols = task.disp_vols(sp.path('behav', 'log'))

    # unpack the items option to get item type code
    item_names = 'abc'
    item_comp = [item_names.index(name) + 1 for name in items]

    # get post runs, A and C items only
    include = ((vols.run >= 5) & np.isin(vols.item_type, item_comp) &
               vols.correct == 1)
    post = vols.loc[include, :]

    # load beta series
    ds = mvpa.load_disp_beta(sp, suffix, mask, feature_mask, verbose=1)

    # define measure and contrasts to write out
    contrasts = ['pos', 'block_inter', 'inter_block']
    m = mvpa.TriadVector(post, item_comp, stat, contrasts, n_perm)

    # zscore
    ds.sa['run'] = vols.run.values
    zscore(ds, chunks_attr='run')

    # searchlight
    print('Running searchlight...')
    sl = sphere_searchlight(m, radius=radius, nproc=n_proc)
    sl_map = sl(ds[include])

    # save results
    nifti_include = map2nifti(ds, sl_map[-1])
    for i, contrast in enumerate(contrasts):
        res_dir = sp.path('rsa', f'{res_name}_{contrast}')
        if not os.path.exists(res_dir):
            os.makedirs(res_dir)

        nifti = map2nifti(ds, sl_map[i])
        nifti.to_filename(su.impath(res_dir, 'zstat'))

        nifti_include.to_filename(su.impath(res_dir, 'included'))
Example #17
0
def create_betas_per_trial_with_pymvpa_roni(study_path, subj, conf, mask_name, flavor, TR):
    dhandle = OpenFMRIDataset(study_path)
    model = 1
    task = 1
    # Do this for other tasks as well. not only the first
    mask_fname = _opj(study_path, "sub{:0>3d}".format(subj), "masks", conf.mvpa_tasks[0], "{}.nii.gz".format(mask_name))
    print mask_fname
    run_datasets = []
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        if type(run_id) == str:
            continue

            # all_events = dhandle.get_bold_run_model(model, subj, run_id)
        all_events = get_bold_run_model(dhandle, 2, subj, run_id)
        run_events = []
        i = 0
        for event in all_events:
            if event["task"] == task:
                event["condition"] = "{}-{}".format(event["condition"], event["id"])
                run_events.append(event)
                i += 1

                # load BOLD data for this run (with masking); add 0-based chunk ID
        run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, flavor=flavor, chunks=run_id - 1, mask=mask_fname)
        # convert event info into a sample attribute and assign as 'targets'
        run_ds.sa.time_coords = run_ds.sa.time_indices * TR
        run_ds.sa["targets"] = events2sample_attr(run_events, run_ds.sa.time_coords, noinfolabel="rest")
        # additional time series preprocessing can go here
        poly_detrend(run_ds, polyord=1, chunks_attr="chunks")
        zscore(run_ds, chunks_attr="chunks", param_est=("targets", ["rest"]), dtype="float32")
        glm_dataset = fit_event_hrf_model(run_ds, run_events, time_attr="time_coords", condition_attr="condition")
        glm_dataset.sa["targets"] = [x[: x.find("-")] for x in glm_dataset.sa.condition]
        glm_dataset.sa["id"] = [x[x.find("-") + 1 :] for x in glm_dataset.sa.condition]
        glm_dataset.sa.condition = glm_dataset.sa["targets"]
        glm_dataset.sa["chunks"] = [run_id - 1] * len(glm_dataset.samples)

        # If a trial was dropped (the subject pressed on a button) than the counter trial from the
        # other condition should also be dropped
        for pair in conf.conditions_to_compare:
            cond_bool = np.array([c in pair for c in glm_dataset.sa["condition"]])
            sub_dataset = glm_dataset[cond_bool]
            c = Counter(sub_dataset.sa.id)
            for value in c:
                if c[value] < 2:
                    id_bool = np.array([value in cond_id for cond_id in glm_dataset.sa["id"]])
                    glm_dataset = glm_dataset[np.bitwise_not(np.logical_and(id_bool, cond_bool))]

        run_datasets.append(glm_dataset)

    return vstack(run_datasets, 0)
Example #18
0
def get_dissim_roi(subnr):
    ds = h5load(fns.betafn(subnr))
    ds = ds[:, mask_]
    ds = ds[ds.sa.condition != 'self']
    zscore(ds, chunks_attr='chunks')
    ds = mean_group_sample(['condition'])(ds)

    names = []
    dissims = []
    for roi, (center, ids) in rois.iteritems():
        names.append(roi)
        sample_roi = ds.samples[:, ids]
        dissim_roi = pdist(sample_roi, 'correlation')
        dissims.append(dissim_roi)
    dss = dataset_wizard(dissims, targets=names)
    return dss
Example #19
0
def preprocess_data(paths,
                    sublist,
                    sub,
                    filter_params=[49, 2],
                    roi="grayMatter",
                    z=True):
    dsdict = loadsubdata(paths, sublist, m=roi)
    tds = dsdict[sub]
    beta_events = loadevents(paths, sublist)
    # savitsky golay filtering
    from pythonutils import savgolfilter as SGF
    SGF.sg_filter(tds, filter_params[0], filter_params[1])
    # zscore entire set. if done chunk-wise, there is no double-dipping (since we leave a chunk out at a time).
    if z:
        from mvpa2.mappers.zscore import zscore
        zscore(tds, chunks_attr='chunks')
    rds, events = amendtimings(tds, beta_events[sub])
    return rds, events
Example #20
0
def normalize(dss, norm_type):
    if norm_type == 'zscore':
        if isinstance(dss, (list, tuple, np.ndarray)):
            _ = [zscore(sd, chunks_attr=None) for sd in dss]
        else:
            zscore(dss, chunks_attr=None)
    elif norm_type == 'percent_signal_change':
        if isinstance(dss, (list, tuple, np.ndarray)):
            _ = [psc(sd, chunks_attr=None) for sd in dss]
        else:
            psc(dss, chunks_attr=None)
    elif norm_type == 'demean':
        if isinstance(dss, (list, tuple, np.ndarray)):
            _ = [psc(sd, scale=False, chunks_attr=None) for sd in dss]
        else:
            psc(dss, scale=False, chunks_attr=None)

    return dss
Example #21
0
    def _level1(self, datasets, commonspace, ref_ds, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = [ds.samples for ds in datasets]
        counts = 1  # number of datasets used so far for generating commonspace
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
                continue
            # assign common space to ``space`` of the mapper, because this is
            # where it will be looking for it
            ds_new.sa[m.get_space()] = commonspace
            # find transformation of this dataset into the current common space
            m.train(ds_new)
            # remove common space attribute again to save on memory when the
            # common space is updated for the next iteration
            del ds_new.sa[m.get_space()]
            # project this dataset into the current common space
            ds_ = m.forward(ds_new.samples)
            if params.zscore_common:
                zscore(ds_, chunks_attr=None)
            # replace original dataset with mapped one -- only the reference
            # dataset will remain unchanged
            data_mapped[i] = ds_

            # compute first-level residuals wrt to the initial common space
            if residuals is not None:
                residuals[0, i] = np.linalg.norm(ds_ - commonspace)

            # Update the common space. This is an incremental update after
            # processing each 1st-level dataset. Maybe there should be a flag
            # to make a batch update after processing all 1st-level datasets
            # to an identical 1st-level common space
            # TODO: make just a function so we dont' waste space
            if params.level1_equal_weight:
                commonspace = params.combiner1(ds_,
                                               commonspace,
                                               weights=(float(counts), 1.0))
            else:
                commonspace = params.combiner1(ds_, commonspace)
            counts += 1
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)
        return data_mapped
Example #22
0
    def _level1(self, datasets, commonspace, ref_ds, mappers, residuals):
        params = self.params            # for quicker access ;)
        data_mapped = [ds.samples for ds in datasets]
        counts = 1  # number of datasets used so far for generating commonspace
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
                continue
            # assign common space to ``space`` of the mapper, because this is
            # where it will be looking for it
            ds_new.sa[m.get_space()] = commonspace
            # find transformation of this dataset into the current common space
            m.train(ds_new)
            # remove common space attribute again to save on memory when the
            # common space is updated for the next iteration
            del ds_new.sa[m.get_space()]
            # project this dataset into the current common space
            ds_ = m.forward(ds_new.samples)
            if params.zscore_common:
                zscore(ds_, chunks_attr=None)
            # replace original dataset with mapped one -- only the reference
            # dataset will remain unchanged
            data_mapped[i] = ds_

            # compute first-level residuals wrt to the initial common space
            if residuals is not None:
                residuals[0, i] = np.linalg.norm(ds_ - commonspace)

            # Update the common space. This is an incremental update after
            # processing each 1st-level dataset. Maybe there should be a flag
            # to make a batch update after processing all 1st-level datasets
            # to an identical 1st-level common space
            # TODO: make just a function so we dont' waste space
            if params.level1_equal_weight:
                commonspace = params.combiner1(ds_, commonspace,
                                               weights=(float(counts), 1.0))
            else:
                commonspace = params.combiner1(ds_, commonspace)
            counts += 1
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)
        return data_mapped
Example #23
0
def hypalign(source, target, node, surfsel, projmat,mask):
    slneighbors = surfsel[node]
    if len(slneighbors) == 0:
        return projmat

    sl = []
    sl = [source[:, slneighbors], target[:, slneighbors]]
    
    hmapper = ProcrusteanMapper(space='commonspace')
    refds = sl[1].copy()
    commonspace = refds.samples
    zscore(commonspace,chunks_attr=None)
    ds_new = sl[0].copy()
    ds_new.sa[hmapper.get_space()] = commonspace.astype(float)
    hmapper.train(ds_new)
    conproj = hmapper.proj
    m, n = conproj.shape
    index = np.array(slneighbors)
    projmat[np.ix_(index,index)] += np.asarray(conproj)
    return projmat
Example #24
0
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
Example #25
0
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig,
                                            scale_fac=1.0,
                                            shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack(
             [ds_, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
def detrend(ds):
	#print ds.summary()
	ds.samples = ds.samples.astype('float')
	pl.figure()
	pl.subplot(221)
	plot_samples_distance(ds, sortbyattr='chunks')
	#plot_samples_distance(ds)
	pl.title('Sample distances (sorted by chunks)')
	poly_detrend(ds, polyord=2, chunks_attr='chunks')
	pl.subplot(222)
	plot_samples_distance(ds, sortbyattr='chunks')
	pl.show()
	zscore(ds, chunks_attr='chunks', dtype='float32')
	pl.subplot(223)
	plot_samples_distance(ds, sortbyattr='chunks')
	pl.subplot(224)
#	plot_samples_distance(ds, sortbyattr='targets')
	pl.title('Sample distances (sorted by condition)')
	pl.show()
	#poly_detrend(ds, polyord=1, chunks_attr='chunks')
	#zscore(ds, chunks_attr='chunks', dtype='float32')
	return ds
 def _get_connectomes(self, datasets):
     conn_mapper = FxyMapper(self.conn_metric)
     mean_feature_measure = MeanFeatureMeasure()
     qe = self.queryengine
     
     roi_ids = self.target_indices
     # compute means for aligning seed features
     conn_means = [self._seed_means(MeanFeatureMeasure(), qe, ds, roi_ids) for ds in datasets]
     
     conn_targets = []
     for csm in conn_means:
         zscore(csm, chunks_attr=None)
         conn_targets.append(csm)
     
     connectomes = []
     for target, ds in zip(conn_targets, datasets):
         conn_mapper.train(target)
         connectome = conn_mapper.forward(ds)
         connectome.fa = ds.fa
         zscore(connectome, chunks_attr=None)
         connectomes.append(connectome)
     return connectomes
    def get_testdata(self):
        # rs = np.random.RandomState(0)
        rs = np.random.RandomState()
        nt = 200
        n_triangles = 4
        ns = 10
        nv = n_triangles * 3
        vertices = np.zeros((nv, 3))  # 4 separated triangles
        faces = []
        for i in range(n_triangles):
            vertices[i * 3] = [i * 2, 0, 0]
            vertices[i * 3 + 1] = [i * 2 + 1, 1 / np.sqrt(3), 0]
            vertices[i * 3 + 2] = [i * 2 + 1, -1 / np.sqrt(3), 0]
            faces.append([i * 3, i * 3 + 1, i * 3 + 2])
        faces = np.array(faces)
        surface = Surface(vertices, faces)

        ds_orig = np.zeros((nt, nv))
        # add coarse-scale information
        for i in range(n_triangles):
            ds_orig[:, i * 3:(i + 1) * 3] += rs.normal(size=(nt, 1))
        # add fine-scale information
        ds_orig += rs.normal(size=(nt, nv))
        dss_train, dss_test = [], []
        for i in range(ns):
            ds = np.zeros_like(ds_orig)
            for j in range(n_triangles):
                ds[:,
                   j * 3:(j + 1) * 3] = np.dot(ds_orig[:, j * 3:(j + 1) * 3],
                                               get_random_rotation(3))
                # special_ortho_group.rvs(3, random_state=rs))
            ds = Dataset(ds)
            ds.fa['node_indices'] = np.arange(nv)
            ds_train, ds_test = ds[:nt // 2, :], ds[nt // 2:, :]
            zscore(ds_train, chunks_attr=None)
            zscore(ds_test, chunks_attr=None)
            dss_train.append(ds_train)
            dss_test.append(ds_test)
        return dss_train, dss_test, surface
Example #30
0
def prep_parcelwise_data(subject, parcel, datatype):
    from mvpa2.datasets import Dataset
    from mvpa2.mappers.zscore import zscore
    if datatype == 'sponpain':
        ds = Dataset(
            np.load(
                os.path.join(
                    sponpain_by_parcel, subject +
                    '_sponpain_connectome_parcel-' + str(parcel) + '.npy')))
        ds.fa['voxel_indices'] = range(ds.shape[1])
        zscore(ds, chunks_attr=None)
    elif datatype == 'bladderpain':
        ds = Dataset(
            np.load(
                os.path.join(
                    bladderpain_by_parcel, subject +
                    '_bladderpain-cleaned-ts_parcel-' + str(parcel) + '.npy')))
        ds.fa['voxel_indices'] = range(ds.shape[1])
        zscore(ds, chunks_attr=None)
    else:
        print('Must specify datatyp as either sponpain or bladderpain')
    return ds
Example #31
0
    def test_linear_svm_weights_per_class(self, svm):
        # assumming many defaults it is as simple as
        kwargs = dict(enable_ca=["sensitivities"])
        sana_split = svm.get_sensitivity_analyzer(
            split_weights=True, **kwargs)
        sana_full = svm.get_sensitivity_analyzer(
            force_train=False, **kwargs)

        # and lets look at all sensitivities
        ds2 = datasets['uni4large'].copy()
        zscore(ds2, param_est=('targets', ['L2', 'L3']))
        ds2 = ds2[np.logical_or(ds2.sa.targets == 'L0', ds2.sa.targets == 'L1')]

        senssplit = sana_split(ds2)
        sensfull = sana_full(ds2)

        self.assertEqual(senssplit.shape, (2, ds2.nfeatures))
        self.assertEqual(sensfull.shape, (1, ds2.nfeatures))

        # just to verify that we split properly and if we reconstruct
        # manually we obtain the same
        dmap = (-1 * senssplit.samples[1] + senssplit.samples[0]) \
               - sensfull.samples
        self.assertTrue((np.abs(dmap) <= 1e-10).all())
        #print "____"
        #print senssplit
        #print SMLR().get_sensitivity_analyzer(combiner=None)(ds2)

        # for now we can do split weights for binary tasks only, so
        # lets check if we raise a concern
        # we temporarily shutdown warning, since it is going to complain
        # otherwise, but we do it on purpose here
        handlers = warning.handlers
        warning.handlers = []
        self.assertRaises(NotImplementedError,
                              sana_split, datasets['uni3medium'])
        # reenable the warnings
        warning.handlers = handlers
    def get_testdata(self):
        # rs = np.random.RandomState(0)
        rs = np.random.RandomState()
        nt = 200
        n_triangles = 4
        ns = 10
        nv = n_triangles * 3
        vertices = np.zeros((nv, 3))  # 4 separated triangles
        faces = []
        for i in range(n_triangles):
            vertices[i*3] = [i*2, 0, 0]
            vertices[i*3+1] = [i*2+1, 1/np.sqrt(3), 0]
            vertices[i*3+2] = [i*2+1, -1/np.sqrt(3), 0]
            faces.append([i*3, i*3+1, i*3+2])
        faces = np.array(faces)
        surface = Surface(vertices, faces)

        ds_orig = np.zeros((nt, nv))
        # add coarse-scale information
        for i in range(n_triangles):
            ds_orig[:, i*3:(i+1)*3] += rs.normal(size=(nt, 1))
        # add fine-scale information
        ds_orig += rs.normal(size=(nt, nv))
        dss_train, dss_test = [], []
        for i in range(ns):
            ds = np.zeros_like(ds_orig)
            for j in range(n_triangles):
                ds[:, j*3:(j+1)*3] = np.dot(ds_orig[:, j*3:(j+1)*3],
                                            get_random_rotation(3))
                                            # special_ortho_group.rvs(3, random_state=rs))
            ds = Dataset(ds)
            ds.fa['node_indices'] = np.arange(nv)
            ds_train, ds_test = ds[:nt//2, :], ds[nt//2:, :]
            zscore(ds_train, chunks_attr=None)
            zscore(ds_test, chunks_attr=None)
            dss_train.append(ds_train)
            dss_test.append(ds_test)
        return dss_train, dss_test, surface
Example #33
0
    def test_linear_svm_weights_per_class(self, svm):
        # assumming many defaults it is as simple as
        kwargs = dict(enable_ca=["sensitivities"])
        sana_split = svm.get_sensitivity_analyzer(
            split_weights=True, **kwargs)
        sana_full = svm.get_sensitivity_analyzer(
            force_train=False, **kwargs)

        # and lets look at all sensitivities
        ds2 = datasets['uni4large'].copy()
        zscore(ds2, param_est=('targets', ['L2', 'L3']))
        ds2 = ds2[np.logical_or(ds2.sa.targets == 'L0', ds2.sa.targets == 'L1')]

        senssplit = sana_split(ds2)
        sensfull = sana_full(ds2)

        self.assertEqual(senssplit.shape, (2, ds2.nfeatures))
        self.assertEqual(sensfull.shape, (1, ds2.nfeatures))

        # just to verify that we split properly and if we reconstruct
        # manually we obtain the same
        dmap = (-1 * senssplit.samples[1] + senssplit.samples[0]) \
               - sensfull.samples
        self.assertTrue((np.abs(dmap) <= 1e-10).all())
        #print "____"
        #print senssplit
        #print SMLR().get_sensitivity_analyzer(combiner=None)(ds2)

        # for now we can do split weights for binary tasks only, so
        # lets check if we raise a concern
        # we temporarily shutdown warning, since it is going to complain
        # otherwise, but we do it on purpose here
        handlers = warning.handlers
        warning.handlers = []
        self.assertRaises(NotImplementedError,
                              sana_split, datasets['uni3medium'])
        # reenable the warnings
        warning.handlers = handlers
Example #34
0
def compute_seed_means(measure, queryengine, ds, roi_ids):
    """
    Parameters:
    ----------
    measure: a PyMVPA measure passed to the Searchlight.
    queryengine: a trained PyMVPA SurfaceQueryEngine.
    ds: a single PyMVPA dataset (samples: timeseries, features: vertices)
    roi_ids: the vertex indices where each searchlight will be centered.
    
    Returns:
    --------
    seed_data: dataset with the mean timeseries for a searchlight centered on each ROI_id.

    """

    # Seed data is the mean timeseries for each searchlight
    seed_data = Searchlight(measure, queryengine=queryengine, 
                            nproc=1, roi_ids=roi_ids.copy())
    if isinstance(ds,np.ndarray):
        ds = Dataset(ds) 
    seed_data = seed_data(ds)
    zscore(seed_data.samples, chunks_attr=None)
    return seed_data
    def test_connectivity_hyperalignment(self):
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        dss_train, dss_test, surface = self.get_testdata()
        qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices')
        cha = ConnectivityHyperalignment(
            mask_ids=[0, 3, 6, 9],
            seed_indices=[0, 3, 6, 9],
            seed_queryengines=qe,
            queryengine=qe)
        mappers = cha(dss_train)
        aligned_train = [mapper.forward(ds) for ds, mapper in zip(dss_train, mappers)]
        aligned_test = [mapper.forward(ds) for ds, mapper in zip(dss_test, mappers)]
        for ds in aligned_train + aligned_test:
            zscore(ds, chunks_attr=None)
        sim_train_before = self.compute_connectivity_profile_similarity(dss_train)
        sim_train_after = self.compute_connectivity_profile_similarity(aligned_train)
        sim_test_before = self.compute_connectivity_profile_similarity(dss_test)
        sim_test_after = self.compute_connectivity_profile_similarity(aligned_test)
        # ISC should be higher after CHA for both training and testing data
        self.assertTrue(sim_train_after.mean() > sim_train_before.mean())
        self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
Example #36
0
def create_betas_per_trial_with_pymvpa(study_path, subj, conf, mask_name, flavor, TR):
    dhandle = OpenFMRIDataset(study_path)
    model = 1
    task = 1
    # Do this for other tasks as well. not only the first
    mask_fname = _opj(study_path, "sub{:0>3d}".format(subj), "masks", conf.mvpa_tasks[0], "{}.nii.gz".format(mask_name))
    print mask_fname
    run_datasets = []
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        if type(run_id) == str:
            continue
        all_events = dhandle.get_bold_run_model(model, subj, run_id)
        run_events = []
        i = 0
        for event in all_events:
            if event["task"] == task:
                event["condition"] = "{}-{}".format(event["condition"], i)
                run_events.append(event)
                i += 1

                # load BOLD data for this run (with masking); add 0-based chunk ID
        run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, flavor=flavor, chunks=run_id - 1, mask=mask_fname)
        # convert event info into a sample attribute and assign as 'targets'
        run_ds.sa.time_coords = run_ds.sa.time_indices * TR
        print run_id

        run_ds.sa["targets"] = events2sample_attr(run_events, run_ds.sa.time_coords, noinfolabel="rest")
        # additional time series preprocessing can go here
        poly_detrend(run_ds, polyord=1, chunks_attr="chunks")
        zscore(run_ds, chunks_attr="chunks", param_est=("targets", ["rest"]), dtype="float32")
        glm_dataset = fit_event_hrf_model(run_ds, run_events, time_attr="time_coords", condition_attr="condition")
        glm_dataset.sa["targets"] = [x[: x.find("-")] for x in glm_dataset.sa.condition]
        glm_dataset.sa.condition = glm_dataset.sa["targets"]
        glm_dataset.sa["chunks"] = [run_id - 1] * len(glm_dataset.samples)
        run_datasets.append(glm_dataset)
    return vstack(run_datasets, 0)
Example #37
0
def main(subject, study_dir, mask, suffix='_stim_fix2'):

    from mvpa2.mappers.zscore import zscore

    # load subject data
    sp = su.SubjPath(subject, study_dir)
    vols = task.disp_vols(sp.path('behav', 'log'))

    # load fmri data
    ds = mvpa.load_disp_beta(sp, suffix, mask, verbose=1)

    # zscore
    ds.sa['run'] = vols.run.values
    zscore(ds, chunks_attr='run')

    # save data samples and corresponding volume information
    res_dir = os.path.join(sp.study_dir, 'batch', 'glm', 'disp' + suffix,
                           'roi', mask)
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    mat_file = os.path.join(res_dir, f'pattern_{subject}.txt')
    tab_file = os.path.join(res_dir, f'pattern_{subject}.csv')
    np.savetxt(mat_file, ds.samples)
    vols.to_csv(tab_file)
Example #38
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate([np.arange(40)
                                       for i in range(20)]).reshape(20, -1).T,
                       targets=1,
                       chunks=1),
    ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Example #39
0
    def _level2(self, datasets, lvl1_data, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = lvl1_data
        # aggregate all processed 1st-level datasets into a new 2nd-level
        # common space
        commonspace = params.combiner2(data_mapped)

        # XXX Why is this commented out? Who knows what combiner2 is doing and
        # whether it changes the distribution of the data
        #if params.zscore_common:
        #zscore(commonspace, chunks_attr=None)

        ndatasets = len(datasets)
        for loop in xrange(params.level2_niter):
            # 2nd-level alignment starts from the original/unprojected datasets
            # again
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_',
                          "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                # Optimization speed up heuristic
                # Slightly modify the common space towards other feature
                # spaces and reduce influence of this feature space for the
                # to-be-computed projection
                temp_commonspace = (commonspace * ndatasets - data_mapped[i]) \
                                    / (ndatasets - 1)

                if params.zscore_common:
                    zscore(temp_commonspace, chunks_attr=None)
                # assign current common space
                ds_new.sa[m.get_space()] = temp_commonspace
                # retrain the mapper for this dataset
                m.train(ds_new)
                # remove common space attribute again to save on memory when the
                # common space is updated for the next iteration
                del ds_new.sa[m.get_space()]
                # obtain the 2nd-level projection
                ds_ = m.forward(ds_new.samples)
                if params.zscore_common:
                    zscore(ds_, chunks_attr=None)
                # store for 2nd-level combiner
                data_mapped[i] = ds_
                # compute residuals
                if residuals is not None:
                    residuals[1 + loop, i] = np.linalg.norm(ds_ - commonspace)

            commonspace = params.combiner2(data_mapped)

        # and again
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # return the final common space
        return commonspace
Example #40
0
    def _level2(self, datasets, lvl1_data, mappers, residuals):
        params = self.params            # for quicker access ;)
        data_mapped = lvl1_data
        # aggregate all processed 1st-level datasets into a new 2nd-level
        # common space
        commonspace = params.combiner2(data_mapped)

        # XXX Why is this commented out? Who knows what combiner2 is doing and
        # whether it changes the distribution of the data
        #if params.zscore_common:
        #zscore(commonspace, chunks_attr=None)

        ndatasets = len(datasets)
        for loop in xrange(params.level2_niter):
            # 2nd-level alignment starts from the original/unprojected datasets
            # again
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                # Optimization speed up heuristic
                # Slightly modify the common space towards other feature
                # spaces and reduce influence of this feature space for the
                # to-be-computed projection
                temp_commonspace = (commonspace * ndatasets - data_mapped[i]) \
                                    / (ndatasets - 1)

                if params.zscore_common:
                    zscore(temp_commonspace, chunks_attr=None)
                # assign current common space
                ds_new.sa[m.get_space()] = temp_commonspace
                # retrain the mapper for this dataset
                m.train(ds_new)
                # remove common space attribute again to save on memory when the
                # common space is updated for the next iteration
                del ds_new.sa[m.get_space()]
                # obtain the 2nd-level projection
                ds_ =  m.forward(ds_new.samples)
                if params.zscore_common:
                    zscore(ds_, chunks_attr=None)
                # store for 2nd-level combiner
                data_mapped[i] = ds_
                # compute residuals
                if residuals is not None:
                    residuals[1+loop, i] = np.linalg.norm(ds_ - commonspace)

            commonspace = params.combiner2(data_mapped)

        # and again
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # return the final common space
        return commonspace
Example #41
0
    def test_hypal_michael_caused_problem(self):
        from mvpa2.misc import data_generators
        from mvpa2.mappers.zscore import zscore
        # Fake data
        ds = data_generators.normal_feature_dataset(nfeatures=20)
        ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)]
        _ = [zscore(sd, chunks_attr=None) for sd in ds_all]
        # Making random data per subject for testing with bias added to first subject
        ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))]
        ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100
        assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99)  # that would have been rudiculous if it was

        # Test with varying alpha so we for sure to not have that issue now
        for alpha in (0, 0.01, 0.5, 0.99, 1.0):
            hyper09 = Hyperalignment(alpha=alpha)
            mappers = hyper09([sd for sd in ds_all])
            ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)]
            ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a]
            corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1]
            assert(corr < 0.99)
Example #42
0
    def test_hypal_michael_caused_problem(self):
        from mvpa2.misc import data_generators
        from mvpa2.mappers.zscore import zscore
        # Fake data
        ds = data_generators.normal_feature_dataset(nfeatures=20)
        ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)]
        _ = [zscore(sd, chunks_attr=None) for sd in ds_all]
        # Making random data per subject for testing with bias added to first subject
        ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))]
        ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100
        assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99)  # that would have been ridiculous if it was

        # Test with varying alpha so we for sure to not have that issue now
        for alpha in (0, 0.01, 0.5, 0.99, 1.0):
            hyper09 = Hyperalignment(alpha=alpha)
            mappers = hyper09([sd for sd in ds_all])
            ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)]
            ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a]
            corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1]
            assert(corr < 0.99)
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [
            slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)
        ]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig, scale_fac=1.0, shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack([ds_, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
Example #46
0
    def train(self, datasets):
        """Derive a common feature space from a series of datasets.

        Parameters
        ----------
        datasets : sequence of datasets

        Returns
        -------
        A list of trained Mappers matching the number of input datasets.
        """
        params = self.params  # for quicker access ;)
        ca = self.ca
        # Check to make sure we get a list of datasets as input.
        if not isinstance(datasets, (list, tuple, np.ndarray)):
            raise TypeError("Input datasets should be a sequence "
                            "(of type list, tuple, or ndarray) of datasets.")

        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]
        alpha = params.alpha

        residuals = None
        if ca['training_residual_errors'].enabled:
            residuals = np.zeros((1 + params.level2_niter, ndatasets))
            ca.training_residual_errors = Dataset(
                samples=residuals,
                sa={
                    'levels':
                    ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)]
                })

        if __debug__:
            debug('HPAL',
                  "Hyperalignment %s for %i datasets" % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            # Making sure that ref_ds is within range.
            #Parameter() already checks for it being a non-negative integer
            if ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.chosen_ref_ds = ref_ds
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                zmapper.train(datasets[ids])
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # initial common space is the reference dataset
        commonspace = datasets[ref_ds].samples
        # the reference dataset might have been zscored already, don't do it
        # twice
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                debug(
                    'HPAL_', "Creating copy of a commonspace and assuring "
                    "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)
        # If there is only one dataset in training phase, there is nothing to be done
        # just use that data as the common space
        if len(datasets) < 2:
            self.commonspace = commonspace
        else:
            # create a mapper per dataset
            # might prefer some other way to initialize... later
            mappers = [deepcopy(params.alignment) for ds in datasets]

            #
            # Level 1 -- initial projection
            #
            lvl1_projdata = self._level1(datasets, commonspace, ref_ds,
                                         mappers, residuals)
            #
            # Level 2 -- might iterate multiple times
            #
            # this is the final common space
            self.commonspace = self._level2(datasets, lvl1_projdata, mappers,
                                            residuals)
        if params.output_dim is not None:
            mappers = self._level3(datasets)
            self._svd_mapper = SVDMapper()
            self._svd_mapper.train(self._map_and_mean(datasets, mappers))
            self._svd_mapper = StaticProjectionMapper(
                proj=self._svd_mapper.proj[:, :params.output_dim])
Example #47
0
def runsub(sub, thisContrast, r, dstype='raw', roi='grayMatter', filterLen=49, filterOrd=3, write=False):

    if dstype == 'raw':
        outdir='PyMVPA'
        print "working with raw data"
        thisSub = {sub: subList[sub]}
        dsdict = lmvpa.loadsubdata(paths, thisSub, m=roi)
        thisDS = dsdict[sub]
        mc_params = lmvpa.loadmotionparams(paths, thisSub)
        beta_events = lmvpa.loadevents(paths, thisSub)
        # savitsky golay filtering
        sg.sg_filter(thisDS, filterLen, filterOrd)
        # gallant group zscores before regression.

        # zscore w.r.t. rest trials
        # zscore(thisDS, param_est=('targets', ['rest']), chunks_attr='chunks')
        # zscore entire set. if done chunk-wise, there is no double-dipping (since we leave a chunk out at a time).
        zscore(thisDS, chunks_attr='chunks')
        print "beta extraction"
        ## BETA EXTRACTION ##
        rds, events = lmvpa.amendtimings(thisDS.copy(), beta_events[sub])
        evds = er.fit_event_hrf_model(rds, events, time_attr='time_coords',
                                      condition_attr=('trial_type', 'chunks'),
                                      design_kwargs={'add_regs': mc_params[sub], 'hrf_model': 'canonical'},
                                      return_model=True)

        fds = lmvpa.replacetargets(evds, contrasts, thisContrast)
        fds = fds[fds.targets != '0']
    else:
        outdir=os.path.join('LSS', dstype)
        print "loading betas"
        fds = lmvpa.loadsubbetas(paths, sub, btype=dstype, m=roi)
        fds.sa['targets'] = fds.sa[thisContrast]
        zscore(fds, chunks_attr='chunks')

    fds = lmvpa.sortds(fds)
    print "searchlights"
    ## initialize classifier
    clf = svm.LinearNuSVMC()
    cv = CrossValidation(clf, NFoldPartitioner())
    from mvpa2.measures.searchlight import sphere_searchlight
    cvSL = sphere_searchlight(cv, radius=r)


    # now I have betas per chunk. could just correlate the betas, or correlate the predictions for corresponding runs
    lidx = fds.chunks < fds.sa['chunks'].unique[len(fds.sa['chunks'].unique)/2]
    pidx = fds.chunks >= fds.sa['chunks'].unique[len(fds.sa['chunks'].unique) / 2]

    lres = sl.run_cv_sl(cvSL, fds[lidx].copy(deep=False))
    pres = sl.run_cv_sl(cvSL, fds[pidx].copy(deep=False))

    if write:
        from mvpa2.base import dataset
        map2nifti(fds, dataset.vstack([lres, pres])).\
            to_filename(os.path.join(
                        paths[0], 'Maps', outdir,
                        sub + '_' + roi + '_' + thisContrast + '_cvsl.nii.gz'))

    del lres, pres, cvSL

    cvSL = sphere_searchlight(cv, radius=r)
    crossSet = fds.copy()
    crossSet.chunks[lidx] = 1
    crossSet.chunks[pidx] = 2
    cres = sl.run_cv_sl(cvSL, crossSet.copy(deep=False))
    if write:
        map2nifti(fds, cres[0]).to_filename(
            os.path.join(paths[0], 'Maps', outdir,
                         sub + '_' + roi + '_' + (thisContrast) + '_P2L.nii.gz'))
        map2nifti(fds, cres[1]).to_filename(
            os.path.join(paths[0], 'Maps', outdir,
                         sub + '_' + roi + '_' + (thisContrast) + '_L2P.nii.gz'))
 def test_hyperalignment_measure(self):
     ref_ds = 0
     fsha = FeatureSelectionHyperalignment()
     ds_orig, dss_rotated, dss_rotated_clean, Rs = self.get_testdata()
     # Lets test two scenarios -- in one with no noise -- we should get
     # close to perfect reconstruction.  If noisy features were added -- not so good
     for noisy, dss in ((False, dss_rotated_clean),
                        (True, dss_rotated)):
         # to verify that original datasets didn't get changed by
         # Hyperalignment store their idhashes of samples
         idhashes = [idhash(ds.samples) for ds in dss]
         idhashes_targets = [idhash(ds.targets) for ds in dss]
         mappers = fsha(dss)
         mappers = [StaticProjectionMapper(proj=m, recon=m.T)
                    for m in mappers]
         idhashes_ = [idhash(ds.samples) for ds in dss]
         idhashes_targets_ = [idhash(ds.targets) for ds in dss]
         self.assertEqual(
             idhashes, idhashes_,
             msg="Hyperalignment must not change original data.")
         self.assertEqual(
             idhashes_targets, idhashes_targets_,
             msg="Hyperalignment must not change original data targets.")
         # Map data back
         dss_clean_back = [m.forward(ds_)
                           for m, ds_ in zip(mappers, dss)]
         _ = [zscore(sd, chunks_attr=None) for sd in dss_clean_back]
         nddss = []
         ndcss = []
         nf = ds_orig.nfeatures
         ds_norm = np.linalg.norm(dss[ref_ds].samples[:, :nf])
         ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                        * np.sign(dss_rotated_clean[ref_ds].a.random_scale)
         zscore(ds_orig_Rref, chunks_attr=None)
         for ds_back in dss_clean_back:
             ndcs = np.diag(np.corrcoef(ds_back.samples.T[:nf, ],
                                        ds_orig_Rref.T)[nf:, :nf], k=0)
             ndcss += [ndcs]
             dds = ds_back.samples[:, :nf] - ds_orig_Rref
             ndds = np.linalg.norm(dds) / ds_norm
             nddss += [ndds]
         # First compare correlations
         snoisy = ('clean', 'noisy')[int(noisy)]
         self.assertTrue(
             np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]),
             msg="Should have reconstructed original dataset more or"
             " less. Got correlations %s in %s case."
             % (ndcss, snoisy))
         # normed differences
         self.assertTrue(
             np.all(np.array(nddss) <= (.2, 3)[int(noisy)]),
             msg="Should have reconstructed original dataset more or"
             " less for all. Got normed differences %s in %s case."
             % (nddss, snoisy))
         self.assertTrue(
             nddss[ref_ds] <= (.1, 0.3)[int(noisy)],
             msg="Should have reconstructed original dataset quite "
             "well even with zscoring. Got normed differences %s "
             "in %s case." % (nddss, snoisy))
         self.assertTrue(
             np.all(np.array(nddss) / nddss[ref_ds] >= (0.95, 0.8)[int(noisy)]),
             msg="Should have reconstructed orig_ds best of all. "
             "Got normed differences %s in %s case with ref_ds=%d."
             % (nddss, snoisy, ref_ds))
     # Testing feature selection within the measure using fraction and count
     # same features
     fsha_fsf = FeatureSelectionHyperalignment(featsel=0.5)
     fsha_fsn = FeatureSelectionHyperalignment(featsel=4)
     fsha_fsf_same = FeatureSelectionHyperalignment(featsel=0.5, use_same_features=True)
     fsha = FeatureSelectionHyperalignment(full_matrix=False)
     # check for valueerror if full_matrix=False and no roi_seed fa
     self.assertRaises(ValueError, fsha, dss_rotated)
     fsha = FeatureSelectionHyperalignment()
     dss_rotated[ref_ds].fa['roi_seed'] = [1, 0, 0, 0, 0, 0, 0, 0]
     mappers_fsf = fsha_fsf(dss_rotated)
     mappers_fsf_same = fsha_fsf_same(dss_rotated)
     mappers_fsn = fsha_fsn(dss_rotated)
     mappers = fsha(dss_rotated_clean)
     mappers_diffsizedss = fsha_fsf([sd[:, nfs] for nfs, sd in
         zip([np.arange(5), np.random.permutation(np.arange(8)), np.arange(8)[::-1], np.arange(8)], dss_rotated)])
     # Testing that most of noisy features are eliminated from reference data
     assert_true(np.alltrue([np.sum(m[:4, :4].std(0) > 0) > 2 for m in mappers_fsf]))
     # using same features make it most likely to eliminate all noisy features
     assert_true(np.alltrue([np.sum(m[:4, :4].std(0) > 0) == 4 for m in mappers_fsf_same]))
     assert_true(np.alltrue([np.sum(m[:4, :4].std(0) > 0) > 2 for m in mappers_fsn]))
     # And it correctly maps the selected features if they are selected
     if np.alltrue([np.all(m[4:, :4] == 0) for m in mappers_fsf]):
         for m, mfs in zip(mappers, mappers_fsf):
             assert_array_equal(m, mfs[:4, :4])
     if np.alltrue([np.all(m[4:, :4] == 0) for m in mappers_fsf_same]):
         for m, mfs in zip(mappers, mappers_fsf_same):
             assert_array_equal(m, mfs[:4, :4])
     # testing roi_seed forces feature selection
     dss_rotated[ref_ds].fa['roi_seed'] = [0, 0, 0, 0, 0, 0, 0, 1]
     fsha_fsf = FeatureSelectionHyperalignment(featsel=0.5)
     mappers_fsf = fsha_fsf(dss_rotated)
     assert(np.alltrue([np.sum(m[7, :] == 0) == 4 for m in mappers_fsf]))
    def test_custom_qas(self):
        # Test if we could provide custom QEs per each of the datasets
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        ns, nf = 10, 4  # # of samples/features -- a very BIG dataset ;)
        ds0 = Dataset(np.random.normal(size=(ns, nf)))
        zscore(ds0, chunks_attr=None)
        ds1 = ds0[:, [3, 0, 1, 2]]  # features circular shifted to the right

        qe0 = FancyQE([[0], [1], [2], [3]])  # does nothing
        qe1 = FancyQE([[1], [2], [3], [0]])  # knows to look into the right

        def apply_slhyper(queryengine, dss=[ds0, ds1], return_mappers=False, **kw):
            """Helper for a common code to create/call slhyper"""
            slhyper = SearchlightHyperalignment(queryengine=queryengine, **kw)
            mappers = slhyper(dss)
            proj = [m.proj.todense() for m in mappers]
            return (proj, mappers) if return_mappers else proj

        # since this single qe resulted in trying to match non-matching time series
        # projections should be non-identity, but no offdiagonal elements
        assert_no_offdiag(apply_slhyper(qe0))

        # both are provided
        projs, mappers = apply_slhyper([qe0, qe1], return_mappers=True)
        tprojs_shifted = [np.eye(nf), np.roll(np.eye(nf), 1, axis=0)]
        assert_array_equal(projs[0], tprojs_shifted[0])  # must be identity since we made them so
        assert_array_equal(projs[1], tprojs_shifted[1])  # pretty much incorporating that shift

        # TODO -- not identity assert_array_equal(projs[0], np.eye(len(p)))  # must be identity since we made them so
        # and must restore data properly
        assert_array_almost_equal(mappers[0].forward(ds0), mappers[1].forward(ds1))

        # give more then # of qes
        assert_raises(ValueError,
                      SearchlightHyperalignment(queryengine=[qe0, qe1]),
                      [ds0, ds1, ds0])

        # The one having no voxels for the "1st" id in "subj1"
        qe1_ = FancyQE([[1], [], [3], [0]])  # knows to look into the right

        projs = apply_slhyper(qe1_)
        assert_no_offdiag(projs)
        for proj in projs:
            # assess that both have '2nd' one 0
            # but not the others!
            assert_array_equal(np.diagonal(proj) != 0, [True, True, False, True])

        # smoke test whenever combine is False
        # In this case should work ok
        apply_slhyper(qe0, combine_neighbormappers=False)
        # this one ok as well since needs only matching ones in ref_ds
        apply_slhyper([qe0, qe1], combine_neighbormappers=False)
        # here since features do not match node_ids -- should raise ValueError
        assert_raises(ValueError, apply_slhyper, qe1, combine_neighbormappers=False)
        assert_raises(ValueError, apply_slhyper, [qe0, qe1], ref_ds=1, combine_neighbormappers=False)

        # and now only one qe lacking for that id
        projs = apply_slhyper([qe0, qe1_])
        tproj0 = np.eye(nf)
        tproj0[1, 1] = 0
        tprojs_shifted_1st0 = [tproj0, np.roll(tproj0, 1, axis=0)]
        for proj, tproj in zip(projs, tprojs_shifted_1st0):
            # assess that both have '2nd' one 0
            # but not the others!
            assert_array_equal(proj, tproj)

        # And now a test with varying number of selected fids, no shift
        qe0 = FancyQE([[0], [1, 2], [1, 2, 3], [0, 1, 2, 3]])
        projs = apply_slhyper(qe0)
        # Test that in general we get larger coefficients for "correct" transformation
        for p, tproj in zip(projs, tprojs_shifted):
            assert(np.all(np.asarray(p)[tproj>0] >= 1.0))
            assert_array_lequal(np.mean(np.asarray(p)[tproj == 0]), 0.3)

        qe1 = FancyQE([[0, 1, 2, 3], [1, 2, 3], [2, 3], [3]])
        # Just a smoke test, for now TODO
        projs = apply_slhyper([qe0, qe1])
 def test_searchlight_hyperalignment(self):
     skip_if_no_external('scipy')
     skip_if_no_external('h5py')
     ds_orig = datasets['3dsmall'].copy()[:, :15]
     ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace
     space = 'voxel_indices'
     # total number of datasets for the analysis
     nds = 5
     zscore(ds_orig, chunks_attr=None)
     dss = [ds_orig]
     # create a few distorted datasets to match the desired number of datasets
     # not sure if this truly mimics the real data, but at least we can test
     # implementation
     while len(dss) < nds - 1:
         sd = local_random_affine_transformations(
             ds_orig,
             scatter_neighborhoods(
                 Sphere(1),
                 ds_orig.fa[space].value, deterministic=True)[1],
             Sphere(2),
             space=space,
             scale_fac=1.0, shift_fac=0.0)
         # sometimes above function returns dataset with nans, infs, we don't want that.
         if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \
                 and np.all(sd.samples.std(0)):
             dss.append(sd)
     ds_orig_noisy = ds_orig.copy()
     ds_orig_noisy.samples += 0.1 * np.random.random(size=ds_orig_noisy.shape)
     dss.append(ds_orig_noisy)
     _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]]
     # we should have some distortion
     for ds in dss[1:]:
         assert_false(np.all(ds_orig.samples == ds.samples))
     # testing checks
     slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1])
     self.assertRaises(ValueError, slhyp, dss[:3])
     slhyp = SearchlightHyperalignment(ref_ds=3)
     self.assertRaises(ValueError, slhyp, dss[:3])
     # store projections for each mapper separately
     projs = list()
     # run the algorithm with all combinations of the two major parameters
     # for projection calculation.
     for kwargs in [{'combine_neighbormappers': True, 'nproc': 2},
                    {'combine_neighbormappers': True, 'dtype': 'float64', 'compute_recon': True},
                    {'combine_neighbormappers': True, 'exclude_from_model': [2, 4]},
                    {'combine_neighbormappers': False},
                    {'combine_neighbormappers': False, 'mask_node_ids': np.arange(dss[0].nfeatures).tolist()},
                    {'combine_neighbormappers': True, 'sparse_radius': 1},
                    {'combine_neighbormappers': True, 'nblocks': 2}]:
         slhyp = SearchlightHyperalignment(radius=2, **kwargs)
         mappers = slhyp(dss)
         # one mapper per input ds
         assert_equal(len(mappers), nds)
         projs.append(mappers)
     # some checks
     for midx in range(nds):
         # making sure mask_node_ids options works as expected
         assert_array_almost_equal(projs[3][midx].proj.todense(),
                                   projs[4][midx].proj.todense())
         # recon check
         assert_array_almost_equal(projs[0][midx].proj.todense(),
                                   projs[1][midx].recon.T.todense(), decimal=5)
         assert_equal(projs[1][midx].proj.dtype, 'float64')
         assert_equal(projs[0][midx].proj.dtype, 'float32')
     # making sure the projections make sense
     for proj in projs:
         # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first
         max_weight = proj[0].proj.toarray().max(0).squeeze()
         diag_weight = proj[0].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
         # and not true for other subjects
         for i in range(1, nds - 1):
             assert(np.sum(proj[i].proj.toarray().max(0).squeeze() == proj[i].proj.diagonal())
                    / float(proj[i].proj.shape[0]) < 0.80)
         # Check to make sure projection weights match across duplicate datasets
         max_weight = proj[-1].proj.toarray().max(0).squeeze()
         diag_weight = proj[-1].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
     # project data
     dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)]
     _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper]
     ndcss = []
     nf = ds_orig.nfeatures
     for ds_hyper in dss_hyper:
         ndcs = np.diag(np.corrcoef(ds_hyper.samples.T,
                                    ds_orig.samples.T)[nf:, :nf],
                        k=0)
         ndcss += [ndcs]
     assert_true(np.median(ndcss[0]) > 0.9)
     # noisy copy of original dataset should be similar to original after hyperalignment
     assert_true(np.median(ndcss[-1]) > 0.9)
     assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
    def test_voxel_selection(self):
        """Compare surface and volume based searchlight"""

        """
        Tests to see whether results are identical for surface-based
        searchlight (just one plane; Euclidean distnace) and volume-based
        searchlight.

        Note that the current value is a float; if it were int, it would
        specify the number of voxels in each searchlight"""

        radius = 10.0

        """Define input filenames"""
        epi_fn = pathjoin(pymvpa_dataroot, "bold.nii.gz")
        maskfn = pathjoin(pymvpa_dataroot, "mask.nii.gz")

        """
        Use the EPI datafile to define a surface.
        The surface has as many nodes as there are voxels
        and is parallel to the volume 'slice'
        """
        vg = volgeom.from_any(maskfn, mask_volume=True)

        aff = vg.affine
        nx, ny, nz = vg.shape[:3]

        """Plane goes in x and y direction, so we take these vectors
        from the affine transformation matrix of the volume"""
        plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny)

        """
        Simulate pial and white matter as just above and below
        the central plane
        """
        normal_vec = aff[:3, 2]
        outer = plane + normal_vec
        inner = plane + -normal_vec

        """
        Combine volume and surface information
        """
        vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner)

        """
        Run voxel selection with specified radius (in mm), using
        Euclidean distance measure
        """
        surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric="e")

        """Define the measure"""

        # run_slow=True would give an actual cross-validation with meaningful
        # accuracies. Because this is a unit-test only the number of voxels
        # in each searchlight is tested.
        run_slow = False

        if run_slow:
            meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t))
            postproc = mean_sample
        else:
            meas = _Voxel_Count_Measure()
            postproc = lambda x: x

        """
        Surface analysis: define the query engine, cross validation,
        and searchlight
        """
        surf_qe = SurfaceVerticesQueryEngine(surf_voxsel)
        surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc)

        """
        new (Sep 2012): also test 'simple' queryengine wrapper function
        """

        surf_qe2 = disc_surface_queryengine(
            radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric="euclidean"
        )
        surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc)

        """
        Same for the volume analysis
        """
        element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2])))
        sph = Sphere(radius, element_sizes=element_sizes)
        kwa = {"voxel_indices": sph}

        vol_qe = IndexQueryEngine(**kwa)
        vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc)

        """The following steps are similar to start_easy.py"""
        attr = SampleAttributes(pathjoin(pymvpa_dataroot, "attributes_literal.txt"))

        mask = surf_voxsel.get_mask()

        dataset = fmri_dataset(
            samples=pathjoin(pymvpa_dataroot, "bold.nii.gz"), targets=attr.targets, chunks=attr.chunks, mask=mask
        )

        if run_slow:
            # do chunkswise linear detrending on dataset

            poly_detrend(dataset, polyord=1, chunks_attr="chunks")

            # zscore dataset relative to baseline ('rest') mean
            zscore(dataset, chunks_attr="chunks", param_est=("targets", ["rest"]))

        # select class face and house for this demo analysis
        # would work with full datasets (just a little slower)
        dataset = dataset[np.array([l in ["face", "house"] for l in dataset.sa.targets], dtype="bool")]

        """Apply searchlight to datasets"""
        surf_dset = surf_sl(dataset)
        surf_dset2 = surf_sl2(dataset)
        vol_dset = vol_sl(dataset)

        surf_data = surf_dset.samples
        surf_data2 = surf_dset2.samples
        vol_data = vol_dset.samples

        assert_array_equal(surf_data, surf_data2)
        assert_array_equal(surf_data, vol_data)
Example #52
0
    def train(self, datasets):
        """Derive a common feature space from a series of datasets.

        Parameters
        ----------
        datasets : sequence of datasets

        Returns
        -------
        A list of trained Mappers matching the number of input datasets.
        """
        params = self.params            # for quicker access ;)
        ca = self.ca
        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]
        alpha = params.alpha
        
        residuals = None
        if ca['training_residual_errors'].enabled:
            residuals = np.zeros((1 + params.level2_niter, ndatasets))
            ca.training_residual_errors = Dataset(
                samples = residuals,
                sa = {'levels' :
                       ['1'] +
                       ['2:%i' % i for i in xrange(params.level2_niter)]})

        if __debug__:
            debug('HPAL', "Hyperalignment %s for %i datasets"
                  % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            if ref_ds < 0 and ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.choosen_ref_ds = ref_ds
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                zmapper.train(datasets[ids])
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # initial common space is the reference dataset
        commonspace = datasets[ref_ds].samples
        # the reference dataset might have been zscored already, don't do it
        # twice
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                debug('HPAL_',
                      "Creating copy of a commonspace and assuring "
                      "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)

        # create a mapper per dataset
        # might prefer some other way to initialize... later
        mappers = [deepcopy(params.alignment) for ds in datasets]

        #
        # Level 1 -- initial projection
        #
        lvl1_projdata = self._level1(datasets, commonspace, ref_ds, mappers,
                                     residuals)
        #
        # Level 2 -- might iterate multiple times
        #
        # this is the final common space
        self.commonspace = self._level2(datasets, lvl1_projdata, mappers,
                                        residuals)
def fx(ds):
    zscore(ds, chunks_attr=None)
    perm = AttributePermutator('condition', limit='chunks')
    ds = perm(ds)
    return ds
Example #54
0
def test_hrf_modeling():
    skip_if_no_external('nibabel')
    skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation
    ds = load_example_fmri_dataset('25mm') #literal=True)
    # TODO: simulate short dataset with known properties and use it
    # for testing
    events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    tr = ds.a.imghdr['pixdim'][4]
    for ev in events:
        for a in ('onset', 'duration'):
            ev[a] = ev[a] * tr
    evds = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # same voxels
    assert_equal(ds.nfeatures, evds.nfeatures)
    assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices)
    # one sample for each condition, plus constant
    assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets))
    assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant')
    # with centered data
    zscore(ds)
    evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # after demeaning the constant should consume a lot less
    assert(evds.a.add_regs[0].samples.mean()
           > evds_demean.a.add_regs[0].samples.mean())
    # from eyeballing the sensitivity example -- would be better to test this on
    # the tutorial data
    assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \
           > evds_demean[evds.sa.targets == 'bottle'].samples.max())
    # HRF models
    assert('regressors' in evds.sa)
    assert('regressors' in evds.a.add_regs.sa)
    assert_equal(evds.sa.regressors.shape[1], len(ds))

    # custom regressors
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                regr_attrs=['time_indices'],
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    # verify that nothing screwed up time_coords
    assert_equal(ds.sa.time_coords[0], 0)
    assert_equal(len(evds_regrs), len(evds))
    # one more output sample in .a.add_regs
    assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs))
    # comes last before constant
    assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2])
    # order of main regressors is unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # custom regressors from external sources
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr='targets',
                                regr_attrs=['time_coords'],
                                design_kwargs=dict(drift_model='blank',
                                                   add_regs=np.linspace(1, -1, len(ds))[None].T,
                                                   add_reg_names=['negative_trend']),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    assert_equal(len(evds_regrs), len(evds))
    # But we got one more in additional regressors
    assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs))
    # comes last before constant
    assert_array_equal(['negative_trend', 'time_coords', 'constant'],
                       evds_regrs.a.add_regs.sa.regressor_names)
    # order is otherwise unchanged
    assert_array_equal(evds.sa.targets, evds_regrs.sa.targets)

    # HRF models with estimating per each chunk
    assert_equal(ds.sa.time_coords[0], 0)
    evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords',
                                condition_attr=['targets', 'chunks'],
                                regr_attrs=['time_indices'],
                                design_kwargs=dict(drift_model='blank'),
                                glmfit_kwargs=dict(model='ols'),
                                model='hrf')
    assert_true('add_regs' in evds_regrs.a)
    assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names)

    assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs))
    assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs))

    from mvpa2.mappers.fx import mean_group_sample
    evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs)
    assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
Example #55
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
Example #56
0
    def __call__(self, datasets):
        """Estimate mappers for each dataset

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained Mappers of the same length as datasets
        """
        params = self.params            # for quicker access ;)
        ca = self.ca
        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]

        residuals = None
        if ca['residual_errors'].enabled:
            residuals = np.zeros((2 + params.level2_niter, ndatasets))
            ca.residual_errors = Dataset(
                samples = residuals,
                sa = {'levels' :
                       ['1'] +
                       ['2:%i' % i for i in xrange(params.level2_niter)] +
                       ['3']})

        if __debug__:
            debug('HPAL', "Hyperalignment %s for %i datasets"
                  % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            if ref_ds < 0 and ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.choosen_ref_ds = ref_ds
        # might prefer some other way to initialize... later
        mappers = [deepcopy(params.alignment) for ds in datasets]
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # Level 1 (first)
        commonspace = np.asanyarray(datasets[ref_ds])
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)
        data_mapped = [np.asanyarray(ds) for ds in datasets]
        for i, (m, data) in enumerate(zip(mappers, data_mapped)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
                continue
            #ZSC zscore(data, chunks_attr=None)
            ds = dataset_wizard(samples=data, targets=commonspace)
            #ZSC zscore(ds, chunks_attr=None)
            m.train(ds)
            data_temp = m.forward(data)
            #ZSC zscore(data_temp, chunks_attr=None)
            data_mapped[i] = data_temp

            if residuals is not None:
                residuals[0, i] = np.linalg.norm(data_temp - commonspace)

            ## if ds_mapped == []:
            ##     ds_mapped = [zscore(m.forward(d), chunks_attr=None)]
            ## else:
            ##     ds_mapped += [zscore(m.forward(d), chunks_attr=None)]

            # zscore before adding
            # TODO: make just a function so we dont' waste space
            commonspace = params.combiner1(data_mapped[i], commonspace)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)

        # update commonspace to mean of ds_mapped
        commonspace = params.combiner2(data_mapped)
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # Level 2 -- might iterate multiple times
        for loop in xrange(params.level2_niter):
            for i, (m, ds) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i])
                ##                   /(ndatasets-1), chunks_attr=None )
                ds_new = ds.copy()
                #ZSC zscore(ds_new, chunks_attr=None)
                #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1)
                #ZSC zscore(ds_temp, chunks_attr=None)
                ds_new.targets = commonspace #PRJ ds_temp
                m.train(ds_new) # ds_temp)
                data_mapped[i] = m.forward(np.asanyarray(ds))
                if residuals is not None:
                    residuals[1+loop, i] = np.linalg.norm(data_mapped - commonspace)

                #ds_mapped[i] = zscore( m.forward(ds_temp), chunks_attr=None)

            commonspace = params.combiner2(data_mapped)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)

        # Level 3 (last) to params.levels
        for i, (m, ds) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 3: ds #%i" % i)

            ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i])
            ##                   /(ndatasets-1), chunks_attr=None )
            ds_new = ds.copy()     # shallow copy so we could assign new labels
            #ZSC zscore(ds_new, chunks_attr=None)
            #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1)
            #ZSC zscore(ds_temp, chunks_attr=None)
            ds_new.targets = commonspace #PRJ ds_temp#
            m.train(ds_new) #ds_temp)

            if residuals is not None:
                data_mapped = m.forward(ds_new)
                residuals[-1, i] = np.linalg.norm(data_mapped - commonspace)

        return mappers
Example #57
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # verify that if param_est is set but chunks_attr is None
    # performs zscoring across entire dataset correctly
    data = data.copy()
    data_01 = data.select({'targets': [0, 1]})
    zscore(data_01, chunks_attr=None)
    zscore(data, chunks_attr=None, param_est=('targets', [0, 1]))
    assert_array_equal(data_01.samples, data.select({'targets': [0, 1]}))

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))

    # And just a smoke test for warnings reporting whenever # of
    # samples per chunk is low.
    # on 1 sample per chunk
    zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, -1]])
    ok_(np.all(zds1.samples == 0))   # they all should be 0
    # on 2 samples per chunk
    zds2 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, 1, -10, -1]])
    assert_array_equal(np.unique(zds2.samples), [-1., 1]) # they all should be -1 or 1
    # on 3 samples per chunk -- different warning
    ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, 1, 2, -3, -2, -1]])

    # test if std provided as a list not as an array is handled
    # properly -- should zscore all features (not just first/none
    # as it was before)
    ds = dataset_wizard(np.arange(32).reshape((8,-1)),
                        targets=range(8), chunks=[0] * 8)
    means = [0, 1, -10, 10]
    std0 = np.std(ds[:, 0])             # std deviation of first one
    stds = [std0, 10, .1, 1]

    zm = ZScoreMapper(params=(means, stds),
                      auto_train=True)
    dsz = zm(ds)

    assert_array_almost_equal((np.mean(ds, axis=0) - np.asanyarray(means))/np.array(stds),
                              np.mean(dsz, axis=0))

    assert_array_almost_equal(np.std(ds, axis=0)/np.array(stds),
                              np.std(dsz, axis=0))
Example #58
0
    def test_basic_functioning(self, ref_ds, zscore_common, zscore_all):
        ha = Hyperalignment(ref_ds=ref_ds,
                            zscore_all=zscore_all,
                            zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0                      # by default should be this one

        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 4 # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []

        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            ## if False: # i == ref_ds:
            #     # Do not rotate the target space so we could check later on
            #     # if we transform back nicely
            #     R = np.eye(ds_orig.nfeatures)
            ## else:
            ds_ = random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10)
            Rs.append(ds_.a.random_rotation)
            # reusing random data from dataset itself
            random_scales += [ds_.a.random_scale]
            random_shifts += [ds_.a.random_shift]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]

            ## if (zscore_common or zscore_all):
            ##     # for later on testing of "precise" reconstruction
            ##     zscore(ds_, chunks_attr=None)

            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean),
                           (True, dss_rotated)):
            # to verify that original datasets didn't get changed by
            # Hyperalignment store their idhashes of samples
            idhashes = [idhash(ds.samples) for ds in dss]
            idhashes_targets = [idhash(ds.targets) for ds in dss]

            mappers = ha(dss)

            idhashes_ = [idhash(ds.samples) for ds in dss]
            idhashes_targets_ = [idhash(ds.targets) for ds in dss]
            self.assertEqual(idhashes, idhashes_,
                msg="Hyperalignment must not change original data.")
            self.assertEqual(idhashes_targets, idhashes_targets_,
                msg="Hyperalignment must not change original data targets.")

            self.assertEqual(ref_ds, ha.ca.chosen_ref_ds)

            # Map data back

            dss_clean_back = [m.forward(ds_)
                              for m, ds_ in zip(mappers, dss_rotated_clean)]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                           * random_scales[ref_ds] \
                           + random_shifts[ref_ds]
            if zscore_common or zscore_all:
                zscore(Dataset(ds_orig_Rref), chunks_attr=None)
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf], k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            snoisy = ('clean', 'noisy')[int(noisy)]
            do_labile = cfg.getboolean('tests', 'labile', default='yes')
            if not noisy or do_labile:
                # First compare correlations
                self.assertTrue(np.all(np.array(ndcss)
                                       >= (0.9, 0.85)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less. Got correlations %s in %s case."
                        % (ndcss, snoisy))
                if not (zscore_all or zscore_common):
                    # if we didn't zscore -- all of them should be really close
                    self.assertTrue(np.all(np.array(nddss)
                                       <= (1e-10, 1e-1)[int(noisy)]),
                        msg="Should have reconstructed original dataset well "
                        "without zscoring. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                elif do_labile:
                    # otherwise they all should be somewhat close
                    self.assertTrue(np.all(np.array(nddss)
                                           <= (.2, 3)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less for all. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                    self.assertTrue(np.all(nddss[ref_ds] <= .09),
                        msg="Should have reconstructed original dataset quite "
                        "well even with zscoring. Got normed differences %s "
                        "in %s case." % (nddss, snoisy))
                    # yoh: and leave 5% of difference for a chance and numerical
                    #      fluctuations ;)
                    self.assertTrue(np.all(np.array(nddss) >= 0.95*nddss[ref_ds]),
                        msg="Should have reconstructed orig_ds best of all. "
                        "Got normed differences %s in %s case with ref_ds=%d."
                        % (nddss, snoisy, ref_ds))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2,
                            enable_ca=['training_residual_errors',
                                       'residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.assertTrue(np.all(ha.ca.training_residual_errors.sa.levels ==
                              ['1', '2:0', '2:1']))
        rterrors = ha.ca.training_residual_errors.samples
        # just basic tests:
        self.assertEqual(rterrors[0, ref_ds], 0)
        self.assertEqual(rterrors.shape, (3, n))
        rerrors = ha.ca.residual_errors.samples
        self.assertEqual(rerrors.shape, (1, n))
Example #59
0
from mvpa2.datasets.sources.native import load_tutorial_data
datapath = pjoin(cfg.get('location', 'tutorial data'), 'haxby2001')
ds = load_tutorial_data(roi=(15, 16, 23, 24, 36, 38, 39, 40, 48))

"""
We only do minimal pre-processing: linear trend removal and Z-scoring all voxel
time-series with respect to the mean and standard deviation of the "rest"
condition.
"""

# only minial detrending
from mvpa2.mappers.detrend import poly_detrend
poly_detrend(ds, polyord=1, chunks_attr='chunks')
# z-scoring with respect to the 'rest' condition
from mvpa2.mappers.zscore import zscore
zscore(ds, chunks_attr='chunks', param_est=('targets', 'rest'))
# now remove 'rest' samples
ds = ds[ds.sa.targets != 'rest']

"""
RSA is all about so-called dissimilarity matrices: square, symetric matrices
with a zero diagonal that encode the (dis)similarity between all pairs of
data samples or conditions in a dataset. We compose a little helper function
to plot such matrices, including a color-scale and proper labeling of matrix
rows and columns.
"""

# little helper function to plot dissimilarity matrices
def plot_mtx(mtx, labels, title):
    pl.figure()
    pl.imshow(mtx, interpolation='nearest')
Example #60
0
    def __call__(self, datasets):
        """Estimate mappers for each dataset

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained Mappers of the same length as datasets
        """
        params = self.params            # for quicker access ;)
        ca = self.ca
        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]

        residuals = None
        if ca['residual_errors'].enabled:
            residuals = np.zeros((2 + params.level2_niter, ndatasets))
            ca.residual_errors = Dataset(
                samples = residuals,
                sa = {'levels' :
                       ['1'] +
                       ['2:%i' % i for i in xrange(params.level2_niter)] +
                       ['3']})

        if __debug__:
            debug('HPAL', "Hyperalignment %s for %i datasets"
                  % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            if ref_ds < 0 and ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.choosen_ref_ds = ref_ds
        # might prefer some other way to initialize... later
        mappers = [deepcopy(params.alignment) for ds in datasets]
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # Level 1 (first)

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            # zscore them once while storing corresponding ZScoreMapper's
            zmappers = []
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                zmappers.append(zmapper)
                zmapper.train(datasets[ids])
                datasets[ids] = zmapper.forward(datasets[ids])

        commonspace = np.asanyarray(datasets[ref_ds])
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                debug('HPAL_',
                      "Creating copy of a commonspace and assuring "
                      "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)

        data_mapped = [np.asanyarray(ds) for ds in datasets]
        #zscore(data_mapped[ref_ds],chunks_attr=None)
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
                continue
            #ds_new = ds.copy()
            #zscore(ds_new, chunks_attr=None);
            ds_new.targets = commonspace
            m.train(ds_new)
            ds_ = m.forward(np.asanyarray(ds_new))
            if params.zscore_common:
                zscore(ds_, chunks_attr=None)
            data_mapped[i] = ds_

            if residuals is not None:
                residuals[0, i] = np.linalg.norm(ds_ - commonspace)

            ## if ds_mapped == []:
            ##     ds_mapped = [zscore(m.forward(d), chunks_attr=None)]
            ## else:
            ##     ds_mapped += [zscore(m.forward(d), chunks_attr=None)]

            # zscore before adding
            # TODO: make just a function so we dont' waste space
            commonspace = params.combiner1(data_mapped[i], commonspace)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)

        # update commonspace to mean of ds_mapped
        commonspace = params.combiner2(data_mapped)
        #if params.zscore_common:
        #zscore(commonspace, chunks_attr=None)
        # Level 2 -- might iterate multiple times
        for loop in xrange(params.level2_niter):
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                ds_temp = (commonspace*ndatasets - data_mapped[i])/(ndatasets-1)
                if params.zscore_common:
                    zscore(ds_temp, chunks_attr=None)
                #ds_new = ds.copy()
                #zscore(ds_new, chunks_attr=None)
                ds_new.targets = ds_temp #commonspace #PRJ ds_temp
                m.train(ds_new) # ds_temp)
                ds_ =  m.forward(np.asanyarray(ds_new))
                if params.zscore_common:
                    zscore(ds_, chunks_attr=None)
                data_mapped[i] = ds_
                if residuals is not None:
                    residuals[1+loop, i] = np.linalg.norm(ds_ - commonspace)

                #ds_mapped[i] = zscore( m.forward(ds_temp), chunks_attr=None)

            commonspace = params.combiner2(data_mapped)
            #if params.zscore_common:
                #zscore(commonspace, chunks_attr=None)

        # Level 3 (last) to params.levels
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 3: ds #%i" % i)

            #ds_new = ds.copy()     # shallow copy so we could assign new labels
            #zscore(ds_new, chunks_attr=None)
            ds_temp = (commonspace*ndatasets - data_mapped[i])/(ndatasets-1)
            if params.zscore_common:
                zscore(ds_temp, chunks_attr=None)
            ds_new.targets = ds_temp #commonspace #PRJ ds_temp#
            m.train(ds_new) #ds_temp)
            data_mapped[i] = m.forward(np.asanyarray(ds_new))
            if residuals is not None:
                residuals[-1, i] = np.linalg.norm(data_mapped[i] - commonspace)

        if params.zscore_all:
            # We need to construct new mappers which would chain
            # zscore and then final transformation
            return [ChainMapper([zm, m]) for zm, m in zip(zmappers, mappers)]
        else:
            return mappers