Beispiel #1
0
def test_fmri_to_cosmo():
    skip_if_no_external('nibabel')
    from mvpa2.datasets.mri import fmri_dataset
    # test exporting an fMRI dataset to CoSMoMVPA
    pymvpa_ds = fmri_dataset(
        samples=pathjoin(pymvpa_dataroot, 'example4d.nii.gz'),
        targets=[1, 2], sprefix='voxel')
    cosmomvpa_struct = cosmo.map2cosmo(pymvpa_ds)
    _assert_set_equal(cosmomvpa_struct.keys(), ['a', 'fa', 'sa', 'samples'])

    a_dict = dict(_obj2tup(cosmomvpa_struct['a']))
    mri_keys = ['imgaffine', 'voxel_eldim', 'voxel_dim']
    _assert_subset(mri_keys, a_dict.keys())

    for k in mri_keys:
        c_value = a_dict[k]
        p_value = pymvpa_ds.a[k].value

        if isinstance(p_value, tuple):
            c_value = c_value.ravel()
            p_value = np.asarray(p_value).ravel()

        assert_array_almost_equal(c_value, p_value)

    fa_dict = dict(_obj2tup(cosmomvpa_struct['fa']))
    fa_keys = ['voxel_indices']
    _assert_set_equal(fa_dict.keys(), fa_keys)
    for k in fa_keys:
        assert_array_almost_equal(fa_dict[k].T, pymvpa_ds.fa[k].value)
Beispiel #2
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Beispiel #3
0
def test_searchlight_errors_per_trial():
    # To make sure that searchlight can return error/accuracy per trial
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.partition import OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.testing.datasets import datasets
    from mvpa2.misc.errorfx import prediction_target_matches

    dataset = datasets['3dsmall'].copy()
    # randomly permute samples so we break any random correspondence
    # to strengthen tests below
    sample_idx = np.arange(len(dataset))
    dataset = dataset[np.random.permutation(sample_idx)]

    dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets]
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    part = OddEvenPartitioner()
    # only do partial to save time
    cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches)
    # Just to compare error
    cv_error = CrossValidation(sample_clf, part)

    # Large searchlight radius so we get entire ROI, 2 centers just to make sure
    # that all stacking works correctly
    sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1])
    results = sl(dataset)

    sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None,
                                   center_ids=[0, 1])
    results_gnbsl = sl_gnb(dataset)

    # inspect both results
    # verify that partitioning was done correctly
    partitions = list(part.generate(dataset))
    for res in (results, results_gnbsl):
        assert('targets' in res.sa.keys())  # should carry targets
        assert('cvfolds' in res.sa.keys())  # should carry cvfolds
        for ipart in xrange(len(partitions)):
            assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets,
                               res.sa.targets[res.sa.cvfolds == ipart])

    assert_datasets_equal(results, results_gnbsl)

    # one "accuracy" per each trial
    assert_equal(results.shape, (len(dataset), 2))
    # with accuracies the same in both searchlights since the same
    # features were to be selected in both cases due too large radii
    errors_dataset = cv(dataset)
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0])
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1])
    # and error matching (up to precision) the one if we run with default error function
    assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0],
                              np.mean(cv_error(dataset)))
Beispiel #4
0
def test_eep_load():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    ds = [ eep_dataset(source, targets=[1, 2]) for source in
            (eb, os.path.join(pymvpa_dataroot, 'eep.bin')) ]

    for d in ds:
        assert_equal(d.nsamples, 2)
        assert_equal(d.nfeatures, 128)
        assert_equal(np.unique(d.fa.channels[4*23:4*23+4]), 'Pz')
        assert_array_almost_equal([np.arange(-0.002, 0.005, 0.002)] * 32,
                                  d.a.mapper.reverse1(d.fa.timepoints))
def test_eep_load():
    eb = EEPBin(pathjoin(pymvpa_dataroot, 'eep.bin'))

    ds = [ eep_dataset(source, targets=[1, 2]) for source in
            (eb, pathjoin(pymvpa_dataroot, 'eep.bin')) ]

    for d in ds:
        assert_equal(d.nsamples, 2)
        assert_equal(d.nfeatures, 128)
        assert_equal(np.unique(d.fa.channels[4*23:4*23+4]), 'Pz')
        assert_array_almost_equal([np.arange(-0.002, 0.005, 0.002)] * 32,
                                  d.a.mapper.reverse1(d.fa.timepoints))
Beispiel #6
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Beispiel #7
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Beispiel #8
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3,10))
    c = ArrayCollectable(data.copy(), 'myname',
                         "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Beispiel #9
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate([np.arange(40)
                                       for i in range(20)]).reshape(20, -1).T,
                       targets=1,
                       chunks=1),
    ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Beispiel #10
0
 def test_symmetry(self):
     self.build_vector_based_pm()
     assert_array_almost_equal(self.pm.proj[:,self.samples.shape[0]],
                               self.pm.proj.T[self.samples.shape[0],:])
     assert_array_equal(self.pm.proj[:,self.samples.shape[0]],
                        self.pm.proj.T[self.samples.shape[0],:])
Beispiel #11
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # verify that if param_est is set but chunks_attr is None
    # performs zscoring across entire dataset correctly
    data = data.copy()
    data_01 = data.select({'targets': [0, 1]})
    zscore(data_01, chunks_attr=None)
    zscore(data, chunks_attr=None, param_est=('targets', [0, 1]))
    assert_array_equal(data_01.samples, data.select({'targets': [0, 1]}))

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))

    # And just a smoke test for warnings reporting whenever # of
    # samples per chunk is low.
    # on 1 sample per chunk
    zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, -1]])
    ok_(np.all(zds1.samples == 0))   # they all should be 0
    # on 2 samples per chunk
    zds2 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, 1, -10, -1]])
    assert_array_equal(np.unique(zds2.samples), [-1., 1]) # they all should be -1 or 1
    # on 3 samples per chunk -- different warning
    ZScoreMapper(chunks_attr='chunks', auto_train=True)(
        ds[[0, 1, 2, -3, -2, -1]])

    # test if std provided as a list not as an array is handled
    # properly -- should zscore all features (not just first/none
    # as it was before)
    ds = dataset_wizard(np.arange(32).reshape((8,-1)),
                        targets=range(8), chunks=[0] * 8)
    means = [0, 1, -10, 10]
    std0 = np.std(ds[:, 0])             # std deviation of first one
    stds = [std0, 10, .1, 1]

    zm = ZScoreMapper(params=(means, stds),
                      auto_train=True)
    dsz = zm(ds)

    assert_array_almost_equal((np.mean(ds, axis=0) - np.asanyarray(means))/np.array(stds),
                              np.mean(dsz, axis=0))

    assert_array_almost_equal(np.std(ds, axis=0)/np.array(stds),
                              np.std(dsz, axis=0))
Beispiel #12
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
Beispiel #13
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                     dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                          targets=[0, 2, 2, 2, 1] + [2] * 11,
                          chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                          targets=[0, 2, 2, 2, 1] + [2] * 11,
                          chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)  # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3, 1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1)
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)  # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2, 1), 1: (12, 1)})
    zm.train(ds)  # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))

    # And just a smoke test for warnings reporting whenever # of
    # samples per chunk is low.
    # on 1 sample per chunk
    zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(ds[[0, -1]])
    ok_(np.all(zds1.samples == 0))  # they all should be 0
    # on 2 samples per chunk
    zds2 = ZScoreMapper(chunks_attr='chunks',
                        auto_train=True)(ds[[0, 1, -10, -1]])
    assert_array_equal(np.unique(zds2.samples),
                       [-1., 1])  # they all should be -1 or 1
    # on 3 samples per chunk -- different warning
    ZScoreMapper(chunks_attr='chunks',
                 auto_train=True)(ds[[0, 1, 2, -3, -2, -1]])

    # test if std provided as a list not as an array is handled
    # properly -- should zscore all features (not just first/none
    # as it was before)
    ds = dataset_wizard(np.arange(32).reshape((8, -1)),
                        targets=range(8),
                        chunks=[0] * 8)
    means = [0, 1, -10, 10]
    std0 = np.std(ds[:, 0])  # std deviation of first one
    stds = [std0, 10, .1, 1]

    zm = ZScoreMapper(params=(means, stds), auto_train=True)
    dsz = zm(ds)

    assert_array_almost_equal(
        (np.mean(ds, axis=0) - np.asanyarray(means)) / np.array(stds),
        np.mean(dsz, axis=0))

    assert_array_almost_equal(
        np.std(ds, axis=0) / np.array(stds), np.std(dsz, axis=0))
Beispiel #14
0
 def test_symmetry(self):
     self.build_vector_based_pm()
     assert_array_almost_equal(self.pm.proj[:,self.samples.shape[0]],
                               self.pm.proj.T[self.samples.shape[0],:])
     assert_array_equal(self.pm.proj[:,self.samples.shape[0]],
                        self.pm.proj.T[self.samples.shape[0],:])