Python Dataset.copy 예제들, mvpa2.datasets.Dataset.copy Python 예제들

예제 #1

0

파일 보기

파일: test_filters.py 프로젝트: lydiawawa/Machine-Learning

def test_resample():
    time = np.linspace(0, 2 * np.pi, 100)
    ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T,
                 sa={
                     'time': time,
                     'section': np.repeat(range(10), 10)
                 })
    assert_equal(ds.shape, (100, 2))

    # downsample
    num = 10
    rm = FFTResampleMapper(num,
                           window=('gauss', 50),
                           position_attr='time',
                           attr_strategy='sample')
    mds = rm.forward(ds)
    assert_equal(mds.shape, (num, ds.nfeatures))
    # didn't change the orig
    assert_equal(len(ds), 100)

    # check position-based resampling
    ds_partial = ds[0::10]
    mds_partial = rm.forward(ds_partial)
    # despite different input sampling should yield the same output timepoints
    assert_array_almost_equal(mds.sa.time, mds_partial.sa.time)
    # exclude the first points to prevent edge effects, but the data should be
    # very similar too
    assert_array_almost_equal(mds.samples[2:],
                              mds_partial.samples[2:],
                              decimal=2)
    # simple sample of sa's should give meaningful stuff
    assert_array_equal(mds.sa.section, range(10))

    # and now for a dataset with chunks
    cds = vstack([ds.copy(), ds.copy()])
    cds.sa['chunks'] = np.repeat([0, 1], len(ds))
    rm = FFTResampleMapper(num,
                           attr_strategy='sample',
                           chunks_attr='chunks',
                           window=('gauss', 50))
    mcds = rm.forward(cds)
    assert_equal(mcds.shape, (20, 2))
    assert_array_equal(mcds.sa.section, np.tile(range(10), 2))
    # each individual chunks should be identical to previous dataset
    assert_array_almost_equal(mds.samples, mcds.samples[:10])
    assert_array_almost_equal(mds.samples, mcds.samples[10:])

예제 #2

0

파일 보기

파일: test_surfing.py 프로젝트: vishalbelsare/PyMVPA

    def test_surf_ring_queryengine(self):
        s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)
        # add second layer
        s2 = surf.merge(s, (s + (.01, 0, 0)))
        ds = Dataset(samples=np.arange(20)[np.newaxis],
                     fa=dict(node_indices=np.arange(39, 0, -2)))
        # add more features (with shared node indices)
        ds3 = hstack((ds, ds, ds))
        radius = 2.5
        inner_radius = 1.0
        # Makes sure it raises error if inner_radius is >= radius
        assert_raises(ValueError,
                      lambda: queryengine.SurfaceRingQueryEngine(surface=s2,
                                                         inner_radius=2.5,
                                                         radius=radius))
        distance_metrics = ('euclidean', 'dijkstra', 'euclidean', 'dijkstra')
        for distance_metric, include_center in zip(distance_metrics, [True, False]*2):
            qe = queryengine.SurfaceRingQueryEngine(surface=s2, radius=radius,
                                inner_radius=inner_radius, distance_metric=distance_metric,
                                include_center=include_center)
            # untrained qe should give errors
            assert_raises(ValueError, lambda: qe.ids)
            assert_raises(ValueError, lambda: qe.query_byid(0))

            # node index out of bounds should give error
            ds_ = ds.copy()
            ds_.fa.node_indices[0] = 100
            assert_raises(ValueError, lambda: qe.train(ds_))

            # lack of node indices should give error
            ds_.fa.pop('node_indices')
            assert_raises(ValueError, lambda: qe.train(ds_))
            # train the qe
            qe.train(ds3)

            for node in np.arange(-1, s2.nvertices + 1):
                if node < 0 or node >= s2.nvertices:
                    assert_raises(KeyError, lambda: qe.query_byid(node))
                    continue

                feature_ids = np.asarray(qe.query_byid(node))
                # node indices relative to ds
                base_ids = feature_ids[feature_ids < 20]
                # should have multiples of 20
                assert_equal(set(feature_ids),
                             set((base_ids[np.newaxis].T + \
                                  [0, 20, 40]).ravel()))

                node_indices = s2.circlearound_n2d(node,
                                    radius, distance_metric or 'dijkstra')

                fa_indices = [fa_index for fa_index, inode in
                              enumerate(ds3.fa.node_indices)
                              if inode in node_indices and node_indices[inode] > inner_radius]
                if include_center and node in ds3.fa.node_indices:
                    fa_indices += np.where(ds3.fa.node_indices == node)[0].tolist()
                assert_equal(set(feature_ids), set(fa_indices))

예제 #3

0

파일 보기

파일: test_filters.py 프로젝트: Anhmike/PyMVPA

def test_resample():
    time = np.linspace(0, 2*np.pi, 100)
    ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T,
                 sa = {'time': time,
                       'section': np.repeat(range(10), 10)})
    assert_equal(ds.shape, (100, 2))

    # downsample
    num = 10
    rm = FFTResampleMapper(num, window=('gauss', 50),
                           position_attr='time',
                           attr_strategy='sample')
    mds = rm.forward(ds)
    assert_equal(mds.shape, (num, ds.nfeatures))
    # didn't change the orig
    assert_equal(len(ds), 100)

    # check position-based resampling
    ds_partial = ds[0::10]
    mds_partial = rm.forward(ds_partial)
    # despite different input sampling should yield the same output timepoints
    assert_array_almost_equal(mds.sa.time, mds_partial.sa.time)
    # exclude the first points to prevent edge effects, but the data should be
    # very similar too
    assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2)
    # simple sample of sa's should give meaningful stuff
    assert_array_equal(mds.sa.section, range(10))

    # and now for a dataset with chunks
    cds = vstack([ds.copy(), ds.copy()])
    cds.sa['chunks'] = np.repeat([0,1], len(ds))
    rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks',
                           window=('gauss', 50))
    mcds = rm.forward(cds)
    assert_equal(mcds.shape, (20, 2))
    assert_array_equal(mcds.sa.section, np.tile(range(10),2))
    # each individual chunks should be identical to previous dataset
    assert_array_almost_equal(mds.samples, mcds.samples[:10])
    assert_array_almost_equal(mds.samples, mcds.samples[10:])

예제 #4

0

파일 보기

    def test_surf_queryengine(self, qefn):
        s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)

        # add second layer
        s2 = surf.merge(s, (s + (.01, 0, 0)))

        ds = Dataset(samples=np.arange(20)[np.newaxis],
                     fa=dict(node_indices=np.arange(39, 0, -2)))

        # add more features (with shared node indices)
        ds3 = hstack((ds, ds, ds))

        radius = 2.5

        # Note: sweepargs it not used to avoid re-generating the same
        #       surface and dataset multiple times.
        for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None):
            builder = lambda: queryengine.SurfaceQueryEngine(
                s2, radius, distance_metric)
            if distance_metric in ('<illegal>', None):
                assert_raises(ValueError, builder)
                continue

            qe = builder()

            # test i/o and ensure that the untrained instance is not trained
            if externals.exists('h5py'):
                h5save(qefn, qe)
                qe = h5load(qefn)

            # untrained qe should give errors
            assert_raises(ValueError, lambda: qe.ids)
            assert_raises(ValueError, lambda: qe.query_byid(0))

            # node index out of bounds should give error
            ds_ = ds.copy()
            ds_.fa.node_indices[0] = 100
            assert_raises(ValueError, lambda: qe.train(ds_))

            # lack of node indices should give error
            ds_.fa.pop('node_indices')
            assert_raises(ValueError, lambda: qe.train(ds_))

            # train the qe
            qe.train(ds3)

            # test i/o and ensure that the loaded instance is trained
            if externals.exists('h5py'):
                h5save(qefn, qe)
                qe = h5load(qefn)

            for node in np.arange(-1, s2.nvertices + 1):
                if node < 0 or node >= s2.nvertices:
                    assert_raises(KeyError, lambda: qe.query_byid(node))
                    continue

                feature_ids = np.asarray(qe.query_byid(node))

                # node indices relative to ds
                base_ids = feature_ids[feature_ids < 20]

                # should have multiples of 20
                assert_equal(set(feature_ids),
                             set((base_ids[np.newaxis].T + \
                                            [0, 20, 40]).ravel()))

                node_indices = list(
                    s2.circlearound_n2d(node, radius, distance_metric
                                        or 'dijkstra'))

                fa_indices = [
                    fa_index
                    for fa_index, node in enumerate(ds3.fa.node_indices)
                    if node in node_indices
                ]

                assert_equal(set(feature_ids), set(fa_indices))

            # smoke tests
            assert_true('SurfaceQueryEngine' in '%s' % qe)
            assert_true('SurfaceQueryEngine' in '%r' % qe)

예제 #5

0

파일 보기

파일: test_surfing.py 프로젝트: Arthurkorn/PyMVPA

    def test_surf_queryengine(self, qefn):
        s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)

        # add scond layer
        s2 = surf.merge(s, (s + (.01, 0, 0)))

        ds = Dataset(samples=np.arange(20)[np.newaxis],
                    fa=dict(node_indices=np.arange(39, 0, -2)))

        # add more features (with shared node indices)
        ds3 = hstack((ds, ds, ds))

        radius = 2.5

        # Note: sweepargs it not used to avoid re-generating the same
        #       surface and dataset multiple times.
        for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None):
            builder = lambda: queryengine.SurfaceQueryEngine(s2, radius,
                                                             distance_metric)
            if distance_metric in ('<illegal>', None):
                assert_raises(ValueError, builder)
                continue

            qe = builder()

            # test i/o and ensure that the untrained instance is not trained
            if externals.exists('h5py'):
                fd, qefn = tempfile.mkstemp('qe.hdf5', 'test'); os.close(fd)
                h5save(qefn, qe)
                qe = h5load(qefn)
                os.remove(qefn)


            # untrained qe should give errors
            assert_raises(ValueError, lambda:qe.ids)
            assert_raises(ValueError, lambda:qe.query_byid(0))

            # node index out of bounds should give error
            ds_ = ds.copy()
            ds_.fa.node_indices[0] = 100
            assert_raises(ValueError, lambda: qe.train(ds_))

            # lack of node indices should give error
            ds_.fa.pop('node_indices')
            assert_raises(ValueError, lambda: qe.train(ds_))


            # train the qe
            qe.train(ds3)

            # test i/o and ensure that the loaded instance is trained
            if externals.exists('h5py'):
                h5save(qefn, qe)
                qe = h5load(qefn)

            for node in np.arange(-1, s2.nvertices + 1):
                if node < 0 or node >= s2.nvertices:
                    assert_raises(KeyError, lambda: qe.query_byid(node))
                    continue

                feature_ids = np.asarray(qe.query_byid(node))

                # node indices relative to ds
                base_ids = feature_ids[feature_ids < 20]

                # should have multiples of 20
                assert_equal(set(feature_ids),
                             set((base_ids[np.newaxis].T + \
                                            [0, 20, 40]).ravel()))



                node_indices = list(s2.circlearound_n2d(node,
                                    radius, distance_metric or 'dijkstra'))

                fa_indices = [fa_index for fa_index, node in
                                    enumerate(ds3.fa.node_indices)
                                    if node in node_indices]


                assert_equal(set(feature_ids), set(fa_indices))

            # smoke tests
            assert_true('SurfaceQueryEngine' in '%s' % qe)
            assert_true('SurfaceQueryEngine' in '%r' % qe)