def test_skip(self):
        r = DataInMemory(self.d)
        lagged_it = r.iterator(lag=5)
        assert lagged_it._it.skip == 0
        assert lagged_it._it_lagged.skip == 5

        it = r.iterator()
        for itraj, X in it:
            if itraj == 0:
                it.skip = 5
            if itraj == 1:
                assert it.skip == 5
    def test_current_trajindex(self):
        r = DataInMemory(self.d)
        expected_itraj = 0
        for itraj, X in r.iterator(chunk=0):
            assert itraj == expected_itraj
            expected_itraj += 1

        expected_itraj = -1
        it = r.iterator(chunk=16)
        for itraj, X in it:
            if it.pos == 0:
                expected_itraj += 1
            assert itraj == expected_itraj == it.current_trajindex
 def test_last_chunk(self):
     r = DataInMemory(self.d)
     it = r.iterator(chunk=0)
     for itraj, X in it:
         assert it.last_chunk_in_traj
         if itraj == 2:
             assert it.last_chunk
 def test_chunksize_max_memory(self):
     from pyemma.util.contexts import settings
     data = np.random.random((10000, 10))
     max_size = 1024
     with settings(default_chunksize=str(max_size)):
         r = DataInMemory(data)
         for itraj, x in r.iterator():
             self.assertLessEqual(x.nbytes, max_size)
Ejemplo n.º 5
0
    def test_n_chunks(self):
        r = DataInMemory(self.d)

        it0 = r.iterator(chunk=0)
        assert it0.n_chunks == 3  # 3 trajs

        it1 = r.iterator(chunk=50)
        assert it1.n_chunks == 3 * 2  # 2 chunks per trajectory

        it2 = r.iterator(chunk=30)
        # 3 full chunks and 1 small chunk per trajectory
        assert it2.n_chunks == 3 * 4

        it3 = r.iterator(chunk=30)
        it3.skip = 10
        assert it3.n_chunks == 3 * 3  # 3 full chunks per traj

        it4 = r.iterator(chunk=30)
        it4.skip = 5
        # 3 full chunks and 1 chunk of 5 frames per trajectory
        assert it4.n_chunks == 3 * 4

        # test for lagged iterator
        for stride in range(1, 5):
            for lag in range(0, 18):
                it = r.iterator(lag=lag,
                                chunk=30,
                                stride=stride,
                                return_trajindex=False)
                chunks = sum(1 for _ in it)
                np.testing.assert_equal(
                    it.n_chunks,
                    chunks,
                    err_msg=
                    "Expected number of chunks did not agree with what the iterator "
                    "returned for stride=%s, lag=%s" % (stride, lag))
                assert chunks == it.n_chunks

        dd = [
            np.random.random((100, 3)),
            np.random.random((120, 3)),
            np.random.random((120, 3))
        ]
        rr = DataInMemory(dd)

        # test for lagged iterator
        for stride in range(1, 5):
            for lag in [x for x in range(0, 18)] + [50, 100]:
                it = rr.iterator(lag=lag,
                                 chunk=30,
                                 stride=stride,
                                 return_trajindex=False)
                chunks = sum(1 for _ in it)
                np.testing.assert_equal(
                    it.n_chunks,
                    chunks,
                    err_msg=
                    "Expected number of chunks did not agree with what the iterator "
                    "returned for stride=%s, lag=%s" % (stride, lag))
                assert chunks == it.n_chunks
 def test_invalid_data_in_input_inf(self):
     self.d[1][-1] = np.inf
     r = DataInMemory(self.d, chunksize=5)
     it = r.iterator()
     from pyemma.coordinates.data._base.datasource import InvalidDataInStreamException
     with settings(coordinates_check_output=True):
         with self.assertRaises(InvalidDataInStreamException) as cm:
             for itraj, X in it:
                 pass
 def test_stride(self):
     r = DataInMemory(self.d)
     stride = np.arange(1, 17)
     i = 0
     it = r.iterator(stride=stride[i], chunk=1)
     for _ in it:
         i += 1
         i %= len(stride)
         it.stride = stride[i]
         assert it.stride == stride[i]
 def test_pos(self):
     r = DataInMemory(self.d)
     r.chunksize = 17
     it = r.iterator()
     t = 0
     for itraj, X in it:
         assert t == it.pos
         t += len(X)
         if it.last_chunk_in_traj:
             t = 0
    def test_return_trajindex(self):
        r = DataInMemory(self.d)
        it = r.iterator(chunk=0)
        it.return_traj_index = True
        assert it.return_traj_index is True
        for tup in it:
            assert len(tup) == 2
        it.reset()
        it.return_traj_index = False
        assert it.return_traj_index is False
        itraj = 0
        for tup in it:
            np.testing.assert_equal(tup, self.d[itraj])
            itraj += 1

        for tup in r.iterator(return_trajindex=True):
            assert len(tup) == 2
        itraj = 0
        for tup in r.iterator(return_trajindex=False):
            np.testing.assert_equal(tup, self.d[itraj])
            itraj += 1
    def test_iterator_context(self):
        dim = DataInMemory(np.array([1]))

        ctx = dim.iterator(stride=1).state
        assert ctx.stride == 1
        assert ctx.uniform_stride
        assert ctx.is_stride_sorted()
        assert ctx.traj_keys is None

        ctx = dim.iterator(stride=np.asarray([[0, 0], [0, 1], [0, 2]])).state
        assert not ctx.uniform_stride
        assert ctx.is_stride_sorted()
        np.testing.assert_array_equal(ctx.traj_keys, np.array([0]))

        # require sorted random access
        dim._needs_sorted_random_access_stride = True

        # sorted within trajectory, not sorted by trajectory key
        with self.assertRaises(ValueError):
            dim.iterator(stride=np.asarray([[1, 1], [1, 2], [1, 3], [0, 0],
                                            [0, 1], [0, 2]]))

        # sorted by trajectory key, not within trajectory
        with self.assertRaises(ValueError):
            dim.iterator(stride=np.asarray([[0, 0], [0, 1], [0, 2], [1, 1],
                                            [1, 5], [1, 3]]))

        np.testing.assert_array_equal(ctx.ra_indices_for_traj(0),
                                      np.array([0, 1, 2]))
Ejemplo n.º 11
0
    def test_n_chunks(self):
        r = DataInMemory(self.d)

        it0 = r.iterator(chunk=0)
        assert it0._n_chunks == 3  # 3 trajs

        it1 = r.iterator(chunk=50)
        assert it1._n_chunks == 3 * 2  # 2 chunks per trajectory

        it2 = r.iterator(chunk=30)
        # 3 full chunks and 1 small chunk per trajectory
        assert it2._n_chunks == 3 * 4

        it3 = r.iterator(chunk=30)
        it3.skip = 10
        assert it3._n_chunks == 3 * 3  # 3 full chunks per traj

        it4 = r.iterator(chunk=30)
        it4.skip = 5
        # 3 full chunks and 1 chunk of 5 frames per trajectory
        assert it4._n_chunks == 3 * 4

        # test for lagged iterator
        for stride in range(1, 5):
            for lag in range(0, 18):
                it = r.iterator(lag=lag,
                                chunk=30,
                                stride=stride,
                                return_trajindex=False)
                chunks = 0
                for _ in it:
                    chunks += 1
                assert chunks == it._n_chunks
Ejemplo n.º 12
0
 def test_chunksize(self):
     r = DataInMemory(self.d)
     cs = np.arange(1, 17)
     i = 0
     it = r.iterator(chunk=cs[i])
     for itraj, X in it:
         if not it.last_chunk_in_traj:
             assert len(X) == it.chunksize
         else:
             assert len(X) <= it.chunksize
         i += 1
         i %= len(cs)
         it.chunksize = cs[i]
         assert it.chunksize == cs[i]
    def test_n_chunks_ra(self):
        """ """
        r = DataInMemory(self.d)

        def gen_sorted_stride(n):
            frames = np.random.randint(0, 99, size=n)
            trajs = np.random.randint(0, 3, size=n)

            stride = np.sort(np.stack((trajs, frames)).T, axis=1)
            # sort by file and frame index
            sort_inds = np.lexsort((stride[:, 1], stride[:, 0]))
            return stride[sort_inds]

        strides = [gen_sorted_stride(np.random.randint(1, 99)) for _ in range(10)]
        lengths = [len(x) for x in strides]
        for chunk in range(0, 100):#max(lengths)):
            for stride in strides:
                it = r.iterator(chunk=chunk, stride=stride)
                self._count_chunks(it)