Ejemplo n.º 1
0
 def test_skip(self):
     for skip in [0, 3, 13]:
         r1 = DataInMemory(self.d)
         out_with_skip = r1.get_output(skip=skip)[0]
         r2 = DataInMemory(self.d)
         out = r2.get_output()[0]
         np.testing.assert_almost_equal(out_with_skip, out[skip::],
                                        err_msg="The first %s rows were skipped, but that did not "
                                                "match the rows with skip=0 and sliced by [%s::]" % (skip, skip))
Ejemplo n.º 2
0
 def test_skip_input_list(self):
     for skip in [0, 3, 13]:
         r1 = DataInMemory([self.d, self.d])
         out_with_skip = r1.get_output(skip=skip)
         r2 = DataInMemory([self.d, self.d])
         out = r2.get_output()
         np.testing.assert_almost_equal(out_with_skip[0], out[0][skip::],
                                        err_msg="The first %s rows of the first file were skipped, but that did not "
                                                "match the rows with skip=0 and sliced by [%s::]" % (skip, skip))
         np.testing.assert_almost_equal(out_with_skip[1], out[1][skip::],
                                        err_msg="The first %s rows of the second file were skipped, but that did not"
                                                " match the rows with skip=0 and sliced by [%s::]" % (skip, skip))
Ejemplo n.º 3
0
    def test_lagged_iterator_1d(self):
        n = 57
        chunksize = 10
        lag = 1

        data = [np.arange(n), np.arange(50), np.arange(30)]
        input_lens = [x.shape[0] for x in data]
        reader = DataInMemory(data)
        reader.chunksize = chunksize

        self.assertEqual(reader.n_frames_total(), sum(input_lens))

        # store results by traj
        chunked_trajs = [[] for _ in range(len(data))]
        chunked_lagged_trajs = [[] for _ in range(len(data))]

        # iterate over data
        for itraj, X, Y in reader.iterator(lag=lag):
            chunked_trajs[itraj].append(X)
            chunked_lagged_trajs[itraj].append(Y)

        trajs = [np.vstack(ichunks) for ichunks in chunked_trajs]
        lagged_trajs = [np.vstack(ichunks) for ichunks in chunked_lagged_trajs]

        # unlagged data
        for traj, input_traj in zip(trajs, data):
            np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)

        # lagged data
        lagged_0 = [d[lag:] for d in data]

        for traj, input_traj in zip(lagged_trajs, lagged_0):
            np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
Ejemplo n.º 4
0
 def test_duplicated_data_in_fit_transform(self):
     X = np.random.randn(100, 2)
     d = DataInMemory([X, X])
     tica = api.tica(data=d, lag=1, dim=1)
     out1 = tica.get_output()
     out2 = tica.fit_transform([X, X])
     np.testing.assert_array_almost_equal(out1, out2)
Ejemplo n.º 5
0
    def test_big_k(self):
        x = np.random.random((300, 3))
        reader = DataInMemory(x)
        k = 151
        c = api.cluster_uniform_time(k=k)

        c.estimate(reader)
Ejemplo n.º 6
0
    def test_2d_skip(self):
        x = np.random.random((300, 3))
        reader = DataInMemory(x)

        k = 2
        c = api.cluster_uniform_time(k=k, skip=100)

        c.estimate(reader)
Ejemplo n.º 7
0
 def test_stride(self):
     reader = DataInMemory(self.d)
     stride = [1, 2, 3, 4, 5, 6, 7, 10, 11, 21, 23]
     for s in stride:
         output = reader.get_output(stride=s)[0]
         expected = self.d[::s]
         np.testing.assert_allclose(output, expected,
                                    err_msg="not equal for stride=%i" % s)
Ejemplo n.º 8
0
    def test_big_k(self):
        x = np.random.random((300, 3))
        reader = DataInMemory(x)
        k=151
        c = api.cluster_uniform_time(k=k)

        c.data_producer = reader
        c.parametrize()
Ejemplo n.º 9
0
    def testDataArray(self):
        frames_per_traj = 100
        dim = 3

        data = np.random.random((frames_per_traj, dim))
        d = DataInMemory(data)

        np.testing.assert_equal(d.trajectory_lengths(),
                                np.array([frames_per_traj for _ in range(1)]))
Ejemplo n.º 10
0
    def test1dDataList(self):
        n = 10
        data = [np.arange(n), np.arange(n)]
        reader = DataInMemory(data)

        self.assertEqual(reader.trajectory_lengths(), [n, n])
        self.assertEqual(reader.dimension(), 1)
        self.assertEqual(reader.number_of_trajectories(), 2)
        self.assertEqual(reader.n_frames_total(), 2 * n)
Ejemplo n.º 11
0
    def testDataArray(self):
        frames_per_traj = 100
        dim = 3

        data = np.random.random((frames_per_traj, dim))
        d = DataInMemory(data)

        self.assertEqual(
            d.trajectory_lengths(), [frames_per_traj for _ in xrange(1)])
Ejemplo n.º 12
0
    def test1dDataList(self):
        n = 10
        data = [np.arange(n), np.arange(n)]
        reader = DataInMemory(data)

        np.testing.assert_equal(reader.trajectory_lengths(), np.array([n, n]))
        self.assertEqual(reader.ndim, 1)
        self.assertEqual(reader.number_of_trajectories(), 2)
        self.assertEqual(reader.n_frames_total(), 2 * n)
Ejemplo n.º 13
0
    def test_1d(self):
        x = np.random.random(1000)
        reader = DataInMemory(x)

        k = 2
        c = api.cluster_uniform_time(k=k)

        c.data_producer = reader
        c.parametrize()
Ejemplo n.º 14
0
    def test1dData(self):
        n = 3
        data = np.arange(n)
        reader = DataInMemory(data)

        self.assertEqual(reader.trajectory_lengths(), np.array([n]))
        self.assertEqual(reader.ndim, 1)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), n)
Ejemplo n.º 15
0
    def test_2d_skip(self):
        x = np.random.random((300, 3))
        reader = DataInMemory(x)

        k = 2
        c = api.cluster_uniform_time(k=k, skip=100)

        c.data_producer = reader
        c.parametrize()
Ejemplo n.º 16
0
    def test_big_k(self):
        # TODO: fix this (some error handling should be done in _param_init)
        x = np.random.random((300, 3))
        reader = DataInMemory(x)

        k = 298
        c = api.cluster_uniform_time(k=k)

        c.data_producer = reader
        c.parametrize()
Ejemplo n.º 17
0
    def test_ndim_input(self):
        data = np.empty((4, 2, 2, 2))

        reader = DataInMemory(data)

        self.assertEqual(reader.dimension(), 2 * 2 * 2)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), 4)
        self.assertEqual(
            reader.trajectory_lengths(), [reader.n_frames_total()])
Ejemplo n.º 18
0
    def setUp(self):
        self.X = np.random.random((1000, 10))
        ones = np.ones((1000, 1))
        data = np.concatenate((self.X, ones), axis=1)
        self.src = DataInMemory(data)
        self.src.chunksize = 200

        self.sparsifier = Sparsifier()
        self.sparsifier.data_producer = self.src
        self.sparsifier.parametrize()
Ejemplo n.º 19
0
    def test_ndim_input(self):
        data = np.empty((4, 2, 2, 2))

        reader = DataInMemory(data)

        self.assertEqual(reader.ndim, 2 * 2 * 2)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), 4)
        np.testing.assert_equal(reader.trajectory_lengths(),
                                np.array([reader.n_frames_total()]))
Ejemplo n.º 20
0
    def test_duplicated_data(self):
        # make some data that has one column repeated twice
        X = np.random.randn(100, 2)
        X = np.hstack((X, X[:, 0, np.newaxis]))

        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Ejemplo n.º 21
0
    def testWriter(self):
        writer = WriterCSV(self.output_file)
        data = np.random.random((100, 3))
        dm = DataInMemory(data)
        writer.data_producer = dm

        writer.parametrize()

        # open file and compare data
        output = np.loadtxt(self.output_file)
        np.testing.assert_allclose(output, data)
Ejemplo n.º 22
0
    def testListOfArrays(self):

        frames_per_traj = 100
        dim = 3
        data = [np.random.random((frames_per_traj, dim)) for _ in range(3)]

        d = DataInMemory(data)

        self.assertEqual(d.dimension(), dim)

        np.testing.assert_equal(d.trajectory_lengths(),
                                np.array([frames_per_traj for _ in range(3)]))
Ejemplo n.º 23
0
 def test_lagged_stridden_access(self):
     data = np.random.random((1000, 2))
     reader = DataInMemory(data)
     strides = [2, 3, 5, 7, 15]
     lags = [1, 3, 7, 10, 30]
     for stride in strides:
         for lag in lags:
             chunks = []
             for _, _, Y in reader.iterator(stride, lag):
                 chunks.append(Y)
             chunks = np.vstack(chunks)
             np.testing.assert_equal(chunks, data[lag::stride])
Ejemplo n.º 24
0
    def testChunksizeResultsTica(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        X = np.random.randn(23000, 3)

        # un-chunked
        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=lag, dim=1)

        cov = tica_obj.cov.copy()
        mean = tica_obj.mean.copy()

        # ------- run again with new chunksize -------
        d = DataInMemory(X)
        d.chunksize = chunk
        tica_obj = tica(data=d, lag=lag, dim=1)

        np.testing.assert_allclose(tica_obj.mean, mean)
        np.testing.assert_allclose(tica_obj.cov, cov)
Ejemplo n.º 25
0
    def test_lagged_iterator_1d(self):
        n = 30
        chunksize = 10
        lag = 9
        stride = 2

        data = [np.arange(n), np.arange(50), np.arange(33)]
        input_lens = [x.shape[0] for x in data]
        reader = DataInMemory(data, chunksize=chunksize)
        it = reader.iterator(chunk=chunksize, stride=stride, lag=lag)
        # lag < chunksize, so we expect a LaggedIter
        from pyemma.coordinates.data._base.iterable import _LaggedIterator
        self.assertIsInstance(it, _LaggedIterator)
        assert reader.chunksize == chunksize

        self.assertEqual(reader.n_frames_total(), sum(input_lens))

        # store results by traj
        chunked_trajs = [[] for _ in range(len(data))]
        chunked_lagged_trajs = [[] for _ in range(len(data))]

        # iterate over data
        for itraj, X, Y in reader.iterator(lag=lag, stride=stride):
            chunked_trajs[itraj].append(X)
            chunked_lagged_trajs[itraj].append(Y)

        trajs = [np.vstack(ichunks) for ichunks in chunked_trajs]
        lagged_trajs = [np.vstack(ichunks) for ichunks in chunked_lagged_trajs]

        # unlagged data
        for idx, (traj, input_traj) in enumerate(zip(trajs, data)):
            # do not consider chunks that have no lagged counterpart
            input_shape = input_traj.shape
            np.testing.assert_equal(
                traj.T.squeeze(),
                input_traj[::stride][:len(lagged_trajs[idx])].squeeze(),
                err_msg="failed for traj=%s" % idx)

        # lagged data
        for idx, (traj, input_traj) in enumerate(zip(lagged_trajs, data)):
            np.testing.assert_equal(traj.T.squeeze(),
                                    input_traj[lag::stride].squeeze(),
                                    err_msg="failed for traj=%s" % idx)
Ejemplo n.º 26
0
    def test_lagged_iterator_2d(self):
        chunksize = 10
        lag = 1

        data = [
            np.arange(300).reshape((100, 3)),
            np.arange(29 * 3).reshape((29, 3)),
            np.arange(150).reshape(50, 3)
        ]
        input_lens = [x.shape[0] for x in data]
        # print data[0].shape
        reader = DataInMemory(data)
        reader.chunksize = chunksize

        self.assertEqual(reader.n_frames_total(), sum(input_lens))

        # store results by traj
        chunks = [[] for _ in range(len(data))]
        lagged_chunks = [[] for _ in range(len(data))]

        # iterate over data
        for itraj, X, Y in reader.iterator(lag=lag):
            chunks[itraj].append(X)
            lagged_chunks[itraj].append(Y)

        trajs = [np.vstack(ichunks) for ichunks in chunks]

        lagged_trajs = [np.vstack(ichunks) for ichunks in lagged_chunks]

        # unlagged data
        for traj, input_traj in zip(trajs, data):
            # do not consider chunks that have no lagged counterpart
            input_shape = input_traj.shape
            np.testing.assert_equal(traj.reshape((input_shape[0] - lag, 3)),
                                    input_traj[:len(input_traj) - lag])

        # lagged data
        lagged_0 = [d[lag:] for d in data]

        for traj, input_traj in zip(lagged_trajs, lagged_0):
            np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
Ejemplo n.º 27
0
    def test_time_lagged_chunked_access(self):
        n = 100
        data = [np.random.random((n, 3)), np.zeros((29, 3)),
                np.random.random((n - 50, 3))]
        reader = DataInMemory(data)
        self.assertEqual(reader.n_frames_total(), n + n - 50 + 29)

        # iterate over data
        it = reader.iterator(lag=30, return_trajindex=True)
        for itraj, X, Y in it:
            if itraj == 0:
                # self.assertEqual(X.shape, (100, 3)) <-- changed behavior: return only chunks of same size
                self.assertEqual(X.shape, (70, 3))
                self.assertEqual(Y.shape, (70, 3))
            elif itraj == 1:
                # the time lagged chunk can not be built due to lag time
                self.assertEqual(X.shape, (0, 3))
                self.assertEqual(Y.shape, (0, 3))
            elif itraj == 2:
                self.assertEqual(X.shape, (20, 3))
                self.assertEqual(Y.shape, (20, 3))
Ejemplo n.º 28
0
    def test_lagged_iterator_2d(self):
        n = 57
        chunksize = 10
        lag = 1

#         data = [np.random.random((n, 3)),
#                 np.zeros((29, 3)),
#                 np.random.random((n - 50, 3))]
        data = [np.arange(300).reshape((100, 3)),
                np.arange(29 * 3).reshape((29, 3)),
                np.arange(150).reshape(50, 3)]
        input_lens = [x.shape[0] for x in data]
        # print data[0].shape
        reader = DataInMemory(data)
        reader.chunksize = chunksize

        self.assertEqual(reader.n_frames_total(), sum(input_lens))

        # store results by traj
        chunks = [[] for _ in xrange(len(data))]
        lagged_chunks = [[] for _ in xrange(len(data))]

        # iterate over data
        for itraj, X, Y in reader.iterator(lag=lag):
            chunks[itraj].append(X)
            lagged_chunks[itraj].append(Y)

        trajs = [np.vstack(ichunks) for ichunks in chunks]

        lagged_trajs = [np.vstack(ichunks) for ichunks in lagged_chunks]

        # unlagged data
        for traj, input_traj in zip(trajs, data):
            np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)

        # lagged data
        lagged_0 = [d[lag:] for d in data]

        for traj, input_traj in zip(lagged_trajs, lagged_0):
            np.testing.assert_equal(traj.reshape(input_traj.shape), input_traj)
Ejemplo n.º 29
0
    def test_time_lagged_chunked_access(self):
        n = 100
        data = [
            np.random.random((n, 3)),
            np.zeros((29, 3)),
            np.random.random((n - 50, 3))
        ]
        reader = DataInMemory(data)
        self.assertEqual(reader.n_frames_total(), n + n - 50 + 29)

        # iterate over data
        ctx = TransformerIteratorContext(lag=30)
        t = 0
        itraj = 0
        last_chunk = False
        while not last_chunk:
            last_chunk_in_traj = False
            t = 0
            while not last_chunk_in_traj:
                X, Y = reader._next_chunk(ctx)
                if itraj == 0:
                    self.assertEqual(X.shape, (100, 3))
                    self.assertEqual(Y.shape, (70, 3))
                elif itraj == 1:
                    # the time lagged chunk can not be built due to lag time
                    self.assertEqual(X.shape, (29, 3))
                    self.assertEqual(Y.shape, (0, 3))
                elif itraj == 2:
                    self.assertEqual(X.shape, (50, 3))
                    self.assertEqual(Y.shape, (20, 3))
                L = np.shape(X)[0]
                # last chunk in traj?
                last_chunk_in_traj = (t + L >= reader.trajectory_length(itraj))
                # last chunk?
                last_chunk = (last_chunk_in_traj
                              and itraj >= reader.number_of_trajectories() - 1)
                t += L
            # increment trajectory
            itraj += 1
Ejemplo n.º 30
0
    def test_1d(self):
        x = np.random.random(1000)
        reader = DataInMemory(x)

        k = 2
        c = api.cluster_uniform_time(reader, k=k)