Esempio n. 1
0
    def test_numpy_filereader_random_access(self):
        tmpfiles = [
            tempfile.mktemp(suffix='.npy') for _ in range(0, len(self.data))
        ]
        try:
            for idx, tmp in enumerate(tmpfiles):
                np.save(tmp, self.data[idx])
            # large enough chunk size
            np_fr = coor.source(tmpfiles, chunksize=10)
            out1 = np_fr.get_output(stride=self.stride)

            # small chunk size
            np_fr = coor.source(tmpfiles, chunksize=1)
            out2 = np_fr.get_output(stride=self.stride)

            # full traj mode
            np_fr = coor.source(tmpfiles, chunksize=0)
            out3 = np_fr.get_output(stride=self.stride)

            for idx in np.unique(self.stride[:, 0]):
                np.testing.assert_array_almost_equal(
                    self.data[idx][self.stride[self.stride[:, 0] == idx][:,
                                                                         1]],
                    out1[idx])
                np.testing.assert_array_almost_equal(out1[idx], out2[idx])
                np.testing.assert_array_almost_equal(out2[idx], out3[idx])

        finally:
            for tmp in tmpfiles:
                try:
                    os.unlink(tmp)
                except EnvironmentError:
                    pass
Esempio n. 2
0
 def test_bullshit_csv(self):
     # this file is not parseable as tabulated float file
     with self.assertRaises(Exception) as r:
         api.source(self.bs)
     # depending on we have the traj info cache switched on, we get these types of exceptions.
     self.assertIsInstance(r.exception, (IOError, ValueError))
     self.assertIn('could not parse', str(r.exception))
Esempio n. 3
0
    def test_source_set_chunksize(self):
        x = np.zeros(10)
        r = api.source(x, chunksize=1)
        assert r.chunksize == 1
        r2 = api.source(r, chunksize=2)
        assert r2 is r
        assert r2.chunksize == 2

        # reset to default chunk size.
        r3 = api.source(r, chunksize=None)
        assert r3.chunksize is not None
Esempio n. 4
0
    def test_in_memory_with_stride(self):
        # map "results" to memory
        reader = api.source(self.trajfile, top=self.topfile)
        reader.in_memory = True
        mem_it = reader.iterator(stride=2, chunk=0, return_trajindex=False)
        assert isinstance(mem_it, DataInMemoryIterator)
        mem_data = [X for X in mem_it]

        reader2 = api.source(self.trajfile, top=self.topfile)
        out = reader2.get_output(stride=2)

        np.testing.assert_equal(mem_data[0], out[0])
Esempio n. 5
0
    def test_fragmented_reader_random_access(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()

            trajfiles = [
                trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2]
            ]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for chunksize in [0, 2, 3, 100000]:
                out = source.get_output(stride=self.stride, chunk=chunksize)
                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(out):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords, traj.xyz[np.array(
                                self.stride[self.stride[:,
                                                        0] == i][:,
                                                                 1])].reshape(
                                                                     -1,
                                                                     3 * 3))
Esempio n. 6
0
    def test_lagged_stridden_access(self):
        reader = api.source([self.trajfile, self.trajfile2], top=self.topfile)
        reader.chunksize = 210
        strides = [2, 3, 5, 7, 15]
        lags = [1, 3, 7, 10, 30]
        err_msg = "not equal for stride=%i, lag=%i"
        for stride in strides:
            for lag in lags:
                chunks = {
                    itraj: []
                    for itraj in range(reader.number_of_trajectories())
                }
                for itraj, _, Y in reader.iterator(stride=stride, lag=lag):
                    chunks[itraj].append(Y)
                chunks[0] = np.vstack(chunks[0])
                np.testing.assert_almost_equal(chunks[0],
                                               self.xyz.reshape(
                                                   -1, 9)[lag::stride],
                                               err_msg=err_msg % (stride, lag))

                chunks[1] = np.vstack(chunks[1])
                np.testing.assert_almost_equal(chunks[1],
                                               self.xyz2.reshape(
                                                   -1, 9)[lag::stride],
                                               err_msg=err_msg % (stride, lag))
Esempio n. 7
0
    def test_in_memory(self):
        data = np.random.random((100, 10))
        reader = api.source(data)
        tica_obj = api.tica(reader, lag=10, dim=1)

        tica_obj.in_memory = True
        tica_obj.get_output()
Esempio n. 8
0
    def test_with_data_in_mem(self):
        import pyerna.coordinates as api

        data = [
            np.random.random((100, 50)),
            np.random.random((103, 50)),
            np.random.random((33, 50))
        ]
        reader = source(data)
        assert isinstance(reader, DataInMemory)

        tpca = api.pca(dim=2)

        n_centers = 10
        km = api.cluster_kmeans(k=n_centers)

        disc = api.discretizer(reader, tpca, km)
        disc.parametrize()

        dtrajs = disc.dtrajs
        for dtraj in dtrajs:
            n_states = np.max((np.unique(dtraj)))
            self.assertGreaterEqual(
                n_centers - 1, n_states,
                "dtraj has more states than cluster centers")
Esempio n. 9
0
 def test_save_dtrajs(self):
     reader = source(self.trajfiles, top=self.topfile)
     cluster = cluster_kmeans(k=2)
     d = Discretizer(reader, cluster=cluster)
     d.parametrize()
     d.save_dtrajs(output_dir=self.dest_dir)
     dtrajs = os.listdir(self.dest_dir)
Esempio n. 10
0
def _test_ra_with_format(format, stride):
    from pyerna.coordinates.tests.test_featurereader import create_traj

    topfile = pkg_resources.resource_filename(__name__, 'data/test.pdb')
    trajfiles = []
    for _ in range(3):
        f, _, _ = create_traj(topfile, format=format)
        trajfiles.append(f)
    try:
        source = coor.source(trajfiles, top=topfile)
        source.chunksize = 2

        out = source.get_output(stride=stride)
        keys = np.unique(stride[:, 0])
        for i, coords in enumerate(out):
            if i in keys:
                traj = mdtraj.load(trajfiles[i], top=topfile)
                np.testing.assert_equal(
                    coords, traj.xyz[np.array(
                        stride[stride[:, 0] == i][:, 1])].reshape(-1, 9))
    finally:
        for t in trajfiles:
            try:
                os.unlink(t)
            except EnvironmentError:
                pass
Esempio n. 11
0
    def test_with_pipeline_time_lagged(self):
        reader = api.source(self.trajfile, top=self.topfile)
        assert isinstance(reader, FeatureReader)

        t = tica(dim=2, lag=1)
        d = discretizer(reader, t, chunksize=10)
        d.parametrize()
Esempio n. 12
0
    def test_add_element(self):
        # start with empty pipeline without auto-parametrization
        p = api.pipeline([], run=False)
        # add some reader
        reader = api.source(self.traj_files, top=self.pdb_file)
        p.add_element(reader)
        p.parametrize()

        # get the result immediately
        out1 = reader.get_output()

        # add some kmeans
        kmeans = api.cluster_kmeans(k=15)
        p.add_element(kmeans)
        p.parametrize()
        # get the result immediately
        kmeans1 = kmeans.get_output()

        # get reader output again
        out2 = reader.get_output()
        p.add_element(api.cluster_kmeans(k=2))
        p.parametrize()

        # get kmeans output again
        kmeans2 = kmeans.get_output()
        # check if add_element changes the intermediate results
        np.testing.assert_array_equal(out1[0], out2[0])
        np.testing.assert_array_equal(out1[1], out2[1])
        np.testing.assert_array_equal(kmeans1[0], kmeans2[0])
        np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
Esempio n. 13
0
 def test_np_reader_in_pipeline(self):
     with TemporaryDirectory() as td:
         file_name = os.path.join(td, "test.npy")
         data = np.random.random((100, 3))
         np.save(file_name, data)
         reader = api.source(file_name)
         p = api.pipeline(reader, run=False, stride=2, chunksize=5)
         p.parametrize()
Esempio n. 14
0
 def test_no_transform(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     api.pipeline([reader_xtc,
                   api.cluster_kmeans(k=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_regspace(dmin=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_uniform_time()])._chain[-1].get_output()
Esempio n. 15
0
    def test_in_memory(self):
        reader = api.source(self.trajfile, top=self.topfile)
        out1 = reader.get_output()
        # now map stuff to memory
        reader.in_memory = True

        reader2 = api.source(self.trajfile, top=self.topfile)
        out = reader2.get_output()

        assert len(out) == len(reader._Y) == 1
        np.testing.assert_equal(out1, out)
        np.testing.assert_equal(reader._Y[0], out[0])
        np.testing.assert_equal(reader.get_output(), out)

        # reset in_memory and check output gets deleted
        reader.in_memory = False
        assert reader._Y is None
Esempio n. 16
0
 def test_read_single_file_toplogy_file(self):
     reader = api.source(self.traj_files[0], top=self.pdb_file)
     self.assertIsNotNone(reader, "The reader should not be none.")
     self.assertEqual(reader.topfile, self.pdb_file,
                      "Reader topology file and input topology file should coincide.")
     self.assertListEqual(reader.filenames, [self.traj_files[0]], "Reader trajectories and input"
                                                                  " trajectories should coincide.")
     self.assertEqual(reader.featurizer.topologyfile, self.pdb_file, "Featurizers topology file and input "
                                                                     "topology file should coincide.")
Esempio n. 17
0
 def test_read_multiple_files_featurizer(self):
     featurizer = MDFeaturizer(self.pdb_file)
     reader = api.source(self.traj_files, features=featurizer)
     self.assertIsNotNone(reader, "The reader should not be none.")
     self.assertEqual(reader.topfile, self.pdb_file,
                      "Reader topology file and input topology file should coincide.")
     self.assertListEqual(reader.filenames, self.traj_files, "Reader trajectories and input"
                                                             " trajectories should coincide.")
     self.assertEqual(reader.featurizer.topologyfile, self.pdb_file, "Featurizers topology file and input "
                                                                     "topology file should coincide.")
Esempio n. 18
0
    def test_data_in_memory_random_access(self):
        # access with a chunksize that is larger than the largest index list of stride
        data_in_memory = coor.source(self.data, chunksize=10)
        out1 = data_in_memory.get_output(stride=self.stride)

        # access with a chunksize that is smaller than the largest index list of stride
        data_in_memory = coor.source(self.data, chunksize=1)
        out2 = data_in_memory.get_output(stride=self.stride)

        # access in full trajectory mode
        data_in_memory = coor.source(self.data, chunksize=0)
        out3 = data_in_memory.get_output(stride=self.stride)

        for idx in np.unique(self.stride[:, 0]):
            np.testing.assert_array_almost_equal(
                self.data[idx][self.stride[self.stride[:, 0] == idx][:, 1]],
                out1[idx])
            np.testing.assert_array_almost_equal(out1[idx], out2[idx])
            np.testing.assert_array_almost_equal(out2[idx], out3[idx])
Esempio n. 19
0
 def test_no_cluster(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     # only reader
     api.pipeline(reader_xtc)
     reader_xtc.get_output()
     # reader + pca / tica
     tica = api.tica()
     pca = api.pca()
     api.pipeline([reader_xtc, tica])._chain[-1].get_output()
     api.pipeline([reader_xtc, pca])._chain[-1].get_output()
Esempio n. 20
0
    def test_in_memory_switch_stride_dim(self):
        reader = api.source(self.trajfile, top=self.topfile)
        reader.chunksize = 100
        reader.in_memory = True

        # now get output with different strides
        strides = [1, 2, 3, 4, 5]
        for s in strides:
            out = reader.get_output(stride=s)
            shape = (reader.trajectory_length(0, stride=s), reader.dimension())
            self.assertEqual(out[0].shape, shape,
                             "not equal for stride=%i" % s)
Esempio n. 21
0
 def test_various_formats_source(self):
     chunksizes = [0, 13]
     X = None
     bpti_mini_previous = None
     for cs in chunksizes:
         for bpti_mini in self.bpti_mini_files:
             Y = api.source(bpti_mini, top=self.bpti_pdbfile).get_output(chunk=cs)
             if X is not None:
                 np.testing.assert_array_almost_equal(X, Y, err_msg='Comparing %s to %s failed for chunksize %s'
                                                                    % (bpti_mini, bpti_mini_previous, cs))
             X = Y
             bpti_mini_previous = bpti_mini
Esempio n. 22
0
    def test_flip_in_memory_exception(self):
        """ ensure in_memory behaves well during exceptions. """
        reader = api.source(self.trajfile, top=self.topfile)

        def dummy(x):
            raise ValueError("no")

        reader.featurizer.add_custom_func(dummy, 1)
        try:
            reader.in_memory = True
        except ValueError:
            assert not reader.in_memory
Esempio n. 23
0
 def test_chunksize(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     chunksize = 1001
     chain = [
         reader_xtc,
         api.tica(),
         api.cluster_mini_batch_kmeans(batch_size=0.3, k=3)
     ]
     p = api.pipeline(chain, chunksize=chunksize, run=False)
     assert p.chunksize == chunksize
     for e in p._chain:
         assert e.chunksize == chunksize
Esempio n. 24
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            # Set the lag time:
            cls.lag = 10
            # Compute mean free data:
            mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum(
                cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag))
            mref_nr = np.sum(cls.X[:-cls.lag, :],
                             axis=0) / float(cls.T - cls.lag)
            cls.X_mf = cls.X - mref[None, :]
            cls.X_mf_nr = cls.X - mref_nr[None, :]
            # Compute correlation matrices:
            cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag))
            cls.cov_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag)
            cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag))
            cls.cov_tau_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag)

            # do unscaled TICA
            reader = api.source(cls.X, chunksize=0)
            cls.tica_obj = api.tica(data=reader,
                                    lag=cls.lag,
                                    dim=1,
                                    kinetic_map=False)
            # non-reversible TICA
            cls.tica_obj_nr = api.tica(data=reader,
                                       lag=cls.lag,
                                       dim=1,
                                       kinetic_map=False,
                                       reversible=False)
Esempio n. 25
0
    def test_cols(self):
        reader = api.source(self.trajfile, top=self.topfile)
        # select first and second atom?
        cols = np.array((0, 2))
        ref = mdtraj.load(self.trajfile, top=self.topfile).xyz
        s = ref.shape
        new_shape = (s[0], s[1] * s[2])
        ref = ref.reshape(new_shape)
        ref = ref[:, cols]

        it = reader.iterator(chunk=0, return_trajindex=False, cols=cols)
        with it:
            for x in it:
                np.testing.assert_equal(x, ref)
Esempio n. 26
0
 def test_store_load_traj_info(self):
     x = np.random.random((10, 3))
     from pyerna.util._config import Config
     my_conf = Config()
     my_conf.cfg_dir = self.work_dir
     with mock.patch('pyerna.coordinates.data.util.traj_info_cache.config', my_conf):
         with NamedTemporaryFile(delete=False) as fh:
             np.savetxt(fh.name, x)
             reader = api.source(fh.name)
             info = self.db[fh.name, reader]
             self.db.close()
             self.db.__init__(self.db._database.filename)
             info2 = self.db[fh.name, reader]
             self.assertEqual(info2, info)
Esempio n. 27
0
    def test(self):
        reader = source(self.trajfiles, top=self.topfile)
        pcat = pca(dim=2)

        n_clusters = 2
        clustering = UniformTimeClustering(n_clusters=n_clusters)

        D = Discretizer(reader, transform=pcat, cluster=clustering)
        D.parametrize()

        self.assertEqual(len(D.dtrajs), len(self.trajfiles))

        for dtraj in clustering.dtrajs:
            unique = np.unique(dtraj)
            self.assertEqual(unique.shape[0], n_clusters)
Esempio n. 28
0
 def test_fragmented_reader(self):
     top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
     trajfiles = []
     nframes = []
     with TemporaryDirectory() as wd:
         for _ in range(3):
             f, _, l = create_traj(top_file, dir=wd)
             trajfiles.append(f)
             nframes.append(l)
         # three trajectories: one consisting of all three, one consisting of the first,
         # one consisting of the first and the last
         reader = api.source(
             [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file)
         np.testing.assert_equal(reader.trajectory_lengths(),
                                 [sum(nframes), nframes[0], nframes[0] + nframes[2]])
Esempio n. 29
0
    def test_corrupted_db(self):
        with NamedTemporaryFile(mode='w', suffix='.dat', delete=False) as f:
            f.write("makes no sense!!!!")
            f.close()
        name = f.name
        import warnings
        with warnings.catch_warnings(record=True) as cm:
            warnings.simplefilter('always')
            db = TrajectoryInfoCache(name)
            assert len(cm) == 1
            assert "corrupted" in str(cm[-1].message)

        # ensure we can perform lookups on the broken db without exception.
        r = api.source(xtcfiles[0], top=pdbfile)
        db[xtcfiles[0], r]
Esempio n. 30
0
    def test_replace_data_source(self):
        reader_xtc = api.source(self.traj_files, top=self.pdb_file)
        reader_gen = DataInMemory(data=self.generated_data)

        kmeans = api.cluster_kmeans(k=10)
        assert hasattr(kmeans, '_chunks')
        p = api.pipeline([reader_xtc, kmeans])
        out1 = kmeans.get_output()
        # replace source
        print(reader_gen)
        p.set_element(0, reader_gen)
        assert hasattr(kmeans, '_chunks')
        p.parametrize()
        out2 = kmeans.get_output()
        self.assertFalse(
            np.array_equal(out1, out2),
            "Data source changed, so should the resulting clusters.")