Exemple #1
0
    def testDtraj(self):
        self.k = 5
        self.dim = 100
        self.data = [
            np.random.random((30, self.dim)),
            np.random.random((37, self.dim))
        ]
        self.kmeans = cluster_kmeans(data=self.data, k=self.k, max_iter=100)

        assert self.kmeans.dtrajs[0].dtype == self.kmeans.output_type()

        prefix = "test"
        extension = ".dtraj"
        with TemporaryDirectory() as outdir:
            self.kmeans.save_dtrajs(trajfiles=None,
                                    prefix=prefix,
                                    output_dir=outdir,
                                    extension=extension)

            names = [
                "%s_%i%s" % (prefix, i, extension) for i in range(
                    self.kmeans.data_producer.number_of_trajectories())
            ]
            names = [os.path.join(outdir, n) for n in names]

            # check files with given patterns are there
            for f in names:
                os.stat(f)
Exemple #2
0
    def test_fragmented_reader(self):
        from pyemma.coordinates.tests.util import create_traj
        from pyemma.util.files import TemporaryDirectory

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []

        with TemporaryDirectory() as d:
            for _ in range(3):
                f, _, _ = create_traj(top_file, dir=d)
                trajfiles.append(f)
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            frag_trajs = [
                trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]
            ]
            chunksize = 232
            source = coor.source(frag_trajs, top=top_file, chunksize=chunksize)
            params = {
                'chunksize': chunksize,
                'ndim': source.ndim,
                '_trajectories': trajfiles
            }
            restored = self.compare(source, params)

            np.testing.assert_equal(source.get_output(), restored.get_output())
    def test_write_to_csv_propagate_filenames(self):
        from pyemma.coordinates import source, tica
        with TemporaryDirectory() as td:
            data = [np.random.random((20, 3))] * 3
            fns = [
                os.path.join(td, f)
                for f in ('blah.npy', 'blub.npy', 'foo.npy')
            ]
            for x, fn in zip(data, fns):
                np.save(fn, x)
            reader = source(fns)
            assert reader.filenames == fns
            tica_obj = tica(reader, lag=1, dim=2)
            tica_obj.write_to_csv(extension=".exotic", chunksize=3)
            res = sorted([
                os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic')
            ])
            self.assertEqual(len(res), len(fns))
            desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns])
            self.assertEqual(res, desired_fns)

            # compare written results
            expected = tica_obj.get_output()
            actual = source(list(s.replace('.npy', '.exotic')
                                 for s in fns)).get_output()
            assert len(actual) == len(fns)
            for a, e in zip(actual, expected):
                np.testing.assert_allclose(a, e)
    def test_fragmented_reader_random_access(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()

            trajfiles = [
                trajfiles[0], (trajfiles[0], trajfiles[1]), trajfiles[2]
            ]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for chunksize in [0, 2, 3, 100000]:
                out = source.get_output(stride=self.stride, chunk=chunksize)
                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(out):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords, traj.xyz[np.array(
                                self.stride[self.stride[:,
                                                        0] == i][:,
                                                                 1])].reshape(
                                                                     -1,
                                                                     3 * 3))
Exemple #5
0
 def test_np_reader_in_pipeline(self):
     with TemporaryDirectory() as td:
         file_name = os.path.join(td, "test.npy")
         data = np.random.random((100, 3))
         np.save(file_name, data)
         reader = api.source(file_name)
         p = api.pipeline(reader, run=False, stride=2, chunksize=5)
         p.parametrize()
Exemple #6
0
    def test_non_writeable_cfg_dir(self):
        with TemporaryDirectory() as tmp:
            # make cfg dir non-writeable
            os.chmod(tmp, 0x300)
            assert not os.access(tmp, os.W_OK)

            with self.assertRaises(ConfigDirectoryException) as cm:
                self.config_inst.cfg_dir = tmp
            self.assertIn("is not writeable", str(cm.exception))
 def test_numpy_reader(self):
     arr = np.random.random(10)
     from pyemma.util.files import TemporaryDirectory
     with TemporaryDirectory() as d:
         files = [os.path.join(d, '1.npy'), os.path.join(d, '2.npy')]
         np.save(files[0], arr)
         np.save(files[1], arr)
         params = {'filenames': files, 'chunksize': 23}
         r = NumPyFileReader(**params)
         self.compare(r, params)
Exemple #8
0
    def test_non_writeable_cfg_dir(self):
        with TemporaryDirectory() as tmp:
            os.environ['PYEMMA_CFG_DIR'] = tmp
            # make cfg dir non-writeable
            os.chmod(tmp, 0x300)
            assert not os.access(tmp, os.W_OK)

            with self.assertRaises(RuntimeError) as cm:
                self.config_inst._create_cfg_dir()
            self.assertIn("is not writeable", str(cm.exception))
Exemple #9
0
    def test_load(self):
        with TemporaryDirectory() as td:
            new_file = os.path.join(td, "test.cfg")
            self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars
            old_val = self.config_inst.show_progress_bars
            self.config_inst.save(new_file)

            # set a runtime value, differing from what used to be state before save
            self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars

            self.config_inst.load(new_file)
            self.assertEqual(self.config_inst.show_progress_bars, old_val)
 def test_max_n_entries(self):
     data = [np.random.random((10, 3)) for _ in range(20)]
     max_entries = 10
     config.traj_info_max_entries = max_entries
     files = []
     with TemporaryDirectory() as td:
         for i, arr in enumerate(data):
             f = os.path.join(td, "%s.npy" % i)
             np.save(f, arr)
             files.append(f)
         pyemma.coordinates.source(files)
     self.assertLessEqual(self.db.num_entries, max_entries)
     self.assertGreater(self.db.num_entries, 0)
Exemple #11
0
    def test_save_dtrajs(self):
        c = self.ass
        prefix = "test"
        extension = ".dtraj"
        with TemporaryDirectory() as outdir:
            c.save_dtrajs(trajfiles=None, prefix=prefix, output_dir=outdir, extension=extension)

            names = ["%s_%i%s" % (prefix, i, extension)
                     for i in range(c.data_producer.number_of_trajectories())]
            names = [os.path.join(outdir, n) for n in names]

            # check files with given patterns are there
            for f in names:
                os.stat(f)
Exemple #12
0
 def test_fragmented_reader(self):
     top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
     trajfiles = []
     nframes = []
     with TemporaryDirectory() as wd:
         for _ in range(3):
             f, _, l = create_traj(top_file, dir=wd)
             trajfiles.append(f)
             nframes.append(l)
         # three trajectories: one consisting of all three, one consisting of the first,
         # one consisting of the first and the last
         reader = api.source(
             [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]], top=top_file)
         np.testing.assert_equal(reader.trajectory_lengths(),
                                 [sum(nframes), nframes[0], nframes[0] + nframes[2]])
 def test_csv_reader(self):
     arr = np.random.random(10).reshape(-1, 2)
     from pyemma.util.files import TemporaryDirectory
     delimiter = ' '
     with TemporaryDirectory() as d:
         files = [os.path.join(d, '1.csv'), os.path.join(d, '2.csv')]
         np.savetxt(files[0], arr, delimiter=delimiter)
         np.savetxt(files[1], arr, delimiter=delimiter)
         params = {'filenames': files, 'chunksize': 23}
         from pyemma.coordinates.data import PyCSVReader
         # sniffing the delimiter does not aid in the 1-column case:
         # https://bugs.python.org/issue2078
         # but also specifying it does not help...
         r = PyCSVReader(delimiter=delimiter, **params)
         self.compare(r, params)
Exemple #14
0
    def test_config_vals_match_properties_in_wrapper(self):
        with TemporaryDirectory() as td:
            self.config_inst.cfg_dir = td
            self.assertEqual(self.config_inst.cfg_dir, td)
            from pyemma import config as config_module
            assert hasattr(config_module, 'default_config_file')
            my_cfg = os.path.join(td, 'pyemma.cfg')
            self.assertEqual(pkg_resources.resource_filename('pyemma', 'pyemma.cfg'),
                             config_module.default_config_file)
            reader = configparser.ConfigParser()
            reader.read(my_cfg)

            opts = sorted(reader.options('pyemma'))
            actual = sorted(config_module.keys())
            self.assertEqual(opts, actual)
Exemple #15
0
    def test_save_load_no_cfg_file_given(self):
        """ test that in case no cfg dir has been set, the default location is being used and values changed at
        runtime are used afterwards."""
        # replace a value with a non default value:
        with TemporaryDirectory() as td:
            os.environ['PYEMMA_CFG_DIR'] = td
            self.config_inst = pyemma.config()
            self.config_inst.show_progress_bars = not self.config_inst.show_progress_bars
            self.config_inst.save()

            supposed_to_use_cfg = os.path.join(td, self.config_inst.DEFAULT_CONFIG_FILE_NAME)

            cfg = configparser.RawConfigParser()
            cfg.read(supposed_to_use_cfg)
            self.assertEqual(cfg.getboolean('pyemma', 'show_progress_bars'),
                             self.config_inst.show_progress_bars)
Exemple #16
0
    def test_max_size(self):
        data = [np.random.random((150, 10)) for _ in range(150)]
        max_size = 1

        files = []
        config.show_progress_bars = False
        with TemporaryDirectory() as td, settings(traj_info_max_size=max_size):
            for i, arr in enumerate(data):
                f = os.path.join(td, "%s.txt" % i)
                # save as txt to enforce creation of offsets
                np.savetxt(f, arr)
                files.append(f)
            pyemma.coordinates.source(files)

        self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size)
        self.assertGreater(self.db.num_entries, 0)
Exemple #17
0
    def test_h5_mdtraj_vs_plain(self):
        with TemporaryDirectory() as td:
            f = convert_traj(self.traj_files[0],
                             format='h5',
                             dir=td,
                             top=self.pdb_file)
            r = api.source(f, top=self.pdb_file)
            from pyemma.coordinates.data import FeatureReader
            self.assertIsInstance(r, FeatureReader)

            import h5py
            from pyemma.coordinates.data.h5_reader import H5Reader
            plain_h5_file = os.path.join(td, 'f.h5')
            with h5py.File(plain_h5_file, mode='a') as fh:
                fh.create_dataset('test', data=np.random.random((100, 3)))
            r = api.source(plain_h5_file)
            self.assertIsInstance(r, H5Reader)
    def test_fragmented_reader_random_access1(self):
        with TemporaryDirectory() as td:
            trajfiles = []
            for i in range(3):
                trajfiles.append(
                    create_traj(start=i * 10, dir=td, length=20)[0])
            topfile = get_top()
            trajfiles = [(trajfiles[0], trajfiles[1]), trajfiles[0],
                         trajfiles[2]]

            source = coor.source(trajfiles, top=topfile)
            assert isinstance(source, FragmentedTrajectoryReader)

            for r in source._readers:
                if not isinstance(r, (list, tuple)):
                    r = r[0]
                for _r in r:
                    _r._return_traj_obj = True

            from collections import defaultdict
            for chunksize in [0, 2, 3, 100000]:
                frames = defaultdict(list)
                with source.iterator(chunk=chunksize,
                                     return_trajindex=True,
                                     stride=self.stride) as it:
                    for itraj, t in it:
                        frames[itraj].append(t)

                dest = []
                for itraj in frames.keys():
                    dest.append(frames[itraj][0])

                    for t in frames[itraj][1:]:
                        dest[-1] = dest[-1].join(t)

                keys = np.unique(self.stride[:, 0])
                for i, coords in enumerate(dest):
                    if i in keys:
                        traj = mdtraj.load(trajfiles[i], top=topfile)
                        np.testing.assert_equal(
                            coords.xyz,
                            traj.xyz[np.array(
                                self.stride[self.stride[:, 0] == i][:, 1])],
                            err_msg="not equal for chunksize=%s" % chunksize)
Exemple #19
0
    def test_npy_reader(self):
        lengths_and_dims = [(7, 3), (23, 3), (27, 3)]
        data = [
            np.empty((n, dim)) for n, dim in lengths_and_dims]
        files = []
        with TemporaryDirectory() as td:
            for i, x in enumerate(data):
                fn = os.path.join(td, "%i.npy" % i)
                np.save(fn, x)
                files.append(fn)

            reader = NumPyFileReader(files)

            # cache it and compare
            results = {f: (self.db[f, reader].length, self.db[f, reader].ndim,
                           self.db[f, reader].offsets) for f in files}
            expected = {f: (len(data[i]), data[i].shape[1], [])
                        for i, f in enumerate(files)}
            np.testing.assert_equal(results, expected)
Exemple #20
0
    def test_non_writeable_cfg_dir(self):

        with TemporaryDirectory() as tmp:
            cfg_dir = os.path.join(tmp, '.pyemma')
            os.mkdir(cfg_dir)
            os.environ['HOME'] = tmp
            # make cfg dir non-writeable
            os.chmod(cfg_dir, 444)

            exp_homedir = os.path.expanduser('~')
            assert exp_homedir == tmp

            with warnings.catch_warnings(record=True) as w:
                # Cause all warnings to always be triggered.
                warnings.simplefilter("always")
                # Trigger a warning.
                readConfiguration()
                assert len(w) == 1
                assert issubclass(w[-1].category, UserWarning)
                assert "is not writeable" in str(w[-1].message)
 def test_write_to_csv_propagate_filenames(self):
     from pyemma.coordinates import source, tica
     with TemporaryDirectory() as td:
         data = [np.random.random((20, 3))] * 3
         fns = [
             os.path.join(td, f)
             for f in ('blah.npy', 'blub.npy', 'foo.npy')
         ]
         for x, fn in zip(data, fns):
             np.save(fn, x)
         reader = source(fns)
         assert reader.filenames == fns
         tica_obj = tica(reader, lag=1)
         tica_obj.write_to_csv(extension=".exotic")
         res = sorted([
             os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic')
         ])
         self.assertEqual(len(res), len(fns))
         desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns])
         self.assertEqual(res, desired_fns)
Exemple #22
0
    def test_with_fragmented_reader(self):
        from pyemma.util.files import TemporaryDirectory
        trajlen = 35
        # trajectory 0 (first trajectory, is trajfiles[2])
        #   -> skipped
        # trajectory 1 (second trajectory, is {trajfiles[0], trajfiles[1]})
        #   fragment 1:
        #       -> frames 0,1,2,10
        #   fragment 2:
        #       -> frames 1 (i.e., 36) and 34 (i.e., 69)
        # trajectory 2 (third trajectory, is trajfiles[2])
        #   -> frame 5
        ra_indices = np.array([[1, 0], [1, 1], [1, 2], [1, 10],
                               [1, trajlen + 1], [1, 2 * trajlen - 1], [2, 5]],
                              dtype=int)
        with TemporaryDirectory() as td:

            trajfiles = []
            xyzs = []
            for i in range(3):
                tf, xyz, _ = create_traj(start=i * 10, dir=td, length=trajlen)
                trajfiles.append(tf)
                xyzs.append(xyz)

            topfile = get_top()
            frag_traj = [
                trajfiles[2], [trajfiles[0], trajfiles[1]], trajfiles[2]
            ]

            expected = xyzs[0][np.array([0, 1, 2, 10]), :], xyzs[1][np.array(
                [1, 34])], np.array([(xyzs[2][5, :])])
            expected = np.vstack(expected)

            reader = coor.source(frag_traj, top=topfile)

            for cs in range(1, 10):
                traj = save_traj(reader, ra_indices, None, chunksize=cs)
                np.testing.assert_almost_equal(traj.xyz, expected)