Exemple #1
0
    def test_length_and_content_feature_reader_and_TICA(self):
        for stride in range(1, 100, 23):
            r = coor.source(self.trajnames, top=self.temppdb)
            t = coor.tica(data=r, lag=2, dim=2)

            # subsample data
            out_tica = t.get_output(stride=stride)
            out_reader = r.get_output(stride=stride)

            # get length in different ways
            len_tica = [x.shape[0] for x in out_tica]
            len_reader = [x.shape[0] for x in out_reader]
            len_trajs = t.trajectory_lengths(stride=stride)
            len_ref = [(x.shape[0]-1)//stride+1 for x in self.data]
            # print 'len_ref', len_ref

            # compare length
            np.testing.assert_equal(len_trajs, len_ref)
            self.assertTrue(len_ref == len_tica)
            self.assertTrue(len_ref == len_reader)

            # compare content (reader)
            for ref_data, test_data in zip(self.data, out_reader):
                ref_data_reshaped = ref_data.reshape((ref_data.shape[0], ref_data.shape[1]*3))
                self.assertTrue(np.allclose(ref_data_reshaped[::stride, :], test_data, atol=1E-3))
Exemple #2
0
    def test_write_to_csv_propagate_filenames(self):
        from pyerna.coordinates import source, tica
        with TemporaryDirectory() as td:
            data = [np.random.random((20, 3))] * 3
            fns = [
                os.path.join(td, f)
                for f in ('blah.npy', 'blub.npy', 'foo.npy')
            ]
            for x, fn in zip(data, fns):
                np.save(fn, x)
            reader = source(fns)
            assert reader.filenames == fns
            tica_obj = tica(reader, lag=1, dim=2)
            tica_obj.write_to_csv(extension=".exotic", chunksize=3)
            res = sorted([
                os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic')
            ])
            self.assertEqual(len(res), len(fns))
            desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns])
            self.assertEqual(res, desired_fns)

            # compare written results
            expected = tica_obj.get_output()
            actual = source(list(s.replace('.npy', '.exotic')
                                 for s in fns)).get_output()
            assert len(actual) == len(fns)
            for a, e in zip(actual, expected):
                np.testing.assert_allclose(a, e)
Exemple #3
0
 def test_MD_data(self):
     # this is too little data to get reasonable results. We just test to avoid exceptions
     path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
     self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
     self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
     inp = source(self.xtc_file, top=self.pdb_file)
     # see if this doesn't raise
     ticamini = tica(inp, lag=1)
 def setUpClass(cls):
     cls.data = np.ones((10000, 100))
     cls.variable_columns = np.random.choice(100, 10, replace=False)
     cls.data[:, cls.variable_columns] = np.random.rand(10000, 10)
     # Start with one of the constant columns:
     cls.initial_columns = np.setdiff1d(np.arange(cls.data.shape[1]),
                                        cls.variable_columns)[0:1]
     cls.tica_obj = tica(data=cls.data, lag=1)
Exemple #5
0
 def test_parametrize_with_stride(self):
     for stride in range(1, 100, 23):
         r = coor.source(self.trajnames, top=self.temppdb)
         tau = 5
         try:
             t = coor.tica(r, lag=tau, stride=stride, dim=2)
             # force_eigenvalues_le_one=True enables an internal consistency check in TICA
             self.assertTrue(np.all(t.eigenvalues <= 1.0+1.E-12))
         except RuntimeError:
             assert tau % stride != 0
Exemple #6
0
    def setUpClass(cls):
        from pyerna.datasets import get_bpti_test_data

        d = get_bpti_test_data()
        trajs, top = d['trajs'], d['top']
        s = source(trajs, top=top)

        t = tica(s, lag=1)

        c = cluster_kmeans(t)
        cls.model_file = tempfile.mktemp()
        c.save(cls.model_file, save_streaming_chain=True)
Exemple #7
0
    def test_feature_correlation_MD(self):
        # Copying from the test_MD_data
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
        inp = source(self.xtc_file, top=self.pdb_file)
        ticamini = tica(inp, lag=1, kinetic_map=False)

        feature_traj = ticamini.data_producer.get_output()[0]
        tica_traj = ticamini.get_output()[0]
        test_corr = ticamini.feature_TIC_correlation
        true_corr = mycorrcoef(feature_traj, tica_traj, ticamini.lag)
        #assert np.isclose(test_corr, true_corr).all()
        np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
 def setUpClass(cls):
     from pyerna.datasets import load_2well_discrete
     dw = load_2well_discrete()
     v = dw.dtraj_T100K_dt10[:10000]
     cls.T = v.size
     nstates = 100
     b = np.linspace(-1, 1, nstates)
     sigma = 0.15
     cls.Z = np.zeros((cls.T, nstates))
     for t in range(cls.T):
         for j in range(nstates):
             cls.Z[t, j] = np.exp(-(b[v[t]] - b[j])**2 / (2 * sigma**2))
     cls.lag = 10
     cls.tica_obj = tica(data=cls.Z, lag=cls.lag)
Exemple #9
0
    def test_feature_correlation_data(self):
        # Create features with some correlation
        feature_traj = np.zeros((100, 3))
        feature_traj[:, 0] = np.linspace(-.5, .5, len(feature_traj))
        feature_traj[:, 1] = (feature_traj[:, 0] +
                              np.random.randn(len(feature_traj)) * .5)**1
        feature_traj[:, 2] = np.random.randn(len(feature_traj))

        # Tica
        tica_obj = tica(data=feature_traj, dim=3, kinetic_map=False)
        tica_traj = tica_obj.get_output()[0]

        # Create correlations
        test_corr = tica_obj.feature_TIC_correlation
        true_corr = mycorrcoef(feature_traj, tica_traj, tica_obj.lag)
        np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
Exemple #10
0
    def setUp(self):
        self.readers = []
        data_dir = pkg_resources.resource_filename('pyerna.coordinates.tests',
                                                   'data')
        # three md trajs
        trajs = glob(data_dir + "/bpti_0*.xtc")
        top = os.path.join(data_dir, 'bpti_ca.pdb')
        self.readers.append(source(trajs, top=top))
        self.readers[0].featurizer.add_all()
        ndim = self.readers[0].ndim
        # three random arrays
        lengths = self.readers[0].trajectory_lengths()
        arrays = [np.random.random((length, ndim)) for length in lengths]
        self.readers.append(source(arrays))

        self.readers.append(tica(self.readers[-1], dim=20))
Exemple #11
0
    def test_too_short_traj_partial_fit(self):
        data = [np.empty((20, 3)), np.empty((10, 3))]
        lag = 11
        tica_obj = tica(lag=lag)
        from pyerna.util.testing_tools import MockLoggingHandler
        log_handler = MockLoggingHandler()
        import logging
        L = logging.getLogger('pyerna.coordinates.estimation.covariance')
        L.addHandler(log_handler)
        try:
            for x in data:
                tica_obj.partial_fit(x)

            #self.assertEqual(tica_obj._used_data, 20 - lag)
            self.assertEqual(len(log_handler.messages['warning']), 1)
            self.assertIn("longer than lag",
                          log_handler.messages['warning'][0])
        finally:
            L.removeHandler(log_handler)
Exemple #12
0
    def testChunksizeResultsTica(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        X = np.random.randn(23000, 3)

        # un-chunked
        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=lag, dim=1)

        cov = tica_obj.cov.copy()
        mean = tica_obj.mean.copy()

        # ------- run again with new chunksize -------
        d = DataInMemory(X)
        d.chunksize = chunk
        tica_obj = tica(data=d, lag=lag, dim=1)

        np.testing.assert_allclose(tica_obj.mean, mean)
        np.testing.assert_allclose(tica_obj.cov, cov)
Exemple #13
0
 def test_too_short_trajs(self):
     trajs = [np.empty((100, 1))]
     with self.assertRaises(ValueError):
         tica(trajs, lag=100)
Exemple #14
0
    def setUpClass(cls):
        # Basis set definition:
        cls.nf = 10
        cls.chi = np.zeros((20, cls.nf), dtype=float)
        for n in range(cls.nf):
            cls.chi[2 * n:2 * (n + 1), n] = 1.0

        # Load simulations:
        f = np.load(
            pkg_resources.resource_filename(__name__,
                                            "data/test_data_koopman.npz"))
        trajs = [f[key] for key in f.keys()]
        cls.data = [cls.chi[traj, :] for traj in trajs]

        # Lag time:
        cls.tau = 10
        # Truncation for small eigenvalues:
        cls.epsilon = 1e-6

        # Compute the means:
        cls.mean_x = np.zeros(cls.nf)
        cls.mean_y = np.zeros(cls.nf)
        cls.frames = 0
        for traj in cls.data:
            cls.mean_x += np.sum(traj[:-cls.tau, :], axis=0)
            cls.mean_y += np.sum(traj[cls.tau:, :], axis=0)
            cls.frames += traj[:-cls.tau, :].shape[0]
        cls.mean_x *= (1.0 / cls.frames)
        cls.mean_y *= (1.0 / cls.frames)
        cls.mean_rev = 0.5 * (cls.mean_x + cls.mean_y)

        # Compute correlations:
        cls.C0 = np.zeros((cls.nf, cls.nf))
        cls.Ct = np.zeros((cls.nf, cls.nf))
        cls.C0_rev = np.zeros((cls.nf, cls.nf))
        cls.Ct_rev = np.zeros((cls.nf, cls.nf))
        for traj in cls.data:
            itraj = (traj - cls.mean_x[None, :]).copy()
            cls.C0 += np.dot(itraj[:-cls.tau, :].T, itraj[:-cls.tau, :])
            cls.Ct += np.dot(itraj[:-cls.tau, :].T, itraj[cls.tau:, :])
            itraj = (traj - cls.mean_rev[None, :]).copy()
            cls.C0_rev += np.dot(itraj[:-cls.tau, :].T, itraj[:-cls.tau, :])\
                          + np.dot(itraj[cls.tau:, :].T, itraj[cls.tau:, :])
            cls.Ct_rev += np.dot(itraj[:-cls.tau, :].T, itraj[cls.tau:, :])\
                          + np.dot(itraj[cls.tau:, :].T, itraj[:-cls.tau, :])
        cls.C0 *= (1.0 / cls.frames)
        cls.Ct *= (1.0 / cls.frames)
        cls.C0_rev *= (1.0 / (2 * cls.frames))
        cls.Ct_rev *= (1.0 / (2 * cls.frames))

        # Compute whitening transformation:
        cls.R = transform_C0(cls.C0, cls.epsilon)
        cls.Rrev = transform_C0(cls.C0_rev, cls.epsilon)

        # Perform non-reversible diagonalization
        cls.ln, cls.Rn = scl.eig(np.dot(cls.R.T, np.dot(cls.Ct, cls.R)))
        cls.ln, cls.Rn = sort_by_norm(cls.ln, cls.Rn)
        cls.Rn = np.dot(cls.R, cls.Rn)
        cls.Rn = scale_eigenvectors(cls.Rn)
        cls.tsn = -cls.tau / np.log(np.abs(cls.ln))

        cls.ls, cls.Rs = scl.eig(
            np.dot(cls.Rrev.T, np.dot(cls.Ct_rev, cls.Rrev)))
        cls.ls, cls.Rs = sort_by_norm(cls.ls, cls.Rs)
        cls.Rs = np.dot(cls.Rrev, cls.Rs)
        cls.Rs = scale_eigenvectors(cls.Rs)
        cls.tss = -cls.tau / np.log(np.abs(cls.ls))

        # Compute non-reversible Koopman matrix:
        cls.K = np.dot(cls.R.T, np.dot(cls.Ct, cls.R))
        cls.K = np.vstack((cls.K, np.dot((cls.mean_y - cls.mean_x), cls.R)))
        cls.K = np.hstack(
            (cls.K, np.eye(cls.K.shape[0], 1, k=-cls.K.shape[0] + 1)))
        cls.N1 = cls.K.shape[0]

        # Compute u-vector:
        ln, Un = scl.eig(cls.K.T)
        ln, Un = sort_by_norm(ln, Un)
        cls.u = np.real(Un[:, 0])
        v = np.eye(cls.N1, 1, k=-cls.N1 + 1)[:, 0]
        cls.u *= (1.0 / np.dot(cls.u, v))

        # Prepare weight object:
        u_mod = cls.u.copy()
        N = cls.R.shape[0]
        u_input = np.zeros(N + 1)
        u_input[0:N] = cls.R.dot(u_mod[0:-1])  # in input basis
        u_input[N] = u_mod[-1] - cls.mean_x.dot(cls.R.dot(u_mod[0:-1]))
        weight_obj = _KoopmanWeights(u_input[:-1], u_input[-1])

        # Compute weights over all data points:
        cls.wtraj = []
        for traj in cls.data:
            traj = np.dot((traj - cls.mean_x[None, :]), cls.R).copy()
            traj = np.hstack((traj, np.ones((traj.shape[0], 1))))
            cls.wtraj.append(np.dot(traj, cls.u))

        # Compute equilibrium mean:
        cls.mean_eq = np.zeros(cls.nf)
        q = 0
        for traj in cls.data:
            qwtraj = cls.wtraj[q]
            cls.mean_eq += np.sum((qwtraj[:-cls.tau, None] * traj[:-cls.tau, :]), axis=0)\
                           + np.sum((qwtraj[:-cls.tau, None] * traj[cls.tau:, :]), axis=0)
            q += 1
        cls.mean_eq *= (1.0 / (2 * cls.frames))

        # Compute reversible C0, Ct:
        cls.C0_eq = np.zeros((cls.N1, cls.N1))
        cls.Ct_eq = np.zeros((cls.N1, cls.N1))
        q = 0
        for traj in cls.data:
            qwtraj = cls.wtraj[q]
            traj = (traj - cls.mean_eq[None, :]).copy()
            cls.C0_eq += np.dot((qwtraj[:-cls.tau, None] * traj[:-cls.tau, :]).T, traj[:-cls.tau, :])\
                         + np.dot((qwtraj[:-cls.tau, None] * traj[cls.tau:, :]).T, traj[cls.tau:, :])
            cls.Ct_eq += np.dot((qwtraj[:-cls.tau, None] * traj[:-cls.tau, :]).T, traj[cls.tau:, :])\
                         + np.dot((qwtraj[:-cls.tau, None] * traj[cls.tau:, :]).T, traj[:-cls.tau, :])
            q += 1
        cls.C0_eq *= (1.0 / (2 * cls.frames))
        cls.Ct_eq *= (1.0 / (2 * cls.frames))

        # Solve re-weighted eigenvalue problem:
        S = transform_C0(cls.C0_eq, cls.epsilon)
        Ct_S = np.dot(S.T, np.dot(cls.Ct_eq, S))

        # Compute its eigenvalues:
        cls.lr, cls.Rr = scl.eigh(Ct_S)
        cls.lr, cls.Rr = sort_by_norm(cls.lr, cls.Rr)
        cls.Rr = np.dot(S, cls.Rr)
        cls.Rr = scale_eigenvectors(cls.Rr)
        cls.tsr = -cls.tau / np.log(np.abs(cls.lr))

        # Set up the model:
        cls.koop_rev = tica(cls.data, lag=cls.tau, kinetic_map=False)
        cls.koop_eq = tica(cls.data,
                           lag=cls.tau,
                           kinetic_map=False,
                           weights='koopman')
        # Test the model by supplying weights directly:
        cls.koop_eq_direct = tica(cls.data,
                                  lag=cls.tau,
                                  weights=weight_obj,
                                  kinetic_map=False)
Exemple #15
0
 def test_describe(self):
     desc = self.tica_obj.describe()
     assert types.is_string(desc) or types.is_list_of_string(desc)
     # describe on empty estimator
     tica(lag=1).describe()
Exemple #16
0
 def test_default_cs(self):
     t = tica(chunksize=None)
     assert t.default_chunksize == t.chunksize == t._FALLBACK_CHUNKSIZE
Exemple #17
0
 def test_commute_map(self):
     tica(np.arange(100), commute_map=True, kinetic_map=False)
Exemple #18
0
    def test_write_h5(self):
        from pyerna.coordinates import tica
        dim = 10
        data = [
            np.random.random((np.random.randint(50, 150), dim))
            for _ in range(4)
        ]
        tica = tica(data, lag=1)
        import tempfile
        out = tempfile.mktemp()
        group = '/test'

        def perform(chunksize, stride):
            try:
                transformed_output = tica.get_output(chunk=chunksize,
                                                     stride=stride)
                tica.write_to_hdf5(out,
                                   group=group,
                                   chunksize=chunksize,
                                   stride=stride)

                import h5py
                with h5py.File(out) as f:
                    assert len(f[group]) == len(data)
                    for (itraj,
                         actual), desired in zip(f[group].items(),
                                                 transformed_output):
                        np.testing.assert_equal(
                            actual,
                            desired,
                            err_msg='failed for cs=%s, stride=%s' %
                            (chunksize, stride))
            finally:
                os.remove(out)

        for cs in [0, 1, 3, 10, 42, 50]:
            for s in [1, 2, 3, 10]:
                perform(cs, s)

        # test overwrite
        try:
            tica.write_to_hdf5(out, group=group)
            with self.assertRaises(ValueError):
                tica.write_to_hdf5(out, group=group)

            os.remove(out)
            tica.write_to_hdf5(out)
            with self.assertRaises(ValueError) as ctx:
                tica.write_to_hdf5(out)
            assert 'Refusing to overwrite data' in ctx.exception.args[0]

            os.remove(out)
            tica.write_to_hdf5(out, group=group)
            tica.write_to_hdf5(out, group=group, overwrite=True)

            os.remove(out)
            import h5py
            with h5py.File(out) as f:
                f.create_group('empty').create_dataset('0000', shape=(1, 1))
            with self.assertRaises(ValueError):
                tica.write_to_hdf5(out, group='empty')
            tica.write_to_hdf5(out, group='empty', overwrite=True)
        finally:
            os.remove(out)