Ejemplo n.º 1
0
    def setUpClass(cls):
        # load observations
        import pyemma.datasets
        obs = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10
        obs -= np.min(obs)  # remove empty states

        # hidden states
        nstates = 2

        # run with lag 1 and 10
        cls.msm_lag1 = msm.estimate_markov_model([obs],
                                                 1,
                                                 reversible=True,
                                                 connectivity='largest')
        cls.hmsm_lag1 = msm.estimate_hidden_markov_model([obs],
                                                         nstates,
                                                         1,
                                                         reversible=True,
                                                         observe_nonempty=True)
        cls.msm_lag10 = msm.estimate_markov_model([obs],
                                                  10,
                                                  reversible=True,
                                                  connectivity='largest')
        cls.hmsm_lag10 = msm.estimate_hidden_markov_model(
            [obs], nstates, 10, reversible=True, observe_nonempty=True)
Ejemplo n.º 2
0
    def test_oom(self):
        from pyemma import msm
        msm_one_over_n = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity='1/n', weights='oom')

        # we now restrict the connectivity to have at least 6 counts, so we will loose state 2
        msm_restrict_connectivity = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity=6, weights='oom')
        self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
Ejemplo n.º 3
0
    def setUpClass(cls):
        import pyemma.datasets
        cls.core_set = [34, 65]

        cls.dtraj = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10
        nu = 1. * np.bincount(cls.dtraj)[cls.core_set]
        cls.statdist = nu / nu.sum()

        cls.tau = 10
        maxerr = 1e-12

        warnings.filterwarnings("ignore")
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            cls.msmrev = estimate_markov_model(cls.dtraj,
                                               cls.tau,
                                               maxerr=maxerr,
                                               core_set=cls.core_set)
            cls.msmrevpi = estimate_markov_model(cls.dtraj,
                                                 cls.tau,
                                                 maxerr=maxerr,
                                                 statdist=cls.statdist,
                                                 core_set=cls.core_set)
            cls.msm = estimate_markov_model(cls.dtraj,
                                            cls.tau,
                                            reversible=False,
                                            maxerr=maxerr,
                                            core_set=cls.core_set)
Ejemplo n.º 4
0
 def test_valid_trajectory(self):
     pi = np.array([0.1, 0.0, 0.9])
     dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1])
     dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1])
     msm = estimate_markov_model(dtraj_valid, 1, statdist=pi)
     self.assertTrue(np.all(msm.active_set == np.array([0, 2])))
     with self.assertRaises(ValueError):
         msm = estimate_markov_model(dtraj_invalid, 1, statdist=pi)
Ejemplo n.º 5
0
 def test_msm(self):
     msm_one_over_n = estimate_markov_model(self.dtraj,
                                            lag=1,
                                            mincount_connectivity='1/n')
     msm_restrict_connectivity = estimate_markov_model(
         self.dtraj,
         lag=1,
         mincount_connectivity=self.mincount_connectivity)
     self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
Ejemplo n.º 6
0
 def test_valid_stationary_vector(self):
     dtraj = np.array([0, 0, 1, 0, 1, 2])
     pi_valid = np.array([0.1, 0.9, 0.0])
     pi_invalid = np.array([0.1, 0.9])
     active_set = np.array([0, 1])
     msm = estimate_markov_model(dtraj, 1, statdist=pi_valid)
     self.assertTrue(np.all(msm.active_set == active_set))
     with self.assertRaises(ValueError):
         msm = estimate_markov_model(dtraj, 1, statdist=pi_invalid)
Ejemplo n.º 7
0
 def test_valid_trajectory(self):
     pi = np.array([0.1, 0.9])
     dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1])
     dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1])
     core_set = [0, 2]
     msm = estimate_markov_model(dtraj_valid, 1, statdist=pi, core_set=core_set)
     self.assertTrue(np.all(msm.active_set==np.array(core_set)))
     np.testing.assert_array_equal(msm.pi, pi)
     with self.assertRaises(ValueError):
         estimate_markov_model(dtraj_invalid, 1, statdist=pi, core_set=core_set)
Ejemplo n.º 8
0
    def test_CK_covariances_against_MSM(self):
        obs = np.eye(3)  # observe every state
        sta = np.eye(3)  # restrict p0 to every state
        cktest = self.vamp.cktest(observables=obs,
                                  statistics=sta,
                                  mlags=4,
                                  show_progress=True,
                                  n_jobs=1)
        pred = cktest.predictions[1:]
        est = cktest.estimates[1:]

        for i, (est_, pred_) in enumerate(zip(est, pred)):
            msm = estimate_markov_model(dtrajs=self.dtrajs,
                                        lag=self.lag * (i + 1),
                                        reversible=False)
            msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs).T
            msm_pred = (self.p0 * sta).T.dot(
                np.linalg.matrix_power(self.msm.P, (i + 1))).dot(obs).T
            np.testing.assert_allclose(np.diag(pred_),
                                       np.diag(msm_pred),
                                       atol=self.atol)
            np.testing.assert_allclose(np.diag(est_),
                                       np.diag(msm_esti),
                                       atol=self.atol)
            np.testing.assert_allclose(np.diag(est_),
                                       np.diag(pred_),
                                       atol=0.006)
Ejemplo n.º 9
0
    def test_score_vs_MSM(self):
        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(32):
            trajs_test, trajs_train = cvsplit_trajs(self.trajs)
        with numpy_random_seed(32):
            dtrajs_test, dtrajs_train = cvsplit_trajs(self.dtrajs)

        methods = ('VAMP1', 'VAMP2', 'VAMPE')

        for m in methods:
            msm_train = estimate_markov_model(dtrajs=dtrajs_train,
                                              lag=self.lag,
                                              reversible=False)
            score_msm = msm_train.score(dtrajs_test,
                                        score_method=m,
                                        score_k=None)

            vamp_train = pyemma_api_vamp(data=trajs_train,
                                         lag=self.lag,
                                         dim=1.0)
            score_vamp = vamp_train.score(test_data=trajs_test, score_method=m)

            self.assertAlmostEqual(score_msm,
                                   score_vamp,
                                   places=2 if m == 'VAMPE' else 3,
                                   msg=m)
Ejemplo n.º 10
0
 def test_rdl_recompute(self):
     """ test for issue 1301. Should recompute RDL decomposition in case of new transition matrix. """
     msm = estimate_markov_model(self.dtraj, self.tau)
     ev1 = msm.eigenvectors_left(2)
     msm.estimate(self.dtraj, lag=self.tau + 1)
     ev2 = msm.eigenvectors_left(2)
     assert ev2 is not ev1
Ejemplo n.º 11
0
 def setUpClass(cls):
     N_steps = 10000
     N_traj = 20
     lag = 1
     T = np.linalg.matrix_power(
         np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag)
     dtrajs = [generate(T, N_steps) for _ in range(N_traj)]
     p0 = np.zeros(3)
     p1 = np.zeros(3)
     trajs = []
     for dtraj in dtrajs:
         traj = np.zeros((N_steps, T.shape[0]))
         traj[np.arange(len(dtraj)), dtraj] = 1.0
         trajs.append(traj)
         p0 += traj[:-lag, :].sum(axis=0)
         p1 += traj[lag:, :].sum(axis=0)
     vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None, dim=1.0)
     msm = estimate_markov_model(dtrajs, lag=lag, reversible=False)
     cls.trajs = trajs
     cls.dtrajs = dtrajs
     cls.lag = lag
     cls.msm = msm
     cls.vamp = vamp
     cls.p0 = p0 / p0.sum()
     cls.p1 = p1 / p1.sum()
     cls.atol = np.finfo(vamp.output_type()).eps * 1000.0
Ejemplo n.º 12
0
Archivo: model.py Proyecto: tonigi/htmd
    def markovModel(self,
                    lag,
                    macronum,
                    units='frames',
                    sparse=False,
                    hmm=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(),
                                             self.lag,
                                             sparse=sparse)
        modelflag = False
        while not modelflag:
            self.coarsemsm = self.msm.pcca(macronum)
            if len(np.unique(self.msm.metastable_assignments)) != macronum:
                macronum -= 1
                logger.warning(
                    'PCCA returned empty macrostates. Reducing the number of macrostates to {}.'
                    .format(macronum))
            else:
                modelflag = True
            if macronum < 2:
                raise RuntimeError(
                    'Could not create even two macrostates. Please revise your clustering.'
                )

        self._modelid = random.random()

        if hmm:  # Still in development
            self.hmm = self.msm.coarse_grain(self.macronum)

        logger.info('{:.1f}% of the data was used'.format(
            self.msm.active_count_fraction * 100))

        _macroTrajectoriesReport(
            self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster),
            self.data.simlist)
Ejemplo n.º 13
0
 def test_MSM_sparse(self):
     msm = estimate_markov_model(self.dtraj, self.tau, sparse=True)
     assert_allclose(self.dtraj, msm.discrete_trajectories_full[0])
     self.assertEqual(self.tau, msm.lagtime)
     assert_allclose(self.lcc_MSM, msm.largest_connected_set)
     self.assertTrue(np.allclose(self.Ccc_MSM.toarray(), msm.count_matrix_active.toarray()))
     self.assertTrue(np.allclose(self.C_MSM.toarray(), msm.count_matrix_full.toarray()))
     self.assertTrue(np.allclose(self.P_MSM.toarray(), msm.transition_matrix.toarray()))
     assert_allclose(self.mu_MSM, msm.stationary_distribution)
     assert_allclose(self.ts[1:], msm.timescales(self.k - 1))
Ejemplo n.º 14
0
    def buildMSM(self):
        """ Estimate a MSM from the trajectories using a provided lagtime that
        should be big enough so that the relevant processes have converged.

        self.error: whether to estimate errors or not
        """
        if self.error:
            self.MSM_object = msm.bayesian_markov_model(self.dtrajs, self.lagtime)
        else:
            self.MSM_object = msm.estimate_markov_model(self.dtrajs, self.lagtime)
Ejemplo n.º 15
0
def main(lagtimes,
         clusters_file,
         disctraj,
         trajs,
         n_clusters,
         plots_path,
         save_plot,
         show_plot,
         lagtime_resolution=20):
    if disctraj is not None:
        dtraj_files = glob.glob(os.path.join(disctraj, "*traj*.disctraj"))
        dtrajs = [np.loadtxt(f, dtype=int) for f in dtraj_files]
        clusterCenters = np.loadtxt(clusters_file)
    else:
        clusteringObject = cluster.Cluster(n_clusters,
                                           trajs,
                                           "traj*",
                                           alwaysCluster=False,
                                           discretizedPath=disctraj)
        if clusters_file is not None:
            # only assign
            clusteringObject.clusterCentersFile = clusters_file
        clusteringObject.clusterTrajectories()
        clusterCenters = clusteringObject.clusterCenters
        dtrajs = clusteringObject.dtrajs
    Q = []
    for lag in lagtimes:
        msm_obj = msm.estimate_markov_model(dtrajs, lag)
        counts = msm_obj.count_matrix_full
        Q.append(counts.diagonal() / counts.sum())
    Q = np.array(Q)

    print("Clusters over 0.01 metastability")
    correlation_limit = 0.01
    states2 = np.where(Q[-1] > correlation_limit)[0]
    size2 = states2.size
    if len(states2):
        print(" ".join(map(str, states2)))
    print("Number of clusters:", size2,
          ", %.2f%% of the total" % (100 * size2 / float(n_clusters)))
    utilities.write_PDB_clusters(np.hstack((clusterCenters, Q[:-1].T)),
                                 use_beta=True,
                                 title="cluster_Q.pdb")
    if plots_path is None:
        plots_path = ""
    else:
        utilities.makeFolder(plots_path)
    create_plots(Q,
                 plots_path,
                 save_plot,
                 show_plot,
                 n_clusters,
                 lagtimes,
                 threshold=2.0)
Ejemplo n.º 16
0
    def setUpClass(cls):
        import pyemma.datasets
        cls.dtraj = [pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10]
        #assert isinstance(cls.dtraj, list)
        nu = 1. * np.bincount(cls.dtraj[0])
        cls.statdist = nu / nu.sum()

        cls.tau = 10
        cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau)
        cls.msmrevpi = estimate_markov_model(cls.dtraj,
                                             cls.tau,
                                             statdist=cls.statdist)
        cls.msm = estimate_markov_model(cls.dtraj, cls.tau, reversible=False)
        """Sparse"""
        cls.msmrev_sparse = estimate_markov_model(cls.dtraj,
                                                  cls.tau,
                                                  sparse=True)
        cls.msmrevpi_sparse = estimate_markov_model(cls.dtraj,
                                                    cls.tau,
                                                    statdist=cls.statdist,
                                                    sparse=True)
        cls.msm_sparse = estimate_markov_model(cls.dtraj,
                                               cls.tau,
                                               reversible=False,
                                               sparse=True)
Ejemplo n.º 17
0
    def setUpClass(cls):
        import pyemma.datasets
        cls.dtraj = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10
        nu = 1. * np.bincount(cls.dtraj)
        cls.statdist = nu / nu.sum()

        cls.tau = 10
        maxerr = 1e-12
        cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr)
        cls.msmrevpi = estimate_markov_model(cls.dtraj,
                                             cls.tau,
                                             maxerr=maxerr,
                                             statdist=cls.statdist)
        cls.msm = estimate_markov_model(cls.dtraj,
                                        cls.tau,
                                        reversible=False,
                                        maxerr=maxerr)
        """Sparse"""
        cls.msmrev_sparse = estimate_markov_model(cls.dtraj,
                                                  cls.tau,
                                                  sparse=True,
                                                  maxerr=maxerr)
        cls.msmrevpi_sparse = estimate_markov_model(cls.dtraj,
                                                    cls.tau,
                                                    maxerr=maxerr,
                                                    statdist=cls.statdist,
                                                    sparse=True)
        cls.msm_sparse = estimate_markov_model(cls.dtraj,
                                               cls.tau,
                                               reversible=False,
                                               sparse=True,
                                               maxerr=maxerr)
Ejemplo n.º 18
0
    def markovModel(self, lag, macronum, units='frames', sparse=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(),
                                             self.lag,
                                             sparse=sparse)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(
            len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(
            self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(
            self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster),
            self.data.simlist)
Ejemplo n.º 19
0
    def test_CK_expectation_against_MSM(self):
        obs = np.eye(3) # observe every state
        cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4)
        pred = cktest.predictions[1:]
        est = cktest.estimates[1:]

        for i, (est_, pred_) in enumerate(zip(est, pred)):
            msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False)
            msm_esti = self.p0.T.dot(msm.P).dot(obs)
            msm_pred = self.p0.T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs)
            np.testing.assert_allclose(pred_,  msm_pred, atol=self.atol)
            np.testing.assert_allclose(est_, msm_esti, atol=self.atol)
            np.testing.assert_allclose(est_, pred_, atol=0.006)
Ejemplo n.º 20
0
    def __init__(self, molecular_topology_file, trajectory, transition_matrix, num_clusters):
        # Build Markov model with PyEmma
        feat = coor.featurizer(molecular_topology_file)
        X = coor.load(trajectory, feat)
        Y = coor.tica(X, dim=2).get_output()
        k_means = coor.cluster_kmeans(Y, k=num_clusters)
        centroids = get_centroids(k_means)

        markov_model = msm.estimate_markov_model(kmeans.dtrajs, 100)  #

        previous_transition_matrix = transition_matrix
        self.transition_matrix = markov_model.get_transition_matrix()  # figure this out
        self._is_converged = relative_entropy(self.transition_matrix, transition_matrix) < tol
Ejemplo n.º 21
0
def estimateMSM(trajectories, lagtime, error_est=False):
    """ Estimate a MSM from the trajectories using a provided lagtime that
    should be big enough so that the relevant processes have converged.
    Return a MaximumLikelihoodMSM object"""
    if error_est:
        print "Computing msm with bayes error calc"
        MSM_object = MSM.bayesian_markov_model(trajectories, lagtime)
    else:
        print "Computing msm with no error calc"
        MSM_object = MSM.estimate_markov_model(trajectories,
                                               lagtime,
                                               count_mode='sliding')
    return MSM_object
def lengthVsNtrajs(data, nruns, lagtime, clusters, outputFilename, cache, m,
                   stride):
    nClusters = len(clusters)
    nLags = len(lagtime)
    results = np.zeros((nClusters, nLags))
    results_cv = np.zeros((nClusters, nLags))
    for i, cl in enumerate(clusters):
        clustering = coor.cluster_kmeans(data=data,
                                         k=cl,
                                         max_iter=500,
                                         stride=stride)
        for j, lag in enumerate(lagtime):
            if (cl, lag) in cache:
                print(
                    "Loading cached computation for %d clusters and %d lagtime"
                    % (cl, lag))
                results[i][j], results_cv[i][j] = cache[(cl, lag)]
                with open(outputFilename, 'a') as f:
                    f.write("%d %d %f %f\n" %
                            (cl, lag, results[i][j], results_cv[i][j]))
                continue
            print("Computing for %d clusters and %d lagtime" % (cl, lag))
            try:
                MSM = msm.estimate_markov_model(clustering.dtrajs, lag)
                print("MSM estimated on %d states" % MSM.nstates)
            except Exception:
                print("Estimation error in %d clusters, %d lagtime" %
                      (cl, lag))
                results[i][j] = 0.0
                results_cv[i][j] = 0.0
                continue
            try:
                results[i][j] = np.mean(MSM.score(MSM.dtrajs_full, score_k=m))
            except Exception:
                print("Estimation error in %d clusters, %d lagtime" %
                      (cl, lag))
                results[i][j] = 0.0
                results_cv[i][j] = 0.0
                continue
            try:
                results_cv[i][j] = np.mean(
                    MSM.score_cv(MSM.dtrajs_full, score_k=m, n=nruns))
            except Exception:
                print("Estimation error in %d clusters, %d lagtime" %
                      (cl, lag))
                results_cv[i][j] = 0.0

            with open(outputFilename, 'a') as f:
                f.write("%d %d %f %f\n" %
                        (cl, lag, results[i][j], results_cv[i][j]))
    return results, results_cv
Ejemplo n.º 23
0
    def markovModel(self, lag, macronum):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model in frames.
        macronum : int
            The number of macrostates (metastable states) to produce

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        if not isinstance(lag, int):
            lag = int(lag)
            logger.warning(
                'The lag given to markovModel() was not an integer. Converting to integer: {}'
                .format(lag))

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(
            len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(
            self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(
            self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster),
            self.data.simlist)
Ejemplo n.º 24
0
    def __init__(self, topfile, trajectory, P, N):
        # Build markov model with PyEmma
        feat = coor.featurizer(topfile)
        X = coor.load(trajectory, feat)
        Y = coor.tica(X, dim=2).get_output()
        k_means = coor.cluster_kmeans(Y, k=N)
        centroids = get_centroids(k_means)

        M = msm.estimate_markov_model(kmeans.dtrajs, 100)

        # Q = n-1 transition matrix, P = n transition matrix
        Q = P
        self.P = M.get_transition_matrix()  # figure this out
        self._is_converged = relative_entropy(self.P, Q) < tol
Ejemplo n.º 25
0
    def markovModel(self, lag, macronum, units='frames', sparse=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
Ejemplo n.º 26
0
    def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse)
        modelflag = False
        while not modelflag:
            self.coarsemsm = self.msm.pcca(macronum)
            if len(np.unique(self.msm.metastable_assignments)) != macronum:
                macronum -= 1
                logger.warning('PCCA returned empty macrostates. Reducing the number of macrostates to {}.'.format(macronum))
            else:
                modelflag = True
            if macronum < 2:
                raise RuntimeError('Could not create even two macrostates. Please revise your clustering.')

        self._modelid = random.random()

        if hmm:  # Still in development
            self.hmm = self.msm.coarse_grain(self.macronum)

        logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100))

        _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
Ejemplo n.º 27
0
    def markovModel(self, lag, macronum):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model in frames.
        macronum : int
            The number of macrostates (metastable states) to produce

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        if not isinstance(lag, int):
            lag = int(lag)
            logger.warning('The lag given to markovModel() was not an integer. Converting to integer: {}'.format(lag))

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
Ejemplo n.º 28
0
 def test_ck_msm(self):
     MLMSM = msm.estimate_markov_model(
         [self.double_well_data.dtraj_T100K_dt10_n6good], 40)
     ck = MLMSM.cktest(2, mlags=[0, 1, 10])
     estref = np.array([[[1., 0.], [0., 1.]],
                        [[0.89806859, 0.10193141], [0.10003466,
                                                    0.89996534]],
                        [[0.64851782, 0.35148218], [0.34411751,
                                                    0.65588249]]])
     predref = np.array([[[1., 0.], [0., 1.]],
                         [[0.89806859, 0.10193141],
                          [0.10003466, 0.89996534]],
                         [[0.62613723, 0.37386277], [0.3669059,
                                                     0.6330941]]])
     # rough agreement with MLE
     assert np.allclose(ck.estimates, estref, rtol=0.1, atol=10.0)
     assert ck.estimates_conf[0] is None
     assert ck.estimates_conf[1] is None
     assert np.allclose(ck.predictions, predref, rtol=0.1, atol=10.0)
     assert ck.predictions_conf[0] is None
     assert ck.predictions_conf[1] is None
Ejemplo n.º 29
0
    def markovModel(self, lag, macronum):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model in frames.
        macronum : int
            The number of macrostates (metastable states) to produce

        Examples
        --------
        >>> model = ModelHMM(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)
        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag)
        self.hmm = self.msm.coarse_grain(macronum)

        self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(
            len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)

        self.P = self.hmm.transition_matrix
        self.macronum = np.size(self.P, 0)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.hmm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.hmm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro

        self._modelid = random.random()
Ejemplo n.º 30
0
    def markovModel(self, lag, macronum):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model in frames.
        macronum : int
            The number of macrostates (metastable states) to produce

        Examples
        --------
        >>> model = ModelHMM(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)
        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag)
        self.hmm = self.msm.coarse_grain(macronum)

        self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)

        self.P = self.hmm.transition_matrix
        self.macronum = np.size(self.P, 0)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.hmm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.hmm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro

        self._modelid = random.random()
Ejemplo n.º 31
0
    def test_time_units(self):
        dtraj = np.random.randint(0, 4, 1000)
        tau = 12
        dt = 0.456
        msmobj = estimate_markov_model(dtraj, lag=tau, dt_traj='%f ns' % dt)

        # check MFPT consistency
        mfpt_ref = msmobj.mfpt([0], [1])
        tptobj = tpt(msmobj, [0], [1])
        assert_allclose(tptobj.mfpt, mfpt_ref)
        assert_allclose(
            msmana.mfpt(msmobj.P, [1], [0], tau=tau) * dt, mfpt_ref)
        assert_allclose(
            np.dot(msmobj.stationary_distribution, tptobj.backward_committor) /
            tptobj.total_flux, mfpt_ref)

        # check flux consistency
        total_flux_ref = tptobj.total_flux
        A = tptobj.A
        B = tptobj.B
        I = tptobj.I
        assert_allclose(
            tptobj.gross_flux[A, :][:, B].sum() +
            tptobj.gross_flux[A, :][:, I].sum(), total_flux_ref)
        assert_allclose(
            tptobj.net_flux[A, :][:, B].sum() +
            tptobj.net_flux[A, :][:, I].sum(), total_flux_ref)
        assert_allclose(
            tptobj.flux[A, :][:, B].sum() + tptobj.flux[A, :][:, I].sum(),
            total_flux_ref)
        mf = tptobj.major_flux(1.0)
        assert_allclose(mf[A, :][:, B].sum() + mf[A, :][:, I].sum(),
                        total_flux_ref)

        # check that the coarse-grained version is consistent too
        _, tptobj2 = tptobj.coarse_grain([A, I, B])
        assert_allclose(tptobj2.total_flux, total_flux_ref)
        assert_allclose(tptobj2.mfpt, mfpt_ref)
Ejemplo n.º 32
0
def estimateDG(data, nruns, cl, lag, ntraj, len_traj, skipFirstSnaphots,
               cluster_each_iteration):
    deltaG = []
    if not cluster_each_iteration:
        clustering = coor.cluster_kmeans(data=data,
                                         k=cl,
                                         max_iter=500,
                                         stride=1)
    for _ in range(nruns):
        data_it = select_iteration_data(data, ntraj)
        data_it = [data[j][skipFirstSnaphots:len_traj] for j in data_it]
        if cluster_each_iteration:
            clustering = coor.cluster_kmeans(data=data_it,
                                             k=cl,
                                             max_iter=500,
                                             stride=1)
            dtrajs = clustering.dtrajs
        else:
            dtrajs = clustering.assign(data_it)
        try:
            MSM = msm.estimate_markov_model(dtrajs, lag)
            print("MSM estimated on %d states" % MSM.nstates)
        except Exception:
            print(
                "Estimation error in %d clusters, %d lagtime, %d trajectories of %d steps"
                % (cl, lag, ntraj, len_traj))
            continue
        pi, cl_centers = compute.ensure_connectivity(MSM,
                                                     clustering.clustercenters)
        d = 0.75
        bins = compute.create_box(cl_centers, data_it, d)
        microstateVolume = compute.calculate_microstate_volumes_new(
            cl_centers, data_it, bins, d)
        _, string = compute.calculate_pmf(microstateVolume, pi)
        value = float(string.split()[1])
        deltaG.append(value)
    return np.mean(deltaG), np.std(deltaG)
Ejemplo n.º 33
0
from pyemma.msm import estimate_markov_model

import numpy as np


"""
method 1
"""
#msmrev=OOMReweightedMSM(lag=150,sparse=True,reversible=False,rank_Ct='bootstrap_trajs')
#msmrev=OOMReweightedMSM(lag=150,sparse=True,reversible=False)

#tol_rank=10.0  or smaller? 
#sparse=True/False
#reversible=True


"""
method2
"""
sequence=np.load('all_faked_trajs_0.npy')

dtrajs=[sequence[i] for i in range(len(sequence))]

#msmrev_fit=msmrev.fit(dtrajs)


msm = estimate_markov_model(dtrajs, lag=200, weights='oom')
np.save('msm_timescales.npy',msm.timescales())

#msm.stationary_distribution
Ejemplo n.º 34
0
    config.show_progress_bars = False

    lag = args.tica_lag

    feat = coor.featurizer(topfile)
    feat.add_backbone_torsions()

    inp = coor.source(trajfiles, feat)
    dim = args.tica_dim

    tica_obj = coor.tica(inp, lag=lag, dim=dim, kinetic_map=False)
    Y = tica_obj.get_output()

    cl = coor.cluster_kmeans(data=Y, k=args.msm_states, stride=args.stride)
    M = msm.estimate_markov_model(cl.dtrajs, args.msm_lag)

    # with open("model.dtraj", "w") as f:
    #     f.write("\n".join(" ".join(map(str, x)) for x in cl.dtrajs))
    #
    # # np.savetxt("model.dtraj", cl.dtrajs, delimiter=" ", fmt='%d')
    # np.savetxt("model.msm", M.P, delimiter=",")

    data = {
        'input': {
            'frames': inp.n_frames_total(),
            'dimension': inp.dimension(),
            'trajectories': inp.number_of_trajectories(),
            'lengths': inp.trajectory_lengths().tolist(),
        },
        'tica': {
Ejemplo n.º 35
0
 def _find_omega_msm(self) -> np.ndarray:
     _, bins = np.histogram(self._newcv, self._size)
     newcv_ind = np.digitize(self._newcv, bins)
     return msm.estimate_markov_model(
         newcv_ind, self._lag_time, reversible=False
     ).eigenvalues()
Ejemplo n.º 36
0
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
    fig, (ax1, ax2) = pp.subplots(1,2)
    ax1.scatter(cc_x, cc_y, marker='o', color='black') 
    ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png'))
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
###
#actually generate MSM from data
msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime)

#plot and/or save implied timescales, if specified
if args.timescales:
    its = msm.timescales_msm(dtrajs=mapped_data, lags=500)
    mplt.plot_implied_timescales(its, show_mean=False, ylog=True, dt=25, units='ps', linewidth=2)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_its.png'))
    if args.display:
        pp.show()
pp.clf()
pp.close()

####
#pcca cluster using specified n_sets
msm_from_data.pcca(n_sets)
Ejemplo n.º 37
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1

        """Estimate MSM"""
        MSM = estimate_markov_model(dtraj, tau)
        C_MSM = MSM.count_matrix_full
        lcc_MSM = MSM.largest_connected_set
        Ccc_MSM = MSM.count_matrix_active
        P_MSM = MSM.transition_matrix
        mu_MSM = MSM.stationary_distribution

        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()

        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = tmatrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)

            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

        """Input"""
        self.MSM = MSM
        self.K = K
        self.A = A
        self.B = B

        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD
Ejemplo n.º 38
0
def initial_model_discrete(observations, nstates, lag=1, reversible=True):
    """Generate an initial model with discrete output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=int)
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.
    lag : int, optional, default=1
        The lag time to use for initializing the model.

    TODO
    ----
    * Why do we have a `lag` option?  Isn't the HMM model, by definition, lag=1 everywhere?  Why would this be useful instead of just having the user subsample the data?

    Examples
    --------

    Generate initial model for a discrete output model.

    >>> from bhmm import testsystems
    >>> [model, observations, states] = testsystems.generate_synthetic_observations(output_model_type='discrete')
    >>> initial_model = initial_model_discrete(observations, model.nstates)

    """
    # check input
    if not reversible:
        warnings.warn("nonreversible initialization of discrete HMM currently not supported. Using a reversible matrix for initialization.")
        reversible = True

    # import emma inside function in order to avoid dependency loops
    from pyemma import msm

    # estimate Markov model
    MSM = msm.estimate_markov_model(observations, lag, reversible=True, connectivity='largest')

    # PCCA
    pcca = MSM.pcca(nstates)

    # HMM output matrix
    B_conn = MSM.metastable_distributions

    #print 'B_conn = \n',B_conn
    # full state space output matrix
    nstates_full = MSM.count_matrix_full.shape[0]
    eps = 0.01 * (1.0/nstates_full) # default output probability, in order to avoid zero columns
    B = eps * np.ones((nstates,nstates_full), dtype=np.float64)
    # expand B_conn to full state space
    B[:,MSM.active_set] = B_conn[:,:]
    # renormalize B to make it row-stochastic
    B /= B.sum(axis=1)[:,None]

    # coarse-grained transition matrix
    M = pcca.memberships
    W = np.linalg.inv(np.dot(M.T, M))
    A = np.dot(np.dot(M.T, MSM.transition_matrix), M)
    P_coarse = np.dot(W, A)

    # symmetrize and renormalize to eliminate numerical errors
    X = np.dot(np.diag(pcca.coarse_grained_stationary_probability), P_coarse)
    X = 0.5 * (X + X.T)
    # if there are values < 0, set to eps
    X = np.maximum(X, eps)
    # turn into coarse-grained transition matrix
    A = X / X.sum(axis=1)[:, None]

    logger().info('Initial model: ')
    logger().info('transition matrix = \n'+str(A))
    logger().info('output matrix = \n'+str(B.T))

    # initialize HMM
    # --------------
    output_model = DiscreteOutputModel(B)
    model = HMM(A, output_model)
    return model