def tpt_scenario(sparse_mode): P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], [0.1, 0.75, 0.05, 0.05, 0.05], [0.05, 0.1, 0.8, 0.0, 0.05], [0.0, 0.2, 0.0, 0.8, 0.0], [0.0, 0.02, 0.02, 0.0, 0.96]]) if sparse_mode: P = sparse.csr_matrix(P) msm = MarkovStateModel(P) tpt = msm.reactive_flux([0], [4]) return msm, tpt
def __init__(self): from pkg_resources import resource_filename filename = resource_filename('pyemma.datasets', 'double_well_discrete.npz') datafile = np.load(filename) self._dtraj_T100K_dt10 = datafile['dtraj'] self._P = datafile['P'] self._msm_dt = MarkovStateModel(self._P) self._msm = markov_model(self._P)
def sample_trajectories(bias_functions): trajs = np.zeros((len(bias_centers), n_samples), dtype=np.int32) for i, bias in enumerate(bias_functions): biased_energies = (xs - 1)**4 * (xs + 1)**4 - 0.1 * xs + bias(xs) biased_energies /= np.max(biased_energies) transition_matrix = tmatrix_metropolis1d(biased_energies) msm = MarkovStateModel(transition_matrix) trajs[i] = msm.simulate(n_steps=n_samples) return trajs
def __init__(self, reversible: bool, init_strategy: str, lagtime: int): self.reversible = reversible self.init_strategy = init_strategy self.lagtime = lagtime self.n_steps = int(1e5) self.msm = MarkovStateModel( np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.2, 0.7]])) self.hidden_stationary_distribution = tools.analysis.stationary_distribution( self.msm.transition_matrix) self.n_hidden = self.msm.n_states n_obs_per_hidden_state = 5 self.n_observable = self.n_hidden * n_obs_per_hidden_state def gaussian(x, mu, sigma): prop = 1 / np.sqrt(2. * np.pi * sigma**2) * np.exp(-(x - mu)**2 / (2 * sigma**2)) return prop / prop.sum() self.observed_alphabet = np.arange(self.n_observable) self.output_probabilities = np.array([ gaussian(self.observed_alphabet, mu, 2.) for mu in np.arange((n_obs_per_hidden_state - 1) // 2, self.n_observable, n_obs_per_hidden_state) ]) self.hidden_state_traj = self.msm.simulate(self.n_steps, 0) self.observable_state_traj = np.zeros_like(self.hidden_state_traj) - 1 for state in range(self.n_hidden): ix = np.where(self.hidden_state_traj == state)[0] self.observable_state_traj[ix] = np.random.choice( self.n_observable, p=self.output_probabilities[state], size=ix.shape[0]) assert -1 not in np.unique(self.observable_state_traj) if init_strategy == 'random': self.init_hmm = deeptime.markov.hmm.init.discrete.random_guess( n_observation_states=self.n_observable, n_hidden_states=self.n_hidden, seed=17) elif init_strategy == 'pcca': self.init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data( self.observable_state_traj, n_hidden_states=self.n_hidden, lagtime=self.lagtime) else: raise ValueError("unknown init strategy {}".format(init_strategy)) self.hmm = MaximumLikelihoodHMM( self.init_hmm, reversible=self.reversible, lagtime=self.lagtime).fit( self.observable_state_traj).fetch_model()
def sqrt_model(n_samples, seed=None): r""" Sample a hidden state and an sqrt-transformed emission trajectory. We sample a hidden state trajectory and sqrt-masked emissions in two dimensions such that the two metastable states are not linearly separable. .. plot:: datasets/plot_sqrt_model.py Parameters ---------- n_samples : int Number of samples to produce. seed : int, optional, default=None Random seed to use. Defaults to None, which means that the random device will be default-initialized. Returns ------- sequence : (n_samples, ) ndarray The discrete states. trajectory : (n_samples, ) ndarray The observable. Notes ----- First, the hidden discrete-state trajectory is simulated. Its transition matrix is given by .. math:: P = \begin{pmatrix}0.95 & 0.05 \\ 0.05 & 0.95 \end{pmatrix}. The observations are generated via the means are :math:`\mu_0 = (0, 1)^\top` and :math:`\mu_1= (0, -1)`, respectively, as well as the covariance matrix .. math:: C = \begin{pmatrix} 30 & 0 \\ 0 & 0.015 \end{pmatrix}. Afterwards, the trajectory is transformed via .. math:: (x, y) \mapsto (x, y + \sqrt{| x |}). """ from deeptime.markov.msm import MarkovStateModel state = np.random.RandomState(seed) cov = sqrt_model.cov states = sqrt_model.states msm = MarkovStateModel(sqrt_model.transition_matrix) dtraj = msm.simulate(n_samples, seed=seed) traj = states[dtraj, :] + state.multivariate_normal(np.zeros(len(cov)), cov, size=len(dtraj), check_valid='ignore') traj[:, 1] += np.sqrt(np.abs(traj[:, 0])) return dtraj, traj
def test_update_transition_matrix(): msm = MarkovStateModel([[1., 0.], [0., 1.]]) with assert_raises(ValueError): msm.update_transition_matrix(np.array([[1., np.inf], [0., 1.]])) with assert_raises(ValueError): msm.update_transition_matrix(np.array([[1., .1], [0., 1.]])) with assert_raises(ValueError): msm.update_transition_matrix(None)
def setUpClass(cls): path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep cls.pdb_file = os.path.join(path, 'bpti_ca.pdb') cls.feat = MDFeaturizer(cls.pdb_file) cls.feat.add_all() cls.traj_files = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] # generate HMM with two gaussians p = np.array([[0.99, 0.01], [0.01, 0.99]]) t = 10000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory x = np.zeros((t, 2)) # hidden trajectory dtraj = MarkovStateModel(p).simulate(t) for t in range(t): s = dtraj[t] x[t, 0] = widths[s][0] * np.random.randn() + means[s][0] x[t, 1] = widths[s][1] * np.random.randn() + means[s][1] cls.generated_data = x cls.generated_lag = 10
def test_discrete_4_2(self): # 4x4 transition matrix n_states = 2 P = np.array([[0.90, 0.10, 0.00, 0.00], [0.10, 0.89, 0.01, 0.00], [0.00, 0.01, 0.89, 0.10], [0.00, 0.00, 0.10, 0.90]]) # generate realization T = 50000 dtrajs = [MarkovStateModel(P).simulate(T)] # estimate initial HMM with 2 states - should be identical to P hmm = init.discrete.metastable_from_data(dtrajs, n_states, lagtime=1, regularize=False) # Test if model fit is close to reference. Note that we do not have an exact reference, so we cannot set the # tolerance in a rigorous way to test statistical significance. These are just sanity checks. Tij = hmm.transition_model.transition_matrix B = hmm.output_probabilities # Test stochasticity np.testing.assert_(msmana.is_transition_matrix(Tij)) np.testing.assert_allclose(B.sum(axis=1), np.ones(B.shape[0])) Tij_ref = np.array([[0.99, 0.01], [0.01, 0.99]]) Bref = np.array([[0.5, 0.5, 0.0, 0.0], [0.0, 0.0, 0.5, 0.5]]) np.testing.assert_array_almost_equal(Tij, Tij_ref, decimal=2) if np.max(B - Bref) < .05: np.testing.assert_allclose(B, Bref, atol=0.06) else: np.testing.assert_allclose(B[[1, 0]], Bref, atol=0.06)
def __init__(self, grid_size: Tuple[int, int], bar_location: List[Coordinate], home_location: List[Coordinate], barriers=None): if barriers is None: barriers = [] self.n_states = grid_size[0] * grid_size[1] self.grid_size = grid_size self.bar_location = bar_location if isinstance( bar_location, (tuple, list, np.ndarray)) else [bar_location] self.bar_location = np.atleast_2d(self.bar_location) self.bar_state = [ self.coordinate_to_state(state) for state in self.bar_location ] self.home_location = home_location if isinstance( home_location, (tuple, list, np.ndarray)) else [home_location] self.home_location = np.atleast_2d(self.home_location) self.home_state = [ self.coordinate_to_state(state) for state in self.home_location ] self.barriers = barriers self.barrier_states = [ self.coordinate_to_state(barrier) for barrier in self.barriers ] self.barrier_weights = [None] * len(barriers) from deeptime.markov.msm import MarkovStateModel self._msm = MarkovStateModel( transition_matrix=np.eye(self.n_states, dtype=np.float64)) self._update_transition_matrix()
def simulate(self, N, start=None, stop=None, dt=1): """ Generates a realization of the Markov Model Parameters ---------- N : int trajectory length in steps of the lag time start : int, optional, default = None starting hidden state. If not given, will sample from the stationary distribution of the hidden transition matrix. stop : int or int-array-like, optional, default = None stopping hidden set. If given, the trajectory will be stopped before N steps once a hidden state of the stop set is reached dt : int trajectory will be saved every dt time steps. Internally, the dt'th power of P is taken to ensure a more efficient simulation. Returns ------- htraj: (N/dt, ) ndarray The state trajectory with length N/dt """ from deeptime.markov.msm import MarkovStateModel return MarkovStateModel(self.transition_matrix).simulate(N, start=start, stop=stop, dt=dt)
def msm(self): r""" MarkovStateModel for this birth death chain :getter: Yields the MSM. :type: deeptime.markov.msm.MarkovStateModel """ from deeptime.markov.msm import MarkovStateModel return MarkovStateModel(self.transition_matrix, self.stationary_distribution)
def sample(self, prior: MarkovStateModel, n_samples: int, n_steps: Optional[int] = None, callback=None): r""" Performs sampling based on a prior. Parameters ---------- prior : MarkovStateModel The MSM that is used as initial sampling point. n_samples : int The number of samples to draw. n_steps : int, optional, default=None The number of sampling steps for each transition matrix. If None, determined by :math:`\sqrt{\mathrm{n\_states}}`. callback : callable, optional, default=None Callback function that indicates progress of sampling. Returns ------- samples : list of :obj:`MarkovStateModel` The generated samples Examples -------- This method can in particular be used to append samples to an already estimated posterior: >>> import numpy as np >>> import deeptime as dt >>> dtrajs = [np.array([0,1,2,2,2,2,1,2,2,2,1,0,0,0,0,0,0,0]), ... np.array([0,0,0,0,1,1,2,2,2,2,2,2,2,1,0,0])] >>> prior = dt.markov.msm.MaximumLikelihoodMSM().fit(dtrajs, lagtime=1) >>> estimator = dt.markov.msm.BayesianMSM() >>> posterior = estimator.fit(prior).fetch_model() >>> n_samples = len(posterior.samples) >>> posterior.samples.extend(estimator.sample(posterior.prior, n_samples=23)) >>> assert len(posterior.samples) == n_samples + 23 """ if n_steps is None: # heuristic for number of steps to decorrelate n_steps = int(sqrt(prior.count_model.n_states_full)) # transition matrix sampler from deeptime.markov.tools.estimation import tmatrix_sampler if self.stationary_distribution_constraint is None: tsampler = tmatrix_sampler(prior.count_model.count_matrix, reversible=self.reversible, T0=prior.transition_matrix, nsteps=n_steps) else: # Use the stationary distribution on the active set of states statdist_active = prior.stationary_distribution # We can not use the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(prior.count_model.count_matrix, reversible=self.reversible, mu=statdist_active, nsteps=n_steps) sample_Ps, sample_mus = tsampler.sample(nsamples=n_samples, return_statdist=True, callback=callback) # construct sampled MSMs samples = [ MarkovStateModel(P, stationary_distribution=pi, reversible=self.reversible, count_model=prior.count_model, transition_matrix_tolerance=prior.transition_matrix_tolerance) for P, pi in zip(sample_Ps, sample_mus) ] return samples
def fit_from_msm(self, msm: MarkovStateModel, callback=None): r""" Fits a bayesian posterior from a given Markov state model. The MSM must contain a count model to be able to produce confidences. Note that the count model should be produced using effective counting, otherwise counts are correlated and computed confidences are wrong. Parameters ---------- msm : MarkovStateModel The Markov state model to use as sampling start point. callback : callable, optional, default=None Function to be called to indicate progress of sampling. Returns ------- self : BayesianMSM Reference to self. """ if not msm.has_count_model: raise ValueError( "Can only sample confidences with a count model. The counting mode should be 'effective'" " to avoid correlations between counts and therefore wrong confidences." ) # transition matrix sampler from deeptime.markov.tools.estimation import tmatrix_sampler from math import sqrt if self.n_steps is None: # heuristic for number of steps to decorrelate self.n_steps = int(sqrt(msm.count_model.n_states_full)) # use the same count matrix as the MLE. This is why we have effective as a default if self.stationary_distribution_constraint is None: tsampler = tmatrix_sampler(msm.count_model.count_matrix, reversible=self.reversible, T0=msm.transition_matrix, nsteps=self.n_steps) else: # Use the stationary distribution on the active set of states statdist_active = msm.stationary_distribution # We can not use the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(msm.count_model.count_matrix, reversible=self.reversible, mu=statdist_active, nsteps=self.n_steps) sample_Ps, sample_mus = tsampler.sample(nsamples=self.n_samples, return_statdist=True, call_back=callback) # construct sampled MSMs samples = [ MarkovStateModel( P, stationary_distribution=pi, reversible=self.reversible, count_model=msm.count_model, transition_matrix_tolerance=msm.transition_matrix_tolerance) for P, pi in zip(sample_Ps, sample_mus) ] self._model = BayesianPosterior(prior=msm, samples=samples) return self
def setUpClass(cls): P = np.array([ [0.5, .25, .25, 0.], [0., .25, .5, .25], [.25, .25, .5, 0], [.25, .25, .25, .25], ]) dtrajs = [MarkovStateModel(P).simulate(1000) for _ in range(5)] msm_obj = pyemma.msm.MaximumLikelihoodMSM() msm_obj.estimate(dtrajs) cls.ck = msm_obj.cktest(3, n_jobs=1)
def __init__(self, laziness: float = 0.97, seed: Optional[int] = None): if laziness <= 0.5 or laziness > 1: raise ValueError( "Laziness must be at least 0.5 and at most 1.0 but was {}". format(laziness)) transition_matrix = np.array([[laziness, 1 - laziness], [1 - laziness, laziness]]) from deeptime.markov.msm import MarkovStateModel self._msm = MarkovStateModel(transition_matrix) self._rnd = np.random.RandomState(seed=seed) self._seed = seed self._cov = np.array([[5.7, 5.65], [5.65, 5.7]])
def _load_double_well_discrete(): import os filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'double_well_discrete.npz') with _np.load(filename) as datafile: dtraj = datafile['dtraj'] transition_matrix = datafile['P'] # avoid side effects, since we are caching these arrays! dtraj.flags.writeable = False transition_matrix.flags.writeable = False from deeptime.markov.msm import MarkovStateModel msm = MarkovStateModel(transition_matrix) return dtraj, msm
def test_disconnected_dtraj_sanity(mode, reversible): msm1 = MarkovStateModel([[.8, .2], [.3, .7]]) msm2 = MarkovStateModel([[.9, .05, .05], [.3, .6, .1], [.1, .1, .8]]) dtrajs = [msm1.simulate(10000), 2 + msm2.simulate(10000), np.array([5]*100)] init_hmm = init.discrete.random_guess(6, 3) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1, reversible=reversible) \ .fit(dtrajs).fetch_model() if mode == 'bayesian': BayesianHMM(hmm.submodel_largest(dtrajs=dtrajs), reversible=reversible).fit(dtrajs)
def setUpClass(cls): cls.dtrajs = [] # simple case dtraj_simple = [0, 1, 1, 1, 0] cls.dtrajs.append([dtraj_simple]) # as ndarray cls.dtrajs.append([np.array(dtraj_simple)]) dtraj_disc = [0, 1, 1, 0, 0] cls.dtrajs.append([dtraj_disc]) # multitrajectory case cls.dtrajs.append([[0], [1, 1, 1, 1], [0, 1, 1, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1]]) # large-scale case large_trajs = [] for i in range(10): large_trajs.append(np.random.randint(10, size=1000)) cls.dtrajs.append(large_trajs) # Markovian timeseries with timescale about 5 cls.P2 = np.array([[0.9, 0.1], [0.1, 0.9]]) cls.dtraj2 = MarkovStateModel(cls.P2).simulate(1000) cls.dtrajs.append([cls.dtraj2]) # Markovian timeseries with timescale about 5 cls.P4 = np.array([[0.95, 0.05, 0.0, 0.0], [0.05, 0.93, 0.02, 0.0], [0.0, 0.02, 0.93, 0.05], [0.0, 0.0, 0.05, 0.95]]) cls.dtraj4_2 = MarkovStateModel(cls.P4).simulate(20000) I = [0, 0, 1, 1] # coarse-graining for i in range(len(cls.dtraj4_2)): cls.dtraj4_2[i] = I[cls.dtraj4_2[i]] cls.dtrajs.append([cls.dtraj4_2])
def generate_traj(self, N, start=None, stop=None, stride=1): """Generates a synthetic discrete trajectory of length N and simulation time stride * lag time * N This information can be used in order to generate a synthetic molecular dynamics trajectory - see :func:`pyemma.coordinates.save_traj` Note that the time different between two samples is the Markov model lag time tau. When comparing quantities computing from this synthetic trajectory and from the input trajectories, the time points of this trajectory must be scaled by the lag time in order to have them on the same time scale. Parameters ---------- N : int Number of time steps in the output trajectory. The total simulation time is stride * lag time * N start : int, optional, default = None starting state. If not given, will sample from the stationary distribution of P stop : int or int-array-like, optional, default = None stopping set. If given, the trajectory will be stopped before N steps once a state of the stop set is reached stride : int, optional, default = 1 Multiple of lag time used as a time step. By default, the time step is equal to the lag time Returns ------- indexes : ndarray( (N, 2) ) trajectory and time indexes of the simulated trajectory. Each row consist of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. Note that the time different between two samples is the Markov model lag time tau See also -------- pyemma.coordinates.save_traj in order to save this synthetic trajectory as a trajectory file with molecular structures """ # TODO: this is the only function left which does something time-related in a multiple of tau rather than dt. # TODO: we could generate dt-strided trajectories by sampling tau times from the current state, but that would # TODO: probably lead to a weird-looking trajectory. Maybe we could use a HMM to generate intermediate 'hidden' # TODO: frames. Anyway, this is a nontrivial issue. self._check_is_estimated() # generate synthetic states from deeptime.markov.msm import MarkovStateModel syntraj = MarkovStateModel(self.transition_matrix).simulate(N, start=start, stop=stop, dt=stride) # result from pyemma.util.discrete_trajectories import sample_indexes_by_sequence return sample_indexes_by_sequence(self.active_state_indexes, syntraj)
def setUpClass(cls): with numpy_random_seed(123): # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 40000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = MarkovStateModel(cls.P).simulate(cls.T) for t in range(cls.T): s = dtraj[t] cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0] cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1] # Set the lag time: cls.lag = 10 # Compute mean free data: mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum( cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag)) mref_nr = np.sum(cls.X[:-cls.lag, :], axis=0) / float(cls.T - cls.lag) cls.X_mf = cls.X - mref[None, :] cls.X_mf_nr = cls.X - mref_nr[None, :] # Compute correlation matrices: cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag)) cls.cov_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag) cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag)) cls.cov_tau_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag) # do unscaled TICA reader = api.source(cls.X, chunksize=0) cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False) # non-reversible TICA cls.tica_obj_nr = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False, reversible=False)
def setUpClass(cls): P = np.array([ [0.5, .25, .25, 0.], [0., .25, .5, .25], [.25, .25, .5, 0], [.25, .25, .25, .25], ]) # bogus its object lags = [1, 2, 3, 5, 10] cls.dtraj = MarkovStateModel(P).simulate(1000) cls.estimator = MaximumLikelihoodMSM(dt_traj='10 ps') cls.its = ImpliedTimescales(estimator=cls.estimator, n_jobs=1) cls.its.estimate(cls.dtraj, lags=lags) cls.refs = cls.its.timescales[-1] return cls
def __init__( self, transition_model, output_model: Union[np.ndarray, OutputModel], initial_distribution: Optional[np.ndarray] = None, likelihoods: Optional[np.ndarray] = None, state_probabilities: Optional[List[np.ndarray]] = None, initial_count: Optional[np.ndarray] = None, hidden_state_trajectories: Optional[Iterable[np.ndarray]] = None, stride: Union[int, str] = 1, observation_symbols: Optional[np.ndarray] = None, observation_symbols_full: Optional[np.ndarray] = None): super().__init__() if isinstance(transition_model, np.ndarray): from deeptime.markov.msm import MarkovStateModel transition_model = MarkovStateModel(transition_model) if isinstance(output_model, np.ndarray): output_model = DiscreteOutputModel(output_model) if transition_model.n_states != output_model.n_hidden_states: raise ValueError("Transition model must describe hidden states") if initial_distribution is None: # uniform initial_distribution = np.ones( transition_model.n_states) / transition_model.n_states if initial_distribution.shape[0] != transition_model.n_states: raise ValueError( "Initial distribution over hidden states must be of length {}". format(transition_model.n_states)) self._transition_model = transition_model self._output_model = output_model self._initial_distribution = initial_distribution self._likelihoods = likelihoods self._state_probabilities = state_probabilities self._initial_count = initial_count self._hidden_state_trajectories = hidden_state_trajectories if observation_symbols is None and output_model.n_observable_states >= 0: observation_symbols = np.arange(output_model.n_observable_states) observation_symbols_full = observation_symbols self._observation_symbols = observation_symbols self._observation_symbols_full = observation_symbols_full if not (isinstance(stride, Integral) or (isinstance(stride, str) and stride == 'effective')): raise ValueError( "Stride argument must either be an integer value or 'effective', " "but was: {}".format(stride)) self._stride = stride
def test_amm_sanity(fixed_seed): # Meta-stable birth-death chain b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = birth_death_chain(q, p) P = bdc.transition_matrix dtraj = MarkovStateModel(P).simulate(n_steps=10000, start=0) tau = 1 k = 3 # Predictions and experimental data E = np.vstack((np.linspace(-0.1, 1., 7), np.linspace(1.5, -0.1, 7))).T m = np.array([0.0, 0.0]) w = np.array([2.0, 2.5]) sigmas = 1. / np.sqrt(2) / np.sqrt(w) """ Feature trajectory """ ftraj = E[dtraj, :] amm_estimator = AugmentedMSMEstimator(expectations_by_state=E, experimental_measurements=m, experimental_measurement_weights=w) counts = TransitionCountEstimator(lagtime=tau, count_mode="sliding").fit(dtraj).fetch_model() amm = amm_estimator.fit(counts).fetch_model() amm_convenience_estimator = AugmentedMSMEstimator.estimator_from_feature_trajectories( dtraj, ftraj, n_states=counts.n_states_full, experimental_measurements=m, sigmas=sigmas) amm_convenience = amm_convenience_estimator.fit(counts).fetch_model() assert_equal(tau, amm.lagtime) assert_array_almost_equal(E, amm_estimator.expectations_by_state) assert_array_almost_equal(E, amm_convenience_estimator.expectations_by_state, decimal=4) assert_array_almost_equal(m, amm_estimator.experimental_measurements) assert_array_almost_equal(m, amm_convenience_estimator.experimental_measurements) assert_array_almost_equal(w, amm_estimator.experimental_measurement_weights) assert_array_almost_equal(w, amm_convenience_estimator.experimental_measurement_weights) assert_array_almost_equal(amm.transition_matrix, amm_convenience.transition_matrix, decimal=4) assert_array_almost_equal(amm.stationary_distribution, amm_convenience.stationary_distribution, decimal=4) assert_array_almost_equal(amm.optimizer_state.lagrange, amm_convenience.optimizer_state.lagrange, decimal=4)
def generate_hmm_test_data(): state = np.random.RandomState(123) # generate HMM with two Gaussians P = np.array([[0.99, 0.01], [0.01, 0.99]]) T = 40000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory X = np.zeros((T, 2)) # hidden trajectory dtraj = MarkovStateModel(P).simulate(n_steps=T) means = np.array(means) widths = np.array(widths) normal_vals = state.normal(size=(len(X), 2)) X[:, 0] = means[dtraj][:, 0] + widths[dtraj][:, 0] * normal_vals[:, 0] X[:, 1] = means[dtraj][:, 1] + widths[dtraj][:, 1] * normal_vals[:, 1] # Set the lag time: lag = 10 # Compute mean free data: mref = (np.sum(X[:-lag, :], axis=0) + np.sum(X[lag:, :], axis=0)) / float( 2 * (T - lag)) mref_nr = np.sum(X[:-lag, :], axis=0) / float(T - lag) X_mf = X - mref[None, :] X_mf_nr = X - mref_nr[None, :] # Compute correlation matrices: cov_ref = (np.dot(X_mf[:-lag, :].T, X_mf[:-lag, :]) + np.dot(X_mf[lag:, :].T, X_mf[lag:, :])) / float(2 * (T - lag)) cov_ref_nr = np.dot(X_mf_nr[:-lag, :].T, X_mf_nr[:-lag, :]) / float(T - lag) cov_tau_ref = (np.dot(X_mf[:-lag, :].T, X_mf[lag:, :]) + np.dot(X_mf[lag:, :].T, X_mf[:-lag, :])) / float(2 * (T - lag)) cov_tau_ref_nr = np.dot(X_mf_nr[:-lag, :].T, X_mf_nr[lag:, :]) / float(T - lag) return dict(lagtime=lag, cov_ref_00=cov_ref, cov_ref_00_nr=cov_ref_nr, cov_ref_0t=cov_tau_ref, cov_ref_0t_nr=cov_tau_ref_nr, data=X)
def setUpClass(cls): # set random state, remember old one and set it back in tearDownClass cls.old_state = np.random.get_state() np.random.seed(0) # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 10000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = MarkovStateModel(cls.P).simulate(cls.T) for t in range(cls.T): s = dtraj[t] cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0] cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1] cls.pca_obj = pca(data=cls.X, dim=1)
def simulate(self, N, start=None, stop=None, dt=1): """ Generates a realization of the Hidden Markov Model Parameters ---------- N : int trajectory length in steps of the lag time start : int, optional, default = None starting hidden state. If not given, will sample from the stationary distribution of the hidden transition matrix. stop : int or int-array-like, optional, default = None stopping hidden set. If given, the trajectory will be stopped before N steps once a hidden state of the stop set is reached dt : int trajectory will be saved every dt time steps. Internally, the dt'th power of P is taken to ensure a more efficient simulation. Returns ------- htraj : (N/dt, ) ndarray The hidden state trajectory with length N/dt otraj : (N/dt, ) ndarray The observable state discrete trajectory with length N/dt """ from scipy import stats # generate output distributions output_distributions = [ stats.rv_discrete(values=(_np.arange(self.pobs.shape[1]), pobs_i)) for pobs_i in self.pobs ] # sample hidden trajectory htraj = MarkovStateModel(self.transition_matrix).simulate(N, start=start, stop=stop, dt=dt) otraj = _np.zeros(htraj.size, dtype=int) # for each time step, sample microstate for t, h in enumerate(htraj): otraj[t] = output_distributions[h].rvs() # current cluster return htraj, otraj
def _append_sample(self, models, prior, sample_model): # Save a copy of the current model. model_copy = deepcopy(sample_model) # the Viterbi path is discarded, but is needed to get a new transition matrix for each model. if not self.store_hidden: model_copy.hidden_trajs.clear() # potentially restrict sampled models to observed space # since model_copy is defined on full space, observation_symbols are also observation states count_model = TransitionCountModel(model_copy.counts, lagtime=prior.lagtime) models.append( HiddenMarkovModel( transition_model=MarkovStateModel( model_copy.transition_matrix, stationary_distribution=model_copy.stationary_distribution, reversible=self.reversible, count_model=count_model), output_model=model_copy.output_model, initial_distribution=model_copy.initial_distribution, hidden_state_trajectories=model_copy.hidden_trajs))
def test_discrete_6_3(self): # 4x4 transition matrix n_states = 3 P = np.array([[0.90, 0.10, 0.00, 0.00, 0.00, 0.00], [0.20, 0.79, 0.01, 0.00, 0.00, 0.00], [0.00, 0.01, 0.84, 0.15, 0.00, 0.00], [0.00, 0.00, 0.05, 0.94, 0.01, 0.00], [0.00, 0.00, 0.00, 0.02, 0.78, 0.20], [0.00, 0.00, 0.00, 0.00, 0.10, 0.90]]) # generate realization T = 10000 dtrajs = [MarkovStateModel(P).simulate(T)] # estimate initial HMM with 2 states - should be identical to P hmm = init.discrete.metastable_from_data(dtrajs, n_states, 1) # Test stochasticity and reversibility Tij = hmm.transition_model.transition_matrix B = hmm.output_probabilities np.testing.assert_(msmana.is_transition_matrix(Tij)) np.testing.assert_(msmana.is_reversible(Tij)) np.testing.assert_allclose(B.sum(axis=1), np.ones(B.shape[0]))
def test_discrete_2_2(self): # 2x2 transition matrix P = np.array([[0.99, 0.01], [0.01, 0.99]]) # generate realization T = 10000 dtrajs = [MarkovStateModel(P).simulate(T)] # estimate initial HMM with 2 states - should be identical to P init_hmm = init.discrete.metastable_from_data(dtrajs, n_hidden_states=2, lagtime=1) # test A = init_hmm.transition_model.transition_matrix B = init_hmm.output_probabilities # Test stochasticity np.testing.assert_(msmana.is_transition_matrix(A)) np.testing.assert_allclose(B.sum(axis=1), np.ones(B.shape[0])) # A should be close to P if B[0, 0] < B[1, 0]: B = B[np.array([1, 0]), :] np.testing.assert_array_almost_equal(A, P, decimal=2) np.testing.assert_array_almost_equal(B, np.eye(2), decimal=2)
def test_gaussian_prinz(): system = prinz_potential() trajs = system.trajectory(np.zeros((5, 1)), length=10000) # this corresponds to a GMM with the means being the correct potential landscape minima om = deeptime.markov.hmm.GaussianOutputModel(n_states=4, means=system.minima, sigmas=[0.1] * 4) # this is almost the right hidden transition matrix tmat = np.array([[9.59e-1, 0, 4.06e-2, 1 - 9.59e-1 - 4.06e-2], [0, 9.79e-1, 0, 1 - 9.79e-1], [2.64e-2, 0, 9.68e-1, 1 - 9.68e-1 - 2.64e-2], [0, 1.67e-2, 1 - 9.74e-1 - 1.67e-2, 9.74e-1]]) msm = MarkovStateModel(tmat) init_ghmm = HiddenMarkovModel( msm, om, initial_distribution=msm.stationary_distribution) ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs) gom = ghmm.output_model for minimum_ix in range(4): x = gom.means[minimum_ix] xref = system.minima[np.argmin(np.abs(system.minima - x))] assert_allclose(x, xref, atol=1e-1)