def fingerprint_relaxation(self, p0, a, k=None, ncv=None): # basic checks for a and b p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric') a = _types.ensure_ndarray(a, ndim=1, kind='numeric', size=len(p0)) # are we on microstates space? if len(a) == self.nstates_obs: p0 = _np.dot(self.observation_probabilities, p0) a = _np.dot(self.observation_probabilities, a) # now we are on macrostate space, or something is wrong if len(a) == self.nstates: return _MSM.fingerprint_relaxation(self, p0, a) else: raise ValueError( 'observable vectors have size %s which is incompatible with both hidden (%s)' ' and observed states (%s)' % (len(a), self.nstates, self.nstates_obs))
def expectation(self, a): r"""Equilibrium expectation value of a given observable. Parameters ---------- a : (n,) ndarray Observable vector on the MSM state space Returns ------- val: float Equilibrium expectation value fo the given observable Notes ----- The equilibrium expectation value of an observable :math:`a` is defined as follows .. math:: \mathbb{E}_{\mu}[a] = \sum_i \pi_i a_i :math:`\pi=(\pi_i)` is the stationary vector of the transition matrix :math:`P`. """ # check input and go a = _types.ensure_ndarray(a, ndim=1, size=self.nstates, kind='numeric') return _np.dot(a, self.stationary_distribution)
def test_estimator(self, test_estimator): self._test_estimator = test_estimator self.active_set = types.ensure_ndarray(np.array( test_estimator.active_set), kind='i') # create a copy # map from the full set (here defined by the largest state index in active set) to active self._full2active = np.zeros(np.max(self.active_set) + 1, dtype=int) self._full2active[self.active_set] = np.arange(self.nstates)
def __init__(self, test_model, test_estimator, mlags=None, conf=0.95, err_est=False, n_jobs=None, show_progress=True): # set model and estimator # copy the test model, since the estimation of cktest modifies the model. from copy import deepcopy self.test_model = deepcopy(test_model) self.test_estimator = test_estimator # set mlags try: maxlength = np.max([ len(dtraj) for dtraj in test_estimator.discrete_trajectories_full ]) except AttributeError: maxlength = np.max(test_estimator.trajectory_lengths()) maxmlag = int(math.floor(maxlength / test_estimator.lag)) if mlags is None: mlags = maxmlag if types.is_int(mlags): mlags = np.arange(mlags) mlags = types.ensure_ndarray(mlags, ndim=1, kind='i') if np.any(mlags > maxmlag): mlags = mlags[np.where(mlags <= maxmlag)] self.logger.warning( 'Changed mlags as some mlags exceeded maximum trajectory length.' ) if np.any(mlags < 0): mlags = mlags[np.where(mlags >= 0)] self.logger.warning('Changed mlags as some mlags were negative.') self.mlags = mlags # set conf and error handling self.conf = conf self.has_errors = issubclass(self.test_model.__class__, SampledModel) if self.has_errors: self.test_model.set_model_params(conf=conf) self.err_est = err_est if err_est and not self.has_errors: raise ValueError( 'Requested errors on the estimated models, ' 'but the model is not able to calculate errors at all') self.n_jobs = n_jobs self.show_progress = show_progress
def propagate(self, p0, k): r""" Propagates the initial distribution p0 k times Computes the product .. math:: p_k = p_0^T P^k If the lag time of transition matrix :math:`P` is :math:`\tau`, this will provide the probability distribution at time :math:`k \tau`. Parameters ---------- p0 : ndarray(n) Initial distribution. Vector of size of the active set. k : int Number of time steps Returns ---------- pk : ndarray(n) Distribution after k steps. Vector of size of the active set. """ p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric') assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer' if k == 0: # simply return p0 normalized return p0 / p0.sum() micro = False # are we on microstates space? if len(p0) == self.nstates_obs: micro = True # project to hidden and compute p0 = _np.dot(self.observation_probabilities, p0) self._ensure_eigendecomposition(self.nstates) from pyerna.util.linalg import mdot pk = mdot(p0.T, self.eigenvectors_right(), _np.diag(_np.power(self.eigenvalues(), k)), self.eigenvectors_left()) if micro: pk = _np.dot(pk, self.observation_probabilities ) # convert back to microstate space # normalize to 1.0 and return return pk / pk.sum()
def propagate(self, p0, k): r""" Propagates the initial distribution p0 k times Computes the product .. math:: p_k = p_0^T P^k If the lag time of transition matrix :math:`P` is :math:`\tau`, this will provide the probability distribution at time :math:`k \tau`. Parameters ---------- p0 : ndarray(n,) Initial distribution. Vector of size of the active set. k : int Number of time steps Returns ---------- pk : ndarray(n,) Distribution after k steps. Vector of size of the active set. """ p0 = _types.ensure_ndarray(p0, ndim=1, size=self.nstates, kind='numeric') assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer' if k == 0: # simply return p0 normalized return p0 / p0.sum() if self.is_sparse: # sparse: we don't have a full eigenvalue set, so just propagate pk = _np.array(p0) for i in range(k): pk = _np.dot(pk.T, self.transition_matrix) else: # dense: employ eigenvalue decomposition self._ensure_eigendecomposition(self.nstates) from pyerna.util.linalg import mdot pk = mdot(p0.T, self.eigenvectors_right(), _np.diag(_np.power(self.eigenvalues(), k)), self.eigenvectors_left()).real # normalize to 1.0 and return return pk / pk.sum()
def correlation(self, a, b=None, maxtime=None, k=None, ncv=None): # basic checks for a and b a = _types.ensure_ndarray(a, ndim=1, kind='numeric') b = _types.ensure_ndarray_or_None(b, ndim=1, kind='numeric', size=len(a)) # are we on microstates space? if len(a) == self.nstates_obs: a = _np.dot(self.observation_probabilities, a) if b is not None: b = _np.dot(self.observation_probabilities, b) # now we are on macrostate space, or something is wrong if len(a) == self.nstates: return _MSM.correlation(self, a, b=b, maxtime=maxtime) else: raise ValueError( 'observable vectors have size %s which is incompatible with both hidden (%s)' ' and observed states (%s)' % (len(a), self.nstates, self.nstates_obs))
def plot_network(self, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%10.2f', max_width=12, max_height=12, figpadding=0.2, xticks=False, yticks=False, show_frame=False, **textkwargs): """ Draws a network using discs and curved arrows. The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A. """ # Set the default values for the text dictionary from matplotlib import pyplot as _plt textkwargs.setdefault('size', None) textkwargs.setdefault('horizontalalignment', 'center') textkwargs.setdefault('verticalalignment', 'center') textkwargs.setdefault('color', 'black') # remove the temporary key 'arrow_label_size' as it cannot be parsed by plt.text! arrow_label_size = textkwargs.pop('arrow_label_size', textkwargs['size']) if self.pos is None: self.layout_automatic() # number of nodes n = len(self.pos) # get bounds and pad figure xmin = _np.min(self.pos[:, 0]) xmax = _np.max(self.pos[:, 0]) Dx = xmax - xmin xmin -= Dx * figpadding xmax += Dx * figpadding Dx *= 1 + figpadding ymin = _np.min(self.pos[:, 1]) ymax = _np.max(self.pos[:, 1]) Dy = ymax - ymin ymin -= Dy * figpadding ymax += Dy * figpadding Dy *= 1 + figpadding # sizes of nodes if state_sizes is None: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * _np.ones(n) / float(n) else: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * state_sizes / (_np.max(state_sizes) * float(n)) # automatic arrow rescaling arrow_scale *= 1.0 / \ (_np.max(self.A - _np.diag(_np.diag(self.A))) * _sqrt(n)) # size figure if (Dx / max_width > Dy / max_height): figsize = (max_width, Dy * (max_width / Dx)) else: figsize = (Dx / Dy * max_height, max_height) if self.ax is None: logger.debug("creating new figure") fig = _plt.figure(None, figsize=figsize) self.ax = fig.add_subplot(111) else: fig = self.ax.figure window_extend = self.ax.get_window_extent() axes_ratio = window_extend.height / window_extend.width data_ratio = (ymax - ymin) / (xmax - xmin) q = axes_ratio / data_ratio if q > 1.0: ymin *= q ymax *= q else: xmin /= q xmax /= q if not xticks: self.ax.get_xaxis().set_ticks([]) if not yticks: self.ax.get_yaxis().set_ticks([]) # show or suppress frame self.ax.set_frame_on(show_frame) # set node labels if state_labels is None: pass elif isinstance(state_labels, str) and state_labels == 'auto': state_labels = [str(i) for i in _np.arange(n)] else: if len(state_labels) != n: raise ValueError( "length of state_labels({}) has to match length of states({})." .format(len(state_labels), n)) # set node colors if state_colors is None: state_colors = '#ff5500' # None is not acceptable if isinstance(state_colors, str): state_colors = [state_colors] * n if isinstance(state_colors, list) and not len(state_colors) == n: raise ValueError( "Mistmatch between nstates and nr. state_colors (%u vs %u)" % (n, len(state_colors))) try: colorscales = _types.ensure_ndarray(state_colors, ndim=1, kind='numeric') colorscales /= colorscales.max() state_colors = [ _plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n) ] except AssertionError: # assume we have a list of strings now. logger.debug("could not cast 'state_colors' to numeric values.") # set arrow labels if isinstance(arrow_labels, _np.ndarray): L = arrow_labels if isinstance(arrow_labels[0, 0], str): arrow_label_format = '%s' elif isinstance(arrow_labels, str) and arrow_labels.lower() == 'weights': L = self.A[:, :] elif arrow_labels is None: L = _np.empty(_np.shape(self.A), dtype=object) L[:, :] = '' arrow_label_format = '%s' else: raise ValueError('invalid arrow labels') # draw circles circles = [] for i in range(n): # choose color c = _plt.Circle(self.pos[i], radius=_sqrt(0.5 * state_sizes[i]) / 2.0, color=state_colors[i], zorder=2) circles.append(c) self.ax.add_artist(c) # add annotation if state_labels is not None: self.ax.text(self.pos[i][0], self.pos[i][1], state_labels[i], zorder=3, **textkwargs) assert len(circles) == n, "%i != %i" % (len(circles), n) # draw arrows for i in range(n): for j in range(i + 1, n): if (abs(self.A[i, j]) > 0): self._draw_arrow(self.pos[i, 0], self.pos[i, 1], self.pos[j, 0], self.pos[j, 1], Dx, Dy, label=arrow_label_format % L[i, j], width=arrow_scale * self.A[i, j], arrow_curvature=arrow_curvature, patchA=circles[i], patchB=circles[j], shrinkA=3, shrinkB=0, arrow_label_size=arrow_label_size) if (abs(self.A[j, i]) > 0): self._draw_arrow(self.pos[j, 0], self.pos[j, 1], self.pos[i, 0], self.pos[i, 1], Dx, Dy, label=arrow_label_format % L[j, i], width=arrow_scale * self.A[j, i], arrow_curvature=arrow_curvature, patchA=circles[j], patchB=circles[i], shrinkA=3, shrinkB=0, arrow_label_size=arrow_label_size) # plot self.ax.set_xlim(xmin, xmax) self.ax.set_ylim(ymin, ymax) return fig
def cktest(self, n_observables=None, observables='phi', statistics='psi', mlags=10, n_jobs=None, show_progress=True, iterable=None): r"""Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times. Notes ----- This method computes two sets of time-lagged covariance matrices * estimates at higher lag times : .. math:: \left\langle \mathbf{K}(n\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} where :math:`\rho_{0}` is the empirical distribution implicitly defined by all data points from time steps 0 to T-tau in all trajectories, :math:`\mathbf{K}(n\tau)` is a rank-reduced Koopman matrix estimated at the lag-time n*tau and g and f are some functions of the data. Rank-reduction of the Koopman matrix is controlled by the `dim` parameter of :func:`vamp <pyerna.coordinates.vamp>`. * predictions at higher lag times : .. math:: \left\langle \mathbf{K}^{n}(\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} where :math:`\mathbf{K}^{n}` is the n'th power of the rank-reduced Koopman matrix contained in self. The Champan-Kolmogorov test is to compare the predictions to the estimates. Parameters ---------- n_observables : int, optional, default=None Limit the number of default observables (and of default statistics) to this number. Only used if `observables` are None or `statistics` are None. observables : np.ndarray((input_dimension, n_observables)) or 'phi' Coefficients that express one or multiple observables :math:`g` in the basis of the input features. This parameter can be 'phi'. In that case, the dominant right singular functions of the Koopman operator estimated at the smallest lag time are used as default observables. statistics : np.ndarray((input_dimension, n_statistics)) or 'psi' Coefficients that express one or multiple statistics :math:`f` in the basis of the input features. This parameter can be 'psi'. In that case, the dominant left singular functions of the Koopman operator estimated at the smallest lag time are used as default statistics. mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). A single int will trigger a range, i.e. mlags=10 maps to mlags=range(10). Note that you need to be able to do a model prediction for each of these lag time multiples, e.g. the value 0 only make sense if model.expectation(lag_multiple=0) will work. n_jobs : int, default=None how many jobs to use during calculation show_progress : bool, default=True Show progressbars for calculation? iterable : any data format that `pyerna.coordinates.vamp()` accepts as input, optional It `iterable` is None, the same data source with which VAMP was initialized will be used for all estimation. Otherwise, all estimates (not predictions) from data will be computed from the data contained in `iterable`. Returns ------- vckv : :class:`VAMPChapmanKolmogorovValidator <pyerna.coordinates.transform.VAMPChapmanKolmogorovValidator>` Contains the estimated and the predicted covarince matrices. The object can be plotted with :func:`plot_cktest <pyerna.plots.plot_cktest>` with the option `y01=False`. """ if n_observables is not None: if n_observables > self.dimension(): warnings.warn( 'Selected singular functions as observables but dimension ' 'is lower than requested number of observables.') n_observables = self.dimension() else: n_observables = self.dimension() if isinstance(observables, str) and observables == 'phi': observables = self.singular_vectors_right[:, 0:n_observables] observables_mean_free = True else: ensure_ndarray(observables, ndim=2) observables_mean_free = False if isinstance(statistics, str) and statistics == 'psi': statistics = self.singular_vectors_left[:, 0:n_observables] statistics_mean_free = True else: ensure_ndarray_or_None(statistics, ndim=2) statistics_mean_free = False ck = VAMPChapmanKolmogorovValidator(self.model, self, observables, statistics, observables_mean_free, statistics_mean_free, mlags=mlags, n_jobs=n_jobs, show_progress=show_progress) if iterable is None: iterable = self.data_producer ck.estimate(iterable) return ck
def __init__(self, bias_energies_full, lag, count_mode='sliding', connectivity='reversible_pathways', maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', init=None, init_maxiter=10000, init_maxerr=1.0E-8): r""" Discrete Transition(-based) Reweighting Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' Mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='reversible_pathways' One of 'reversible_pathways', 'summed_count_matrix' or None. Defines what should be considered a connected set in the joint (product) space of conformations and thermodynamic ensembles. * 'reversible_pathways' : requires that every state in the connected set can be reached by following a pathway of reversible transitions. A reversible transition between two Markov states (within the same thermodynamic state k) is a pair of Markov states that belong to the same strongly connected component of the count matrix (from thermodynamic state k). A pathway of reversible transitions is a list of reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)), (i_(N-1), i_N)]. The thermodynamic state where the reversible transitions happen, is ignored in constructing the reversible pathways. This is equivalent to assuming that two ensembles overlap at some Markov state whenever there exist frames from both ensembles in that Markov state. * 'summed_count_matrix' : all thermodynamic states are assumed to overlap. The connected set is then computed by summing the count matrices over all thermodynamic states and taking it's largest strongly connected set. Not recommended! * None : assume that everything is connected. For debugging. For more details see :func:`pyerna.thermo.extensions.cset.compute_csets_dTRAM`. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual log-likelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'wham': perform a short WHAM estimate to initialize the free energies init_maxiter : int, optional, default=10000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. Example ------- >>> from pyerna.thermo import DTRAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> dtram = DTRAM(B, 1) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> dtram = dtram.estimate((ttrajs, dtrajs)) >>> dtram.log_likelihood() # doctest: +ELLIPSIS -9.805... >>> dtram.count_matrices # doctest: +SKIP array([[[5, 1], [1, 2]], [[1, 4], [3, 1]]], dtype=int32) >>> dtram.stationary_distribution # doctest: +ELLIPSIS array([ 0.38..., 0.61...]) >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS [array([ 0.38..., 0.61...]), array([ 0.50..., 0.49...])] References ---------- .. [1] Wu, H. et al 2014 Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states J. Chem. Phys. 141, 214106 """ # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity in [ None, 'reversible_pathways', 'summed_count_matrix' ], \ 'Currently the only implemented connectivity checks are \'reversible_pathways\', \'summed_count_matrix\' and None' self.connectivity = connectivity self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info assert init in ( None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def __init__(self, bias_energies_full, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', stride=1): r"""Weighted Histogram Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' stride : int, optional, default=1 not used Example ------- >>> from pyerna.thermo import WHAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> wham = WHAM(B) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> wham = wham.estimate((ttrajs, dtrajs)) >>> wham.log_likelihood() # doctest: +ELLIPSIS -6.6... >>> wham.state_counts # doctest: +SKIP array([[7, 3], [5, 5]]) >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF array([ 0.5..., 0.4...]) >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF [array([ 0.5..., 0.4...]), array([ 0.6..., 0.3...])] References ---------- .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988. New Monte Carlo Technique for Studying Phase Transitions. Phys. Rev. Lett. 23, 2635--2638 .. [2] Kumar, S. et al 1992. The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method. J. Comp. Chem. 13, 1011--1021 """ self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.stride = stride self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape # set iteration variables self.therm_energies = None self.conf_energies = None
def memberships(self, value): self._memberships = types.ensure_ndarray(value, ndim=2, kind='numeric') self.nstates, self.nsets = self._memberships.shape assert np.allclose(self._memberships.sum(axis=1), np.ones(self.nstates)) # stochastic matrix?