Ejemplo n.º 1
0
    def __init__(self, model, estimator, memberships, mlags=None, conf=0.95,
                 err_est=False, n_jobs=1, show_progress=True):
        """

        Parameters
        ----------
        memberships : ndarray(n, m)
            Set memberships to calculate set probabilities. n must be equal to
            the number of active states in model. m is the number of sets.
            memberships must be a row-stochastic matrix (the rows must sum up
            to 1).

        """
        LaggedModelValidator.__init__(self, model, estimator, mlags=mlags,
                                      conf=conf, n_jobs=n_jobs,
                                      show_progress=show_progress)
        # check and store parameters
        self.memberships = types.ensure_ndarray(memberships, ndim=2, kind='numeric')
        self.nstates, self.nsets = memberships.shape
        assert np.allclose(memberships.sum(axis=1), np.ones(self.nstates))  # stochastic matrix?
        # active set
        self.active_set = types.ensure_ndarray(np.array(estimator.active_set), kind='i')  # create a copy
        # map from the full set (here defined by the largest state index in active set) to active
        self._full2active = np.zeros(np.max(self.active_set)+1, dtype=int)
        self._full2active[self.active_set] = np.arange(self.nstates)
        # define starting distribution
        self.P0 = memberships * model.stationary_distribution[:, None]
        self.P0 /= self.P0.sum(axis=0)  # column-normalize
        self.err_est = err_est  # TODO: this is currently unused
Ejemplo n.º 2
0
    def expectation(self, a):
        r"""Equilibrium expectation value of a given observable.

        Parameters
        ----------
        a : (n,) ndarray
            Observable vector on the MSM state space

        Returns
        -------
        val: float
            Equilibrium expectation value fo the given observable

        Notes
        -----
        The equilibrium expectation value of an observable :math:`a` is defined as follows

        .. math::

            \mathbb{E}_{\mu}[a] = \sum_i \pi_i a_i

        :math:`\pi=(\pi_i)` is the stationary vector of the transition matrix :math:`P`.

        """
        # check input and go
        a = _types.ensure_ndarray(a, ndim=1, size=self.nstates, kind='numeric')
        return _np.dot(a, self.stationary_distribution)
    def propagate(self, p0, k):
        """ Propagates the initial distribution p0 k times

        Computes the product

        .. math::

            p_k = p_0^T P^k

        If the lag time of transition matrix :math:`P` is :math:`\tau`, this
        will provide the probability distribution at time :math:`k \tau`.

        :param p0: ndarray - initial distribution. Vector of size of the active set
        :param k: int - number of time steps
        :return: ndarray - distribution after k steps, vector of size of the active set
        """

        p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric')
        assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer'

        if k == 0 or k == 1:
            return self.eval(0).propagate(p0, k).real
        else:
            pprop = self.eval(0).propagate(p0, 1).real
            for i in range(1, k):
                pprop = self.eval(i).propagate(pprop, 1).real
            return pprop
Ejemplo n.º 4
0
 def __init__(self,
              bias_energies_full,
              lag,
              count_mode='sliding',
              connectivity='largest',
              maxiter=10000,
              maxerr=1E-15,
              dt_traj='1 step',
              save_convergence_info=0,
              init=None):
     # set all parameters
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.lag = lag
     assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
     self.count_mode = count_mode
     assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
     self.connectivity = connectivity
     assert init in (
         None, 'wham'), 'Currently only None and \'wham\' are supported'
     self.init = init
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None
     self.log_lagrangian_mult = None
Ejemplo n.º 5
0
    def _param_init(self):
        indim = self.data_producer.dimension()
        assert indim > 0, "zero dimension from data producer"
        assert self._dim <= indim, (
            "requested more output dimensions (%i) than dimension"
            " of input data (%i)" % (self._dim, indim))
        if self._force_eigenvalues_le_one and self._lag % self._param_with_stride != 0:
            raise RuntimeError(
                "When using TICA with force_eigenvalues_le_one, lag must be a multiple of stride."
            )

        if self.mu is not None:
            self.mu = types.ensure_ndarray(self.mu, shape=(indim, ))
            self._given_mean = True
        else:
            self.mu = np.zeros(indim)
            self._given_mean = False

        self._N_mean = 0
        self._N_cov = 0
        self._N_cov_tau = 0
        # create covariance matrices
        self.cov = np.zeros((indim, indim))
        self.cov_tau = np.zeros_like(self.cov)

        self._logger.debug(
            "Running TICA with tau=%i; Estimating two covariance matrices"
            " with dimension (%i, %i)" % (self._lag, indim, indim))

        # amount of chunks
        denom = self._n_chunks(self._param_with_stride)
        self._progress_register(denom, "calculate mean", 0)
        self._progress_register(denom, "calculate covariances", 1)

        return 0  # in zero'th pass don't request lagged data
Ejemplo n.º 6
0
 def fingerprint_relaxation(self, p0, a, k=None, ncv=None):
     # basic checks for a and b
     p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric')
     a = _types.ensure_ndarray(a, ndim=1, kind='numeric', size=len(p0))
     # are we on microstates space?
     if len(a) == self.nstates_obs:
         p0 = _np.dot(self.observation_probabilities, p0)
         a = _np.dot(self.observation_probabilities, a)
     # now we are on macrostate space, or something is wrong
     if len(a) == self.nstates:
         return _MSM.fingerprint_relaxation(self, p0, a)
     else:
         raise ValueError(
             'observable vectors have size %s which is incompatible with both hidden (%s)'
             ' and observed states (%s)' %
             (len(a), self.nstates, self.nstates_obs))
Ejemplo n.º 7
0
 def test_estimator(self, test_estimator):
     self._test_estimator = test_estimator
     self.active_set = types.ensure_ndarray(np.array(
         test_estimator.active_set),
                                            kind='i')  # create a copy
     # map from the full set (here defined by the largest state index in active set) to active
     self._full2active = np.zeros(np.max(self.active_set) + 1, dtype=int)
     self._full2active[self.active_set] = np.arange(self.nstates)
Ejemplo n.º 8
0
    def __init__(self,
                 test_model,
                 test_estimator,
                 mlags=None,
                 conf=0.95,
                 err_est=False,
                 n_jobs=None,
                 show_progress=True):

        # set model and estimator
        # copy the test model, since the estimation of cktest modifies the model.
        from copy import deepcopy
        self.test_model = deepcopy(test_model)
        self.test_estimator = test_estimator

        # set mlags
        try:
            maxlength = np.max([
                len(dtraj)
                for dtraj in test_estimator.discrete_trajectories_full
            ])
        except AttributeError:
            maxlength = np.max(test_estimator.trajectory_lengths())
        maxmlag = int(math.floor(maxlength / test_estimator.lag))
        if mlags is None:
            mlags = maxmlag
        if types.is_int(mlags):
            mlags = np.arange(mlags)
        mlags = types.ensure_ndarray(mlags, ndim=1, kind='i')
        if np.any(mlags > maxmlag):
            mlags = mlags[np.where(mlags <= maxmlag)]
            self.logger.warning(
                'Changed mlags as some mlags exceeded maximum trajectory length.'
            )
        if np.any(mlags < 0):
            mlags = mlags[np.where(mlags >= 0)]
            self.logger.warning('Changed mlags as some mlags were negative.')
        self.mlags = mlags

        # set conf and error handling
        self.conf = conf
        self.has_errors = issubclass(self.test_model.__class__, SampledModel)
        if self.has_errors:
            self.test_model.set_model_params(conf=conf)
        self.err_est = err_est
        if err_est and not self.has_errors:
            raise ValueError(
                'Requested errors on the estimated models, '
                'but the model is not able to calculate errors at all')
        self.n_jobs = n_jobs
        self.show_progress = show_progress
Ejemplo n.º 9
0
    def propagate(self, p0, k):
        r""" Propagates the initial distribution p0 k times

        Computes the product

        .. math::

            p_k = p_0^T P^k

        If the lag time of transition matrix :math:`P` is :math:`\tau`, this
        will provide the probability distribution at time :math:`k \tau`.

        Parameters
        ----------
        p0 : ndarray(n)
            Initial distribution. Vector of size of the active set.

        k : int
            Number of time steps

        Returns
        ----------
        pk : ndarray(n)
            Distribution after k steps. Vector of size of the active set.

        """
        p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric')
        assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer'
        if k == 0:  # simply return p0 normalized
            return p0 / p0.sum()

        micro = False
        # are we on microstates space?
        if len(p0) == self.nstates_obs:
            micro = True
            # project to hidden and compute
            p0 = _np.dot(self.observation_probabilities, p0)

        self._ensure_eigendecomposition(self.nstates)
        from pyemma.util.linalg import mdot
        pk = mdot(p0.T, self.eigenvectors_right(),
                  _np.diag(_np.power(self.eigenvalues(), k)),
                  self.eigenvectors_left())

        if micro:
            pk = _np.dot(pk, self.observation_probabilities
                         )  # convert back to microstate space

        # normalize to 1.0 and return
        return pk / pk.sum()
Ejemplo n.º 10
0
    def propagate(self, p0, k):
        r""" Propagates the initial distribution p0 k times

        Computes the product

        .. math::

            p_k = p_0^T P^k

        If the lag time of transition matrix :math:`P` is :math:`\tau`, this
        will provide the probability distribution at time :math:`k \tau`.

        Parameters
        ----------
        p0 : ndarray(n,)
            Initial distribution. Vector of size of the active set.

        k : int
            Number of time steps

        Returns
        ----------
        pk : ndarray(n,)
            Distribution after k steps. Vector of size of the active set.

        """
        p0 = _types.ensure_ndarray(p0,
                                   ndim=1,
                                   size=self.nstates,
                                   kind='numeric')
        assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer'

        if k == 0:  # simply return p0 normalized
            return p0 / p0.sum()

        if self.is_sparse:  # sparse: we don't have a full eigenvalue set, so just propagate
            pk = _np.array(p0)
            for i in range(k):
                pk = _np.dot(pk.T, self.transition_matrix)
        else:  # dense: employ eigenvalue decomposition
            self._ensure_eigendecomposition(self.nstates)
            from pyemma.util.linalg import mdot
            pk = mdot(p0.T, self.eigenvectors_right(),
                      _np.diag(_np.power(self.eigenvalues(), k)),
                      self.eigenvectors_left()).real
        # normalize to 1.0 and return
        return pk / pk.sum()
Ejemplo n.º 11
0
 def correlation(self, a, b=None, maxtime=None, k=None, ncv=None):
     # basic checks for a and b
     a = _types.ensure_ndarray(a, ndim=1, kind='numeric')
     b = _types.ensure_ndarray_or_None(b,
                                       ndim=1,
                                       kind='numeric',
                                       size=len(a))
     # are we on microstates space?
     if len(a) == self.nstates_obs:
         a = _np.dot(self.observation_probabilities, a)
         if b is not None:
             b = _np.dot(self.observation_probabilities, b)
     # now we are on macrostate space, or something is wrong
     if len(a) == self.nstates:
         return _MSM.correlation(self, a, b=b, maxtime=maxtime)
     else:
         raise ValueError(
             'observable vectors have size %s which is incompatible with both hidden (%s)'
             ' and observed states (%s)' %
             (len(a), self.nstates, self.nstates_obs))
Ejemplo n.º 12
0
    def _param_init(self):
        self._N_mean = 0
        self._N_cov = 0
        # create mean array and covariance matrix
        indim = self.data_producer.dimension()
        self._logger.info("Running PCA on %i dimensional input" % indim)
        assert indim > 0, "Incoming data of PCA has 0 dimension!"

        if self.mu is not None:
            self.mu = types.ensure_ndarray(self.mu, shape=(indim,))
            self._given_mean = True
        else:
            self.mu = np.zeros(indim)
            self._given_mean = False

        self.cov = np.zeros((indim, indim))

        # amount of chunks
        denom = self._n_chunks(self._param_with_stride)
        self._progress_register(denom, description="calculate mean", stage=0)
        self._progress_register(denom, description="calculate covariances", stage=1)
Ejemplo n.º 13
0
 def __init__(self,
              bias_energies_full,
              maxiter=10000,
              maxerr=1.0E-15,
              save_convergence_info=0,
              dt_traj='1 step',
              stride=1):
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.stride = stride
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None
Ejemplo n.º 14
0
 def memberships(self, value):
     self._memberships = types.ensure_ndarray(value, ndim=2, kind='numeric')
     self.nstates, self.nsets = self._memberships.shape
     assert np.allclose(self._memberships.sum(axis=1), np.ones(self.nstates))  # stochastic matrix?
Ejemplo n.º 15
0
    def plot_network(self,
                     state_sizes=None,
                     state_scale=1.0,
                     state_colors='#ff5500',
                     state_labels='auto',
                     arrow_scale=1.0,
                     arrow_curvature=1.0,
                     arrow_labels='weights',
                     arrow_label_format='%10.2f',
                     max_width=12,
                     max_height=12,
                     figpadding=0.2,
                     xticks=False,
                     yticks=False,
                     show_frame=False,
                     **textkwargs):
        """
        Draws a network using discs and curved arrows.

        The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A.

        """
        if self.pos is None:
            self.layout_automatic()
        # number of nodes
        n = len(self.pos)
        # get bounds and pad figure
        xmin = np.min(self.pos[:, 0])
        xmax = np.max(self.pos[:, 0])
        Dx = xmax - xmin
        xmin -= Dx * figpadding
        xmax += Dx * figpadding
        Dx *= 1 + figpadding
        ymin = np.min(self.pos[:, 1])
        ymax = np.max(self.pos[:, 1])
        Dy = ymax - ymin
        ymin -= Dy * figpadding
        ymax += Dy * figpadding
        Dy *= 1 + figpadding
        # sizes of nodes
        if state_sizes is None:
            state_sizes = 0.5 * state_scale * \
                min(Dx, Dy)**2 * np.ones(n) / float(n)
        else:
            state_sizes = 0.5 * state_scale * \
                min(Dx, Dy)**2 * state_sizes / (np.max(state_sizes) * float(n))
        # automatic arrow rescaling
        arrow_scale *= 1.0 / \
            (np.max(self.A - np.diag(np.diag(self.A))) * math.sqrt(n))
        # size figure
        if (Dx / max_width > Dy / max_height):
            figsize = (max_width, Dy * (max_width / Dx))
        else:
            figsize = (Dx / Dy * max_height, max_height)
        fig = plt.gcf()
        fig.set_size_inches(figsize, forward=True)
        # font sizes
        old_fontsize = rcParams['font.size']
        rcParams['font.size'] = 20
        # remove axis labels
        frame = plt.gca()
        if not xticks:
            frame.axes.get_xaxis().set_ticks([])
        if not yticks:
            frame.axes.get_yaxis().set_ticks([])
        # show or suppress frame
        frame.set_frame_on(show_frame)
        # set node labels
        if state_labels is 'auto':
            state_labels = [str(i) for i in np.arange(n)]
        else:
            assert len(
                state_labels
            ) == n, "Mistmatch between nstates and nr. state_labels (%u vs %u)" % (
                n, len(state_labels))
        # set node colors
        if state_colors is None:
            state_colors = '#ff5500'  # None is not acceptable
        if isinstance(state_colors, str):
            state_colors = [state_colors] * n
        if isinstance(state_colors, list):
            assert len(
                state_colors
            ) == n, "Mistmatch between nstates and nr. state_colors (%u vs %u)" % (
                n, len(state_colors))
        try:
            colorscales = _types.ensure_ndarray(state_colors,
                                                ndim=1,
                                                kind='numeric')
            colorscales /= colorscales.max()
            state_colors = [
                plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n)
            ]
        except:
            pass  # assume we have a list of strings now.
        # set arrow labels
        if isinstance(arrow_labels, np.ndarray):
            L = arrow_labels
        else:
            L = np.empty(np.shape(self.A), dtype=object)
        if arrow_labels is None:
            L[:, :] = ''
        elif arrow_labels.lower() == 'weights':
            for i in range(n):
                for j in range(n):
                    L[i, j] = arrow_label_format % self.A[i, j]
        else:
            rcParams['font.size'] = old_fontsize
            raise ValueError('invalid arrow label format')

        # Set the default values for the text dictionary
        textkwargs.setdefault('size', 14)
        textkwargs.setdefault('horizontalalignment', 'center')
        textkwargs.setdefault('verticalalignment', 'center')
        textkwargs.setdefault('color', 'black')

        # draw circles
        circles = []
        for i in range(n):
            fig = plt.gcf()
            # choose color
            c = plt.Circle(self.pos[i],
                           radius=math.sqrt(0.5 * state_sizes[i]) / 2.0,
                           color=state_colors[i],
                           zorder=2)
            circles.append(c)
            fig.gca().add_artist(c)
            # add annotation
            plt.text(self.pos[i][0],
                     self.pos[i][1],
                     state_labels[i],
                     zorder=3,
                     **textkwargs)

        assert len(circles) == n, "%i != %i" % (len(circles), n)

        # draw arrows
        for i in range(n):
            for j in range(i + 1, n):
                if (abs(self.A[i, j]) > 0):
                    self._draw_arrow(self.pos[i, 0],
                                     self.pos[i, 1],
                                     self.pos[j, 0],
                                     self.pos[j, 1],
                                     Dx,
                                     Dy,
                                     label=str(L[i, j]),
                                     width=arrow_scale * self.A[i, j],
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[i],
                                     patchB=circles[j],
                                     shrinkA=3,
                                     shrinkB=0)
                if (abs(self.A[j, i]) > 0):
                    self._draw_arrow(self.pos[j, 0],
                                     self.pos[j, 1],
                                     self.pos[i, 0],
                                     self.pos[i, 1],
                                     Dx,
                                     Dy,
                                     label=str(L[j, i]),
                                     width=arrow_scale * self.A[j, i],
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[j],
                                     patchB=circles[i],
                                     shrinkA=3,
                                     shrinkB=0)

        # plot
        plt.xlim(xmin, xmax)
        plt.ylim(ymin, ymax)
        rcParams['font.size'] = old_fontsize
        return fig
Ejemplo n.º 16
0
    def __init__(
        self, bias_energies_full, lag, count_mode='sliding', connectivity='largest',
        maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step',
        init=None, init_maxiter=10000, init_maxerr=1.0E-8):
        r""" Discrete Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            Mode to obtain count matrices from discrete trajectories. Should be one of:
            * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='largest'
            Defines what should be considered a connected set in the joint space of conformations and
            thermodynamic ensembles. Currently only 'largest' is supported.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'wham':  perform a short WHAM estimate to initialize the free energies
        init_maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.

        Example
        -------
        >>> from pyemma.thermo import DTRAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> dtram = DTRAM(B, 1)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> dtram = dtram.estimate((ttrajs, dtrajs))
        >>> dtram.log_likelihood() # doctest: +ELLIPSIS
        -9.805...
        >>> dtram.count_matrices # doctest: +SKIP
        array([[[5, 1],
                [1, 2]],

               [[1, 4],
                [3, 1]]], dtype=int32)
        >>> dtram.stationary_distribution # doctest: +ELLIPSIS
        array([ 0.38...,  0.61...])
        >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS
        [array([ 0.38...,  0.61...]), array([ 0.50...,  0.49...])]

        References
        ----------

        .. [1] Wu, H. et al 2014
            Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states
            J. Chem. Phys. 141, 214106

        """
        # set all parameters
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric')
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
        self.connectivity = connectivity
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        assert init in (None, 'wham'), 'Currently only None and \'wham\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        self.timestep_traj = _TimeUnit(dt_traj)
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None
        self.log_lagrangian_mult = None
Ejemplo n.º 17
0
    def cktest(self,
               n_observables=None,
               observables='phi',
               statistics='psi',
               mlags=10,
               n_jobs=1,
               show_progress=True,
               iterable=None):
        r"""Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times.

        Notes
        -----

        This method computes two sets of time-lagged covariance matrices

        * estimates at higher lag times :

          .. math::

              \left\langle \mathbf{K}(n\tau)g_{i},f_{j}\right\rangle_{\rho_{0}}

          where :math:`\rho_{0}` is the empirical distribution implicitly defined
          by all data points from time steps 0 to T-tau in all trajectories,
          :math:`\mathbf{K}(n\tau)` is a rank-reduced Koopman matrix estimated
          at the lag-time n*tau and g and f are some functions of the data.
          Rank-reduction of the Koopman matrix is controlled by the `dim`
          parameter of :func:`vamp <pyemma.coordinates.vamp>`.

        * predictions at higher lag times :

          .. math::

              \left\langle \mathbf{K}^{n}(\tau)g_{i},f_{j}\right\rangle_{\rho_{0}}

          where :math:`\mathbf{K}^{n}` is the n'th power of the rank-reduced
          Koopman matrix contained in self.


        The Champan-Kolmogorov test is to compare the predictions to the
        estimates.

        Parameters
        ----------
        n_observables : int, optional, default=None
            Limit the number of default observables (and of default statistics)
            to this number.
            Only used if `observables` are None or `statistics` are None.

        observables : np.ndarray((input_dimension, n_observables)) or 'phi'
            Coefficients that express one or multiple observables :math:`g`
            in the basis of the input features.
            This parameter can be 'phi'. In that case, the dominant
            right singular functions of the Koopman operator estimated
            at the smallest lag time are used as default observables.

        statistics : np.ndarray((input_dimension, n_statistics)) or 'psi'
            Coefficients that express one or multiple statistics :math:`f`
            in the basis of the input features.
            This parameter can be 'psi'. In that case, the dominant
            left singular functions of the Koopman operator estimated
            at the smallest lag time are used as default statistics.

        mlags : int or int-array, default=10
            multiples of lag times for testing the Model, e.g. range(10).
            A single int will trigger a range, i.e. mlags=10 maps to
            mlags=range(10).
            Note that you need to be able to do a model prediction for each
            of these lag time multiples, e.g. the value 0 only make sense
            if model.expectation(lag_multiple=0) will work.

        n_jobs : int, default=1
            how many jobs to use during calculation

        show_progress : bool, default=True
            Show progressbars for calculation?

        iterable : any data format that `pyemma.coordinates.vamp()` accepts as input, optional
            It `iterable` is None, the same data source with which VAMP
            was initialized will be used for all estimation.
            Otherwise, all estimates (not predictions) from data will be computed
            from the data contained in `iterable`.

        Returns
        -------
        vckv : :class:`VAMPChapmanKolmogorovValidator <pyemma.coordinates.transform.VAMPChapmanKolmogorovValidator>`
            Contains the estimated and the predicted covarince matrices.
            The object can be plotted with :func:`plot_cktest <pyemma.plots.plot_cktest>` with the option `y01=False`.
        """
        if n_observables is not None:
            if n_observables > self.dimension():
                warnings.warn(
                    'Selected singular functions as observables but dimension '
                    'is lower than requested number of observables.')
                n_observables = self.dimension()
        else:
            n_observables = self.dimension()

        if isinstance(observables, str) and observables == 'phi':
            observables = self.singular_vectors_right[:, 0:n_observables]
            observables_mean_free = True
        else:
            ensure_ndarray(observables, ndim=2)
            observables_mean_free = False

        if isinstance(statistics, str) and statistics == 'psi':
            statistics = self.singular_vectors_left[:, 0:n_observables]
            statistics_mean_free = True
        else:
            ensure_ndarray_or_None(statistics, ndim=2)
            statistics_mean_free = False

        ck = VAMPChapmanKolmogorovValidator(self.model,
                                            self,
                                            observables,
                                            statistics,
                                            observables_mean_free,
                                            statistics_mean_free,
                                            mlags=mlags,
                                            n_jobs=n_jobs,
                                            show_progress=show_progress)

        if iterable is None:
            iterable = self.data_producer

        ck.estimate(iterable)
        return ck
Ejemplo n.º 18
0
    def __init__(self,
                 bias_energies_full,
                 lag,
                 count_mode='sliding',
                 connectivity='reversible_pathways',
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 init=None,
                 init_maxiter=10000,
                 init_maxerr=1.0E-8):
        r""" Discrete Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            Mode to obtain count matrices from discrete trajectories. Should be one of:
            * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='reversible_pathways'
            One of 'reversible_pathways', 'summed_count_matrix' or None.
            Defines what should be considered a connected set in the joint (product)
            space of conformations and thermodynamic ensembles.
            * 'reversible_pathways' : requires that every state in the connected set
              can be reached by following a pathway of reversible transitions. A
              reversible transition between two Markov states (within the same
              thermodynamic state k) is a pair of Markov states that belong to the
              same strongly connected component of the count matrix (from
              thermodynamic state k). A pathway of reversible transitions is a list of
              reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)),
              (i_(N-1), i_N)]. The thermodynamic state where the reversible
              transitions happen, is ignored in constructing the reversible pathways.
              This is equivalent to assuming that two ensembles overlap at some Markov
              state whenever there exist frames from both ensembles in that Markov
              state.
            * 'summed_count_matrix' : all thermodynamic states are assumed to overlap.
              The connected set is then computed by summing the count matrices over
              all thermodynamic states and taking it's largest strongly connected set.
              Not recommended!
            * None : assume that everything is connected. For debugging.
            For more details see :func:`pyemma.thermo.extensions.cset.compute_csets_dTRAM`.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual log-likelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'wham':  perform a short WHAM estimate to initialize the free energies
        init_maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.

        Example
        -------
        >>> from pyemma.thermo import DTRAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> dtram = DTRAM(B, 1)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> dtram = dtram.estimate((ttrajs, dtrajs))
        >>> dtram.log_likelihood() # doctest: +ELLIPSIS
        -9.805...
        >>> dtram.count_matrices # doctest: +SKIP
        array([[[5, 1],
                [1, 2]],

               [[1, 4],
                [3, 1]]], dtype=int32)
        >>> dtram.stationary_distribution # doctest: +ELLIPSIS
        array([ 0.38...,  0.61...])
        >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS
        [array([ 0.38...,  0.61...]), array([ 0.50...,  0.49...])]

        References
        ----------

        .. [1] Wu, H. et al 2014
            Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states
            J. Chem. Phys. 141, 214106

        """
        # set all parameters
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                        ndim=2,
                                                        kind='numeric')
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        assert connectivity in [ None, 'reversible_pathways', 'summed_count_matrix' ], \
            'Currently the only implemented connectivity checks are \'reversible_pathways\', \'summed_count_matrix\' and None'
        self.connectivity = connectivity
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'wham'), 'Currently only None and \'wham\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None
        self.log_lagrangian_mult = None
Ejemplo n.º 19
0
    def __init__(self,
                 bias_energies_full,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 stride=1):
        r"""Weighted Histogram Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        stride : int, optional, default=1
            not used

        Example
        -------
        >>> from pyemma.thermo import WHAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> wham = WHAM(B)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> wham = wham.estimate((ttrajs, dtrajs))
        >>> wham.log_likelihood() # doctest: +ELLIPSIS
        -6.6...
        >>> wham.state_counts # doctest: +SKIP
        array([[7, 3],
               [5, 5]])
        >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF
        array([ 0.5...,  0.4...])
        >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF
        [array([ 0.5...,  0.4...]), array([ 0.6...,  0.3...])]

        References
        ----------

        .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988.
            New Monte Carlo Technique for Studying Phase Transitions.
            Phys. Rev. Lett. 23, 2635--2638

        .. [2] Kumar, S. et al 1992.
            The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method.
            J. Comp. Chem. 13, 1011--1021

        """
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                        ndim=2,
                                                        kind='numeric')
        self.stride = stride
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None
Ejemplo n.º 20
0
    def plot_network(self,
                     state_sizes=None,
                     state_scale=1.0,
                     state_colors='#ff5500',
                     state_labels='auto',
                     arrow_scale=1.0,
                     arrow_curvature=1.0,
                     arrow_labels='weights',
                     arrow_label_format='%10.2f',
                     max_width=12,
                     max_height=12,
                     figpadding=0.2,
                     xticks=False,
                     yticks=False,
                     show_frame=False,
                     **textkwargs):
        """
        Draws a network using discs and curved arrows.

        The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements
        in A.

        """

        # Set the default values for the text dictionary
        from matplotlib import pyplot as _plt
        textkwargs.setdefault('size', None)
        textkwargs.setdefault('horizontalalignment', 'center')
        textkwargs.setdefault('verticalalignment', 'center')
        textkwargs.setdefault('color', 'black')
        # remove the temporary key 'arrow_label_size' as it cannot be parsed by plt.text!
        arrow_label_size = textkwargs.pop('arrow_label_size',
                                          textkwargs['size'])
        if self.pos is None:
            self.layout_automatic()
        # number of nodes
        n = len(self.pos)
        # get bounds and pad figure
        xmin = _np.min(self.pos[:, 0])
        xmax = _np.max(self.pos[:, 0])
        Dx = xmax - xmin
        xmin -= Dx * figpadding
        xmax += Dx * figpadding
        Dx *= 1 + figpadding
        ymin = _np.min(self.pos[:, 1])
        ymax = _np.max(self.pos[:, 1])
        Dy = ymax - ymin
        ymin -= Dy * figpadding
        ymax += Dy * figpadding
        Dy *= 1 + figpadding
        # sizes of nodes
        if state_sizes is None:
            state_sizes = 0.5 * state_scale * \
                min(Dx, Dy)**2 * _np.ones(n) / float(n)
        else:
            state_sizes = 0.5 * state_scale * \
                min(Dx, Dy)**2 * state_sizes / (_np.max(state_sizes) * float(n))
        # automatic arrow rescaling
        arrow_scale *= 1.0 / \
            (_np.max(self.A - _np.diag(_np.diag(self.A))) * _sqrt(n))
        # size figure
        if (Dx / max_width > Dy / max_height):
            figsize = (max_width, Dy * (max_width / Dx))
        else:
            figsize = (Dx / Dy * max_height, max_height)
        if self.ax is None:
            logger.debug("creating new figure")
            fig = _plt.figure(None, figsize=figsize)
            self.ax = fig.add_subplot(111)
        else:
            fig = self.ax.figure
            window_extend = self.ax.get_window_extent()
            axes_ratio = window_extend.height / window_extend.width
            data_ratio = (ymax - ymin) / (xmax - xmin)
            q = axes_ratio / data_ratio
            if q > 1.0:
                ymin *= q
                ymax *= q
            else:
                xmin /= q
                xmax /= q
        if not xticks:
            self.ax.get_xaxis().set_ticks([])
        if not yticks:
            self.ax.get_yaxis().set_ticks([])
        # show or suppress frame
        self.ax.set_frame_on(show_frame)
        # set node labels
        if state_labels is None:
            pass
        elif isinstance(state_labels, str) and state_labels == 'auto':
            state_labels = [str(i) for i in _np.arange(n)]
        else:
            if len(state_labels) != n:
                raise ValueError(
                    "length of state_labels({}) has to match length of states({})."
                    .format(len(state_labels), n))
        # set node colors
        if state_colors is None:
            state_colors = '#ff5500'  # None is not acceptable
        if isinstance(state_colors, str):
            state_colors = [state_colors] * n
        if isinstance(state_colors, list) and not len(state_colors) == n:
            raise ValueError(
                "Mistmatch between nstates and nr. state_colors (%u vs %u)" %
                (n, len(state_colors)))
        try:
            colorscales = _types.ensure_ndarray(state_colors,
                                                ndim=1,
                                                kind='numeric')
            colorscales /= colorscales.max()
            state_colors = [
                _plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n)
            ]
        except AssertionError:
            # assume we have a list of strings now.
            logger.debug("could not cast 'state_colors' to numeric values.")

        # set arrow labels
        if isinstance(arrow_labels, _np.ndarray):
            L = arrow_labels
            if isinstance(arrow_labels[0, 0], str):
                arrow_label_format = '%s'
        elif isinstance(arrow_labels,
                        str) and arrow_labels.lower() == 'weights':
            L = self.A[:, :]
        elif arrow_labels is None:
            L = _np.empty(_np.shape(self.A), dtype=object)
            L[:, :] = ''
            arrow_label_format = '%s'
        else:
            raise ValueError('invalid arrow labels')

        # draw circles
        circles = []
        for i in range(n):
            # choose color
            c = _plt.Circle(self.pos[i],
                            radius=_sqrt(0.5 * state_sizes[i]) / 2.0,
                            color=state_colors[i],
                            zorder=2)
            circles.append(c)
            self.ax.add_artist(c)
            # add annotation
            if state_labels is not None:
                self.ax.text(self.pos[i][0],
                             self.pos[i][1],
                             state_labels[i],
                             zorder=3,
                             **textkwargs)

        assert len(circles) == n, "%i != %i" % (len(circles), n)

        # draw arrows
        for i in range(n):
            for j in range(i + 1, n):
                if (abs(self.A[i, j]) > 0):
                    self._draw_arrow(self.pos[i, 0],
                                     self.pos[i, 1],
                                     self.pos[j, 0],
                                     self.pos[j, 1],
                                     Dx,
                                     Dy,
                                     label=arrow_label_format % L[i, j],
                                     width=arrow_scale * self.A[i, j],
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[i],
                                     patchB=circles[j],
                                     shrinkA=3,
                                     shrinkB=0,
                                     arrow_label_size=arrow_label_size)
                if (abs(self.A[j, i]) > 0):
                    self._draw_arrow(self.pos[j, 0],
                                     self.pos[j, 1],
                                     self.pos[i, 0],
                                     self.pos[i, 1],
                                     Dx,
                                     Dy,
                                     label=arrow_label_format % L[j, i],
                                     width=arrow_scale * self.A[j, i],
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[j],
                                     patchB=circles[i],
                                     shrinkA=3,
                                     shrinkB=0,
                                     arrow_label_size=arrow_label_size)

        # plot
        self.ax.set_xlim(xmin, xmax)
        self.ax.set_ylim(ymin, ymax)
        return fig
Ejemplo n.º 21
0
def tpt(msmobj, A, B):
    r""" A->B reactive flux from transition path theory (TPT)

    The returned :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>` object
    can be used to extract various quantities of the flux, as well as to
    compute A -> B transition pathways, their weights, and to coarse-grain
    the flux onto sets of states.

    Parameters
    ----------
    msmobj : :class:`MSM <pyemma.msm.MSM>` object
        Markov state model (MSM) object
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B

    Returns
    -------
    tptobj : :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>` object
        An object containing the reactive A->B flux network
        and several additional quantities, such as the stationary probability,
        committors and set definitions.

    See also
    --------
    :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>`
        Reactive Flux object


    .. autoclass:: msmtools.flux.reactive_flux.ReactiveFlux
        :members:
        :undoc-members:

        .. rubric:: Methods

        .. autoautosummary:: msmtools.flux.reactive_flux.ReactiveFlux
           :methods:

        .. rubric:: Attributes

        .. autoautosummary:: msmtools.flux.reactive_flux.ReactiveFlux
            :attributes:

    References
    ----------
    Transition path theory was introduced for space-continuous dynamical
    processes, such as Langevin dynamics, in [1]_, [2]_ introduces discrete
    transition path theory for Markov jump processes (Master equation models,
    rate matrices) and pathway decomposition algorithms. [3]_ introduces
    transition path theory for Markov state models (MSMs) and some analysis
    algorithms. In this function, the equations described in [3]_ are applied.

    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths. 
        J. Stat. Phys. 123: 503-523 (2006)

    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)

    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)

    """
    T = msmobj.transition_matrix
    mu = msmobj.stationary_distribution
    A = _types.ensure_ndarray(A, kind='i')
    B = _types.ensure_ndarray(B, kind='i')
    tptobj = tpt_factory(T, A, B, mu=mu)
    return tptobj
Ejemplo n.º 22
0
    def __init__(self, model, estimator, mlags=None, conf=0.95, err_est=False,
                 n_jobs=1, show_progress=True):
        r"""
        Parameters
        ----------
        model : Model
            Model to be tested

        estimator : Estimator
            Parametrized Estimator that has produced the model

        mlags : int or int-array, default=10
            multiples of lag times for testing the Model, e.g. range(10).
            A single int will trigger a range, i.e. mlags=10 maps to
            mlags=range(10). The setting None will choose mlags automatically
            according to the longest available trajectory
            Note that you need to be able to do a model prediction for each
            of these lag time multiples, e.g. the value 0 only make sense
            if _predict_observables(0) will work.

        conf : float, default = 0.95
            confidence interval for errors

        err_est : bool, default=False
            if the Estimator is capable of error calculation, will compute
            errors for each tau estimate. This option can be computationally
            expensive.

        n_jobs : int, default=1
            how many jobs to use during calculation

        show_progress : bool, default=True
            Show progressbars for calculation?

        """
        # set model and estimator
        self.test_model = model
        self.test_estimator = estimator

        # set mlags
        maxlength = np.max([len(dtraj) for dtraj in estimator.discrete_trajectories_full])
        maxmlag = int(math.floor(maxlength / estimator.lag))
        if mlags is None:
            mlags = maxmlag
        if types.is_int(mlags):
            mlags = np.arange(mlags)
        mlags = types.ensure_ndarray(mlags, ndim=1, kind='i')
        if np.any(mlags > maxmlag):
            mlags = mlags[np.where(mlags <= maxmlag)]
            self.logger.warn('Changed mlags as some mlags exceeded maximum trajectory length.')
        if np.any(mlags < 0):
            mlags = mlags[np.where(mlags >= 0)]
            self.logger.warn('Changed mlags as some mlags were negative.')
        self.mlags = mlags

        # set conf and error handling
        self.conf = conf
        self.has_errors = issubclass(self.test_model.__class__, SampledModel)
        if self.has_errors:
            self.test_model.set_model_params(conf=conf)
        self.err_est = err_est
        if err_est and not self.has_errors:
            raise ValueError('Requested errors on the estimated models, '
                             'but the model is not able to calculate errors at all')
        self.n_jobs = n_jobs
        self.show_progress = show_progress
Ejemplo n.º 23
0
    def plot_network(self,
                     state_sizes=None,
                     state_scale=1.0,
                     state_colors='#ff5500',
                     state_labels='auto',
                     arrow_scale=1.0,
                     arrow_curvature=1.0,
                     arrow_labels='weights',
                     arrow_label_format='%10.2f',
                     max_width=12,
                     max_height=12,
                     max_flux=None,
                     figpadding=0.2,
                     xticks=False,
                     yticks=False,
                     show_frame=False,
                     **textkwargs):
        """
        Draws a network using discs and curved arrows.

        The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A.

        """
        plt = self.plt

        if self.pos is None:
            self.layout_automatic()
        # number of nodes
        n = len(self.pos)
        # get bounds and pad figure
        xmin = np.min(self.pos[:, 0])
        xmax = np.max(self.pos[:, 0])
        Dx = xmax - xmin
        xmin -= Dx * figpadding
        xmax += Dx * figpadding
        Dx *= 1 + figpadding
        ymin = np.min(self.pos[:, 1])
        ymax = np.max(self.pos[:, 1])
        Dy = ymax - ymin
        ymin -= Dy * figpadding
        ymax += Dy * figpadding
        Dy *= 1 + figpadding
        # sizes of nodes
        if state_sizes is None:
            state_sizes = 0.5 * state_scale * \
                min(Dx, Dy)**2 * np.ones(n) / float(n)
        else:
            state_sizes = 0.5 * state_scale * \
                state_sizes / (np.max(state_sizes) * float(n))
            #min(Dx, Dy)**2 * state_sizes / (np.max(state_sizes) * float(n)) # JFR
        # automatic arrow rescaling **JFR - Soooo confusing, don't do this!
        #arrow_scale *= 1.0 / \
        #    (np.max(self.A - np.diag(np.diag(self.A))) * math.sqrt(n))
        # size figure
        if (Dx / max_width > Dy / max_height):
            figsize = (max_width, Dy * (max_width / Dx))
        else:
            figsize = (Dx / Dy * max_height, max_height)
        fig = plt.gcf()
        fig.set_size_inches(figsize, forward=True)
        # font sizes
        from matplotlib import rcParams
        old_fontsize = rcParams['font.size']
        rcParams['font.size'] = 20
        # remove axis labels
        frame = plt.gca()
        if not xticks:
            frame.axes.get_xaxis().set_ticks([])
        if not yticks:
            frame.axes.get_yaxis().set_ticks([])
        # show or suppress frame
        frame.set_frame_on(show_frame)
        # set node labels
        if state_labels is 'auto':
            state_labels = [str(i) for i in np.arange(n)]
        else:
            assert len(
                state_labels
            ) == n, "Mistmatch between nstates and nr. state_labels (%u vs %u)" % (
                n, len(state_labels))
        # set node colors
        if state_colors is None:
            state_colors = '#ff5500'  # None is not acceptable
        if isinstance(state_colors, str):
            state_colors = [state_colors] * n
        if isinstance(state_colors, list):
            assert len(
                state_colors
            ) == n, "Mistmatch between nstates and nr. state_colors (%u vs %u)" % (
                n, len(state_colors))
        try:
            colorscales = _types.ensure_ndarray(state_colors,
                                                ndim=1,
                                                kind='numeric')
            colorscales /= colorscales.max()
            state_colors = [
                plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n)
            ]
        except:
            pass  # assume we have a list of strings now.
        # set arrow labels
        if isinstance(arrow_labels, np.ndarray):
            L = arrow_labels
        else:
            L = np.empty(np.shape(self.A), dtype=object)
        if arrow_labels is None:
            L[:, :] = ''
        elif arrow_labels.lower() == 'weights':
            for i in range(n):
                for j in range(n):
                    L[i, j] = arrow_label_format % self.A[i, j]
        else:
            rcParams['font.size'] = old_fontsize
            raise ValueError('invalid arrow label format')

        # Set the default values for the text dictionary
        textkwargs.setdefault('size', 14)
        textkwargs.setdefault('horizontalalignment', 'center')
        textkwargs.setdefault('verticalalignment', 'center')
        textkwargs.setdefault('color', 'black')

        # draw circles
        circles = []
        for i in range(n):
            fig = plt.gcf()
            # choose color
            c = plt.Circle(self.pos[i],
                           radius=math.sqrt(0.5 * state_sizes[i]) / 2.0,
                           color=state_colors[i],
                           zorder=2)
            circles.append(c)
            fig.gca().add_artist(c)
            # add annotation
            plt.text(self.pos[i][0],
                     self.pos[i][1],
                     state_labels[i],
                     zorder=3,
                     **textkwargs)

        assert len(circles) == n, "%i != %i" % (len(circles), n)

        # draw arrows
        # my own colormap
        from matplotlib import pyplot as plt
        from matplotlib.pyplot import *
        # define the colormap
        #print self.A
        #print np.all(self.A >= 0)
        if (np.all(self.A >= 0)):
            mycmap = plt.cm.Greys
            #mycmap = plt.cm.winter
            mycmap_max = np.max(np.abs(self.A))
            mycmap_min = -1. * mycmap_max  #0. # np.min(self.A[self.A != 0])
        else:
            mycmap = plt.cm.bwr
            #mycmap = plt.cm.jet
            if (max_flux is None):
                mycmap_max = np.max(np.abs(self.A))
                mycmap_min = -mycmap_max
            else:
                mycmap_max = max_flux
                mycmap_min = -mycmap_max
        # extract all colors from the .jet map
        mycmaplist = [mycmap(i) for i in range(mycmap.N)]
        # create the new map
        mycmap = mycmap.from_list('Custom cmap', mycmaplist, mycmap.N)
        # define the bins and normalize
        bounds = np.linspace(mycmap_min, mycmap_max, mycmap.N)
        norm = matplotlib.colors.BoundaryNorm(bounds, mycmap.N)
        mycmaplist = [mycmap(i) for i in range(mycmap.N)]
        dx = bounds[1] - bounds[0]

        for i in range(n):
            for j in range(i + 1, n):
                if (abs(self.A[i, j]) > 0):
                    # JFR - let's allow for neg delta-F
                    grid = int((self.A[i, j] - mycmap_min) / dx + 0.5)
                    color = mycmaplist[grid]
                    self._draw_arrow(self.pos[i, 0],
                                     self.pos[i, 1],
                                     self.pos[j, 0],
                                     self.pos[j, 1],
                                     Dx,
                                     Dy,
                                     label=str(L[i, j]),
                                     width=arrow_scale * abs(self.A[i, j]),
                                     color=color,
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[i],
                                     patchB=circles[j],
                                     shrinkA=3,
                                     shrinkB=0)

                if (abs(self.A[j, i]) > 0):
                    grid = int((self.A[j, i] - mycmap_min) / dx + 0.5)
                    color = mycmaplist[grid]
                    self._draw_arrow(self.pos[j, 0],
                                     self.pos[j, 1],
                                     self.pos[i, 0],
                                     self.pos[i, 1],
                                     Dx,
                                     Dy,
                                     label=str(L[j, i]),
                                     width=arrow_scale * abs(self.A[j, i]),
                                     color=color,
                                     arrow_curvature=arrow_curvature,
                                     patchA=circles[j],
                                     patchB=circles[i],
                                     shrinkA=3,
                                     shrinkB=0)

        # plot
        plt.xlim(xmin, xmax)
        plt.ylim(ymin, ymax)
        rcParams['font.size'] = old_fontsize
        return fig