Exemple #1
0
 def _check_submodel_transitive_properties(histogram, count_matrix, model: TransitionCountModel):
     """ checks properties of the model which do not / should not change when taking a submodel """
     np.testing.assert_equal(model.state_histogram_full, histogram)
     np.testing.assert_equal(model.lagtime, 1)
     np.testing.assert_equal(model.n_states_full, 4)
     np.testing.assert_equal(model.physical_time, Q_("10 miles"))
     np.testing.assert_equal(model.count_matrix_full, count_matrix)
     np.testing.assert_equal(model.counting_mode, "effective")
Exemple #2
0
 def test_properties(self):
     valid_count_modes = "sample", "sliding", "sliding-effective", "effective"
     for mode in valid_count_modes:
         estimator = TransitionCountEstimator(lagtime=5, count_mode=mode, physical_time="10 ns")
         self.assertEqual(estimator.count_mode, mode)
         np.testing.assert_equal(estimator.lagtime, 5)
         assert Q_("10 ns") == estimator.physical_time, \
             "expected 10 ns as physical time but got {}".format(estimator.physical_time)
    def physical_time(self, value: str):
        r"""
        Sets a description of the physical time for input trajectories. Specify by a number, whitespace, and unit.
        Permitted units are 'fs', 'ps', 'ns', 'us', 'ms', 's', and 'step'.

        Parameters
        ----------
        value : str
            the physical time description
        """
        self._physical_time = Q_(value)
Exemple #4
0
 def test_sliding_counting(self):
     dtraj = np.array([0, 0, 0, 0, 1, 1, 0, 1])
     estimator = TransitionCountEstimator(lagtime=2, count_mode="sliding")
     model = estimator.fit(dtraj).fetch_model()
     # sliding window across trajectory counting transitions, overestimating total count:
     # 0 -> 0, 0 -> 0, 0 -> 1, 0-> 1, 1-> 0, 1-> 1
     np.testing.assert_array_equal(model.count_matrix.toarray(), np.array([[2., 2.], [1., 1.]]))
     np.testing.assert_equal(model.lagtime, 2)
     assert model.counting_mode == "sliding", "expected sliding counting mode, got {}".format(model.counting_mode)
     assert Q_("1 step") == model.physical_time, "no physical time specified, expecting 'step' " \
                                                 "but got {}".format(model.physical_time)
     np.testing.assert_equal(model.state_symbols, [0, 1], err_msg="Trajectory only contained states 0 and 1")
     np.testing.assert_equal(model.n_states, 2)
     np.testing.assert_equal(model.state_histogram, [5, 3])
     assert model.is_full_model
     np.testing.assert_equal(model.selected_count_fraction, 1)
     np.testing.assert_equal(model.selected_state_fraction, 1)
     np.testing.assert_equal(model.total_count, len(dtraj))
     np.testing.assert_equal(model.visited_set, [0, 1])
Exemple #5
0
 def test_sample_counting(self):
     dtraj = np.array([0, 0, 0, 0, 1, 1, 0, 1])
     estimator = TransitionCountEstimator(lagtime=2, count_mode="sample")
     model = estimator.fit(dtraj).fetch_model()
     # sample strides the trajectory with "lag" and then counts instantaneous transitions
     # get counts 0 -> 0, 0 -> 1, 1 -> 0
     np.testing.assert_array_equal(model.count_matrix.toarray(), np.array([[1., 1.], [1., 0.]]))
     np.testing.assert_equal(model.lagtime, 2)
     assert model.counting_mode == "sample", "expected sample counting mode, got {}".format(model.counting_mode)
     assert Q_("1 step") == model.physical_time, "no physical time specified, expecting 'step' " \
                                                 "but got {}".format(model.physical_time)
     np.testing.assert_equal(model.state_symbols, [0, 1], err_msg="Trajectory only contained states 0 and 1")
     np.testing.assert_equal(model.n_states, 2)
     np.testing.assert_equal(model.state_histogram, [5, 3])
     assert model.is_full_model
     np.testing.assert_equal(model.selected_count_fraction, 1)
     np.testing.assert_equal(model.selected_state_fraction, 1)
     np.testing.assert_equal(model.total_count, len(dtraj))
     np.testing.assert_equal(model.visited_set, [0, 1])
Exemple #6
0
 def test_effective_counting(self):
     dtraj = np.array([0, 0, 0, 0, 1, 1, 0, 1])
     estimator = TransitionCountEstimator(lagtime=2, count_mode="effective")
     model = estimator.fit(dtraj).fetch_model()
     # effective counting
     # todo actually compute this and see if it makes sense
     np.testing.assert_array_equal(model.count_matrix.toarray(), np.array([[1.6, 1.6], [1., 1.]]))
     np.testing.assert_equal(model.lagtime, 2)
     assert model.counting_mode == "effective", "expected effective counting mode, " \
                                                "got {}".format(model.counting_mode)
     assert Q_("1 step") == model.physical_time, "no physical time specified, expecting 'step' " \
                                                 "but got {}".format(model.physical_time)
     np.testing.assert_equal(model.state_symbols, [0, 1], err_msg="Trajectory only contained states 0 and 1")
     np.testing.assert_equal(model.n_states, 2)
     np.testing.assert_equal(model.state_histogram, [5, 3])
     assert model.is_full_model
     np.testing.assert_equal(model.selected_count_fraction, 1)
     np.testing.assert_equal(model.selected_state_fraction, 1)
     np.testing.assert_equal(model.total_count, len(dtraj))
     np.testing.assert_equal(model.visited_set, [0, 1])
    def __init__(self,
                 count_matrix: Union[np.ndarray, coo_matrix],
                 counting_mode: Optional[str] = None,
                 lagtime: int = 1,
                 state_histogram: Optional[np.ndarray] = None,
                 physical_time: Union[Q_, str] = '1 step',
                 state_symbols: Optional[np.ndarray] = None,
                 count_matrix_full: Union[None, np.ndarray, coo_matrix] = None,
                 state_histogram_full: Optional[np.ndarray] = None):
        r"""Creates a new TransitionCountModel. This can be used to, e.g., construct Markov state models. The minimal
        requirement for instantiation is a count matrix, but statistics of the data can also be provided.

        Parameters
        ----------
        count_matrix : array_like
            The count matrix. In case it was estimated with 'sliding', it contains a factor of `lagtime` more counts
            than are statistically uncorrelated.
        counting_mode : str, optional, default=None
            If not None, one of 'sliding', 'sample', or 'effective'.
            Indicates the counting method that was used to estimate the count matrix. In case of 'sliding', a sliding
            window of the size of the lagtime was used to count transitions. It therefore contains a factor
            of `lagtime` more counts than are statistically uncorrelated. It's fine to use this matrix for maximum
            likelihood estimation, but it will give far too small errors if you use it for uncertainty calculations.
            In order to do uncertainty calculations, use the effective count matrix, see
            :attr:`effective_count_matrix`, divide this count matrix by tau, or use 'effective' as estimation parameter.
        lagtime : int, optional, default=1
            The time offset which was used to count transitions in state.
        state_histogram : array_like, optional, default=None
            Histogram over the visited states in discretized trajectories.
        physical_time : Unit or str, default='step'
            Description of the physical time unit corresponding to one time step of the
            transitioning process (aka lag time). May be used by analysis methods such as plotting
            tools to pretty-print the axes.
            By default 'step', i.e. there is no physical time unit. Permitted units are

            *  'fs',  'femtosecond'
            *  'ps',  'picosecond'
            *  'ns',  'nanosecond'
            *  'us',  'microsecond'
            *  'ms',  'millisecond'
            *  's',   'second'
        state_symbols : array_like, optional, default=None
            Symbols of the original discrete trajectory that are represented in the counting model. If None, the
            symbols are assumed to represent the data, i.e., a iota range over the number of states. Subselection
            of the model also subselects the symbols.
        count_matrix_full : array_like, optional, default=None
            Count matrix for all state symbols. If None, the count matrix provided as first argument is assumed to
            take that role.
        state_histogram_full : array_like, optional, default=None
            Histogram over all state symbols. If None, the provided state_histogram  is assumed to take that role.
        """

        if count_matrix is None:
            raise ValueError("count matrix was None")

        self._count_matrix = count_matrix
        self._counting_mode = counting_mode
        self._lag = lagtime
        self._physical_time = Q_(physical_time) if isinstance(
            physical_time, (str, int)) else physical_time
        self._state_histogram = state_histogram

        if state_symbols is None:
            # if symbols is not set, assume that the count matrix represents all states in the data
            state_symbols = np.arange(self.n_states)

        if len(state_symbols) != self.n_states:
            raise ValueError(
                "Number of symbols in counting model must coincide with the number of states in the "
                "count matrix! (#symbols = {}, #states = {})".format(
                    len(state_symbols), self.n_states))
        self._state_symbols = state_symbols
        if count_matrix_full is None:
            count_matrix_full = count_matrix
        self._count_matrix_full = count_matrix_full
        if self.n_states_full < self.n_states:
            # full number of states must be at least as large as n_states
            raise ValueError(
                "Number of states was bigger than full number of "
                "states. (#states = {}, #states_full = {}), likely a wrong "
                "full count matrix.".format(self.n_states, self.n_states_full))
        if state_histogram_full is None:
            state_histogram_full = state_histogram
        if state_histogram_full is not None and self.n_states_full != len(
                state_histogram_full):
            raise ValueError(
                "Mismatch between number of states represented in full state histogram and full count matrix "
                "(#states histogram = {}, #states matrix = {})".format(
                    len(state_histogram_full), self.n_states_full))
        self._state_histogram_full = state_histogram_full
 def physical_time(self, value):
     self._dt_model = Q_(value)