def submodel(self, states=None, obs=None, mincount_connectivity='1/n'):
        """Returns a HMM with restricted state space

        Parameters
        ----------
        states : None, str or int-array
            Hidden states to restrict the model to. In addition to specifying
            the subset, possible options are:
            * None : all states - don't restrict
            * 'populous-strong' : strongly connected subset with maximum counts
            * 'populous-weak' : weakly connected subset with maximum counts
            * 'largest-strong' : strongly connected subset with maximum size
            * 'largest-weak' : weakly connected subset with maximum size
        obs : None, str or int-array
            Observed states to restrict the model to. In addition to specifying
            an array with the state labels to be observed, possible options are:
            * None : all states - don't restrict
            * 'nonempty' : all states with at least one observation in the estimator
        mincount_connectivity : float or '1/n'
            minimum number of counts to consider a connection between two states.
            Counts lower than that will count zero in the connectivity check and
            may thus separate the resulting transition matrix. Default value:
            1/nstates.

        Returns
        -------
        hmm : HMM
            The restricted HMM.

        """
        if states is None and obs is None and mincount_connectivity == 0:
            return self
        if states is None:
            states = _np.arange(self.nstates)
        if obs is None:
            obs = _np.arange(self.nstates_obs)

        if str(mincount_connectivity) == '1/n':
            mincount_connectivity = 1.0/float(self.nstates)

        # handle new connectivity
        from bhmm.estimators import _tmatrix_disconnected
        S = _tmatrix_disconnected.connected_sets(self.count_matrix,
                                                 mincount_connectivity=mincount_connectivity,
                                                 strong=True)
        if len(S) > 1:
            # keep only non-negligible transitions
            C = _np.zeros(self.count_matrix.shape)
            large = _np.where(self.count_matrix >= mincount_connectivity)
            C[large] = self.count_matrix[large]
            for s in S:  # keep all (also small) transition counts within strongly connected subsets
                C[_np.ix_(s, s)] = self.count_matrix[_np.ix_(s, s)]
            # re-estimate transition matrix with disc.
            P = _tmatrix_disconnected.estimate_P(C, reversible=self.reversible, mincount_connectivity=0)
            pi = _tmatrix_disconnected.stationary_distribution(P, C)
        else:
            C = self.count_matrix
            P = self.transition_matrix
            pi = self.stationary_distribution

        # determine substates
        if isinstance(states, str):
            from bhmm.estimators import _tmatrix_disconnected
            strong = 'strong' in states
            largest = 'largest' in states
            S = _tmatrix_disconnected.connected_sets(self.count_matrix, mincount_connectivity=mincount_connectivity,
                                                     strong=strong)
            if largest:
                score = [len(s) for s in S]
            else:
                score = [self.count_matrix[_np.ix_(s, s)].sum() for s in S]
            states = _np.array(S[_np.argmax(score)])
        if states is not None:  # sub-transition matrix
            self._active_set = states
            C = C[_np.ix_(states, states)].copy()
            P = P[_np.ix_(states, states)].copy()
            P /= P.sum(axis=1)[:, None]
            pi = _tmatrix_disconnected.stationary_distribution(P, C)
            self.initial_count = self.initial_count[states]
            self.initial_distribution = self.initial_distribution[states] / self.initial_distribution[states].sum()

        # determine observed states
        if str(obs) == 'nonempty':
            import msmtools.estimation as msmest
            obs = _np.where(msmest.count_states(self.discrete_trajectories_lagged) > 0)[0]
        if obs is not None:
            # set observable set
            self._observable_set = obs
            self._nstates_obs = obs.size
            # full2active mapping
            _full2obs = -1 * _np.ones(self._nstates_obs_full, dtype=int)
            _full2obs[obs] = _np.arange(len(obs), dtype=int)
            # observable trajectories
            self._dtrajs_obs = []
            for dtraj in self.discrete_trajectories_full:
                self._dtrajs_obs.append(_full2obs[dtraj])
            # observation matrix
            B = self.observation_probabilities[_np.ix_(states, obs)].copy()
            B /= B.sum(axis=1)[:, None]
        else:
            B = self.observation_probabilities

        # set quantities back.
        self.update_model_params(P=P, pobs=B, pi=pi)
        self.count_matrix_EM = self.count_matrix[_np.ix_(states, states)]  # unchanged count matrix
        self.count_matrix = C  # count matrix consistent with P
        return self
Exemple #2
0
 def weakly_connected_sets(self):
     return _tmatrix_disconnected.connected_sets(self._Tij, strong=False)
Exemple #3
0
 def weakly_connected_sets(self):
     return _tmatrix_disconnected.connected_sets(self._Tij, strong=False)
Exemple #4
0
 def strongly_connected_sets(self):
     return _tmatrix_disconnected.connected_sets(self._Tij, strong=True)
Exemple #5
0
 def strongly_connected_sets(self):
     return _tmatrix_disconnected.connected_sets(self._Tij, strong=True)