Ejemplo n.º 1
0
 def run_state_counts(self, i, out):
     c = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         c = hidden.state_counts(self.gamma[i], self.T[i])
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (c, d)
Ejemplo n.º 2
0
 def run_transition_counts(self, i, out):
     C = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         C = hidden.transition_counts(self.alpha[i], self.beta[i], self.A[i], self.pobs[i], out=out)
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*self.nrep)
     return C, d
Ejemplo n.º 3
0
 def run_viterbi(self, i, out):
     vpath = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         vpath = hidden.viterbi(self.A[i], self.pobs[i], self.pi[i])
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*self.nrep)
     return vpath, d
Ejemplo n.º 4
0
 def run_gamma(self, i, out):
     gamma = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         gamma = hidden.state_probabilities(self.alpha[i], self.beta[i], gamma_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return gamma, d
Ejemplo n.º 5
0
 def run_state_counts(self, i, out):
     c = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         c = hidden.state_counts(self.gamma[i], self.T[i])
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return c, d
Ejemplo n.º 6
0
 def run_backward(self, i, out):
     beta = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         beta = hidden.backward(self.A[i], self.pobs[i], beta_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (beta, d)
Ejemplo n.º 7
0
 def run_backward(self, i, out):
     beta = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         beta = hidden.backward(self.A[i], self.pobs[i], beta_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return beta, d
Ejemplo n.º 8
0
 def run_viterbi(self, i, out):
     vpath = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         vpath = hidden.viterbi(self.A[i], self.pobs[i], self.pi[i])
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (vpath, d)
Ejemplo n.º 9
0
 def run_gamma(self, i, out):
     gamma = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         gamma = hidden.state_probabilities(self.alpha[i], self.beta[i], gamma_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (gamma, d)
Ejemplo n.º 10
0
 def run_transition_counts(self, i, out):
     C = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         C = hidden.transition_counts(self.alpha[i], self.beta[i], self.A[i], self.pobs[i], out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (C, d)
Ejemplo n.º 11
0
 def run_viterbi(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     vpath = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         vpath = hidden.viterbi(self.A[i], self.pobs[i], self.pi[i])
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (vpath, d)
Ejemplo n.º 12
0
 def run_state_counts(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     c = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         c = hidden.state_counts(self.gamma[i], self.T[i])
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (c, d)
Ejemplo n.º 13
0
 def run_forward(self, i, out):
     logprob = 0
     alpha = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         logprob, alpha = hidden.forward(self.A[i], self.pobs[i], self.pi[i], alpha_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return logprob, alpha, d
Ejemplo n.º 14
0
 def run_gamma(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     gamma = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         gamma = hidden.state_probabilities(self.alpha[i], self.beta[i], gamma_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*nrep)
     return gamma, d
Ejemplo n.º 15
0
 def run_forward(self, i, out):
     logprob = 0
     alpha = None
     hidden.set_implementation(self.kernel)
     time1 = time.time()
     for k in range(self.nrep):
         logprob, alpha = hidden.forward(self.A[i], self.pobs[i], self.pi[i], alpha_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*self.nrep)
     return (logprob, alpha, d)
Ejemplo n.º 16
0
 def run_viterbi(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     vpath = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         vpath = hidden.viterbi(self.A[i], self.pobs[i], self.pi[i])
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*nrep)
     return vpath, d
Ejemplo n.º 17
0
 def run_transition_counts(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     C = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         C = hidden.transition_counts(self.alpha[i], self.beta[i], self.A[i], self.pobs[i], out=out)
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*nrep)
     return C, d
Ejemplo n.º 18
0
 def run_state_counts(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     c = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         c = hidden.state_counts(self.gamma[i], self.T[i])
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*nrep)
     return c, d
Ejemplo n.º 19
0
 def run_backward(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     beta = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         beta = hidden.backward(self.A[i], self.pobs[i], beta_out=out)
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (beta, d)
Ejemplo n.º 20
0
 def run_backward(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     beta = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         beta = hidden.backward(self.A[i], self.pobs[i], beta_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1)/(1.0*nrep)
     return beta, d
Ejemplo n.º 21
0
 def run_forward(self, i, kernel, out):
     nrep = max(1, int(10000/self.T[i]))
     logprob = 0
     alpha = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         logprob, alpha = hidden.forward(self.A[i], self.pobs[i], self.pi[i], alpha_out=out)
     # compare
     time2 = time.time()
     d = (time2-time1) / (1.0*nrep)
     return logprob, alpha, d
Ejemplo n.º 22
0
 def run_gamma(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     gamma = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         gamma = hidden.state_probabilities(self.alpha[i],
                                            self.beta[i],
                                            gamma_out=out)
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (gamma, d)
Ejemplo n.º 23
0
 def run_transition_counts(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     C = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         C = hidden.transition_counts(self.alpha[i],
                                      self.beta[i],
                                      self.A[i],
                                      self.pobs[i],
                                      out=out)
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (C, d)
Ejemplo n.º 24
0
 def run_forward(self, i, kernel, out):
     nrep = max(1, int(10000 / self.T[i]))
     logprob = 0
     alpha = None
     hidden.set_implementation(kernel)
     time1 = time.time()
     for k in range(nrep):
         logprob, alpha = hidden.forward(self.A[i],
                                         self.pobs[i],
                                         self.pi[i],
                                         alpha_out=out)
     # compare
     time2 = time.time()
     d = (time2 - time1) / (1.0 * nrep)
     return (logprob, alpha, d)
Ejemplo n.º 25
0
    def __init__(
        self,
        observations,
        nstates,
        initial_model=None,
        reversible=True,
        transition_matrix_sampling_steps=1000,
        transition_matrix_prior=None,
        type="gaussian",
    ):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the BHMM.
            Otherwise, a heuristic scheme is used to generate an initial guess.
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed balance) is used;
            otherwise, a standard  non-reversible prior is used.
        transition_matrix_sampling_steps : int, optional, default=1000
            number of transition matrix sampling steps per BHMM cycle
        transition_matrix_prior : str or ndarray(n,n)
            prior count matrix to be used for transition matrix sampling, or a keyword specifying the prior mode
            |  None (default),  -1 prior is used that ensures consistency between mean and MLE. Can lead to sampling
                disconnected matrices in the low-data regime. If you have disconnectivity problems, consider
                using 'init-connect'
            |  'init-connect',  prior count matrix ensuring the same connectivity as in the initial model. 1 count
                is added to all diagonals. All off-diagonals share one prior count distributed proportional to
                the row of the initial transition matrix.
        output_model_type : str, optional, default='gaussian'
            Output model type.  ['gaussian', 'discrete']

        """
        # Sanity checks.
        if len(observations) == 0:
            raise Exception("No observations were provided.")

        # Store options.
        self.reversible = reversible

        # Store the number of states.
        self.nstates = nstates

        # Store a copy of the observations.
        self.observations = copy.deepcopy(observations)
        self.nobs = len(observations)
        self.Ts = [len(o) for o in observations]
        self.maxT = np.max(self.Ts)

        # initial model
        if initial_model:
            # Use user-specified initial model, if provided.
            self.model = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self.model = self._generateInitialModel(type)

        # prior counts
        if transition_matrix_prior is None:
            self.prior = np.zeros((self.nstates, self.nstates))
        elif isinstance(transition_matrix_prior, np.ndarray):
            if np.array_equal(transition_matrix_prior.shape, (self.nstates, self.nstates)):
                self.prior = np.array(transition_matrix_prior)
        elif transition_matrix_prior == "init-connect":
            Pinit = self.model.transition_matrix
            self.prior = Pinit - np.diag(Pinit)  # add off-diagonals from initial T-matrix
            self.prior /= self.prior.sum(axis=1)[:, None]  # scale off-diagonals to row sum 1
            self.prior += np.eye(nstates)  # add diagonal 1.
        else:
            raise ValueError("transition matrix prior mode undefined: " + str(transition_matrix_prior))

        # sampling options
        self.transition_matrix_sampling_steps = transition_matrix_sampling_steps

        # implementation options
        hidden.set_implementation(config.kernel)
        self.model.output_model.set_implementation(config.kernel)

        # pre-construct hidden variables
        self.alpha = np.zeros((self.maxT, self.nstates), config.dtype, order="C")
        self.pobs = np.zeros((self.maxT, self.nstates), config.dtype, order="C")

        return
Ejemplo n.º 26
0
    def __init__(self,
                 observations,
                 nstates,
                 initial_model=None,
                 reversible=True,
                 transition_matrix_sampling_steps=1000,
                 transition_matrix_prior=None,
                 type='gaussian'):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the BHMM.
            Otherwise, a heuristic scheme is used to generate an initial guess.
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed balance) is used;
            otherwise, a standard  non-reversible prior is used.
        transition_matrix_sampling_steps : int, optional, default=1000
            number of transition matrix sampling steps per BHMM cycle
        transition_matrix_prior : str or ndarray(n,n)
            prior count matrix to be used for transition matrix sampling, or a keyword specifying the prior mode
            |  None (default),  -1 prior is used that ensures consistency between mean and MLE. Can lead to sampling
                disconnected matrices in the low-data regime. If you have disconnectivity problems, consider
                using 'init-connect'
            |  'init-connect',  prior count matrix ensuring the same connectivity as in the initial model. 1 count
                is added to all diagonals. All off-diagonals share one prior count distributed proportional to
                the row of the initial transition matrix.
        output_model_type : str, optional, default='gaussian'
            Output model type.  ['gaussian', 'discrete']

        """
        # Sanity checks.
        if len(observations) == 0:
            raise Exception("No observations were provided.")

        # Store options.
        self.reversible = reversible

        # Store the number of states.
        self.nstates = nstates

        # Store a copy of the observations.
        self.observations = copy.deepcopy(observations)
        self.nobs = len(observations)
        self.Ts = [len(o) for o in observations]
        self.maxT = np.max(self.Ts)

        # initial model
        if initial_model:
            # Use user-specified initial model, if provided.
            self.model = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self.model = self._generateInitialModel(type)

        # prior counts
        if transition_matrix_prior is None:
            self.prior = np.zeros((self.nstates, self.nstates))
        elif isinstance(transition_matrix_prior, np.ndarray):
            if np.array_equal(transition_matrix_prior.shape,
                              (self.nstates, self.nstates)):
                self.prior = np.array(transition_matrix_prior)
        elif transition_matrix_prior == 'init-connect':
            Pinit = self.model.transition_matrix
            self.prior = Pinit - np.diag(
                Pinit)  # add off-diagonals from initial T-matrix
            self.prior /= self.prior.sum(
                axis=1)[:, None]  # scale off-diagonals to row sum 1
            self.prior += np.eye(nstates)  # add diagonal 1.
        else:
            raise ValueError('transition matrix prior mode undefined: ' +
                             str(transition_matrix_prior))

        # sampling options
        self.transition_matrix_sampling_steps = transition_matrix_sampling_steps

        # implementation options
        hidden.set_implementation(config.kernel)
        self.model.output_model.set_implementation(config.kernel)

        # pre-construct hidden variables
        self.alpha = np.zeros((self.maxT, self.nstates),
                              config.dtype,
                              order='C')
        self.pobs = np.zeros((self.maxT, self.nstates),
                             config.dtype,
                             order='C')

        return
Ejemplo n.º 27
0
    def __init__(
        self,
        observations,
        nstates,
        initial_model=None,
        reversible=True,
        stationary=False,
        transition_matrix_sampling_steps=1000,
        p0_prior="mixed",
        transition_matrix_prior="mixed",
        output="gaussian",
    ):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the BHMM.
            Otherwise, a heuristic scheme is used to generate an initial guess.
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed balance) is used;
            otherwise, a standard  non-reversible prior is used.
        stationary : bool, optional, default=False
            If True, the stationary distribution of the transition matrix will be used as initial distribution.
            Only use True if you are confident that the observation trajectories are started from a global
            equilibrium. If False, the initial distribution will be estimated as usual from the first step
            of the hidden trajectories.
        transition_matrix_sampling_steps : int, optional, default=1000
            number of transition matrix sampling steps per BHMM cycle
        p0_prior : None, str, float or ndarray(n)
            Prior for the initial distribution of the HMM. Will only be active
            if stationary=False (stationary=True means that p0 is identical to
            the stationary distribution of the transition matrix).
            Currently implements different versions of the Dirichlet prior that
            is conjugate to the Dirichlet distribution of p0. p0 is sampled from:
            .. math:
                p0 \sim \prod_i (p0)_i^{a_i + n_i - 1}
            where :math:`n_i` are the number of times a hidden trajectory was in
            state :math:`i` at time step 0 and :math:`a_i` is the prior count.
            Following options are available:
            |  'mixed' (default),  :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
                is the initial distribution of initial_model.
            |  'uniform',  :math:`a_i = 1`
            |  ndarray(n) or float,
                the given array will be used as A.
            |  None,  :math:`a_i = 0`. This option ensures coincidence between
                sample mean an MLE. Will sooner or later lead to sampling problems,
                because as soon as zero trajectories are drawn from a given state,
                the sampler cannot recover and that state will never serve as a starting
                state subsequently. Only recommended in the large data regime and
                when the probability to sample zero trajectories from any state
                is negligible.
        transition_matrix_prior : str or ndarray(n, n)
            Prior for the HMM transition matrix.
            Currently implements Dirichlet priors if reversible=False and reversible
            transition matrix priors as described in [1]_ if reversible=True. For the
            nonreversible case the posterior of transition matrix :math:`P` is:
            .. math:
                P \sim \prod_{i,j} p_{ij}^{b_{ij} + c_{ij} - 1}
            where :math:`c_{ij}` are the number of transitions found for hidden
            trajectories and :math:`b_{ij}` are prior counts.
            |  'mixed' (default),  :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
                is the transition matrix of initial_model. That means one prior
                count will be used per row.
            |  'uniform',  :math:`b_{ij} = 1`
            |  ndarray(n, n) or broadcastable,
                the given array will be used as B.
            |  None,  :math:`b_ij = 0`. This option ensures coincidence between
                sample mean an MLE. Will sooner or later lead to sampling problems,
                because as soon as a transition :math:`ij` will not occur in a
                sample, the sampler cannot recover and that transition will never
                be sampled again. This option is not recommended unless you have
                a small HMM and a lot of data.
        output_model_type : str, optional, default='gaussian'
            Output model type.  ['gaussian', 'discrete']

        References
        ----------
        .. [1] Trendelkamp-Schroer, B., H. Wu, F. Paul and F. Noe:
            Estimation and uncertainty of reversible Markov models.
            J. Chem. Phys. 143, 174101 (2015).

        """
        # Sanity checks.
        if len(observations) == 0:
            raise Exception("No observations were provided.")

        # Store options.
        self.reversible = reversible
        self.stationary = stationary

        # Store the number of states.
        self.nstates = nstates

        # Store a copy of the observations.
        self.observations = copy.deepcopy(observations)
        self.nobs = len(observations)
        self.Ts = [len(o) for o in observations]
        self.maxT = np.max(self.Ts)

        # initial model
        if initial_model:
            # Use user-specified initial model, if provided.
            self.model = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self.model = self._generateInitialModel(output)

        # prior initial vector
        if p0_prior is None or p0_prior == "sparse":
            self.prior_n0 = np.zeros(self.nstates)
        elif isinstance(p0_prior, np.ndarray):
            if np.array_equal(p0_prior.shape, self.nstates):
                self.prior_n0 = np.array(p0_prior)
        elif p0_prior == "mixed":
            self.prior_n0 = np.array(self.model.initial_distribution)
        elif p0_prior == "uniform":
            self.prior_n0 = np.ones(nstates)
        else:
            raise ValueError("initial distribution prior mode undefined: " + str(p0_prior))

        # prior count matrix
        if transition_matrix_prior is None or p0_prior == "sparse":
            self.prior_C = np.zeros((self.nstates, self.nstates))
        elif isinstance(transition_matrix_prior, np.ndarray):
            if np.array_equal(transition_matrix_prior.shape, (self.nstates, self.nstates)):
                self.prior_C = np.array(transition_matrix_prior)
        elif transition_matrix_prior == "mixed":
            self.prior_C = np.array(self.model.transition_matrix)
        elif p0_prior == "uniform":
            self.prior_C = np.ones((nstates, nstates))
        else:
            raise ValueError("transition matrix prior mode undefined: " + str(transition_matrix_prior))

        # check if we work with these options
        if reversible:
            if not _tmatrix_disconnected.is_connected(self.model.transition_matrix + self.prior_C, strong=True):
                raise NotImplementedError(
                    "Trying to sample disconnected HMM with option reversible:\n "
                    + str(self.model.transition_matrix)
                    + "\nUse prior to connect, select connected subset, or use reversible=False."
                )

        # sampling options
        self.transition_matrix_sampling_steps = transition_matrix_sampling_steps

        # implementation options
        hidden.set_implementation(config.kernel)
        self.model.output_model.set_implementation(config.kernel)

        # pre-construct hidden variables
        self.alpha = np.zeros((self.maxT, self.nstates), config.dtype, order="C")
        self.pobs = np.zeros((self.maxT, self.nstates), config.dtype, order="C")

        return
Ejemplo n.º 28
0
    def __init__(self,
                 observations,
                 nstates,
                 initial_model=None,
                 reversible=True,
                 stationary=False,
                 transition_matrix_sampling_steps=1000,
                 p0_prior='mixed',
                 transition_matrix_prior='mixed',
                 output='gaussian'):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the BHMM.
            Otherwise, a heuristic scheme is used to generate an initial guess.
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed balance) is used;
            otherwise, a standard  non-reversible prior is used.
        stationary : bool, optional, default=False
            If True, the stationary distribution of the transition matrix will be used as initial distribution.
            Only use True if you are confident that the observation trajectories are started from a global
            equilibrium. If False, the initial distribution will be estimated as usual from the first step
            of the hidden trajectories.
        transition_matrix_sampling_steps : int, optional, default=1000
            number of transition matrix sampling steps per BHMM cycle
        p0_prior : None, str, float or ndarray(n)
            Prior for the initial distribution of the HMM. Will only be active
            if stationary=False (stationary=True means that p0 is identical to
            the stationary distribution of the transition matrix).
            Currently implements different versions of the Dirichlet prior that
            is conjugate to the Dirichlet distribution of p0. p0 is sampled from:
            .. math:
                p0 \sim \prod_i (p0)_i^{a_i + n_i - 1}
            where :math:`n_i` are the number of times a hidden trajectory was in
            state :math:`i` at time step 0 and :math:`a_i` is the prior count.
            Following options are available:
            |  'mixed' (default),  :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
                is the initial distribution of initial_model.
            |  'uniform',  :math:`a_i = 1`
            |  ndarray(n) or float,
                the given array will be used as A.
            |  None,  :math:`a_i = 0`. This option ensures coincidence between
                sample mean an MLE. Will sooner or later lead to sampling problems,
                because as soon as zero trajectories are drawn from a given state,
                the sampler cannot recover and that state will never serve as a starting
                state subsequently. Only recommended in the large data regime and
                when the probability to sample zero trajectories from any state
                is negligible.
        transition_matrix_prior : str or ndarray(n, n)
            Prior for the HMM transition matrix.
            Currently implements Dirichlet priors if reversible=False and reversible
            transition matrix priors as described in [1]_ if reversible=True. For the
            nonreversible case the posterior of transition matrix :math:`P` is:
            .. math:
                P \sim \prod_{i,j} p_{ij}^{b_{ij} + c_{ij} - 1}
            where :math:`c_{ij}` are the number of transitions found for hidden
            trajectories and :math:`b_{ij}` are prior counts.
            |  'mixed' (default),  :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
                is the transition matrix of initial_model. That means one prior
                count will be used per row.
            |  'uniform',  :math:`b_{ij} = 1`
            |  ndarray(n, n) or broadcastable,
                the given array will be used as B.
            |  None,  :math:`b_ij = 0`. This option ensures coincidence between
                sample mean an MLE. Will sooner or later lead to sampling problems,
                because as soon as a transition :math:`ij` will not occur in a
                sample, the sampler cannot recover and that transition will never
                be sampled again. This option is not recommended unless you have
                a small HMM and a lot of data.
        output_model_type : str, optional, default='gaussian'
            Output model type.  ['gaussian', 'discrete']

        References
        ----------
        .. [1] Trendelkamp-Schroer, B., H. Wu, F. Paul and F. Noe:
            Estimation and uncertainty of reversible Markov models.
            J. Chem. Phys. 143, 174101 (2015).

        """
        # Sanity checks.
        if len(observations) == 0:
            raise Exception("No observations were provided.")

        # Store options.
        self.reversible = reversible
        self.stationary = stationary

        # Store the number of states.
        self.nstates = nstates

        # Store a copy of the observations.
        self.observations = copy.deepcopy(observations)
        self.nobs = len(observations)
        self.Ts = [len(o) for o in observations]
        self.maxT = np.max(self.Ts)

        # initial model
        if initial_model:
            # Use user-specified initial model, if provided.
            self.model = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self.model = self._generateInitialModel(output)

        # prior initial vector
        if p0_prior is None or p0_prior == 'sparse':
            self.prior_n0 = np.zeros(self.nstates)
        elif isinstance(p0_prior, np.ndarray):
            if len(p0_prior.shape) == 1 and p0_prior.shape[0] == self.nstates:
                self.prior_n0 = np.array(p0_prior)
            else:
                raise ValueError(
                    'initial distribution prior must have dimension ' +
                    str(nstates))
        elif p0_prior == 'mixed':
            self.prior_n0 = np.array(self.model.initial_distribution)
        elif p0_prior == 'uniform':
            self.prior_n0 = np.ones(nstates)
        else:
            raise ValueError('initial distribution prior mode undefined: ' +
                             str(p0_prior))

        # prior count matrix
        if transition_matrix_prior is None or p0_prior == 'sparse':
            self.prior_C = np.zeros((self.nstates, self.nstates))
        elif isinstance(transition_matrix_prior, np.ndarray):
            if np.array_equal(transition_matrix_prior.shape,
                              (self.nstates, self.nstates)):
                self.prior_C = np.array(transition_matrix_prior)
        elif transition_matrix_prior == 'mixed':
            self.prior_C = np.array(self.model.transition_matrix)
        elif p0_prior == 'uniform':
            self.prior_C = np.ones((nstates, nstates))
        else:
            raise ValueError('transition matrix prior mode undefined: ' +
                             str(transition_matrix_prior))

        # check if we work with these options
        if reversible:
            if not _tmatrix_disconnected.is_connected(
                    self.model.transition_matrix + self.prior_C, strong=True):
                raise NotImplementedError(
                    'Trying to sample disconnected HMM with option reversible:\n '
                    + str(self.model.transition_matrix) +
                    '\nUse prior to connect, select connected subset, or use reversible=False.'
                )

        # sampling options
        self.transition_matrix_sampling_steps = transition_matrix_sampling_steps

        # implementation options
        hidden.set_implementation(config.kernel)
        self.model.output_model.set_implementation(config.kernel)

        # pre-construct hidden variables
        self.alpha = np.zeros((self.maxT, self.nstates),
                              config.dtype,
                              order='C')
        self.pobs = np.zeros((self.maxT, self.nstates),
                             config.dtype,
                             order='C')

        return
Ejemplo n.º 29
0
    def __init__(self, observations, nstates, initial_model=None, output='gaussian',
                 reversible=True, stationary=False, p=None, accuracy=1e-3, maxit=1000, maxit_P=100000):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed
            trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the
            BHMM. Otherwise, a heuristic scheme is used to generate an initial guess.
        type : str, optional, default=None
            Output model type from [None, 'gaussian', 'discrete'].
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed
            balance) is used; otherwise, a standard  non-reversible prior is used.
        stationary : bool, optional, default=False
            If True, the initial distribution of hidden states is self-consistently
            computed as the stationary distribution of the transition matrix. If
            False, it will be estimated from the starting states.
        p : ndarray (nstates), optional, default=None
            Initial or fixed stationary distribution. If given and stationary=True,
            transition matrices will be estimated with the constraint that they
            have p as their stationary distribution. If given and stationary=False,
            p is the fixed initial distribution of hidden states.
        accuracy : float
            convergence threshold for EM iteration. When two the likelihood does
            not increase by more than accuracy, the iteration is stopped successfully.
        maxit : int
            stopping criterion for EM iteration. When so many iterations are
            performed without reaching the requested accuracy, the iteration is
            stopped without convergence (a warning is given)
        maxit_P : int
            maximum number of iterations for reversible transition matrix estimation.
            Only used with reversible=True.

        """
        # Store a copy of the observations.
        self._observations = copy.deepcopy(observations)
        self._nobs = len(observations)
        self._Ts = [len(o) for o in observations]
        self._maxT = np.max(self._Ts)

        # Set parameters
        self._nstates = nstates
        self._reversible = reversible
        self._stationary = stationary

        if initial_model is not None:
            # Use user-specified initial model, if provided.
            self._hmm = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self._hmm = bhmm.init_hmm(observations, nstates, output=output)

        # stationary and initial distribution
        self._fixed_stationary_distribution = None
        self._fixed_initial_distribution = None
        if p is not None:
            if stationary:
                self._fixed_stationary_distribution = np.array(p)
            else:
                self._fixed_initial_distribution = np.array(p)

        # pre-construct hidden variables
        self._alpha = np.zeros((self._maxT, self._nstates), config.dtype, order='C')
        self._beta = np.zeros((self._maxT, self._nstates), config.dtype, order='C')
        self._pobs = np.zeros((self._maxT, self._nstates), config.dtype, order='C')
        self._gammas = [np.zeros((len(self._observations[i]), self._nstates), config.dtype, order='C')
                        for i in range(self._nobs)]
        self._Cs = [np.zeros((self._nstates, self._nstates), config.dtype, order='C') for _ in range(self._nobs)]

        # convergence options
        self._accuracy = accuracy
        self._maxit = maxit
        self._maxit_P = maxit_P
        self._likelihoods = None

        # Kernel for computing things
        hidden.set_implementation(config.kernel)
        self._hmm.output_model.set_implementation(config.kernel)
Ejemplo n.º 30
0
    def __init__(self,
                 observations,
                 nstates,
                 initial_model=None,
                 output='gaussian',
                 reversible=True,
                 stationary=False,
                 p=None,
                 accuracy=1e-3,
                 maxit=1000,
                 maxit_P=100000):
        """Initialize a Bayesian hidden Markov model sampler.

        Parameters
        ----------
        observations : list of numpy arrays representing temporal data
            `observations[i]` is a 1d numpy array corresponding to the observed
            trajectory index `i`
        nstates : int
            The number of states in the model.
        initial_model : HMM, optional, default=None
            If specified, the given initial model will be used to initialize the
            BHMM. Otherwise, a heuristic scheme is used to generate an initial guess.
        type : str, optional, default=None
            Output model type from [None, 'gaussian', 'discrete'].
        reversible : bool, optional, default=True
            If True, a prior that enforces reversible transition matrices (detailed
            balance) is used; otherwise, a standard  non-reversible prior is used.
        stationary : bool, optional, default=False
            If True, the initial distribution of hidden states is self-consistently
            computed as the stationary distribution of the transition matrix. If
            False, it will be estimated from the starting states.
        p : ndarray (nstates), optional, default=None
            Initial or fixed stationary distribution. If given and stationary=True,
            transition matrices will be estimated with the constraint that they
            have p as their stationary distribution. If given and stationary=False,
            p is the fixed initial distribution of hidden states.
        accuracy : float
            convergence threshold for EM iteration. When two the likelihood does
            not increase by more than accuracy, the iteration is stopped successfully.
        maxit : int
            stopping criterion for EM iteration. When so many iterations are
            performed without reaching the requested accuracy, the iteration is
            stopped without convergence (a warning is given)
        maxit_P : int
            maximum number of iterations for reversible transition matrix estimation.
            Only used with reversible=True.

        """
        # Store a copy of the observations.
        self._observations = copy.deepcopy(observations)
        self._nobs = len(observations)
        self._Ts = [len(o) for o in observations]
        self._maxT = np.max(self._Ts)

        # Set parameters
        self._nstates = nstates
        self._reversible = reversible
        self._stationary = stationary

        if initial_model is not None:
            # Use user-specified initial model, if provided.
            self._hmm = copy.deepcopy(initial_model)
        else:
            # Generate our own initial model.
            self._hmm = bhmm.init_hmm(observations, nstates, output=output)

        # stationary and initial distribution
        self._fixed_stationary_distribution = None
        self._fixed_initial_distribution = None
        if p is not None:
            if stationary:
                self._fixed_stationary_distribution = np.array(p)
            else:
                self._fixed_initial_distribution = np.array(p)

        # pre-construct hidden variables
        self._alpha = np.zeros((self._maxT, self._nstates),
                               config.dtype,
                               order='C')
        self._beta = np.zeros((self._maxT, self._nstates),
                              config.dtype,
                              order='C')
        self._pobs = np.zeros((self._maxT, self._nstates),
                              config.dtype,
                              order='C')
        self._gammas = [
            np.zeros((len(self._observations[i]), self._nstates),
                     config.dtype,
                     order='C') for i in range(self._nobs)
        ]
        self._Cs = [
            np.zeros((self._nstates, self._nstates), config.dtype, order='C')
            for _ in range(self._nobs)
        ]

        # convergence options
        self._accuracy = accuracy
        self._maxit = maxit
        self._maxit_P = maxit_P
        self._likelihoods = None

        # Kernel for computing things
        hidden.set_implementation(config.kernel)
        self._hmm.output_model.set_implementation(config.kernel)