Esempio n. 1
0
 def _do_forward_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     fwdlattice = np.zeros((n_samples, n_components))
     _hmmc._forward(n_samples, n_components, log_mask_zero(self.startprob_),
                    log_mask_zero(self.transmat_), framelogprob, fwdlattice)
     with np.errstate(under="ignore"):
         return logsumexp(fwdlattice[-1]), fwdlattice
Esempio n. 2
0
 def _do_backward_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     bwdlattice = np.zeros((n_samples, n_components))
     _hmmc._backward(n_samples, n_components,
                     log_mask_zero(self.startprob_),
                     log_mask_zero(self.transmat_), framelogprob,
                     bwdlattice)
     return bwdlattice
Esempio n. 3
0
def _multinomial_hmm_converter(scope, operator, container):
    """Convert HMM Multinomial model to ONNX model

    """
    input_states = operator.inputs[0]
    output_states = operator.outputs[0]

    hmm_clf = operator.raw_operator
    if hmm_clf.algorithm != 'viterbi':
        raise NotImplementedError(
            ("Multinomial model with {} decoder algorithm "
             "is not supported yet").format(hmm_clf.algorithm))
    log_emissionprob = log_mask_zero(hmm_clf.emissionprob_).astype(np.float32)
    log_startprob = log_mask_zero(hmm_clf.startprob_).astype(np.float32)
    log_transmat_t = log_mask_zero(hmm_clf.transmat_).astype(np.float32).T
    n_components = log_startprob.shape[0]

    log_emissionprob_name = scope.get_unique_variable_name("log_emissionprob")
    log_startprob_name = scope.get_unique_variable_name("log_startprob")
    log_transmat_t_name = scope.get_unique_variable_name("log_transmat_t")
    container.add_initializer(log_emissionprob_name, TensorProto.FLOAT,
                              list(log_emissionprob.T.shape),
                              log_emissionprob.T.flatten())
    container.add_initializer(log_startprob_name, TensorProto.FLOAT,
                              list(log_startprob.shape),
                              log_startprob.flatten())
    container.add_initializer(log_transmat_t_name, TensorProto.FLOAT,
                              list(log_transmat_t.shape),
                              log_transmat_t.flatten())

    op_name_gather = scope.get_unique_operator_name('Gather')
    op_name_viterbi = scope.get_unique_operator_name('Viterbi')
    framelogprob = scope.declare_local_variable(
        'framelogprob', data_types.FloatTensorType([None, n_components]))
    container.add_node('Gather',
                       inputs=[log_emissionprob_name, input_states.onnx_name],
                       outputs=[framelogprob.onnx_name],
                       name=op_name_gather,
                       axis=0)
    container.add_node('Viterbi',
                       inputs=[
                           log_startprob_name, log_transmat_t_name,
                           framelogprob.onnx_name
                       ],
                       outputs=[output_states.onnx_name],
                       op_domain='ml.hmm',
                       op_version=1,
                       name=op_name_viterbi)
Esempio n. 4
0
 def _accumulate_sufficient_statistics(self, stats, X, framelogprob,
                                       posteriors, fwdlattice, bwdlattice,
                                       run_lengths):
     stats['nobs'] += 1
     if 's' in self.params:
         first_posterior = get_log_init_posterior(
             np.log(self.startprob_) + framelogprob[0], bwdlattice[0],
             run_lengths[0], log_mask_zero(self.transmat_), framelogprob[0])
         log_normalize(first_posterior)
         stats['start'] += np.exp(first_posterior)
     if 't' in self.params:
         n_samples, n_components = framelogprob.shape
         if n_samples <= 1:
             return
         full_fwdlattice = np.vstack(
             (np.log(self.startprob_) + framelogprob[0], fwdlattice))
         log_xi_sum = compute_log_xi_sum(full_fwdlattice,
                                         log_mask_zero(self.transmat_),
                                         bwdlattice, framelogprob,
                                         run_lengths)
         with np.errstate(under="ignore"):
             stats['trans'] += np.exp(log_xi_sum)
Esempio n. 5
0
 def _do_forward_pass(self, framelogprob, run_lengths):
     n_samples, n_components = framelogprob.shape
     diagonalized = self.__get_lpdps(framelogprob)
     fwdlattice = np.zeros((n_samples, n_components))
     lv = log_mask_zero(self.startprob_) + framelogprob[0]
     for t in range(n_samples):
         (lp, sp), (ld, sd), (lr, sr) = diagonalized[t]
         ld *= run_lengths[t] - (t == 0)
         for j in range(2):
             fwdlattice[t, j] = logsumexp([
                 lv[c] + lp[c, k] + ld[k] + lr[k, j] for k in range(2)
                 for c in range(2)
             ],
                                          b=[
                                              sp[c, k] * sd[k] * sr[k, j]
                                              for k in range(2)
                                              for c in range(2)
                                          ])
         lv = fwdlattice[t]
     with np.errstate(under="ignore"):
         return logsumexp(fwdlattice[-1]), fwdlattice
Esempio n. 6
0
    def _accumulate_sufficient_statistics(self, stats, X, framelogprob,
                                          posteriors, fwdlattice, bwdlattice):
        """Updates sufficient statistics from a given sample.

        Parameters
        ----------
        stats : dict
            Sufficient statistics as returned by
            :meth:`~base._BaseHMM._initialize_sufficient_statistics`.

        X : array, shape (n_samples, n_features)
            Sample sequence.

        framelogprob : array, shape (n_samples, n_components)
            Log-probabilities of each sample under each of the model states.

        posteriors : array, shape (n_samples, n_components)
            Posterior probabilities of each sample being generated by each
            of the model states.

        fwdlattice, bwdlattice : array, shape (n_samples, n_components)
            Log-forward and log-backward probabilities.
        """
        stats['nobs'] += 1
        if 's' in self.params:
            stats['start'] += posteriors[0]
        if 't' in self.params:
            n_samples, n_components = framelogprob.shape
            # when the sample is of length 1, it contains no transitions
            # so there is no reason to update our trans. matrix estimate
            if n_samples <= 1:
                return

            log_xi_sum = np.full((n_components, n_components), -np.inf)
            _hmmc._compute_log_xi_sum(n_samples, n_components, fwdlattice,
                                      log_mask_zero(self.transmat_),
                                      bwdlattice, framelogprob, log_xi_sum)
            with np.errstate(under="ignore"):
                stats['trans'] += np.exp(log_xi_sum)
Esempio n. 7
0
 def _do_viterbi_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     state_sequence, logprob = _hmmc._viterbi(
         n_samples, n_components, log_mask_zero(self.startprob_),
         log_mask_zero(self.transmat_), framelogprob)
     return logprob, state_sequence
Esempio n. 8
0
 def _decode_viterbi(self, X, run_lengths):
     framelogprob = self._compute_log_likelihood(X)
     state_sequence, logprob = viterbi(log_mask_zero(self.startprob_),
                                       log_mask_zero(self.transmat_),
                                       framelogprob, run_lengths)
     return logprob, state_sequence
Esempio n. 9
0
    def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
                                          posteriors, fwdlattice, bwdlattice):
        """
		Updates sufficient statistics from a given sample.

		Parameters
		-------
		stats : dict
			refer to _initialize_sufficient_statistics()

		obs : array, shape (length_of_sample, n_features)
			a single trajectory

		framelogprob : array, shape(, n_components)
			log likelihood of the sample trajectory

		posteriors : array, shape (, n_components)
			Posterior probabilities of each sample being generated by each
			of the model states.
		"""

        # @continue here
        w_r = self._weights[stats['nobs']]

        # formula (1) from the paper - only the part inside sigma
        stats['nobs'] += 1  # current sample
        if 's' in self.params:
            stats['start'] += posteriors[0]

        if 't' in self.params:
            n_samples, n_components = framelogprob.shape
            # when the sample is of length 1, it contains no transitions
            # so there is no reason to update our trans. matrix estimate
            if n_samples <= 1:
                return

            # formula (2)
            log_xi_sum = np.full((n_components, n_components), -np.inf)
            _hmmc._compute_log_xi_sum(n_samples, n_components, fwdlattice,
                                      log_mask_zero(self.transmat_),
                                      bwdlattice, framelogprob, log_xi_sum)
            with np.errstate(under="ignore"):
                stats['trans'] += w_r * np.exp(log_xi_sum)

        obs_loc, obs_time, obs_category = self._split_X_by_features(
            obs)  # @TODO valid?

        if 'm' in self.params or 'c' in self.params:
            stats['post'] += posteriors.sum(axis=0)
            stats['loc_obs_for_mean'] += w_r * np.dot(posteriors.T, obs_loc)
            stats['time_obs_for_mean'] += w_r * np.dot(posteriors.T, obs_time)
            stats['loc_obs'] += np.dot(posteriors.T, obs_loc)
            stats['time_obs'] += np.dot(posteriors.T, obs_time)

        if 'c' in self.params:
            if self.loc_covariance_type in ('spherical', 'diag'):
                stats['loc_obs**2'] += np.dot(posteriors.T, loc_obs**2)
                stats['time_obs**2'] += np.dot(posteriors.T, time_obs**2)
            elif self.loc_covariance_type in ('tied', 'full'):
                # posteriors: (nt, nc); obs: (nt, nf); obs: (nt, nf)
                # -> (nc, nf, nf)
                stats['loc_obs*obs.T'] += np.einsum('ij,ik,il->jkl',
                                                    posteriors, obs_loc,
                                                    obs_loc)
                stats['time_obs*obs.T'] += np.einsum('ij,ik,il->jkl',
                                                     posteriors, obs_time,
                                                     obs_time)

        if 'e' in self.params:
            for t, symbol in enumerate(np.concatenate(obs_category)):
                stats['cat_obs'][:, symbol] += w_r * posteriors[t]

        stats['gamma'] += w_r * posteriors.sum(axis=0)
        stats['xi'] += w_r * log_xi_sum.sum(axis=0)
Esempio n. 10
0
 def _compute_log_likelihood(self, X):
     return log_mask_zero(self.emissionprob_)[:, np.concatenate(X)].T
Esempio n. 11
0
    def _accumulate_sufficient_statistics(self, stats, X, framelogprob,
                                          posteriors, fwdlattice, bwdlattice):
        """Updates sufficient statistics from a given sample.
        Parameters
        ----------
        stats : dict
            Sufficient statistics as returned by
            :meth:`~base._BaseHMM._initialize_sufficient_statistics`.
        X : array, shape (n_samples, n_features)
            Sample sequence.
        framelogprob : array, shape (n_samples, n_components)
            Log-probabilities of each sample under each of the model states.
        posteriors : array, shape (n_samples, n_components)
            Posterior probabilities of each sample being generated by each
            of the model states.
        fwdlattice, bwdlattice : array, shape (n_samples, n_components)
            Log-forward and log-backward probabilities.
        """

        # Based on hmmlearn's _BaseHMM
        safe_transmat = self.transmat_ + np.finfo(float).eps

        stats['nobs'] += 1
        if 's' in self.params:
            stats['start'] += posteriors[0]
        if 't' in self.params:
            n_samples, n_components = framelogprob.shape
            # when the sample is of length 1, it contains no transitions
            # so there is no reason to update our trans. matrix estimate
            if n_samples <= 1:
                return
            log_xi_sum = np.full((n_components, n_components), -np.inf)

            _hmmc._compute_log_xi_sum(n_samples, n_components, fwdlattice,
                                      utils.log_mask_zero(self.transmat_),
                                      bwdlattice, framelogprob, log_xi_sum)

            stats['trans'] += np.exp(log_xi_sum)
            # stats['trans'] = np.round(stats['trans'])
            # if np.sum(stats['trans']) != X.shape[0]-1:
            #     warnings.warn("transmat counts != n_samples", RuntimeWarning)
            #     import pdb; pdb.set_trace()

            template = np.zeros((self.n_components, self.n_components))
            for u in range(self.n_components):
                template[u, u] = stats['trans'][u, u] + 0.

            for l in range(self.n_components - 1):
                template[l, (l + 1)] = stats['trans'][l, (l + 1)] + 0.

            for b in range(self.n_unique):
                transition_index = \
                [i * self.n_chain for i in range(self.n_unique)]
                transition_index.remove(b * self.n_chain)

                block = \
                stats['trans'][self.n_chain * b : self.n_chain * (b + 1)][:] + 0.

                template_block = \
                template[self.n_chain * b : self.n_chain * (b + 1)][:] + 0.

                for i in transition_index:
                    template_block[(self.n_chain - 1), i] = \
                    block[(self.n_chain - 1), i]

                template[self.n_chain * b : self.n_chain * (b + 1)][:] = \
                template_block

            stats['trans'] = np.copy(template)