Exemple #1
0
    def _do_mstep(self, stats, params):
        posteriors = np.vstack(stats['posteriors'])
        obs = np.vstack(stats['obs'])

        if 't' in params:
            if self.reversible_type == 'mle':
                counts = np.maximum(
                    stats['trans'] + self.transmat_prior - 1.0, 1e-20).astype(np.float64)
                self.transmat_, self.populations_ = _reversibility.reversible_transmat(counts)
            elif self.reversible_type == 'transpose':
                revcounts = np.maximum(
                    self.transmat_prior - 1.0 + stats['trans'] + stats['trans'].T, 1e-20)
                populations = np.sum(revcounts, axis=0)
                self.populations_ = populations / np.sum(populations)
                self.transmat_ = revcounts / np.sum(revcounts, axis=1)[:, np.newaxis]
            else:
                raise ValueError('Invalid value for reversible_type: %s '
                                 'Must be either "mle" or "transpose"'
                                 % self.reversible_type)
            self.startprob_ = self.populations_

        if 'm' in params:
            self._fitmeans(posteriors, obs, out=self._means_)
        if 'k' in params:
            self._fitkappas(posteriors, obs, self._means_)
Exemple #2
0
    def _do_mstep(self, stats, params):
        posteriors = np.vstack(stats['posteriors'])
        obs = np.vstack(stats['obs'])

        if 't' in params:
            if self.reversible_type == 'mle':
                counts = np.maximum(stats['trans'] + self.transmat_prior - 1.0,
                                    1e-20).astype(np.float64)
                self.transmat_, self.populations_ = _reversibility.reversible_transmat(
                    counts)
            elif self.reversible_type == 'transpose':
                revcounts = np.maximum(
                    self.transmat_prior - 1.0 + stats['trans'] +
                    stats['trans'].T, 1e-20)
                populations = np.sum(revcounts, axis=0)
                self.populations_ = populations / np.sum(populations)
                self.transmat_ = revcounts / np.sum(revcounts,
                                                    axis=1)[:, np.newaxis]
            else:
                raise ValueError('Invalid value for reversible_type: %s '
                                 'Must be either "mle" or "transpose"' %
                                 self.reversible_type)
            self.startprob_ = self.populations_

        if 'm' in params:
            self._fitmeans(posteriors, obs, out=self._means_)
        if 'k' in params:
            self._fitkappas(posteriors, obs, self._means_)
Exemple #3
0
def test_reversible_mle():
    import scipy.sparse.linalg

    C = 1.0 * np.array([[6, 3, 7], [4, 6, 9], [2, 6, 7]])
    # generated with msmbuilder
    result = np.array([[0.37499995, 0.2370208, 0.38797925],
                       [0.16882446, 0.31578918, 0.51538636],
                       [0.18615565, 0.34717763, 0.46666672]])

    T, pi = _reversibility.reversible_transmat(C)

    np.testing.assert_array_almost_equal(T, result)
    u, v = scipy.sparse.linalg.eigs(T.T, k=1)
    np.testing.assert_array_almost_equal(np.real(v / v.sum()).flatten(), pi)
Exemple #4
0
def test_reversible_mle():
    import scipy.sparse.linalg

    C = 1.0*np.array([[6, 3, 7], [4, 6, 9], [2, 6, 7]])
    # generated with msmbuilder
    result = np.array([[ 0.37499995,  0.2370208,  0.38797925],
                       [ 0.16882446,  0.31578918,  0.51538636],
                       [ 0.18615565,  0.34717763,  0.46666672]])

    T, pi = _reversibility.reversible_transmat(C)

    np.testing.assert_array_almost_equal(T, result)
    u, v = scipy.sparse.linalg.eigs(T.T, k=1)
    np.testing.assert_array_almost_equal(np.real(v / v.sum()).flatten(), pi)
Exemple #5
0
    def _do_mstep(self, stats, params):
        if 't' in params:
            if self.reversible_type == 'mle':
                counts = np.maximum(
                    stats['trans'] + self.transmat_prior - 1.0, 1e-20).astype(np.float64)
                self.transmat_, self.populations_ = _reversibility.reversible_transmat(
                    counts)
            elif self.reversible_type == 'transpose':
                revcounts = np.maximum(
                    self.transmat_prior - 1.0 + stats['trans'] + stats['trans'].T, 1e-20)
                populations = np.sum(revcounts, axis=0)
                self.populations_ = populations / np.sum(populations)
                self.transmat_ = revcounts / np.sum(revcounts, axis=1)[:, np.newaxis]
            else:
                raise ValueError('Invalid value for reversible_type: %s '
                                 'Must be either "mle" or "transpose"'
                                 % self.reversible_type)

        difference_cutoff = 1e-10
        # we don't want denom to be zero, because then the new value of the means
        # will be nan/inf. so padd it up by a very small constant. This particular
        # padding is following the sklearn mixture model m_step code from
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/mixture/gmm.py#L496
        denom = (stats['post'][:, np.newaxis] + 10 * EPS)

        def getdiff(means):
            diff = np.zeros((self.n_features, self.n_states, self.n_states))
            for i in range(self.n_features):
                diff[i] = np.maximum(
                    np.abs(np.subtract.outer(means[:, i], means[:, i])), difference_cutoff)
            return diff

        if 'm' in params:
            means = stats['obs'] / denom  # unregularized means

            if self.fusion_prior > 0 and self.n_lqa_iter > 0:
                # adaptive regularization strength
                strength = self.fusion_prior / getdiff(means)
                rhs = stats['obs'] / self.vars_
                for i in range(self.n_features):
                    np.fill_diagonal(strength[i], 0)

                break_lqa = False
                for s in range(self.n_lqa_iter):
                    diff = getdiff(means)
                    if np.all(diff <= difference_cutoff) or break_lqa:
                        break

                    offdiagonal = -strength / diff
                    diagonal_penalty = np.sum(strength / diff, axis=2)
                    for f in range(self.n_features):
                        if np.all(diff[f] <= difference_cutoff):
                            continue
                        ridge_approximation = np.diag(
                            stats['post'] / self.vars_[:, f] + diagonal_penalty[f]) + offdiagonal[f]
                        try:
                            means[:, f] = np.linalg.solve(ridge_approximation, rhs[:, f])
                        except np.linalg.LinAlgError:
                            # I'm not really sure what exactly causes the ridge
                            # approximation to be non-solvable, but it probably
                            # means we're too close to the merging. Maybe 1e-10
                            # is cutting it too close. ANyways, just break now and
                            # use the last valid value of the means.
                            break_lqa = True

                for i in range(self.n_features):
                    for k, j in zip(*np.triu_indices(self.n_states)):
                        if diff[i, k, j] <= difference_cutoff:
                            means[k, i] = means[j, i]

            self.means_ = means

        if 'v' in params:
            vars_prior = self.vars_prior
            vars_weight = self.vars_weight
            if vars_prior is None:
                vars_weight = 0
                vars_prior = 0

            var_num = (stats['obs**2']
                       - 2 * self.means_ * stats['obs']
                       + self.means_ ** 2 * denom)
            var_denom = max(vars_weight - 1, 0) + denom
            self.vars_ = (vars_prior + var_num) / var_denom
Exemple #6
0
 def _transmat_update(self, stats):
     counts = np.maximum(stats['trans'] + self.transmat_prior - 1.0,
                         1e-20).astype(np.float64)
     self.transmat_, self.populations_ = _reversibility.reversible_transmat(
         counts)
Exemple #7
0
    def _do_mstep(self, stats, params):
        if 't' in params:
            if self.reversible_type == 'mle':
                counts = np.maximum(stats['trans'] + self.transmat_prior - 1.0,
                                    1e-20).astype(np.float64)
                self.transmat_, self.populations_ = _reversibility.reversible_transmat(
                    counts)
            elif self.reversible_type == 'transpose':
                revcounts = np.maximum(
                    self.transmat_prior - 1.0 + stats['trans'] +
                    stats['trans'].T, 1e-20)
                populations = np.sum(revcounts, axis=0)
                self.populations_ = populations / np.sum(populations)
                self.transmat_ = revcounts / np.sum(revcounts,
                                                    axis=1)[:, np.newaxis]
            else:
                raise ValueError('Invalid value for reversible_type: %s '
                                 'Must be either "mle" or "transpose"' %
                                 self.reversible_type)

        difference_cutoff = 1e-10
        # we don't want denom to be zero, because then the new value of the means
        # will be nan/inf. so padd it up by a very small constant. This particular
        # padding is following the sklearn mixture model m_step code from
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/mixture/gmm.py#L496
        denom = (stats['post'][:, np.newaxis] + 10 * EPS)

        def getdiff(means):
            diff = np.zeros((self.n_features, self.n_states, self.n_states))
            for i in range(self.n_features):
                diff[i] = np.maximum(
                    np.abs(np.subtract.outer(means[:, i], means[:, i])),
                    difference_cutoff)
            return diff

        if 'm' in params:
            means = stats['obs'] / denom  # unregularized means

            if self.fusion_prior > 0 and self.n_lqa_iter > 0:
                strength = self.fusion_prior / getdiff(
                    means)  # adaptive regularization strength
                rhs = stats['obs'] / self.vars_
                for i in range(self.n_features):
                    np.fill_diagonal(strength[i], 0)

                break_lqa = False
                for s in range(self.n_lqa_iter):
                    diff = getdiff(means)
                    if np.all(diff <= difference_cutoff) or break_lqa:
                        break

                    offdiagonal = -strength / diff
                    diagonal_penalty = np.sum(strength / diff, axis=2)
                    for f in range(self.n_features):
                        if np.all(diff[f] <= difference_cutoff):
                            continue
                        ridge_approximation = np.diag(
                            stats['post'] / self.vars_[:, f] +
                            diagonal_penalty[f]) + offdiagonal[f]
                        try:
                            means[:,
                                  f] = np.linalg.solve(ridge_approximation,
                                                       rhs[:, f])
                        except np.linalg.LinAlgError:
                            # I'm not really sure what exactly causes the ridge
                            # approximation to be non-solvable, but it probably
                            # means we're too close to the merging. Maybe 1e-10
                            # is cutting it too close. ANyways, just break now and
                            # use the last valid value of the means.
                            break_lqa = True

                for i in range(self.n_features):
                    for k, j in zip(*np.triu_indices(self.n_states)):
                        if diff[i, k, j] <= difference_cutoff:
                            means[k, i] = means[j, i]

            self.means_ = means

        if 'v' in params:
            vars_prior = self.vars_prior
            vars_weight = self.vars_weight
            if vars_prior is None:
                vars_weight = 0
                vars_prior = 0

            var_num = (stats['obs**2'] - 2 * self.means_ * stats['obs'] +
                       self.means_**2 * denom)
            var_denom = max(vars_weight - 1, 0) + denom
            self.vars_ = (vars_prior + var_num) / var_denom
Exemple #8
0
 def _transmat_update(self, stats):
     counts = np.maximum(stats['trans'] + self.transmat_prior - 1.0, 1e-20).astype(np.float64)
     self.transmat_, self.populations_ = _reversibility.reversible_transmat(counts)