Ejemplo n.º 1
0
 def test_reset(self):
     m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False)
     m.iter = 1
     m.history.append(-0.01)
     m._reset()
     assert m.iter == 0
     assert not m.history
Ejemplo n.º 2
0
 def test_report_first_iteration(self, capsys):
     m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=True)
     m.report(-0.01)
     out, err = capsys.readouterr()
     assert not out
     expected = m._template.format(iter=1, logprob=-0.01, delta=np.nan)
     assert err.splitlines() == [expected]
Ejemplo n.º 3
0
 def test_report_first_iteration(self, capsys):
     m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=True)
     m.report(-0.01)
     out, err = capsys.readouterr()
     assert not out
     expected = m._template.format(iter=1, logprob=-0.01, delta=float("nan"))
     assert err.splitlines() == [expected]
Ejemplo n.º 4
0
 def test_converged_by_iterations(self):
     m = ConvergenceMonitor(tol=1e-3, n_iter=2, verbose=False)
     assert not m.converged
     m.report(-0.01)
     assert not m.converged
     m.report(-0.1)
     assert m.converged
Ejemplo n.º 5
0
 def test_reset(self):
     m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False)
     m.iter = 1
     m.history.append(-0.01)
     m._reset()
     assert m.iter == 0
     assert not m.history
Ejemplo n.º 6
0
    def fit(self, X, lengths=None):
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0

            framelogprob = self._compute_log_likelihood(X)
            logprob, fwdlattice = self._do_forward_pass(framelogprob)
            curr_logprob += logprob
            bwdlattice = self._do_backward_pass(framelogprob)
            posteriors = self._compute_posteriors(fwdlattice, bwdlattice)
            self._accumulate_sufficient_statistics(
                stats, X, framelogprob, posteriors, fwdlattice,
                bwdlattice)

            # XXX must be before convergence check, because otherwise
            #     there won't be any updates for the case ``n_iter=1``.
            self._do_mstep(stats)
            self.monitor_.report(curr_logprob)

            if self.monitor_.converged:
                self.framelogprob = framelogprob
                break

        return self
Ejemplo n.º 7
0
    def test_converged_by_logprob(self):
        m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False)
        for logprob in [-0.03, -0.02, -0.01]:
            m.report(logprob)
            assert not m.converged

        m.report(-0.0101)
        assert m.converged
Ejemplo n.º 8
0
    def fit(self, X, lengths=None):
        """Estimate model parameters.
        An initialization step is performed before entering the
        EM algorithm. If you want to avoid this step for a subset of
        the parameters, pass proper ``init_params`` keyword argument
        to estimator's constructor.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.
        lengths : array-like of integers, shape (n_sequences, )
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.
        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for i, j in iter_from_X_lengths(X, lengths):
                framelogprob = self._compute_log_likelihood(X[i:j])
                logprob, fwdlattice = self._do_forward_pass(framelogprob)
                curr_logprob += logprob
                bwdlattice = self._do_backward_pass(framelogprob)
                posteriors = self._compute_posteriors(fwdlattice, bwdlattice)

                # fix posteriors
                if self.states_prior is not None and self.fp_state is not None:
                    for k in range(len(self.states_prior)):
                        if self.states_prior[k] == 0:
                            # non footprint states
                            posteriors[k][self.fp_state] = 0.0
                            posteriors[k] = posteriors[k] / sum(posteriors[k])

                        elif self.states_prior[k] == 1:
                            # footprint states
                            posteriors[k] = 0.0 / sum(posteriors[k])
                            posteriors[k][self.fp_state] = 1.0

                self._accumulate_sufficient_statistics(stats, X[i:j],
                                                       framelogprob,
                                                       posteriors, fwdlattice,
                                                       bwdlattice)

            self._do_mstep(stats)

            self.monitor_.report(curr_logprob)
            if self.monitor_.converged:
                break

        return self
Ejemplo n.º 9
0
    def test_report(self, capsys):
        n_iter = 10
        m = ConvergenceMonitor(tol=1e-3, n_iter=n_iter, verbose=True)
        for i in reversed(range(n_iter)):
            m.report(-0.01 * i)

        out, err = capsys.readouterr()
        assert not out
        assert len(err.splitlines()) == n_iter
Ejemplo n.º 10
0
    def test_report(self, capsys):
        n_iter = 10
        m = ConvergenceMonitor(tol=1e-3, n_iter=n_iter, verbose=True)
        for i in reversed(range(n_iter)):
            m.report(-0.01 * i)

        out, err = capsys.readouterr()
        assert not out
        assert len(err.splitlines()) == n_iter
    def __init__(self, config: dict):
        if "tol" in config["train"] and isinstance(config["train"]["tol"],
                                                   str):
            config["train"]["tol"] = {
                "-inf": -np.inf,
                "inf": np.inf
            }[config["train"]["tol"]]

        self.gmm_hmm = _GMMHMM(**config["parameters"])
        self.gmm_hmm.monitor_ = ConvergenceMonitor(*(config["train"][key]
                                                     for key in ("tol",
                                                                 "n_iter",
                                                                 "verbose")))
        self.iepoch = 1
        self.rand_inits = (config["train"].get("weight_rand_init", 0),
                           config["train"].get("mean_rand_init", 0),
                           config["train"].get("covar_rand_init", 0))
        self.limit_inits = (
            config["train"].get("weight_min_init", 0),
            config["train"].get("covar_min_init", 0),
        )
        self.rescale = config["train"].get("rescale_samples", False)
        if self.rescale:
            self.means = None
            self.stddevs = None
Ejemplo n.º 12
0
    def fit(self, X, lengths=None):
        """Estimate model parameters.
        An initialization step is performed before entering the
        EM algorithm. If you want to avoid this step for a subset of
        the parameters, pass proper ``init_params`` keyword argument
        to estimator's constructor.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.
        lengths : array-like of integers, shape (n_sequences, )
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.
        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for i, j in iter_from_X_lengths(X, lengths):
                framelogprob = self._compute_log_likelihood(X[i:j])
                logprob, fwdlattice = self._do_forward_pass(framelogprob)
                curr_logprob += logprob
                bwdlattice = self._do_backward_pass(framelogprob)
                posteriors = self._compute_posteriors(fwdlattice, bwdlattice)

                # fix posteriors
                if self.states_prior is not None and self.fp_state is not None:
                    for k in range(len(self.states_prior)):
                        if self.states_prior[k] == 0:
                            # non footprint states
                            posteriors[k][self.fp_state] = 0.0
                            posteriors[k] = posteriors[k] / sum(posteriors[k])

                        elif self.states_prior[k] == 1:
                            # footprint states
                            posteriors[k] = 0.0 / sum(posteriors[k])
                            posteriors[k][self.fp_state] = 1.0

                self._accumulate_sufficient_statistics(stats, X[i:j], framelogprob, posteriors, fwdlattice, bwdlattice)

            self._do_mstep(stats)

            self.monitor_.report(curr_logprob)
            if self.monitor_.converged:
                break

        return self
Ejemplo n.º 13
0
 def test_converged_by_iterations(self):
     m = ConvergenceMonitor(tol=1e-3, n_iter=2, verbose=False)
     assert not m.converged
     m.report(-0.01)
     assert not m.converged
     m.report(-0.1)
     assert m.converged
Ejemplo n.º 14
0
    def test_converged_by_logprob(self):
        m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False)
        for logprob in [-0.03, -0.02, -0.01]:
            m.report(logprob)
            assert not m.converged

        m.report(-0.0101)
        assert m.converged
Ejemplo n.º 15
0
class PoissonHMM(_BaseHMM):

    # Overriding the parent
    def __init__(self, framelogprob, rates, M, *args, **kwargs):
        _BaseHMM.__init__(self, *args, **kwargs)
        # rates for each state
        self.rates = rates
        self.M = M
        self.framelogprob = framelogprob

    def _compute_log_likelihood(self, X):
        J, M, N = self.n_components, X.shape[1], X.shape[0]
        observation_prob = np.zeros((M, J))
        for m in range(M):
            for j in range(J):
                for n in range(N):
                    observation_prob[m, j] += poisson.logpmf(X[n, m], self.rates[j])
        o = observation_prob - logsumexp(observation_prob, axis=0)
        extra_normalized = o - np.amax(o, axis=1)[:, np.newaxis]
        return extra_normalized

    def _initialize_sufficient_statistics(self):
            stats = super(PoissonHMM, self)._initialize_sufficient_statistics()
            stats['post'] = np.zeros(1)
            stats['obs'] = np.zeros((self.M))
            return stats

    def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice):
        super(PoissonHMM, self)._accumulate_sufficient_statistics(
            stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice)

        M = len(stats['obs'])
        post = np.zeros((M))
        J = len(posteriors[0, :])
        for j in range(J):
            state = j + 1
            post += posteriors[:,j] * state

        stats['post'] += np.sum(post)
        for o in obs:
            stats['obs'] += o

    def fit(self, X, lengths=None):
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0

            framelogprob = self._compute_log_likelihood(X)
            logprob, fwdlattice = self._do_forward_pass(framelogprob)
            curr_logprob += logprob
            bwdlattice = self._do_backward_pass(framelogprob)
            posteriors = self._compute_posteriors(fwdlattice, bwdlattice)
            self._accumulate_sufficient_statistics(
                stats, X, framelogprob, posteriors, fwdlattice,
                bwdlattice)

            # XXX must be before convergence check, because otherwise
            #     there won't be any updates for the case ``n_iter=1``.
            self._do_mstep(stats)
            self.monitor_.report(curr_logprob)

            if self.monitor_.converged:
                self.framelogprob = framelogprob
                break

        return self

    def _do_mstep(self, stats):
        super(PoissonHMM, self)._do_mstep(stats)
        denom = stats['post']
        nom = np.sum(stats['obs'])

        J = len(self.rates)
        rate = nom / denom
        self.rates = [rate * j for j in range(1, J+1)]
Ejemplo n.º 16
0
class SemiSupervisedGaussianHMM(GaussianHMM):
    def __init__(self,
                 n_components=1,
                 covariance_type='diag',
                 min_covar=1e-3,
                 startprob_prior=1.0,
                 transmat_prior=1.0,
                 means_prior=0,
                 means_weight=0,
                 covars_prior=1e-2,
                 covars_weight=1,
                 algorithm="viterbi",
                 random_state=None,
                 n_iter=5,
                 tol=1e-2,
                 verbose=False,
                 params="stmc",
                 init_params="stmc",
                 states_prior=None,
                 fp_state=None):
        GaussianHMM.__init__(self,
                             n_components=n_components,
                             covariance_type=covariance_type,
                             min_covar=min_covar,
                             startprob_prior=startprob_prior,
                             transmat_prior=transmat_prior,
                             means_prior=means_prior,
                             means_weight=means_weight,
                             covars_prior=covars_prior,
                             covars_weight=covars_weight,
                             algorithm=algorithm,
                             random_state=random_state,
                             n_iter=n_iter,
                             tol=tol,
                             verbose=verbose,
                             params=params,
                             init_params=init_params)

        self.covariance_type = covariance_type
        self.min_covar = min_covar
        self.means_prior = means_prior
        self.means_weight = means_weight
        self.covars_prior = covars_prior
        self.covars_weight = covars_weight
        self.states_prior = states_prior
        self.fp_state = fp_state

    def fit(self, X, lengths=None):
        """Estimate model parameters.
        An initialization step is performed before entering the
        EM algorithm. If you want to avoid this step for a subset of
        the parameters, pass proper ``init_params`` keyword argument
        to estimator's constructor.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.
        lengths : array-like of integers, shape (n_sequences, )
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.
        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for i, j in iter_from_X_lengths(X, lengths):
                framelogprob = self._compute_log_likelihood(X[i:j])
                logprob, fwdlattice = self._do_forward_pass(framelogprob)
                curr_logprob += logprob
                bwdlattice = self._do_backward_pass(framelogprob)
                posteriors = self._compute_posteriors(fwdlattice, bwdlattice)

                # fix posteriors
                if self.states_prior is not None and self.fp_state is not None:
                    for k in range(len(self.states_prior)):
                        if self.states_prior[k] == 0:
                            # non footprint states
                            posteriors[k][self.fp_state] = 0.0
                            posteriors[k] = posteriors[k] / sum(posteriors[k])

                        elif self.states_prior[k] == 1:
                            # footprint states
                            posteriors[k] = 0.0 / sum(posteriors[k])
                            posteriors[k][self.fp_state] = 1.0

                self._accumulate_sufficient_statistics(stats, X[i:j],
                                                       framelogprob,
                                                       posteriors, fwdlattice,
                                                       bwdlattice)

            self._do_mstep(stats)

            self.monitor_.report(curr_logprob)
            if self.monitor_.converged:
                break

        return self
Ejemplo n.º 17
0
class SpaMHMM(BaseEstimator):
    def __init__(self,
                 n_nodes,
                 mix_dim,
                 n_components,
                 n_features,
                 graph=None,
                 emission='gaussian',
                 n_iter=10,
                 tol=1e-2,
                 n_iter_mstep=100,
                 lr_mstep=1e-3,
                 rho1=0.9,
                 rho2=0.99,
                 verbose=False,
                 name='spamhmm'):
        super(SpaMHMM, self).__init__()

        self.n_nodes = n_nodes
        self.mix_dim = mix_dim
        self.n_components = n_components
        self.n_features = n_features
        self.graph = graph
        self.emission = emission
        self.n_iter = n_iter
        self.tol = tol
        self.n_iter_mstep = n_iter_mstep
        self.lr_mstep = lr_mstep
        self.rho1 = rho1
        self.rho2 = rho2
        self.verbose = verbose
        self.name = name

    def init_params(self, X):
        '''
        Parameters initialization.
        '''
        if type(X) == list:
            X = np.concatenate(X)

        self.mixCoefUnNorm = np.random.rand(self.n_nodes, self.mix_dim) + 1e-9
        self.mixCoef = np.reshape(
            np.ones(3) * (1 / 3),
            (1, 3))  #relu_normalization(self.mixCoefUnNorm, axis=1) #  #

        startProb = np.exp(np.random.randn(self.mix_dim, self.n_components))
        normalize(startProb, axis=1)

        transProb = np.exp(
            np.random.randn(self.mix_dim, self.n_components,
                            self.n_components))
        normalize(transProb, axis=2)

        self.time_ = 1
        self.first_moment_ = np.zeros_like(self.mixCoef)
        self.second_moment_ = np.zeros_like(self.mixCoef)

        if self.emission == 'gaussian':
            self.mixModels = [
                hmm.GaussianHMM(n_components=self.n_components,
                                covariance_type='diag')
                for i in range(self.mix_dim)
            ]

            for m in range(self.mix_dim):
                self.mixModels[m]._init(X)
                seed = 1
                trans = rand_initialization.generate_transitions_random(
                    seed, 3, self.n_components, .1)
                self.mixModels[0].transmat_ = trans[0]
                self.mixModels[1].transmat_ = trans[1]
                self.mixModels[2].transmat_ = trans[2]

        else:
            raise NotImplementedError('{} emission is not implemented'.format(
                self.emission))

    def scores_per_seq(self, X, y, lengths=None):
        '''
        Computes the log-likelihood for each sequence in X coming from nodes y.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the range
                [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
        Outputs:
            log_likelihood - np.array of size n_sequences.
        '''
        if type(X) == list:
            lengths = [x.shape[0] for x in X]
            X = np.concatenate(X)
            y = np.array(y)

        N = y.shape[0]

        log_likelihood = np.zeros(N)
        for seq_idx, (i, j) in enumerate(iter_from_X_lengths(X, lengths)):
            ll_per_comp = np.zeros(self.mix_dim)
            for m in range(self.mix_dim):
                if self.mixCoef[y[seq_idx], m] == 0.:
                    continue

                ll_per_comp[m] = self.mixModels[m].score(X[i:j, :])

            nonzero_idx = (self.mixCoef[y[seq_idx], :] != 0.)

            log_likelihood[seq_idx] = logsumexp(
                np.log(self.mixCoef[y[seq_idx], nonzero_idx]) +
                ll_per_comp[nonzero_idx])

        return log_likelihood

    def score(self, X, y, lengths=None):
        '''
        Computes the mean log-likelihood for sequences in X coming from
        nodes y.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
        Outputs:
            log_likelihood - scalar.
        '''
        if type(X) == list:
            lengths = [x.shape[0] for x in X]
            X = np.concatenate(X)
            y = np.array(y)

        self._check()

        Nsamples = X.shape[0]
        log_likelihood = np.sum(self.scores_per_seq(X, y, lengths))

        return log_likelihood / Nsamples

    def _check(self):
        '''
        Validates mixCoef parameter. The remaining parameters are validated
        by the hmm.check() routine.
        Raises
        ------
        ValueError
                If mixCoef have an invalid shape or do not sum to 1.
        '''
        if self.mixCoef.shape != (self.n_nodes, self.mix_dim):
            raise ValueError('mixCoef must have length n_components')
        if not np.allclose(self.mixCoef.sum(axis=1), 1.0):
            raise ValueError('mixCoef must sum to 1.0 (got {0:.4f})'.format(
                self.mixCoef.sum(axis=1)))

    def _compute_mixture_posteriors(self, X, y, lengths):
        '''
        Computes the posterior log-probability of each mixture component given
        the observations X, y.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
        Outputs:
            logmixpost - np.array of size (n_sequences, mix_dim).
        '''
        N = len(lengths)

        transitions = []
        #means = []
        logmixpost = np.zeros((N, self.mix_dim))
        for m in range(self.mix_dim):
            ll_m = np.zeros(N)
            for seq_idx, (i, j) in enumerate(iter_from_X_lengths(X, lengths)):
                ll_m[seq_idx] = self.mixModels[m].score(X[i:j, :])
            transitions = np.append(transitions, self.mixModels[m].transmat_)
            #means = np.append(means, self.mixModels[m].means_)

            logmixpost[:, m] = ll_m + np.log(self.mixCoef[y, m] + .000000001)

        log_normalize(logmixpost, axis=1)

        return logmixpost, transitions  #, means

    def _compute_sufficient_statistics_in_mix_comp(self, X, y, lengths,
                                                   logmixpost, stats):
        '''
        Accumulates sufficient statistics for the parameters of each HMM in the
        mixture.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
            logmixpost - np.array of size (n_sequences, mix_dim).
            stats - dictionary containing sufficient statistics (changed
                    inplace).
        '''
        for m in range(self.mix_dim):
            for seq_idx, (i, j) in enumerate(iter_from_X_lengths(X, lengths)):
                if self.mixCoef[y[seq_idx], m] == 0.:
                    continue

                framelogprob = self.mixModels[m]._compute_log_likelihood(
                    X[i:j, :])
                _, fwdlattice = (
                    self.mixModels[m]._do_forward_pass(framelogprob))
                bwdlattice = self.mixModels[m]._do_backward_pass(framelogprob)
                posteriors = self.mixModels[m]._compute_posteriors(
                    fwdlattice, bwdlattice)
                fwdlattice += logmixpost[seq_idx, m]
                bwdlattice += logmixpost[seq_idx, m]
                posteriors *= np.exp(logmixpost[seq_idx, m])

                self.mixModels[m]._accumulate_sufficient_statistics(
                    stats['mix_idx' + str(m)], X[i:j, :], framelogprob,
                    posteriors, fwdlattice, bwdlattice)

    def _compute_sufficient_statistics(self, X, y, lengths):
        '''
        Computes sufficient statistics to be used in the M-step.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
        Outputs:
            stats - dictionary containing sufficient statistics.
        '''
        def check_large_diff(rates):
            return abs(rates[0] - rates[1]) == np.max(
                abs(rates[0] - rates[1]))  #>= .9 #10

        def get_index_of_large_diff(rates):
            which_cluster = 0
            if (rates[0] - rates[1]).any() < 0:
                which_cluster = 1
            return which_cluster, np.where(
                abs(rates[0] -
                    rates[1]) == np.max(abs(rates[0] - rates[1])))[0][
                        0]  # np.where(abs(rates[0] - rates[1]) >= .9)[0][0]

        means = np.zeros((self.mix_dim, self.mixModels[0].n_components))
        for m in range(self.mix_dim):
            means[m] = self.mixModels[m].means_.flatten()

        i = self.n_components  # some init value so that it never collides with possible indexes of the arrays
        greater_cluster = 0
        if check_large_diff(means).any():
            greater_cluster, i = get_index_of_large_diff(means)

        stats = {'mix_post': np.zeros((self.n_nodes, self.mix_dim))}
        for m in range(self.mix_dim):
            stats['mix_idx' + str(m)] = (
                self.mixModels[m]._initialize_sufficient_statistics())

            stats['trans_prior' + str(m)] = np.ones(
                (self.n_components, self.n_components))

            print(means[m])

        logmixpost, trans = self._compute_mixture_posteriors(X, y, lengths)

        for k in range(self.n_nodes):
            stats['mix_post'][k, :] = np.sum(np.exp(logmixpost[y == k, :]),
                                             axis=0)

        logmixpost -= np.amax(logmixpost, axis=0).reshape(1, self.mix_dim)

        self._compute_sufficient_statistics_in_mix_comp(
            X, y, lengths, logmixpost, stats)

        if self.reg_:
            stats['n_seqs_per_node'] = np.zeros(self.n_nodes)
            for k in range(self.n_nodes):
                stats['n_seqs_per_node'][k] = np.sum(y == k)

        return stats

    def _fit_coef(self, stats):
        '''
        Performs the M step of the EM algorithm for the mixture coefficients,
        via gradient ascent. This function is used only when a graph is given.
        Inputs:
            stats - dictionary containing sufficient statistics.
            n_iter - number of update iterations.
        '''
        Nseqs = np.sum(stats['n_seqs_per_node'])
        for it in range(self.n_iter_mstep):
            grad = np.zeros_like(self.mixCoefUnNorm)
            post_coef_dif = (stats['mix_post'] - self.mixCoef *
                             (stats['n_seqs_per_node'].reshape(-1, 1)))
            G_mixCoef = self.graph @ self.mixCoef
            reg_dif = (
                self.mixCoef *
                (G_mixCoef -
                 (np.sum(self.mixCoef * G_mixCoef, axis=1).reshape(-1, 1))))
            mask = (self.mixCoefUnNorm > 0.)
            grad[mask] = (drelu(self.mixCoefUnNorm[mask]) /
                          relu(self.mixCoefUnNorm[mask]))
            grad *= post_coef_dif / Nseqs + reg_dif

            self.mixCoefUnNorm = self._adam(self.mixCoefUnNorm, grad)

            self.mixCoef = relu_normalization(self.mixCoefUnNorm, axis=1)

    def _adam(self, w, dw, delta=1e-8):
        '''
        Performs an ascending step using the Adam algorithm.
        Inputs:
            w - np.array, the current value of the parameter.
            dw - np.array with the same shape as w, the gradient of the
                 objective with respect to w.
            delta - small constant to avoid division by zero (default: 1e-8)
        Outputs:
            next_w - np.array with the same shape as w, the updated value of
                     the parameter.
        '''
        next_first_moment = self.rho1 * self.first_moment_ + (1 -
                                                              self.rho1) * dw
        next_second_moment = (self.rho2 * self.second_moment_ +
                              (1 - self.rho2) * dw**2)

        correct_first_moment = next_first_moment / (1 - self.rho1**self.time_)
        correct_second_moment = (next_second_moment /
                                 (1 - self.rho2**self.time_))

        upd_w = (self.lr_mstep * correct_first_moment /
                 (np.sqrt(correct_second_moment) + delta))
        next_w = w + upd_w

        self.time_ += 1
        self.first_moment_ = next_first_moment
        self.second_moment_ = next_second_moment

        return next_w

    def _do_mstep(self, stats):
        '''
        Performs the M step of the EM algorithm, updating all model parameters.
        Inputs:
            stats - dictionary containing sufficient statistics.
        '''
        if self.reg_:
            self._fit_coef(stats)
        else:
            self.mixCoef = stats['mix_post']
            normalize(self.mixCoef, axis=1)

        for m in range(self.mix_dim):
            self.mixModels[m].transmat_prior = stats['trans_prior' + str(m)]
            self.mixModels[m]._do_mstep(stats['mix_idx' + str(m)])

    def fit(self, X, y, lengths=None, valid_data=None):
        '''
        Trains SpaMHMM on data X, y, using the EM algorithm.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
            valid_data - tuple (X_valid, y_valid, lengths_valid) containing the
                         validation data; if validation data is given, the
                         model with the lowest validation loss is saved in a
                         pickle file (optional, default:None).
        '''
        if type(X) == list:
            lengths = [x.shape[0] for x in X]
            X = np.concatenate(X)
            y = np.array(y)

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, False)

        if valid_data is not None:
            X_valid, y_valid, lengths_valid = valid_data

            if type(X_valid) == list:
                lengths_valid = [x.shape[0] for x in X_valid]
                X_valid = np.concatenate(X_valid)
                y_valid = np.array(y_valid)

            max_validscore = float('-inf')
            validloss_hist = []

        if self.graph is not None:
            self.reg_ = True
        else:
            self.reg_ = False

        self.init_params(X)
        self._check()

        prevscore = float('-inf')
        trainloss_hist = []
        for it in range(self.n_iter):
            t0 = time.time()
            stats = self._compute_sufficient_statistics(X, y, lengths)

            self._do_mstep(stats)

            print('trans0 {}'.format(self.mixModels[0].transmat_))
            print('trans1 {}'.format(self.mixModels[1].transmat_))

            t1 = time.time()

            currscore = self.score(X, y, lengths)
            trainloss_hist.append(-currscore)
            if valid_data is not None:
                validscore = self.score(X_valid, y_valid, lengths_valid)
                validloss_hist.append(-validscore)

                if validscore > max_validscore:
                    max_validscore = validscore
                    f = open(self.name + '.pkl', 'wb')
                    pickle.dump(self, f)

            if self.verbose:
                if (not self.reg_) and (prevscore > currscore):
                    print(
                        'WARNING: loss has increased at iteration {}!'.format(
                            it))
                    print('prev loss = {:.5f}, curr loss = {:.5f}'.format(
                        -prevscore, -currscore))
                elif valid_data is not None:
                    print('it {}: train loss = {:.5f}, valid loss = {:.5f}, '
                          '{:.3f} sec/it'.format(it + 1, -currscore,
                                                 -validscore, t1 - t0))
                else:
                    print('it {}: loss = {:.5f}, {:.3f} sec/it'.format(
                        it + 1, -currscore, t1 - t0))

            ll = np.sum(self.scores_per_seq(X, y, lengths))
            print("ll: {}".format(ll))

            # ll = 0
            # for m in range(self.mix_dim):
            #     ll += np.max(self.mixModels[m]._compute_log_likelihood(X))

            self.monitor_.report(currscore)
            # self.monitor_.report(ll)
            # print("ll: {}".format(ll))

            if self.monitor_.converged:
                if self.verbose:
                    print(
                        'Loss improved less than {}. Training stopped.'.format(
                            self.tol))
                break

            prevscore = currscore

        if valid_data:
            return trainloss_hist, validloss_hist
        else:
            return trainloss_hist

    def predict_next_observ(self, X, y, x_candidates):
        '''
        Finds the most likely next observation, given the sequence X at node y,
        by trying every candidate point in x_candidates.
        Inputs:
            X - observed sequence, np.array of size (length, n_features).
            y - the node index, integer.
            x_candidates - candidate points, np.array of size (n_candidates,
                           n_features).
        Outputs:
            next_observ - predicted observation, np.array of size n_features.
        '''
        length = X.shape[0]
        Ncand = x_candidates.shape[0]
        ll_per_comp_X = np.zeros(self.mix_dim)
        ll_per_comp_nxt_obs = np.zeros((Ncand, self.mix_dim))
        for m in range(self.mix_dim):
            if self.mixCoef[y, m] == 0.:
                continue

            ll_per_comp_X[m], state_post = self.mixModels[m].score_samples(X)
            final_state_post = state_post[length - 1, :]
            next_state_logpost = logsumexp(
                (np.log(self.mixModels[m].transmat_.T) +
                 np.log(final_state_post)),
                axis=1)
            emiss_ll = self.mixModels[m]._compute_log_likelihood(x_candidates)
            ll_per_comp_nxt_obs[:, m] = logsumexp(
                emiss_ll + (next_state_logpost.reshape(1, -1)), axis=1)

        nonzero_idx = (self.mixCoef[y, :] != 0.)
        ll_next_observ = logsumexp(
            (ll_per_comp_nxt_obs[:, nonzero_idx] + ll_per_comp_X[nonzero_idx] +
             np.log(self.mixCoef[y, nonzero_idx])),
            axis=1)

        max_idx = np.argmax(ll_next_observ)
        next_observ = x_candidates[max_idx, :]

        return next_observ

    def greedy_predict_seq(self, X, y, x_candidates, n_samples):
        '''
        Finds a greedy approximation of the most likely next n_samples, given
        the  sequence X at node y, trying every candidate point in x_candidates
        for each sample.
        Inputs:
            X - observed sequence, np.array of size (length, n_features).
            y - the node index, integer.
            x_candidates - candidate points, np.array of size (n_candidates,
                           n_features).
        Outputs:
            Xpred - predicted sequence, np.array of size (n_samples,
                    n_features).
        '''
        length = X.shape[0]
        Xpred = X
        for i in range(n_samples):
            next_observ = (self.predict_next_observ(Xpred, y,
                                                    x_candidates).reshape(
                                                        1, -1))
            Xpred = np.concatenate([Xpred, next_observ], axis=0)

        return Xpred[length::, :]

    def sample(self, y, n_samples, Xpref=None):
        '''
        Samples a sequence of observations from the MHMM observation
        distribution. If a prefix sequence is given, the new sequence is
        sampled from the posterior distribution given that prefix sequence.
        Inputs:
            y - the node index, integer.
            n_samples - the number of samples, integer.
            Xpref - prefix sequence, np.array of size (pref_len, n_features)
                    (optional, default: None).
        Outputs:
            X - sampled sequence, np.array of size (n_samples, n_features)
            mix_idx - the component which the sequence was sampled from,
                      integer.
            state_seq - the produced state sequence, np.int of size n_samples.
        '''
        if Xpref is not None:
            pref_len = Xpref.shape[0]
            mix, trans = self._compute_mixture_posteriors(Xpref, y, [pref_len])
            mix_post = np.exp(mix)
        else:
            mix_post = self.mixCoef[y, :]

        mix_idx = np.random.choice(self.mix_dim, p=mix_post.reshape(-1))

        if Xpref is not None:
            state_prior = self.mixModels[mix_idx].startprob_
            state_post = self.mixModels[mix_idx].predict_proba(Xpref)[-1, :]
            self.mixModels[mix_idx].startprob_ = state_post

        X, state_seq = self.mixModels[mix_idx].sample(n_samples=n_samples)

        if Xpref is not None:
            self.mixModels[mix_idx].startprob_ = state_prior

        return X, mix_idx, state_seq
Ejemplo n.º 18
0
    def fit(self, X, lengths=None):

        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            print('iteration: {}'.format(iter))
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            tt = 0
            path_list = list()

            for i, j in iter_from_X_lengths(X, lengths):
                logprob, state_sequence = self.decode(X[i:j],
                                                      algorithm="viterbi")

                curr_logprob += logprob

                epsilon = np.zeros((state_sequence.shape[0] - 1,
                                    self.n_components, self.n_components))
                gamma = np.zeros((state_sequence.shape[0], self.n_components))

                for t in range(state_sequence.shape[0] - 1):
                    epsilon[t, state_sequence[t], state_sequence[t + 1]] = 1

                for t in range(state_sequence.shape[0]):
                    for i in range(self.n_components):
                        if t != (state_sequence.shape[0] - 1):
                            gamma[t, i] = np.sum(epsilon[t, i])
                        else:
                            gamma[t, i] = gamma[t - 1, i]

                path_list.append(state_sequence)
                self._accumulate_sufficient_statistics(stats, X[i:j], epsilon,
                                                       gamma, state_sequence,
                                                       None)
                tt += 1

            print('average loss: {}'.format(curr_logprob / tt))

            if not fast_update:
                stats['start'] /= tt
                stats['trans'] /= tt

                self._do_mstep(stats)
                if update_dnn:
                    temp_path = np.zeros((0, 1))
                    for k, (i, j) in enumerate(iter_from_X_lengths(X,
                                                                   lengths)):
                        temp_path = np.vstack(
                            [temp_path,
                             np.array(path_list[k]).reshape(-1, 1)])
                    self.mlp.train(X, temp_path, 20)

                acoustic_model = np.zeros(self.n_components)
                for i, j in iter_from_X_lengths(X, lengths):
                    logprob, state_sequence = self.decode(X[i:j],
                                                          algorithm="viterbi")
                    for state in state_sequence:
                        acoustic_model[state] += 1
                self.aucoustic_model = acoustic_model / np.sum(acoustic_model)

            self.monitor_.report(curr_logprob)
            if self.monitor_.iter == self.monitor_.n_iter or \
                    (len(self.monitor_.history) == 2 and
                     abs(self.monitor_.history[1] - self.monitor_.history[0]) < self.monitor_.tol * abs(
                                self.monitor_.history[1])):
                break

        print('----------------------------------------------')
        return self
Ejemplo n.º 19
0
    def fit(self, X, y, lengths=None, valid_data=None):
        '''
        Trains SpaMHMM on data X, y, using the EM algorithm.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
            valid_data - tuple (X_valid, y_valid, lengths_valid) containing the
                         validation data; if validation data is given, the
                         model with the lowest validation loss is saved in a
                         pickle file (optional, default:None).
        '''
        if type(X) == list:
            lengths = [x.shape[0] for x in X]
            X = np.concatenate(X)
            y = np.array(y)

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, False)

        if valid_data is not None:
            X_valid, y_valid, lengths_valid = valid_data

            if type(X_valid) == list:
                lengths_valid = [x.shape[0] for x in X_valid]
                X_valid = np.concatenate(X_valid)
                y_valid = np.array(y_valid)

            max_validscore = float('-inf')
            validloss_hist = []

        if self.graph is not None:
            self.reg_ = True
        else:
            self.reg_ = False

        self.init_params(X)
        self._check()

        for m in range(self.mix_dim):
            #self.mixModels[m].means_ = np.array([np.min(X), np.max(X) / 4, np.max(X) / 2, np.max(X)])[:, np.newaxis]
            self.mixModels[m].means_ = np.sort(
                self.mixModels[m].means_.flatten())[:, np.newaxis]  #
            #self.mixModels[m].means_ = np.array([4, 9, 17, 25])[:, np.newaxis]

        prevscore = float('-inf')
        trainloss_hist = []
        for it in range(self.n_iter):
            t0 = time.time()
            stats = self._compute_sufficient_statistics(X, y, lengths)

            #similarity = diversified_hmm.get_kernel(self.mixModels[0].transmat_, self.mixModels[1].transmat_) * 1000000000000000
            norm0 = math.sqrt(
                np.trace(self.mixModels[0].transmat_.dot(
                    self.mixModels[0].transmat_)))
            norm1 = math.sqrt(
                np.trace(self.mixModels[1].transmat_.dot(
                    self.mixModels[1].transmat_)))
            similarity = np.trace(self.mixModels[0].transmat_.dot(
                self.mixModels[1].transmat_)) / (norm0 * norm1)
            from scipy.linalg import det
            #similarity = diversified_hmm.get_prior(np.dot(self.mixModels[0].transmat_.T, self.mixModels[1].transmat_))
            self.mixModels[
                0].similarity = similarity  #diversified_hmm.calculate_entropy(self.mixModels[0].transmat_)
            self.mixModels[
                1].similarity = similarity  #diversified_hmm.calculate_entropy(self.mixModels[1].transmat_)

            #print('similarity {}'.format(similarity))

            self.mixModels[0].iter = it

            # if it == 0:
            #     self.mixModels[0].other_trans = np.array([[.1, 0, .3, .6], [.1, 0, .5, .4], [0, .4, .4, .2], [.2, .2, .2, .4]])
            #     #self.mixModels[1].other_trans = np.array([[.7, .1, .2, 0], [.1, 0, .2, .7], [.5, .1, .4, 0], [0, .2, .1, .7]])
            # else:
            self.mixModels[0].other_trans = self.mixModels[1].transmat_
            self.mixModels[1].other_trans = self.mixModels[0].transmat_

            #self._do_mstep(stats)
            if self.reg_:
                self._fit_coef(stats)
            else:
                self.mixCoef = stats['mix_post']
                normalize(self.mixCoef, axis=1)
            self.mixModels[0].transmat_prior = stats['trans_prior' + str(0)]
            self.mixModels[0]._do_mstep(stats['mix_idx' + str(0)])
            self.mixModels[1].other_trans = self.mixModels[
                0].transmat_  # take the update from this iter
            self.mixModels[1].transmat_prior = stats['trans_prior' + str(1)]
            self.mixModels[1]._do_mstep(stats['mix_idx' + str(1)])
            ####

            print('trans0 {}'.format(self.mixModels[0].transmat_))
            print('trans1 {}'.format(self.mixModels[1].transmat_))

            t1 = time.time()

            currscore = self.score(X, y, lengths)
            trainloss_hist.append(-currscore)
            if valid_data is not None:
                validscore = self.score(X_valid, y_valid, lengths_valid)
                validloss_hist.append(-validscore)

                if validscore > max_validscore:
                    max_validscore = validscore
                    f = open(self.name + '.pkl', 'wb')
                    pickle.dump(self, f)

            if self.verbose:
                if (not self.reg_) and (prevscore > currscore):
                    print(
                        'WARNING: loss has increased at iteration {}!'.format(
                            it))
                    print('prev loss = {:.5f}, curr loss = {:.5f}'.format(
                        -prevscore, -currscore))
                elif valid_data is not None:
                    print('it {}: train loss = {:.5f}, valid loss = {:.5f}, '
                          '{:.3f} sec/it'.format(it + 1, -currscore,
                                                 -validscore, t1 - t0))
                else:
                    print('it {}: loss = {:.5f}, {:.3f} sec/it'.format(
                        it + 1, -currscore, t1 - t0))

            ll = np.sum(self.scores_per_seq(X, y, lengths))
            print("ll: {}".format(ll))

            # ll = 0
            # for m in range(self.mix_dim):
            #     ll += np.max(self.mixModels[m]._compute_log_likelihood(X))

            self.monitor_.report(currscore)
            # self.monitor_.report(ll)
            # print("ll: {}".format(ll))

            if self.monitor_.converged:
                if self.verbose:
                    print(
                        'Loss improved less than {}. Training stopped.'.format(
                            self.tol))
                break

            prevscore = currscore

        if valid_data:
            return trainloss_hist, validloss_hist
        else:
            return trainloss_hist
Ejemplo n.º 20
0
class SemiSupervisedGaussianHMM(GaussianHMM):
    def __init__(self, n_components=1, covariance_type='diag', min_covar=1e-3, startprob_prior=1.0,
                 transmat_prior=1.0, means_prior=0, means_weight=0, covars_prior=1e-2, covars_weight=1,
                 algorithm="viterbi", random_state=None, n_iter=5, tol=1e-2, verbose=False,
                 params="stmc", init_params="stmc", states_prior=None, fp_state=None):
        GaussianHMM.__init__(self, n_components=n_components, covariance_type=covariance_type,
                             min_covar=min_covar, startprob_prior=startprob_prior, transmat_prior=transmat_prior,
                             means_prior=means_prior, means_weight=means_weight,
                             covars_prior=covars_prior, covars_weight=covars_weight,
                             algorithm=algorithm, random_state=random_state,
                             n_iter=n_iter, tol=tol, verbose=verbose,
                             params=params, init_params=init_params)

        self.covariance_type = covariance_type
        self.min_covar = min_covar
        self.means_prior = means_prior
        self.means_weight = means_weight
        self.covars_prior = covars_prior
        self.covars_weight = covars_weight
        self.states_prior = states_prior
        self.fp_state = fp_state

    def fit(self, X, lengths=None):
        """Estimate model parameters.
        An initialization step is performed before entering the
        EM algorithm. If you want to avoid this step for a subset of
        the parameters, pass proper ``init_params`` keyword argument
        to estimator's constructor.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.
        lengths : array-like of integers, shape (n_sequences, )
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.
        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for i, j in iter_from_X_lengths(X, lengths):
                framelogprob = self._compute_log_likelihood(X[i:j])
                logprob, fwdlattice = self._do_forward_pass(framelogprob)
                curr_logprob += logprob
                bwdlattice = self._do_backward_pass(framelogprob)
                posteriors = self._compute_posteriors(fwdlattice, bwdlattice)

                # fix posteriors
                if self.states_prior is not None and self.fp_state is not None:
                    for k in range(len(self.states_prior)):
                        if self.states_prior[k] == 0:
                            # non footprint states
                            posteriors[k][self.fp_state] = 0.0
                            posteriors[k] = posteriors[k] / sum(posteriors[k])

                        elif self.states_prior[k] == 1:
                            # footprint states
                            posteriors[k] = 0.0 / sum(posteriors[k])
                            posteriors[k][self.fp_state] = 1.0

                self._accumulate_sufficient_statistics(stats, X[i:j], framelogprob, posteriors, fwdlattice, bwdlattice)

            self._do_mstep(stats)

            self.monitor_.report(curr_logprob)
            if self.monitor_.converged:
                break

        return self
Ejemplo n.º 21
0
    def fit(self, X, y, lengths=None, valid_data=None):
        '''
        Trains SpaMHMM on data X, y, using the EM algorithm.
        Inputs:
            X - np.array of size (n_samples, n_features).
            y - np.int of size n_sequences, whose entries are in the
                range [0, n_nodes-1].
            lengths - list containing the lengths of each individual sequence
                      in X, with size n_sequences.
            valid_data - tuple (X_valid, y_valid, lengths_valid) containing the
                         validation data; if validation data is given, the
                         model with the lowest validation loss is saved in a
                         pickle file (optional, default:None).
        '''
        if type(X) == list:
            lengths = [x.shape[0] for x in X]
            X = np.concatenate(X)
            y = np.array(y)

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, False)

        if valid_data is not None:
            X_valid, y_valid, lengths_valid = valid_data

            if type(X_valid) == list:
                lengths_valid = [x.shape[0] for x in X_valid]
                X_valid = np.concatenate(X_valid)
                y_valid = np.array(y_valid)

            max_validscore = float('-inf')
            validloss_hist = []

        if self.graph is not None:
            self.reg_ = True
        else:
            self.reg_ = False

        self.init_params(X)
        self._check()

        prevscore = float('-inf')
        trainloss_hist = []
        for it in range(self.n_iter):
            t0 = time.time()
            stats = self._compute_sufficient_statistics(X, y, lengths)

            self._do_mstep(stats)

            print('trans0 {}'.format(self.mixModels[0].transmat_))
            print('trans1 {}'.format(self.mixModels[1].transmat_))

            t1 = time.time()

            currscore = self.score(X, y, lengths)
            trainloss_hist.append(-currscore)
            if valid_data is not None:
                validscore = self.score(X_valid, y_valid, lengths_valid)
                validloss_hist.append(-validscore)

                if validscore > max_validscore:
                    max_validscore = validscore
                    f = open(self.name + '.pkl', 'wb')
                    pickle.dump(self, f)

            if self.verbose:
                if (not self.reg_) and (prevscore > currscore):
                    print(
                        'WARNING: loss has increased at iteration {}!'.format(
                            it))
                    print('prev loss = {:.5f}, curr loss = {:.5f}'.format(
                        -prevscore, -currscore))
                elif valid_data is not None:
                    print('it {}: train loss = {:.5f}, valid loss = {:.5f}, '
                          '{:.3f} sec/it'.format(it + 1, -currscore,
                                                 -validscore, t1 - t0))
                else:
                    print('it {}: loss = {:.5f}, {:.3f} sec/it'.format(
                        it + 1, -currscore, t1 - t0))

            ll = np.sum(self.scores_per_seq(X, y, lengths))
            print("ll: {}".format(ll))

            # ll = 0
            # for m in range(self.mix_dim):
            #     ll += np.max(self.mixModels[m]._compute_log_likelihood(X))

            self.monitor_.report(currscore)
            # self.monitor_.report(ll)
            # print("ll: {}".format(ll))

            if self.monitor_.converged:
                if self.verbose:
                    print(
                        'Loss improved less than {}. Training stopped.'.format(
                            self.tol))
                break

            prevscore = currscore

        if valid_data:
            return trainloss_hist, validloss_hist
        else:
            return trainloss_hist
Ejemplo n.º 22
0
class hmm_dnn(_BaseHMM):
    def __init__(self,
                 mlp,
                 aucoustic_model,
                 observation_count,
                 n_components=1,
                 startprob_prior=1.0,
                 transmat_prior=1.0,
                 algorithm="viterbi",
                 random_state=None,
                 n_iter=10,
                 tol=1e-2,
                 verbose=False,
                 params="stmc",
                 init_params="stmc"):

        _BaseHMM.__init__(self,
                          n_components,
                          startprob_prior=startprob_prior,
                          transmat_prior=transmat_prior,
                          algorithm=algorithm,
                          random_state=random_state,
                          n_iter=n_iter,
                          tol=tol,
                          params=params,
                          verbose=verbose,
                          init_params=init_params)

        self.aucoustic_model = aucoustic_model
        self.observation_count = observation_count
        self.mlp = mlp
        self.mlp.info()

    def _compute_log_likelihood(self, X):
        prob = self.mlp.log_probablity(X).astype(type(X[0, 0]))

        prob = prob - np.log(self.observation_count)
        prob = prob - np.log(self.aucoustic_model +
                             (self.aucoustic_model == 0))

        return prob

    def _accumulate_sufficient_statistics(self, stats, X, epsilon, gamma, path,
                                          bwdlattice):

        stats['nobs'] += 1
        if 's' in self.params:
            stats['start'] += gamma[0]
        if 't' in self.params:
            n_samples = X.shape[0]

            if n_samples <= 1:
                return

            a = np.zeros((self.n_components, self.n_components))

            for i in range(self.n_components):
                for j in range(self.n_components):
                    a[i, j] = np.sum(epsilon[:, i,
                                             j]) / (np.sum(gamma[:, i]) +
                                                    (np.sum(gamma[:, i]) == 0))

            stats['trans'] += a

    def fit(self, X, lengths=None):

        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            print('iteration: {}'.format(iter))
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            tt = 0
            path_list = list()

            for i, j in iter_from_X_lengths(X, lengths):
                logprob, state_sequence = self.decode(X[i:j],
                                                      algorithm="viterbi")

                curr_logprob += logprob

                epsilon = np.zeros((state_sequence.shape[0] - 1,
                                    self.n_components, self.n_components))
                gamma = np.zeros((state_sequence.shape[0], self.n_components))

                for t in range(state_sequence.shape[0] - 1):
                    epsilon[t, state_sequence[t], state_sequence[t + 1]] = 1

                for t in range(state_sequence.shape[0]):
                    for i in range(self.n_components):
                        if t != (state_sequence.shape[0] - 1):
                            gamma[t, i] = np.sum(epsilon[t, i])
                        else:
                            gamma[t, i] = gamma[t - 1, i]

                path_list.append(state_sequence)
                self._accumulate_sufficient_statistics(stats, X[i:j], epsilon,
                                                       gamma, state_sequence,
                                                       None)
                tt += 1

            print('average loss: {}'.format(curr_logprob / tt))

            if not fast_update:
                stats['start'] /= tt
                stats['trans'] /= tt

                self._do_mstep(stats)
                if update_dnn:
                    temp_path = np.zeros((0, 1))
                    for k, (i, j) in enumerate(iter_from_X_lengths(X,
                                                                   lengths)):
                        temp_path = np.vstack(
                            [temp_path,
                             np.array(path_list[k]).reshape(-1, 1)])
                    self.mlp.train(X, temp_path, 20)

                acoustic_model = np.zeros(self.n_components)
                for i, j in iter_from_X_lengths(X, lengths):
                    logprob, state_sequence = self.decode(X[i:j],
                                                          algorithm="viterbi")
                    for state in state_sequence:
                        acoustic_model[state] += 1
                self.aucoustic_model = acoustic_model / np.sum(acoustic_model)

            self.monitor_.report(curr_logprob)
            if self.monitor_.iter == self.monitor_.n_iter or \
                    (len(self.monitor_.history) == 2 and
                     abs(self.monitor_.history[1] - self.monitor_.history[0]) < self.monitor_.tol * abs(
                                self.monitor_.history[1])):
                break

        print('----------------------------------------------')
        return self

    def _do_mstep(self, stats):
        if 's' in self.params:
            startprob_ = stats['start']
            self.startprob_ = np.where(self.startprob_ == 0.0, self.startprob_,
                                       startprob_)
            normalize(self.startprob_)
        if 't' in self.params:
            transmat_ = stats['trans']
            self.transmat_ = np.where(self.transmat_ == 0.0, self.transmat_,
                                      transmat_)
            normalize(self.transmat_, axis=1)

            for i, row in enumerate(self.transmat_):
                if not np.any(row):
                    self.transmat_[i][i] = 1