Exemple #1
0
    def train_unsupervised(self, unlabeled_sequences, update_outputs=True,
                           **kwargs):
        """
        Trains the HMM using the Baum-Welch algorithm to maximise the
        probability of the data sequence. This is a variant of the EM
        algorithm, and is unsupervised in that it doesn't need the state
        sequences for the symbols. The code is based on 'A Tutorial on Hidden
        Markov Models and Selected Applications in Speech Recognition',
        Lawrence Rabiner, IEEE, 1989.

        :return: the trained model
        :rtype: HiddenMarkovModelTagger
        :param unlabeled_sequences: the training data, a set of
            sequences of observations
        :type unlabeled_sequences: list

        kwargs may include following parameters:

        :param model: a HiddenMarkovModelTagger instance used to begin
            the Baum-Welch algorithm
        :param max_iterations: the maximum number of EM iterations
        :param convergence_logprob: the maximum change in log probability to
            allow convergence
        """

        # create a uniform HMM, which will be iteratively refined, unless
        # given an existing model
        model = kwargs.get('model')
        if not model:
            priors = RandomProbDist(self._states)
            transitions = DictionaryConditionalProbDist(
                            dict((state, RandomProbDist(self._states))
                                  for state in self._states))
            outputs = DictionaryConditionalProbDist(
                            dict((state, RandomProbDist(self._symbols))
                                  for state in self._states))
            model = HiddenMarkovModelTagger(self._symbols, self._states,
                            transitions, outputs, priors)

        self._states = model._states
        self._symbols = model._symbols

        N = len(self._states)
        M = len(self._symbols)
        symbol_numbers = dict((sym, i) for i, sym in enumerate(self._symbols))

        # update model prob dists so that they can be modified
        # model._priors = MutableProbDist(model._priors, self._states)

        model._transitions = DictionaryConditionalProbDist(
            dict((s, MutableProbDist(model._transitions[s], self._states))
                 for s in self._states))

        if update_outputs:
            model._outputs = DictionaryConditionalProbDist(
                dict((s, MutableProbDist(model._outputs[s], self._symbols))
                     for s in self._states))

        model.reset_cache()

        # iterate until convergence
        converged = False
        last_logprob = None
        iteration = 0
        max_iterations = kwargs.get('max_iterations', 1000)
        epsilon = kwargs.get('convergence_logprob', 1e-6)

        while not converged and iteration < max_iterations:
            A_numer = _ninf_array((N, N))
            B_numer = _ninf_array((N, M))
            A_denom = _ninf_array(N)
            B_denom = _ninf_array(N)

            logprob = 0
            for sequence in unlabeled_sequences:
                sequence = list(sequence)
                if not sequence:
                    continue

                (lpk, seq_A_numer, seq_A_denom,
                seq_B_numer, seq_B_denom) = self._baum_welch_step(sequence, model, symbol_numbers)

                # add these sums to the global A and B values
                for i in range(N):
                    A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i]-lpk)
                    B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i]-lpk)

                A_denom = np.logaddexp2(A_denom, seq_A_denom-lpk)
                B_denom = np.logaddexp2(B_denom, seq_B_denom-lpk)

                logprob += lpk

            # use the calculated values to update the transition and output
            # probability values
            for i in range(N):
                logprob_Ai = A_numer[i] - A_denom[i]
                logprob_Bi = B_numer[i] - B_denom[i]

                # We should normalize all probabilities (see p.391 Huang et al)
                # Let sum(P) be K.
                # We can divide each Pi by K to make sum(P) == 1.
                #   Pi' = Pi/K
                #   log2(Pi') = log2(Pi) - log2(K)
                logprob_Ai -= logsumexp2(logprob_Ai)
                logprob_Bi -= logsumexp2(logprob_Bi)

                # update output and transition probabilities
                si = self._states[i]

                for j in range(N):
                    sj = self._states[j]
                    model._transitions[si].update(sj, logprob_Ai[j])

                if update_outputs:
                    for k in range(M):
                        ok = self._symbols[k]
                        model._outputs[si].update(ok, logprob_Bi[k])

                # Rabiner says the priors don't need to be updated. I don't
                # believe him. FIXME

            # test for convergence
            if iteration > 0 and abs(logprob - last_logprob) < epsilon:
                converged = True

            print('iteration', iteration, 'logprob', logprob)
            iteration += 1
            last_logprob = logprob

        return model
Exemple #2
0
 def cpd(array, conditions, samples):
     d = {}
     for values, condition in zip(array, conditions):
         d[condition] = pd(values, samples)
     return DictionaryConditionalProbDist(d)
Exemple #3
0
    def train_unsupervised(self, unlabeled_sequences, **kwargs):
        """
        Trains the HMM using the Baum-Welch algorithm to maximise the
        probability of the data sequence. This is a variant of the EM
        algorithm, and is unsupervised in that it doesn't need the state
        sequences for the symbols. The code is based on 'A Tutorial on Hidden
        Markov Models and Selected Applications in Speech Recognition',
        Lawrence Rabiner, IEEE, 1989.

        :return: the trained model
        :rtype: HiddenMarkovModelTagger
        :param unlabeled_sequences: the training data, a set of
            sequences of observations
        :type unlabeled_sequences: list

        kwargs may include following parameters:

        :param model: a HiddenMarkovModelTagger instance used to begin
            the Baum-Welch algorithm
        :param max_iterations: the maximum number of EM iterations
        :param convergence_logprob: the maximum change in log probability to
            allow convergence
        """

        N = len(self._states)
        M = len(self._symbols)
        symbol_dict = dict((self._symbols[i], i) for i in range(M))

        # create a uniform HMM, which will be iteratively refined, unless
        # given an existing model
        model = kwargs.get('model')
        if not model:
            priors = UniformProbDist(self._states)
            transitions = DictionaryConditionalProbDist(
                dict((state, UniformProbDist(self._states))
                     for state in self._states))
            output = DictionaryConditionalProbDist(
                dict((state, UniformProbDist(self._symbols))
                     for state in self._states))
            model = HiddenMarkovModelTagger(self._symbols, self._states,
                                            transitions, output, priors)

        # update model prob dists so that they can be modified
        model._priors = MutableProbDist(model._priors, self._states)
        model._transitions = DictionaryConditionalProbDist(
            dict((s, MutableProbDist(model._transitions[s], self._states))
                 for s in self._states))
        model._outputs = DictionaryConditionalProbDist(
            dict((s, MutableProbDist(model._outputs[s], self._symbols))
                 for s in self._states))

        # iterate until convergence
        converged = False
        last_logprob = None
        iteration = 0
        max_iterations = kwargs.get('max_iterations', 1000)
        epsilon = kwargs.get('convergence_logprob', 1e-6)
        while not converged and iteration < max_iterations:
            A_numer = ones((N, N), float64) * _NINF
            B_numer = ones((N, M), float64) * _NINF
            A_denom = ones(N, float64) * _NINF
            B_denom = ones(N, float64) * _NINF

            logprob = 0
            for sequence in unlabeled_sequences:
                sequence = list(sequence)
                if not sequence:
                    continue

                # compute forward and backward probabilities
                alpha = model._forward_probability(sequence)
                beta = model._backward_probability(sequence)

                # find the log probability of the sequence
                T = len(sequence)
                lpk = _log_add(*alpha[T - 1, :])
                logprob += lpk

                # now update A and B (transition and output probabilities)
                # using the alpha and beta values. Please refer to Rabiner's
                # paper for details, it's too hard to explain in comments
                local_A_numer = ones((N, N), float64) * _NINF
                local_B_numer = ones((N, M), float64) * _NINF
                local_A_denom = ones(N, float64) * _NINF
                local_B_denom = ones(N, float64) * _NINF

                # for each position, accumulate sums for A and B
                for t in range(T):
                    x = sequence[t][_TEXT]  #not found? FIXME
                    if t < T - 1:
                        xnext = sequence[t + 1][_TEXT]  #not found? FIXME
                    xi = symbol_dict[x]
                    for i in range(N):
                        si = self._states[i]
                        if t < T - 1:
                            for j in range(N):
                                sj = self._states[j]
                                local_A_numer[i, j] =  \
                                    _log_add(local_A_numer[i, j],
                                        alpha[t, i] +
                                        model._transitions[si].logprob(sj) +
                                        model._outputs[sj].logprob(xnext) +
                                        beta[t+1, j])
                            local_A_denom[i] = _log_add(
                                local_A_denom[i], alpha[t, i] + beta[t, i])
                        else:
                            local_B_denom[i] = _log_add(
                                local_A_denom[i], alpha[t, i] + beta[t, i])

                        local_B_numer[i,
                                      xi] = _log_add(local_B_numer[i, xi],
                                                     alpha[t, i] + beta[t, i])

                # add these sums to the global A and B values
                for i in range(N):
                    for j in range(N):
                        A_numer[i, j] = _log_add(A_numer[i, j],
                                                 local_A_numer[i, j] - lpk)
                    for k in range(M):
                        B_numer[i, k] = _log_add(B_numer[i, k],
                                                 local_B_numer[i, k] - lpk)

                    A_denom[i] = _log_add(A_denom[i], local_A_denom[i] - lpk)
                    B_denom[i] = _log_add(B_denom[i], local_B_denom[i] - lpk)

            # use the calculated values to update the transition and output
            # probability values
            for i in range(N):
                si = self._states[i]
                for j in range(N):
                    sj = self._states[j]
                    model._transitions[si].update(sj,
                                                  A_numer[i, j] - A_denom[i])
                for k in range(M):
                    ok = self._symbols[k]
                    model._outputs[si].update(ok, B_numer[i, k] - B_denom[i])
                # Rabiner says the priors don't need to be updated. I don't
                # believe him. FIXME

            # test for convergence
            if iteration > 0 and abs(logprob - last_logprob) < epsilon:
                converged = True

            print 'iteration', iteration, 'logprob', logprob
            iteration += 1
            last_logprob = logprob

        return model