コード例 #1
0
ファイル: EM_mapper.py プロジェクト: Sandy4321/nltk_contrib
    def map(self, key, value):
        """
        establish the hmm model and estimate the local
        hmm parameters from the input sequences

        @param key: None
        @param value: input sequence
        """

        symbols, states, A, B, pi = self.read_params()
        N = len(states)
        M = len(symbols)
        symbol_dict = dict((symbols[i], i) for i in range(M))

        model = HiddenMarkovModelTagger(symbols=symbols, states=states, \
                transitions=A, outputs=B, priors=pi)

        logprob = 0
        sequence = list(value)
        if not sequence:
            return

        # compute forward and backward probabilities
        alpha = model._forward_probability(sequence)
        beta = model._backward_probability(sequence)

        # find the log probability of the sequence
        T = len(sequence)
        lpk = _log_add(*alpha[T-1, :])
        logprob += lpk

        # now update A and B (transition and output probabilities)
        # using the alpha and beta values. Please refer to Rabiner's
        # paper for details, it's too hard to explain in comments
        local_A_numer = ones((N, N), float64) * _NINF
        local_B_numer = ones((N, M), float64) * _NINF
        local_A_denom = ones(N, float64) * _NINF
        local_B_denom = ones(N, float64) * _NINF

        # for each position, accumulate sums for A and B
        for t in range(T):
            x = sequence[t][_TEXT] #not found? FIXME
            if t < T - 1:
                xnext = sequence[t+1][_TEXT] #not found? FIXME
            xi = symbol_dict[x]
            for i in range(N):
                si = states[i]
                if t < T - 1:
                    for j in range(N):
                        sj = states[j]
                        local_A_numer[i, j] =  \
                            _log_add(local_A_numer[i, j],
                                    alpha[t, i] + 
                                    model._transitions[si].logprob(sj) + 
                                    model._outputs[sj].logprob(xnext) +
                                    beta[t+1, j])
                    local_A_denom[i] = _log_add(local_A_denom[i],
                                alpha[t, i] + beta[t, i])
                else:
                    local_B_denom[i] = _log_add(local_A_denom[i],
                            alpha[t, i] + beta[t, i])

                local_B_numer[i, xi] = _log_add(local_B_numer[i, xi],
                        alpha[t, i] + beta[t, i])

        for i in range(N):
            self.outputcollector.collect("parameters", \
                    tuple2str(("Pi", states[i], pi.prob(states[i]))))

        self.collect_matrix('A', local_A_numer, lpk, N, N)
        self.collect_matrix('B', local_B_numer, lpk, N, M)
        self.collect_matrix('A_denom', [local_A_denom], lpk, 1, N)
        self.collect_matrix('B_denom', [local_B_denom], lpk, 1, N)

        self.outputcollector.collect("parameters", "states " + \
                tuple2str(tuple(states)))
        self.outputcollector.collect("parameters", "symbols " + \
                tuple2str(tuple(symbols)))
コード例 #2
0
    def reduce(self, key, values):
        """
        combine local hmm parameters to estimate a global parameter

        @param key: 'parameters' const string, not used in program
        @param values: various parameter quantity
        """
        A_numer = B_numer = A_denom = B_denom = None
        N = M = 0
        logprob = 0

        states = []
        symbols = []
        pi = {}
        pi_printed = False

        for value in values:
            # identifier identify different parameter type
            identifier = value.split()[0]
            if identifier == "states":
                if not states:
                    states = value.split()[1:]
            elif identifier == "symbols":
                if not symbols:
                    symbols = value.split()[1:]
            elif identifier == "Pi":
                state, prob = value.split()[1:]
                pi[state] = float(prob)
            else:
                # extract quantities from value
                name, i, j, value, lpk, row, col = str2tuple(value)
                row = int(row)
                col = int(col)
                i = int(i)
                j = int(j)
                value = float(value)
                lpk = float(lpk)
                logprob += lpk

                # add these sums to the global A and B values
                if name == "A":
                    if A_numer is None:
                        A_numer = ones((row, col), float64) * _NINF
                        N = row
                    A_numer[i, j] = _log_add(A_numer[i, j], value - lpk)
                elif name == "B":
                    if B_numer is None:
                        B_numer = ones((row, col), float64) * _NINF
                        M = col
                    B_numer[i, j] = _log_add(B_numer[i, j], value - lpk)
                elif name == "A_denom":
                    if A_denom is None:
                        A_denom = ones(col, float64) * _NINF
                    A_denom[j] = _log_add(A_denom[j], value - lpk)
                elif name == "B_denom":
                    if B_denom is None:
                        B_denom = ones(col, float64) * _NINF
                    B_denom[j] = _log_add(B_denom[j], value - lpk)

        # output the global hmm parameter
        for e in pi:
            self.outputcollector.collect("Pi", tuple2str((e, pi[e])))

        for i in range(N):
            for j in range(N):
                self.outputcollector.collect("A", tuple2str((states[i], \
                        states[j], 2 ** (A_numer[i, j] - A_denom[i]))))

        for i in range(N):
            for j in range(M):
                self.outputcollector.collect("B", tuple2str((states[i], \
                        symbols[j], 2 ** (B_numer[i, j] - B_denom[i]))))

        self.outputcollector.collect("loglikelihood", logprob)