def map(self, key, value): """ establish the hmm model and estimate the local hmm parameters from the input sequences @param key: None @param value: input sequence """ symbols, states, A, B, pi = self.read_params() N = len(states) M = len(symbols) symbol_dict = dict((symbols[i], i) for i in range(M)) model = HiddenMarkovModelTagger(symbols=symbols, states=states, \ transitions=A, outputs=B, priors=pi) logprob = 0 sequence = list(value) if not sequence: return # compute forward and backward probabilities alpha = model._forward_probability(sequence) beta = model._backward_probability(sequence) # find the log probability of the sequence T = len(sequence) lpk = _log_add(*alpha[T-1, :]) logprob += lpk # now update A and B (transition and output probabilities) # using the alpha and beta values. Please refer to Rabiner's # paper for details, it's too hard to explain in comments local_A_numer = ones((N, N), float64) * _NINF local_B_numer = ones((N, M), float64) * _NINF local_A_denom = ones(N, float64) * _NINF local_B_denom = ones(N, float64) * _NINF # for each position, accumulate sums for A and B for t in range(T): x = sequence[t][_TEXT] #not found? FIXME if t < T - 1: xnext = sequence[t+1][_TEXT] #not found? FIXME xi = symbol_dict[x] for i in range(N): si = states[i] if t < T - 1: for j in range(N): sj = states[j] local_A_numer[i, j] = \ _log_add(local_A_numer[i, j], alpha[t, i] + model._transitions[si].logprob(sj) + model._outputs[sj].logprob(xnext) + beta[t+1, j]) local_A_denom[i] = _log_add(local_A_denom[i], alpha[t, i] + beta[t, i]) else: local_B_denom[i] = _log_add(local_A_denom[i], alpha[t, i] + beta[t, i]) local_B_numer[i, xi] = _log_add(local_B_numer[i, xi], alpha[t, i] + beta[t, i]) for i in range(N): self.outputcollector.collect("parameters", \ tuple2str(("Pi", states[i], pi.prob(states[i])))) self.collect_matrix('A', local_A_numer, lpk, N, N) self.collect_matrix('B', local_B_numer, lpk, N, M) self.collect_matrix('A_denom', [local_A_denom], lpk, 1, N) self.collect_matrix('B_denom', [local_B_denom], lpk, 1, N) self.outputcollector.collect("parameters", "states " + \ tuple2str(tuple(states))) self.outputcollector.collect("parameters", "symbols " + \ tuple2str(tuple(symbols)))
def reduce(self, key, values): """ combine local hmm parameters to estimate a global parameter @param key: 'parameters' const string, not used in program @param values: various parameter quantity """ A_numer = B_numer = A_denom = B_denom = None N = M = 0 logprob = 0 states = [] symbols = [] pi = {} pi_printed = False for value in values: # identifier identify different parameter type identifier = value.split()[0] if identifier == "states": if not states: states = value.split()[1:] elif identifier == "symbols": if not symbols: symbols = value.split()[1:] elif identifier == "Pi": state, prob = value.split()[1:] pi[state] = float(prob) else: # extract quantities from value name, i, j, value, lpk, row, col = str2tuple(value) row = int(row) col = int(col) i = int(i) j = int(j) value = float(value) lpk = float(lpk) logprob += lpk # add these sums to the global A and B values if name == "A": if A_numer is None: A_numer = ones((row, col), float64) * _NINF N = row A_numer[i, j] = _log_add(A_numer[i, j], value - lpk) elif name == "B": if B_numer is None: B_numer = ones((row, col), float64) * _NINF M = col B_numer[i, j] = _log_add(B_numer[i, j], value - lpk) elif name == "A_denom": if A_denom is None: A_denom = ones(col, float64) * _NINF A_denom[j] = _log_add(A_denom[j], value - lpk) elif name == "B_denom": if B_denom is None: B_denom = ones(col, float64) * _NINF B_denom[j] = _log_add(B_denom[j], value - lpk) # output the global hmm parameter for e in pi: self.outputcollector.collect("Pi", tuple2str((e, pi[e]))) for i in range(N): for j in range(N): self.outputcollector.collect("A", tuple2str((states[i], \ states[j], 2 ** (A_numer[i, j] - A_denom[i])))) for i in range(N): for j in range(M): self.outputcollector.collect("B", tuple2str((states[i], \ symbols[j], 2 ** (B_numer[i, j] - B_denom[i])))) self.outputcollector.collect("loglikelihood", logprob)