Exemple #1
0
def forward(A, B_initial, PI, O):
    row = A.shape[0]
    col = len(O)
    Alpha = np.zeros((row, col))  # Build Alpha
    B = getB(B_initial, O)
    # Initialize Alpha 1
    Alpha[:, 0] = B[:, 0] + PI[:, 0]

    # print Alpha[:,0]
    for t in range(1, len(O)):
        for n in range(row):
            loopc = 0
            for i, j in zip(Alpha[:, t - 1], A[:, n]):
                if loopc == 0:
                    roni = i + j
                    loopc += 1
                else:
                    roni = log_sum(roni, i + j)

            Alpha[n, t] = roni + B[n, t]
    # Logsum the last column:
    count = 0
    for i in Alpha[:, -1]:
        if count == 0:
            sumup = i
            count += 1
        else:
            sumup = log_sum(sumup, i)
    return sumup
Exemple #2
0
 def backward(self):
     for line in open(self.dev):
         line = line.strip("\n")
         line = line.split(" ")
         matrix = [[0.0 for _ in xrange(len(line))]
                   for _ in xrange(self.nState)]
         for i in xrange(len(line) - 2, -2, -1):
             word = line[i + 1]
             for state1 in xrange(self.nState):
                 p = (matrix[0][i + 1] + log(self.trans_matrix[state1][0]) +
                      log(self.emit_matrix[0][word]))
                 for state2 in xrange(1, self.nState):
                     p = log_sum(
                         p, matrix[state2][i + 1] +
                         log(self.trans_matrix[state1][state2]) +
                         log(self.emit_matrix[state2][word]))
                 matrix[state1][i] = p
         p = log(self.prior_matrix[0]) + log(
             self.emit_matrix[0][line[0]]) + matrix[0][0]
         for i in xrange(1, self.nState):
             p = (log_sum(
                 p,
                 log(self.prior_matrix[i]) +
                 log(self.emit_matrix[i][line[0]]) + matrix[i][0]))
         print p
Exemple #3
0
	def forward(self):
		for line in self.dev_file:
			curr_observable_sequence = line.strip().split()
			T = len(curr_observable_sequence)
			
			alpha_log = []
			
			init_alpha_log = {}
			for i in self.states:
				init_alpha_log[i] = ln(self.pie[i]) + ln(self.b[i][curr_observable_sequence[0]])
			alpha_log.append(init_alpha_log)
			
			counter = 1
			for curr_observable in curr_observable_sequence[1:]:
				temp_alpha_log = {}
				for i in self.states:
					hold = alpha_log[counter-1][self.states[0]] + ln(self.a[self.states[0]][i])
					for j in self.states[1:]:
						hold = log_sum(hold,(alpha_log[counter-1][j] + ln(self.a[j][i])))
					temp_alpha_log[i] = ln(self.b[i][curr_observable])+hold
				alpha_log.append(temp_alpha_log)
				counter += 1
			
			hold = alpha_log[T-1][self.states[0]]
			for k in self.states[1:]:
				hold = log_sum(hold,alpha_log[T-1][k])
			print(hold)
Exemple #4
0
    def backward(self):
        for line in self.dev_file:
            curr_observable_sequence = line.strip().split()
            T = len(curr_observable_sequence)

            beta_log = [0] * T

            init_beta_log = {}
            for i in self.states:
                init_beta_log[i] = 0
            beta_log[T - 1] = init_beta_log

            for t in range(T - 2, -1, -1):
                temp_beta_log = {}
                for i in self.states:
                    hold = beta_log[t + 1][self.states[0]] + ln(
                        self.a[i][self.states[0]]) + ln(self.b[self.states[0]][
                            curr_observable_sequence[t + 1]])
                    for j in self.states[1:]:
                        hold = log_sum(
                            hold, beta_log[t + 1][j] + ln(self.a[i][j]) +
                            ln(self.b[j][curr_observable_sequence[t + 1]]))
                    temp_beta_log[i] = hold
                beta_log[t] = temp_beta_log

            hold = ln(self.pie[self.states[0]]) + ln(self.b[self.states[0]][
                curr_observable_sequence[0]]) + beta_log[0][self.states[0]]
            for i in self.states[1:]:
                hold = log_sum(
                    hold,
                    ln(self.pie[i]) +
                    ln(self.b[i][curr_observable_sequence[0]]) +
                    beta_log[0][i])
            print(hold)
def forward():
    emit = emitMatrix()
    trans = transMatrix()
    prior = priorMatrix()
    for line in devfile:
        line = line.split(" ")
        o = line[0]
        probMatrix = np.log(prior) + np.log(np.array(emit[o]).reshape(8, 1))

        # loop throgh words in line
        #print probMatrix
        #sys.exit(0)

        for z in range(1, len(line)):
            o = line[z]
            emitprob = np.array(emit[o]).reshape(8, 1)
            # loop through each t+1 state
            m = []
            for j in range(0, 8):
                temp = np.log(trans[:, j].reshape(8, 1)) + (probMatrix)
                sum = temp[0]
                # sum up probs
                for k in range(1, 8):
                    sum = log_sum(sum, temp[k])
                prob = np.log(emitprob[j]) + sum
                m.append(prob[0])
            probMatrix = np.array(m).reshape(8, 1)
            #print probMatrix
            #sys.exit(0)

        total = probMatrix[0]
        for h in range(1, 8):
            total = log_sum(total, probMatrix[h])

        print total[0]
def train(initDict,transDict,emitDict,sentence):
	sentence=sentence.split()
	currentDict=dict()
	for key in initDict:
		currentDict[key]=initDict[key]+emitDict[key][sentence[0]]
	#Done step1, initializing
	for word in sentence[1:]:
		for i in currentDict:
			alphaCurrent=emitDict[i][word]
			prevSum=0
			for j in currentDict:
				alphaPrev=initDict[j]
				transProb=transDict[j][i]
				if prevSum==0:
					prevSum=alphaPrev+transProb
				else:
					prevSum=logsum.log_sum(prevSum,alphaPrev+transProb)
			currentDict[i]=prevSum+alphaCurrent
		initDict=copy.deepcopy(currentDict)
	totalP=0
	for i in currentDict:
		if totalP==0:
			totalP=currentDict[i]
		else:
			totalP=logsum.log_sum(totalP,currentDict[i])
	print totalP
def backward(sentence_l, prior_l, trans_ll, emit_ld):
    for sentence in sentence_l:
        beta_t = [0] * 8
        sentence.reverse()
        #print alpha_a
        for index, word in enumerate(sentence):
            if index != (len(sentence) - 1):
                temp = [0] * 8

                for i in range(0, 8):
                    sum_beta_a_b = -1e100

                    for j in range(0, 8):
                        beta_a = beta_t[j] + math.log(trans_ll[i][j])
                        beta_a_b = beta_a + math.log(emit_ld[j][word])
                        sum_beta_a_b = log_sum(sum_beta_a_b, beta_a_b)

                    temp[i] = sum_beta_a_b

                beta_t[:] = temp[:]

        sum_beta = -1e100
        for i in range(0, 8):
            beta = math.log(prior_l[i]) + math.log(
                emit_ld[i][word]) + beta_t[i]
            sum_beta = log_sum(sum_beta, beta)
        print sum_beta
Exemple #8
0
def forward(sentence_l, prior_l, trans_ll, emit_ld):
    for sentence in sentence_l:
        alpha_t = []

        #print alpha_a
        for index, word in enumerate(sentence):
            ##calculate alpha1
            if index == 0:
                for i in range(0, 8):
                    alpha_t.append(
                        math.log(prior_l[i]) + math.log(emit_ld[i][word]))

            #calculate rest of the sentence
            else:
                temp = [0] * 8
                for i in range(0, 8):
                    sum_alpha_a = -1e100

                    for j in range(0, 8):
                        alpha_a = alpha_t[j] + math.log(trans_ll[j][i])
                        sum_alpha_a = log_sum(sum_alpha_a, alpha_a)

                    temp[i] = math.log(emit_ld[i][word]) + sum_alpha_a

                alpha_t[:] = temp[:]

        sum_alpha = -1e100
        for i in range(0, 8):
            sum_alpha = log_sum(sum_alpha, alpha_t[i])
        print sum_alpha
Exemple #9
0
def backward(A, B_initial, PI, O):
    row = A.shape[0]
    col = len(O)
    Beta = np.zeros((row, col))  # Build Beta
    B = getB(B_initial, O)
    # Initialize Beta 1
    Beta[:, (col - 1)] = 0

    for t in reversed(range(0, len(O) - 1)):
        for n in range(row):
            loopc = 0
            for i, j, k in zip(Beta[:, (t + 1)], A[n, :], B[:, (t + 1)]):
                if loopc == 0:
                    roni = i + j + k
                    loopc += 1
                else:
                    roni = log_sum(roni, i + j + k)

            Beta[n, t] = roni
    # Logsum the first column:
    count = 0

    for i, j, k in zip(Beta[:, 0], B[:, 0], PI[:, 0]):
        if count == 0:
            sumup = i + j + k
            count += 1
        else:
            sumup = log_sum(sumup, i + j + k)
    print sumup
def main(argv):
    devlines = readfiles(argv[1])
    translines = readfiles(argv[2])
    emitlines = readfiles(argv[3])
    priorlines = readfiles(argv[4])
    aij = {}
    for line in translines:
        words = line.strip().split()
        probs = {}
        for wd in words[1:]:
            prob = wd.split(':')
            probs[prob[0]] = math.log(float(prob[1]))
        aij[words[0]] = probs
    bjk = {}
    for line in emitlines:
        words = line.strip().split()
        probs = {}
        for wd in words[1:]:
            prob = wd.split(':')
            probs[prob[0]] = math.log(float(prob[1]))
        bjk[words[0]] = probs
    pi = {}
    for line in priorlines:
        words = line.strip().split()
        pi[words[0]] = math.log(float(words[1]))
    states = bjk.keys()
    for line in devlines:
        alpha = []
        content = line.strip().split()
        alphanext = {}
        for st in states:
            alphanext[st] = pi[st] + bjk[st][content[0]]
        alpha.append(alphanext)
        index = 0
        for word in content[1:]:
            index += 1
            alphanew = {}
            for st in states:
                alphanew[st] = 0.0
                for oldst in states:
                    if alphanew[st] == 0.0:
                        alphanew[st] = alpha[-1][oldst] + aij[oldst][st]
                    else:
                        alphanew[st] = log_sum(alphanew[st], alpha[-1][oldst] + aij[oldst][st])
                alphanew[st] += bjk[st][content[index]]
            alpha.append(alphanew)
        result = 0.0
        for st in states:
            if result == 0.0:
                result = alpha[-1][st]
            else:
                result = log_sum(result, alpha[-1][st])
        print result
Exemple #11
0
def backward(sentence="", trans=None, emits=None, prior=None, labels=[]):
    os = sentence.split(" ")
    matrix = createMatrix(emits, prior, labels, os)
    for i in xrange(0, len(labels)):
        if(matrix[i][0] == -1):
            matrix[i][0] = backward_recursive(os, i, 0, trans, emits, prior, labels, matrix)
        # if matrix[i][0] == -1:
        #     tmp = math.log(prior[labels[i]])
        #     tmp += math.log(emits[labels[i]][os[0]])
        #     for j in xrange(0, len(labels)):
        #         if matrix[j][1] == -1:
        #             matrix[i][1] = backward_recursive(os, i, 1, trans, emits, prior, labels, matrix)
        #         tmp += matrix[i][1]
        #     matrix[i][0] = tmp


    result = matrix[0][0]
    result += math.log(prior[labels[0]])
    result += math.log(emits[labels[0]][os[0]])
    for z in xrange(1, len(labels)):
        tmp = matrix[z][0]
        tmp += math.log(prior[labels[z]])
        tmp += math.log(emits[labels[z]][os[0]])
        result = log_sum(result, tmp)
    result += -1.0
    sys.stdout.write(str(result) + "\n")
Exemple #12
0
 def calculate_P(self, dev):
     T = len(dev)
     alpha_T = dev[T - 1]
     sum = 0.0
     for i in range(len(alpha_T)):
         if i == 0:
             sum = alpha_T[0]
         else:
             sum = log_sum(sum, alpha_T[i])
     return sum
Exemple #13
0
def _forwardAlg(d):
    l = len(hmmPrior)
    alpha = [[0] * (len(d)) for _ in range(l)]
    #print alpha
    for i in range(l):
        alpha[i][0] = hmmPrior[i] + hmmEmit[i][d[0]]
    for t in range(1, len(d)):
        for i in range(l):
            #print t, i, d[t], hmmEmit[i][d[t]], alpha[t][i]
            alpha[i][t] = hmmEmit[i][d[t]]
            x = alpha[0][t - 1] + hmmTrain[0][i]
            for j in range(1, l):
                x = log_sum(x, alpha[j][t - 1] + hmmTrain[j][i])
            alpha[i][t] += x
    #print alpha
    x = alpha[0][-1]
    for i in range(1, l):
        x = log_sum(x, alpha[i][-1])
    print x
Exemple #14
0
def _backwardAlg(d):
    l = len(hmmPrior)
    beta = [[0] * (len(d)) for _ in range(l)]
    #print alpha
    #for i in range(l):
    #    beta[i][-1] = 1
    for t in range(len(d) - 2, -1, -1):
        for i in range(l):
            #print t, i, d[t], hmmEmit[i][d[t]], alpha[t][i]
            x = beta[0][t + 1] + hmmTrain[i][0] + hmmEmit[0][d[t + 1]]
            for j in range(1, l):
                x = log_sum(
                    x, beta[j][t + 1] + hmmTrain[i][j] + hmmEmit[j][d[t + 1]])
            beta[i][t] = x
    #print alpha
    x = beta[0][0] + hmmPrior[0] + hmmEmit[0][d[0]]
    for i in range(1, l):
        x = log_sum(x, beta[i][0] + hmmPrior[i] + hmmEmit[i][d[0]])
    print x
Exemple #15
0
def forward(dev_file, trans_file, emit_file, prior_file):
    dev = read_dev(dev_file)
    trans = read_trans(trans_file)
    emit = read_emit(emit_file)
    prior = read_prior(prior_file)
    result = []
    for sentence in range(len(dev)):
        alpha_t = {
            k1: log(v1) + log(emit[k1][dev[sentence][0]])
            for k1, v1 in prior.items()
        }
        alpha_tp1 = {}
        for word in range(1, len(dev[sentence])):
            for k in emit.keys():
                alpha_tp1[k] = log(emit[k][dev[sentence][word]]) + reduce(
                    lambda x, y: log_sum(x, y),
                    [tv + log(trans[tk][k]) for tk, tv in alpha_t.items()])
            alpha_t = dict(alpha_tp1)
        result.append(reduce(lambda x, y: log_sum(x, y), alpha_t.values()))
    print '\n'.join([str(x) for x in result])
Exemple #16
0
def forward(sentence="", trans=None, emits=None, prior=None, labels=[]):
    os = sentence.split(" ");
    matrix = createMatrix(emits, prior, labels, os)
    t = len(os)
    for i in xrange(0, len(prior)):
        if matrix[i][t-1] == -1:
            matrix[i][t-1] = forward_recursive(os, i, t-1, trans, emits, prior, labels, matrix)

    tmp = matrix[0][t-1]
    for i in xrange(1, len(prior)):
        tmp = log_sum(tmp, matrix[i][t-1])
    sys.stdout.write(str(tmp) + "\n")
 def calculate_P(self, dev, o1):
     beta1 = dev[len(dev) - 1]
     p = 0.0
     for i in range(self.hmm.N):
         pii = log(self.hmm.pi[i])
         bi = log(self.hmm.b[o1][i])
         beta1_i = beta1[i]
         sum = pii + bi + beta1_i
         if i == 0:
             p = sum
         else:
             p = log_sum(p, sum)
     return p
Exemple #18
0
def backward(dev_file, trans_file, emit_file, prior_file):
    dev = read_dev(dev_file)
    trans = read_trans(trans_file)
    emit = read_emit(emit_file)
    prior = read_prior(prior_file)
    result = []
    for sentence in range(len(dev)):
        beta_tp1 = {kT: 0 for kT in prior.keys()}
        beta_t = {}
        for word in range(len(dev[sentence]) - 1, 0, -1):
            for k in emit.keys():
                beta_t[k] = reduce(lambda x, y: log_sum(x, y), [
                    v_tp1 + log(trans[k][k_tp1]) +
                    log(emit[k_tp1][dev[sentence][word]])
                    for k_tp1, v_tp1 in beta_tp1.items()
                ])
            beta_tp1 = dict(beta_t)
        result.append(
            reduce(lambda x, y: log_sum(x, y), [
                log(v1) + log(emit[k1][dev[sentence][0]]) + beta_tp1[k1]
                for k1, v1 in prior.items()
            ]))
    print '\n'.join([str(x) for x in result])
Exemple #19
0
 def forward(self):
     for line in open(self.dev):
         line = line.strip("\n")
         line = line.split(" ")
         matrix = [[0.0 for _ in xrange(len(line))]
                   for _ in xrange(self.nState)]
         for i in xrange(self.nState):
             matrix[i][0] = (log(self.prior_matrix[i]) +
                             log(self.emit_matrix[i][line[0]]))
         for i in xrange(1, len(line)):
             for state1 in xrange(self.nState):
                 p = matrix[0][i - 1] + log(self.trans_matrix[0][state1])
                 for state2 in xrange(1, self.nState):
                     trans = self.trans_matrix[state2][state1]
                     prev = matrix[state2][i - 1]
                     p = log_sum(prev + log(trans), p)
                 prob_o = self.emit_matrix[state1][line[i]]
                 p = p + log(prob_o)
                 matrix[state1][i] = p
         p = matrix[0][-1]
         for i in xrange(1, self.nState):
             p = log_sum(p, matrix[i][-1])
         print p
Exemple #20
0
def forward_recursive(os, i, j, trans={}, emits={}, prior={}, labels=[], matrix=None):
    if matrix[i][j] == -1:
        if j == 0:
            matrix[i][j] = math.log(prior[labels[i]]) + math.log(emits[labels[i]][os[j]])
        else:
            tmp = math.log(emits[labels[i]][os[j]])
            tmp2 = 0
            if matrix[0][j-1] != -1:
                tmp2 = matrix[0][j-1]
            else:
                tmp2 = forward_recursive(os, 0, j-1, trans, emits, prior, labels, matrix)

            tmp2 += math.log(trans[labels[0]][labels[i]])

            for t in xrange(1, len(prior)):
                if matrix[t][j-1] != -1:
                    tmp2 = log_sum(tmp2, matrix[t][j-1] + math.log(trans[labels[t]][labels[i]]))
                else:
                    matrix[t][j-1] = forward_recursive(os, t, j-1, trans, emits, prior, labels, matrix)
                    tmp2 = log_sum(tmp2, matrix[t][j-1] + math.log(trans[labels[t]][labels[i]]))

            tmp += tmp2
            matrix[i][j] = tmp
    return matrix[i][j]
 def calculate_beta(self, dev, o_tp1, t, T):
     beta_t = list()
     for i in range(self.hmm.N):
         beta_ti = 0.0
         for j in range(self.hmm.N):
             beta_tp1_j = dev[T - (t + 1)][j]
             a_ij = log(self.hmm.a[i][j])
             b_j = log(self.hmm.b[o_tp1][j])
             sum = beta_tp1_j + a_ij + b_j
             if j == 0:
                 beta_ti = sum
             else:
                 beta_ti = log_sum(beta_ti, sum)
         beta_t.append(beta_ti)
     dev.append(beta_t)
Exemple #22
0
 def calculate_alpha(self, o_tp1, tp1, dev):
     alpha_tp1 = list()
     for i in range(self.hmm.N):
         bi = log(self.hmm.b[o_tp1][i])
         logsum = 0.0
         for j in range(self.hmm.N):
             a_tj = dev[tp1 - 2][j]
             a_ji = log(self.hmm.a[j][i])
             sum = a_tj + a_ji
             if j == 0:
                 logsum = sum
             else:
                 logsum = log_sum(logsum, sum)
         alpha_tp1_i = bi + logsum
         alpha_tp1.append(alpha_tp1_i)
     dev.append(alpha_tp1)
Exemple #23
0
def backward_recursive(os, i, j, trans={}, emits={}, prior={}, labels=[], matrix=None):
    tmp = 0
    if matrix[0][j+1] == -1:
        matrix[0][j+1] = backward_recursive(os, 0, j+1, trans, emits, prior, labels, matrix)
    tmp = matrix[0][j+1]
    tmp += math.log(emits[labels[0]][os[j+1]])
    tmp += math.log(trans[labels[i]][labels[0]])

    for z in xrange(1, len(labels)):
        if matrix[z][j+1] == -1:
            matrix[z][j+1] = backward_recursive(os, z, j+1, trans, emits, prior, labels, matrix)
        tmp2 = matrix[z][j+1]
        tmp2 += math.log(emits[labels[z]][os[j+1]])
        tmp2 += math.log(trans[labels[i]][labels[z]])

        tmp = log_sum(tmp, tmp2)

    matrix[i][j] = tmp
    return matrix[i][j]
def forward_each(sentence, states, words, prior_vec, trans_mat, emit_mat):
    o1 = sentence[0]
    afa = []
    for i in range(len(states)):
        index = find_b(o1, words)
        ini = math.log(prior_vec[i]) + math.log(emit_mat[i, index])
        afa.append(ini)
    for t in range(0, len(sentence) - 1):
        temp_afa = afa[:]
        for i in range(len(states)):
            b_j = find_b(sentence[t + 1], words)
            temp_list = []
            for j in range(len(afa)):
                temp_list.append(temp_afa[j] + math.log(trans_mat[j, i]))
            afa[i] = log_sum_all(temp_list) + math.log(emit_mat[i, b_j])
    temp = afa[0]
    final_p = afa[:]
    for i in range(1, len(afa)):
        temp = log_sum(temp, final_p[i])
    print temp
Exemple #25
0
    line = reversed(line)

    for word in line:
        # setting value for other columns over the other states
        for state in states:
            for previous_state in states:
                if states.index(previous_state) == 0:
                    second_column_dict[state] = first_column_dict[
                        previous_state] + trans[state][previous_state] + emits[
                            previous_state][word]

                else:
                    second_column_dict[state] = log_sum(
                        (first_column_dict[previous_state] +
                         trans[state][previous_state] +
                         emits[previous_state][word]),
                        second_column_dict[state])

        first_column_dict = second_column_dict
        second_column_dict = {}
    first_column = True

    for state in states:
        if states.index(state) == 0:
            output = first_column_dict[state] + priors[state] + emits[state][
                first_word]
        else:
            output = log_sum(
                first_column_dict[state] + priors[state] +
                emits[state][first_word], output)
Exemple #26
0
    for word in line:

        # checking if first column and forming first column
        if first_column:
            for state in states:
                first_column_dict[state] = priors[state] + emits[state][word]
            first_column = False

        # forming other columns over the other states
        else:
            for state in states:        # applying recursive formula
                for previous_state in states:
                    if states.index(previous_state) == 0:
                        second_column_dict[state] = first_column_dict[previous_state] + trans[previous_state][state]
                    else:
                        second_column_dict[state] = log_sum(first_column_dict[previous_state]+ trans[previous_state][state], second_column_dict[state])

                # adding the emission probability of a word given a state
                second_column_dict[state] += emits[state][word]

            first_column_dict = second_column_dict
            second_column_dict = {}

    first_column = True

    for state in states:
        if states.index(state) == 0:
            output = first_column_dict[state]
        else:
            output = log_sum(first_column_dict[state], output)
    print output
def log_sum_all(n_list):
    temp = n_list[0]
    for i in range(1, len(n_list)):
        temp = log_sum(temp, n_list[i])
    return temp