Exemple #1
0
def update_jump_alignment_mle(posterior_alignment_counts):
    for key in posterior_alignment_counts:
        if key[0] == jump_key(0, 0)[0]:
            jump_counts[key] = posterior_alignment_counts[key]

    for key in posterior_alignment_counts:
        if key[0] == jump_iip_key(0, 0)[0]:
            jkey_vals = [jump_counts[jk] for jk in posterior_alignment_counts[key]]
            log_add_jkey_vals = lu.logadd_of_list(jkey_vals)
            jump_counts_iip[key] = log_add_jkey_vals
def update_jump_alignment_mle(posterior_alignment_counts):
    for key in posterior_alignment_counts:
        if key[0] == jump_key(0, 0)[0]:
            jump_counts[key] = posterior_alignment_counts[key]

    for key in posterior_alignment_counts:
        if key[0] == jump_iip_key(0, 0)[0]:
            jkey_vals = [jump_counts[jk] for jk in posterior_alignment_counts[key]]
            log_add_jkey_vals = lu.logadd_of_list(jkey_vals)
            jump_counts_iip[key] = log_add_jkey_vals
Exemple #3
0
def get_jump_mle(alignments_split, source_split):
    jump_counts = {}
    jump_keys_by_sentence_len = {}
    for a, s in zip(alignments_split, source_split):
        alignment_bigrams = [(a[i], a[i - 1]) for i in range(1, len(a))]
        for j1, j0 in alignment_bigrams:
            jkey = jump_key(j1, j0)
            jiip_key = jump_iip_key(len(s), j0)
            jump_counts[jkey] = lu.logadd(jump_counts.get(jkey, float('-inf')), 0.0)
            jump_keys_by_sentence_len[jiip_key] = jump_keys_by_sentence_len.get(jiip_key, set([]))
            jump_keys_by_sentence_len[jiip_key].add(jkey)
    jiip = {}
    for jiip_key, jkeys in jump_keys_by_sentence_len.iteritems():
        jkeys_val = [jump_counts[jk] for jk in jkeys]
        jiip[jiip_key] = lu.logadd_of_list(jkeys_val)
    return jump_counts, jiip
def get_jump_mle(alignments_split, source_split):
    jump_counts = {}
    jump_keys_by_sentence_len = {}
    for a, s in zip(alignments_split, source_split):
        alignment_bigrams = [(a[i], a[i - 1]) for i in range(1, len(a))]
        for j1, j0 in alignment_bigrams:
            jkey = jump_key(j1, j0)
            jiip_key = jump_iip_key(len(s), j0)
            jump_counts[jkey] = lu.logadd(jump_counts.get(jkey, float("-inf")), 0.0)
            jump_keys_by_sentence_len[jiip_key] = jump_keys_by_sentence_len.get(jiip_key, set([]))
            jump_keys_by_sentence_len[jiip_key].add(jkey)
    jiip = {}
    for jiip_key, jkeys in jump_keys_by_sentence_len.iteritems():
        jkeys_val = [jump_counts[jk] for jk in jkeys]
        jiip[jiip_key] = lu.logadd_of_list(jkeys_val)
    return jump_counts, jiip
Exemple #5
0
def get_viterbi_and_forward(obs_sequence, trelis, source_len):
    pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    alpha_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    #pi[(0, START_STATE)] = 1.0  # 0,START_STATE
    arg_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): []}
    for k in range(1, len(obs_sequence)):  # the words are numbered from 1 to n, 0 is special start character
        for v in trelis[k]:  # [1]:
            max_prob_to_bt = {}
            sum_prob_to_bt = []
            target_token = obs_sequence[k]
            source_token = v[1]
            for u in trelis[k - 1]:  # [1]:
                aj = v[0]
                aj_1 = u[0]
                #q = get_transition(aj, aj_1)
                q = get_jump_transition(aj, aj_1, source_len)
                e = get_emission(target_token, source_token)
                #print q, aj, aj_1, source_len, jump_key(aj, aj_1), jump_counts.get(jump_key(aj, aj_1), float('-inf')), jump_counts_iip.get(
                #    jump_iip_key(source_len, aj_1), float('-inf')), jump_iip_key(source_len, aj_1)
                #print k
                #print v, '|', u
                #print aj, '|', aj_1, '=', q
                #print target_token, '|', source_token, '=', e
                p = pi[(k - 1, u)] + q + e
                alpha_p = alpha_pi[(k - 1, u)] + q + e
                if len(arg_pi[(k - 1, u)]) == 0:
                    bt = [u]
                else:
                    bt = [arg_pi[(k - 1, u)], u]
                max_prob_to_bt[p] = bt
                sum_prob_to_bt.append(alpha_p)

            max_bt = max_prob_to_bt[max(max_prob_to_bt)]
            new_pi_key = (k, v)
            pi[new_pi_key] = max(max_prob_to_bt)
            #print 'mu   ', new_pi_key, '=', pi[new_pi_key], exp(pi[new_pi_key])
            alpha_pi[new_pi_key] = lu.logadd_of_list(sum_prob_to_bt)
            #print 'alpha', new_pi_key, '=', alpha_pi[new_pi_key], exp(alpha_pi[new_pi_key])
            arg_pi[new_pi_key] = max_bt

    max_bt = max_prob_to_bt[max(max_prob_to_bt)]
    max_p = max(max_prob_to_bt)
    max_bt = flatten_backpointers(max_bt)
    return max_bt, max_p, alpha_pi
def get_viterbi_and_forward(obs_sequence, trelis, source_len):
    pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    alpha_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    # pi[(0, START_STATE)] = 1.0  # 0,START_STATE
    arg_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): []}
    for k in range(1, len(obs_sequence)):  # the words are numbered from 1 to n, 0 is special start character
        for v in trelis[k]:  # [1]:
            max_prob_to_bt = {}
            sum_prob_to_bt = []
            target_token = obs_sequence[k]
            source_token = v[1]
            for u in trelis[k - 1]:  # [1]:
                aj = v[0]
                aj_1 = u[0]
                # q = get_transition(aj, aj_1)
                q = get_jump_transition(aj, aj_1, source_len)
                e = get_emission(target_token, source_token)
                # print q, aj, aj_1, source_len, jump_key(aj, aj_1), jump_counts.get(jump_key(aj, aj_1), float('-inf')), jump_counts_iip.get(
                #    jump_iip_key(source_len, aj_1), float('-inf')), jump_iip_key(source_len, aj_1)
                # print k
                # print v, '|', u
                # print aj, '|', aj_1, '=', q
                # print target_token, '|', source_token, '=', e
                p = pi[(k - 1, u)] + q + e
                alpha_p = alpha_pi[(k - 1, u)] + q + e
                if len(arg_pi[(k - 1, u)]) == 0:
                    bt = [u]
                else:
                    bt = [arg_pi[(k - 1, u)], u]
                max_prob_to_bt[p] = bt
                sum_prob_to_bt.append(alpha_p)

            max_bt = max_prob_to_bt[max(max_prob_to_bt)]
            new_pi_key = (k, v)
            pi[new_pi_key] = max(max_prob_to_bt)
            # print 'mu   ', new_pi_key, '=', pi[new_pi_key], exp(pi[new_pi_key])
            alpha_pi[new_pi_key] = lu.logadd_of_list(sum_prob_to_bt)
            # print 'alpha', new_pi_key, '=', alpha_pi[new_pi_key], exp(alpha_pi[new_pi_key])
            arg_pi[new_pi_key] = max_bt

    max_bt = max_prob_to_bt[max(max_prob_to_bt)]
    max_p = max(max_prob_to_bt)
    max_bt = flatten_backpointers(max_bt)
    return max_bt, max_p, alpha_pi
def get_viterbi_and_forward(obs_sequence, trelis, source_len):
    pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    alpha_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): 0.0}
    arg_pi = {(0, (BOUNDRY_STATE, BOUNDRY_STATE)): []}
    for k in range(1, len(obs_sequence)):  # the words are numbered from 1 to n, 0 is special start character
        for v in trelis[k]:  # [1]:
            max_prob_to_bt = {}
            sum_prob_to_bt = []
            target_token = obs_sequence[k]
            source_token = v[1]
            for u in trelis[k - 1]:  # [1]:
                aj = v[0]
                aj_1 = u[0]
                q = get_jump_transition(aj, aj_1, source_len)
                e = get_emission(target_token, source_token)
                # print k
                # print v, '|', u
                # print aj, '|', aj_1, '=', q
                # print target_token, '|', source_token, '=', e
                p = pi[(k - 1, u)] + q + e
                alpha_p = alpha_pi[(k - 1, u)] + q + e
                # print 'alpha_p', alpha_p
                if alpha_p == float('-inf'):
                    pdb.set_trace()
                if len(arg_pi[(k - 1, u)]) == 0:
                    bt = [u]
                else:
                    bt = [arg_pi[(k - 1, u)], u]
                max_prob_to_bt[p] = bt
                sum_prob_to_bt.append(alpha_p)

            max_bt = max_prob_to_bt[max(max_prob_to_bt)]
            new_pi_key = (k, v)
            pi[new_pi_key] = max(max_prob_to_bt)
            alpha_pi[new_pi_key] = lu.logadd_of_list(sum_prob_to_bt)
            arg_pi[new_pi_key] = max_bt
    max_bt = max_prob_to_bt[max(max_prob_to_bt)]
    max_p = max(max_prob_to_bt)
    max_bt = flatten_backpointers(max_bt)
    return max_bt, max_p, alpha_pi  # returns the best back trace, best path probability, sum of path probabilites