def get_model1_forward(theta, obs_id, fc):
    global source, target, trellis
    obs = trellis[obs_id]
    max_bt = [-1] * len(obs)
    p_st = 0.0
    for t_idx in obs:
        t_tok = target[obs_id][t_idx]
        sum_e = float('-inf')
        max_e = float('-inf')
        max_s_idx = None
        sum_sj = float('-inf')
        for _, s_idx in obs[t_idx]:
            s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL
            e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)
            sum_e = utils.logadd(sum_e, e)
            q = log(1.0 / len(obs[t_idx]))
            sum_sj = utils.logadd(sum_sj, e + q)
            if e > max_e:
                max_e = e
                max_s_idx = s_idx
        max_bt[t_idx] = (t_idx, max_s_idx)
        p_st += sum_sj

        # update fractional counts
        if fc is not None:
            for _, s_idx in obs[t_idx]:
                s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL
                e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)
                delta = e - sum_e
                event = (E_TYPE, t_tok, s_tok)
                fc[event] = utils.logadd(delta, fc.get(event, float('-inf')))

    return max_bt[:-1], p_st, fc
 def accumulate_fc(self, type, alpha, beta, d, S, c=None, q=None, e=None, fc=None):
     if type == T_TYPE:
         update = alpha + q + e + beta - S
         if fc is None:
             self.fractional_counts[T_TYPE, d, c] = utils.logadd(update,
                                                                 self.fractional_counts.get(
                                                                     (T_TYPE, d, c,),
                                                                     float('-inf')))
         else:
             fc[T_TYPE, d, c] = utils.logadd(update,
                                             fc.get((T_TYPE, d, c,), float('-inf')))
     elif type == E_TYPE:
         update = alpha + beta - S  # the emission should be included in alpha
         if fc is None:
             self.fractional_counts[E_TYPE, d, c] = utils.logadd(update,
                                                                 self.fractional_counts.get(
                                                                     (E_TYPE, d, c,),
                                                                     float('-inf')))
         else:
             fc[E_TYPE, d, c] = utils.logadd(update,
                                             fc.get((E_TYPE, d, c,), float('-inf')))
     else:
         raise "Wrong type"
     if fc is not None:
         return fc
def accumulate_fc(type, alpha, beta, d, S, c=None, k=None, q=None, e=None, fc=None):
    if type == T_TYPE:
        update = alpha + q + e + beta - S
        fc[T_TYPE, d, c] = utils.logadd(update, fc.get((T_TYPE, d, c,), float('-inf')))
    elif type == E_TYPE:
        update = alpha + beta - S  # the emission should be included in alpha
        fc[E_TYPE, d, c] = utils.logadd(update, fc.get((E_TYPE, d, c,), float('-inf')))
    else:
        raise "Wrong type"
    return fc
    def get_backwards(self, theta, obs_seq, alpha_pi, fc=None):
        n = len(obs_seq) - 1  # index of last word

        beta_pi = {(n, END_SYM): 0.0}
        S = alpha_pi[(n, END_SYM)]  # from line 13 in pseudo code
        fc = self.accumulate_fc(type=E_TYPE, alpha=0.0, beta=S, e=0.0, S=S, d=START_SYM, c=START_SYM, fc=fc)
        for k in range(n, 0, -1):
            for v in self.get_possible_states(obs_seq[k]):
                e = self.get_decision_given_context(theta=theta, type=E_TYPE, decision=obs_seq[k],
                                                    context=v)  # p(obs[k]|v)
                pb = beta_pi[(k, v)]
                fc = self.accumulate_fc(type=E_TYPE, alpha=alpha_pi[(k, v)], beta=beta_pi[k, v], e=e,
                                        d=obs_seq[k], c=v, S=S, fc=fc)
                for u in self.get_possible_states(obs_seq[k - 1]):
                    q = self.get_decision_given_context(type=T_TYPE, decision=v, context=u, theta=theta)  # p(v|u)
                    fc = self.accumulate_fc(type=T_TYPE, alpha=alpha_pi[k - 1, u], beta=beta_pi[k, v],
                                            q=q, e=e, d=v, c=u, S=S, fc=fc)

                    p = q + e
                    beta_p = pb + p  # The beta includes the emission probability
                    new_pi_key = (k - 1, u)
                    if new_pi_key not in beta_pi:  # implements lines 16
                        beta_pi[new_pi_key] = beta_p
                    else:
                        beta_pi[new_pi_key] = utils.logadd(beta_pi[new_pi_key], beta_p)
                        alpha_pi[(k - 1, u)] + p + beta_pi[(k, v)] - S
        if fc is None:
            return S, beta_pi
        else:
            return S, beta_pi, fc
def batch_accumilate_likelihood(result):
    global data_likelihood, fractional_counts, emp_feat
    data_likelihood += result[0]
    fc = result[1]
    emp_feat += result[2]
    for k in fc:
        fractional_counts[k] = utils.logadd(fc[k], fractional_counts.get(k, float('-inf')))
def get_best_seq(theta, obs_id):
    global source, target, trellis
    obs = trellis[obs_id]
    max_bt = [-1] * len(obs)
    p_st = 0.0
    for t_idx in obs:
        t_tok = target[obs_id][t_idx]
        sum_e = float('-inf')
        max_e = float('-inf')
        max_s_idx = None
        sum_sj = float('-inf')
        for _, s_idx in obs[t_idx]:
            s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL
            e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)
            sum_e = utils.logadd(sum_e, e)
            q = log(1.0 / len(obs[t_idx]))
            sum_sj = utils.logadd(sum_sj, e + q)
            if e > max_e:
                max_e = e
                max_s_idx = s_idx
        max_bt[t_idx] = (t_idx, max_s_idx)
        p_st += sum_sj

    return max_bt[:-1], p_st
def get_backwards(theta, obs_id, alpha_pi, fc=None):
    global max_jump_width, trellis, source, target
    obs = trellis[obs_id]
    src = source[obs_id]
    tar = target[obs_id]
    n = len(obs) - 1  # index of last word
    end_state = obs[n][0]
    beta_pi = {(n, end_state): 0.0}
    S = alpha_pi[(n, end_state)]  # from line 13 in pseudo code
    fc = accumulate_fc(type=E_TYPE, alpha=0.0, beta=S, e=0.0, S=S, d=BOUNDARY_START, c=BOUNDARY_START, fc=fc)
    for k in range(n, 0, -1):
        for v in obs[k]:
            tk, aj = v
            t_tok = tar[tk]
            s_tok = src[aj] if aj is not NULL else NULL
            e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)

            pb = beta_pi[(k, v)]
            fc = accumulate_fc(type=E_TYPE, alpha=alpha_pi[(k, v)], beta=beta_pi[k, v], e=e, S=S, d=t_tok, c=s_tok,
                               fc=fc)
            for u in obs[k - 1]:
                tk_1, aj_1 = u
                t_tok_1 = tar[tk_1]
                s_tok_1 = src[aj_1] if aj_1 is not NULL else NULL
                context = aj_1
                if model_type == HMM_MODEL:
                    q = get_decision_given_context(theta, T_TYPE, decision=aj, context=context)
                    fc = accumulate_fc(type=T_TYPE, alpha=alpha_pi[k - 1, u], beta=beta_pi[k, v], q=q, e=e, d=aj,
                                       c=context,
                                       S=S, fc=fc)
                else:
                    q = log(1.0 / len(obs[k]))
                    # q = 0.0

                p = q + e
                beta_p = pb + p  # The beta includes the emission probability
                new_pi_key = (k - 1, u)
                if new_pi_key not in beta_pi:  # implements lines 16
                    beta_pi[new_pi_key] = beta_p
                else:
                    beta_pi[new_pi_key] = utils.logadd(beta_pi[new_pi_key], beta_p)
                    alpha_pi[(k - 1, u)] + p + beta_pi[(k, v)] - S
    if fc is None:
        return S, beta_pi
    else:
        return S, beta_pi, fc
def get_model1_forward(theta, obs_id, fc=None, ef=None):
    global source, target, trellis, diagonal_tension
    obs = trellis[obs_id]
    m = len(obs)
    max_bt = [-1] * len(obs)
    p_st = 0.0
    for t_idx in obs:
        t_tok = target[obs_id][t_idx]
        sum_e = float('-inf')
        sum_pei = float('-inf')
        max_e = float('-inf')
        max_s_idx = None
        sum_sj = float('-inf')
        sum_q = float('-inf')
        for _, s_idx in obs[t_idx]:
            n = len(obs[t_idx])
            s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL
            e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)
            sum_e = utils.logadd(sum_e, e)

            if t_tok == BOUNDARY_START or t_tok == BOUNDARY_END:
                q = 0.0
            elif s_tok == NULL:
                q = np.log(Po)
            else:
                # q = get_fast_align_transition(theta, t_idx, s_idx, m - 2, n - 1)
                az = compute_z(t_idx, m - 2, n - 1, diagonal_tension) / (1 - Po)
                q = np.log(unnormalized_prob(t_idx, s_idx, m - 2, n - 1, diagonal_tension) / az)

            sum_pei = utils.logadd(sum_pei, q + e)
            sum_sj = utils.logadd(sum_sj, e + q)
            sum_q = utils.logadd(sum_q, q)

            if e > max_e:
                max_e = e
                max_s_idx = s_idx
        max_bt[t_idx] = (t_idx, max_s_idx)
        p_st += sum_sj

        if p_st == float('inf'):
            pdb.set_trace()
        # update fractional counts
        if fc is not None:
            for _, s_idx in obs[t_idx]:
                n = len(obs[t_idx])
                s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL
                e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok)

                if t_tok == BOUNDARY_START or t_tok == BOUNDARY_END:
                    q = 0.0
                    hijmn = 0.0
                elif s_tok == NULL:
                    q = np.log(Po)
                    hijmn = 0.0
                else:
                    az = compute_z(t_idx, m - 2, n - 1, diagonal_tension) / (1 - Po)
                    q = np.log(unnormalized_prob(t_idx, s_idx, m - 2, n - 1, diagonal_tension) / az)
                    hijmn = h(t_idx, s_idx, m - 2, n - 1)

                p_ai = e + q - sum_pei  # TODO: times h(j',i,m,n)
                # p_q = q - sum_q
                event = (E_TYPE, t_tok, s_tok)
                fc[event] = utils.logadd(p_ai, fc.get(event, float('-inf')))
                ef += (exp(p_ai) * hijmn)

    return max_bt[:-1], p_st, fc, ef