def SimilarityViterbi(emission_probs, state_init_probs, state_trans_probs, test_subseq, low_frequency_probabilities, smooth):
    '''
    For now we're ignoring the <UNK> tokens that were inserted
    If we lookup an emission that doesn't exist, it will have probability 0, since we're using a Counter
    '''
    # Initialize paths and probabilities
    path_dict = dict((state, [state[1]]) for state in state_init_probs.keys())
    prev_probs = {}
    for state in state_init_probs.keys():
        prev_probs[state] = state_init_probs[state] * emission_probs[state[1]][test_subseq[0]]

    # Iterate over the sentence
    all_states = set(state2 for state1 in state_trans_probs for state2 in state_trans_probs[state1])
    all_states.remove("<UNK>")
    for emission in test_subseq[1:]:
        new_path_dict = {}
        curr_state_probs = {}
        for curr_state in all_states:
            temp_state_probs = {}
            for prev_state in path_dict:
                
                #Emission is unseen. Finding probability based on feature class
                if emission not in emission_probs[curr_state]:
                    feature_class = findFeatureClass(emission)
                    # simple_curr_state = curr_state.split('-')[-1]
                    # emission_probability = low_frequency_probabilities[simple_curr_state][feature_class]
                    emission_probability = low_frequency_probabilities[curr_state][feature_class]
                else:
                    emission_probability = emission_probs[curr_state][emission]

                if smooth:
                    if prev_state not in state_trans_probs:
                        this_trans_prob = state_trans_probs["<UNK>"][curr_state]
                    elif curr_state not in state_trans_probs[prev_state]:
                        this_trans_prob = state_trans_probs[prev_state]["<UNK>"]
                    else:
                        this_trans_prob = state_trans_probs[prev_state][curr_state]
                else:
                    this_trans_prob = state_trans_probs[prev_state][curr_state]

                temp_state_probs[prev_state] = prev_probs[prev_state] * this_trans_prob * emission_probability



            max_idx = np.argmax(temp_state_probs.values())
            max_prob = temp_state_probs.values()[max_idx]
            max_state = temp_state_probs.keys()[max_idx]

            curr_bigram = (max_state[1], curr_state)
            curr_state_probs[curr_bigram] = max_prob
            new_path_dict[curr_bigram] = path_dict[max_state] + [curr_state]

        prev_probs = curr_state_probs.copy()
        path_dict = new_path_dict.copy()

    # Identify overall most probable path
    overall_max_idx = np.argmax(prev_probs.values())
    overall_max_state = prev_probs.keys()[overall_max_idx]

    return path_dict[overall_max_state]
def SimilarityViterbi(emission_probs, state_init_probs, state_trans_probs,
                      test_subseq, low_frequency_probabilities, smooth):
    '''
    For now we're ignoring the <UNK> tokens that were inserted
    If we lookup an emission that doesn't exist, it will have probability 0, since we're using a Counter
    '''
    # Initialize paths and probabilities
    path_dict = dict((state, [state[1]]) for state in state_init_probs.keys())
    prev_probs = {}
    for state in state_init_probs.keys():
        prev_probs[state] = state_init_probs[state] * emission_probs[state[1]][
            test_subseq[0]]

    # Iterate over the sentence
    all_states = set(state2 for state1 in state_trans_probs
                     for state2 in state_trans_probs[state1])
    all_states.remove("<UNK>")
    for emission in test_subseq[1:]:
        new_path_dict = {}
        curr_state_probs = {}
        for curr_state in all_states:
            temp_state_probs = {}
            for prev_state in path_dict:

                #Emission is unseen. Finding probability based on feature class
                if emission not in emission_probs[curr_state]:
                    feature_class = findFeatureClass(emission)
                    # simple_curr_state = curr_state.split('-')[-1]
                    # emission_probability = low_frequency_probabilities[simple_curr_state][feature_class]
                    emission_probability = low_frequency_probabilities[
                        curr_state][feature_class]
                else:
                    emission_probability = emission_probs[curr_state][emission]

                if smooth:
                    if prev_state not in state_trans_probs:
                        this_trans_prob = state_trans_probs["<UNK>"][
                            curr_state]
                    elif curr_state not in state_trans_probs[prev_state]:
                        this_trans_prob = state_trans_probs[prev_state][
                            "<UNK>"]
                    else:
                        this_trans_prob = state_trans_probs[prev_state][
                            curr_state]
                else:
                    this_trans_prob = state_trans_probs[prev_state][curr_state]

                temp_state_probs[prev_state] = prev_probs[
                    prev_state] * this_trans_prob * emission_probability

            max_idx = np.argmax(temp_state_probs.values())
            max_prob = temp_state_probs.values()[max_idx]
            max_state = temp_state_probs.keys()[max_idx]

            curr_bigram = (max_state[1], curr_state)
            curr_state_probs[curr_bigram] = max_prob
            new_path_dict[curr_bigram] = path_dict[max_state] + [curr_state]

        prev_probs = curr_state_probs.copy()
        path_dict = new_path_dict.copy()

    # Identify overall most probable path
    overall_max_idx = np.argmax(prev_probs.values())
    overall_max_state = prev_probs.keys()[overall_max_idx]

    return path_dict[overall_max_state]
def Viterbi(emission_probs, state_init_probs, state_trans_probs, test_subseq, low_frequency_probabilities, smooth, similarity_based, pos_subseq):
    '''
    For now we're ignoring the <UNK> tokens that were inserted
    If we lookup an emission that doesn't exist, it will have probability 0, since we're using a Counter
    '''
    # Initialize paths and probabilities
    path_dict = dict((state, [state]) for state in state_init_probs.keys())
    prev_probs = {}
    for state in state_init_probs.keys():
        prev_probs[state] = state_init_probs[state] * emission_probs[state][test_subseq[0]]

    # Iterate over the sentence
    all_states = set(state2 for state1 in state_trans_probs for state2 in state_trans_probs[state1])
    # for emission in test_subseq[1:]:
    for emission_idx in range(1, len(test_subseq)):
        emission = test_subseq[emission_idx]
        new_path_dict = {}
        curr_state_probs = {}
        for curr_state in all_states:
            temp_state_probs = {}
            for prev_state in path_dict:
                
                if emission not in emission_probs[curr_state]:
                    
                    #Using the smoothed values in case emission was something we had not seen before
                    if smooth == 'Laplacian' or smooth == 'Good-Turing':
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probs[curr_state]['<UNK>']
                                                        
                    #Using feature classes from local context instead of smoothing
                    elif similarity_based:
                        feature_class = findFeatureClass(emission)
                        current_state = curr_state if '-' not in curr_state else curr_state.split('-')[1]
                        emission_probability = low_frequency_probabilities[feature_class][current_state]
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probability
                    else:
                        feature_class = pos_subseq[emission_idx]
                        current_state = curr_state if '-' not in curr_state else curr_state.split('-')[1]
                        emission_probability = low_frequency_probabilities[feature_class][current_state]
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probability

                else:
                    temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                   emission_probs[curr_state][emission]

            max_idx = np.argmax(temp_state_probs.values())
            max_prob = temp_state_probs.values()[max_idx]
            max_state = temp_state_probs.keys()[max_idx]

            curr_state_probs[curr_state] = max_prob
            new_path_dict[curr_state] = path_dict[max_state] + [curr_state]

        prev_probs = curr_state_probs.copy()
        path_dict = new_path_dict.copy()

    # Identify overall most probable path
    overall_max_idx = np.argmax(prev_probs.values())
    overall_max_state = prev_probs.keys()[overall_max_idx]

    return path_dict[overall_max_state]
def Viterbi(emission_probs, state_init_probs, state_trans_probs, test_subseq,
            low_frequency_probabilities, smooth, similarity_based, pos_subseq):
    '''
    For now we're ignoring the <UNK> tokens that were inserted
    If we lookup an emission that doesn't exist, it will have probability 0, since we're using a Counter
    '''
    # Initialize paths and probabilities
    path_dict = dict((state, [state]) for state in state_init_probs.keys())
    prev_probs = {}
    for state in state_init_probs.keys():
        prev_probs[state] = state_init_probs[state] * emission_probs[state][
            test_subseq[0]]

    # Iterate over the sentence
    all_states = set(state2 for state1 in state_trans_probs
                     for state2 in state_trans_probs[state1])
    # for emission in test_subseq[1:]:
    for emission_idx in range(1, len(test_subseq)):
        emission = test_subseq[emission_idx]
        new_path_dict = {}
        curr_state_probs = {}
        for curr_state in all_states:
            temp_state_probs = {}
            for prev_state in path_dict:

                if emission not in emission_probs[curr_state]:

                    #Using the smoothed values in case emission was something we had not seen before
                    if smooth == 'Laplacian' or smooth == 'Good-Turing':
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probs[curr_state]['<UNK>']

                    #Using feature classes from local context instead of smoothing
                    elif similarity_based:
                        feature_class = findFeatureClass(emission)
                        current_state = curr_state if '-' not in curr_state else curr_state.split(
                            '-')[1]
                        emission_probability = low_frequency_probabilities[
                            feature_class][current_state]
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probability
                    else:
                        feature_class = pos_subseq[emission_idx]
                        current_state = curr_state if '-' not in curr_state else curr_state.split(
                            '-')[1]
                        emission_probability = low_frequency_probabilities[
                            feature_class][current_state]
                        temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                        emission_probability

                else:
                    temp_state_probs[prev_state] = prev_probs[prev_state] * state_trans_probs[prev_state][curr_state] * \
                                                   emission_probs[curr_state][emission]

            max_idx = np.argmax(temp_state_probs.values())
            max_prob = temp_state_probs.values()[max_idx]
            max_state = temp_state_probs.keys()[max_idx]

            curr_state_probs[curr_state] = max_prob
            new_path_dict[curr_state] = path_dict[max_state] + [curr_state]

        prev_probs = curr_state_probs.copy()
        path_dict = new_path_dict.copy()

    # Identify overall most probable path
    overall_max_idx = np.argmax(prev_probs.values())
    overall_max_state = prev_probs.keys()[overall_max_idx]

    return path_dict[overall_max_state]