def getTestPreds(train_obs_list, train_ne_list, test_obs_list, smooth, similarity):
    emission_probs = getEmissionProbabilities(train_obs_list, train_ne_list)
    state_init_probs, state_trans_probs = getBigramStateProbabilities(train_ne_list)
    pred_ne_list = []
    if smooth and not similarity:
        smoothed_emission_probs = getSmoothEmissionProbs(emission_probs)
        smoothed_state_trans_probs = getSmoothTransitionProbs(state_trans_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SmoothViterbi(smoothed_emission_probs, state_init_probs, smoothed_state_trans_probs, test_obs_list[i])
            pred_ne_list.append(predicted_states)
    elif similarity and not smooth:
        feature_type = 'text_features'
        low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))
        reduced_similarity_probs = getReducedSimilarityProbs(low_frequency_probabilities, emission_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SimilarityViterbi(emission_probs, state_init_probs, state_trans_probs, test_obs_list[i], reduced_similarity_probs, smooth=False)
            pred_ne_list.append(predicted_states)
    elif smooth and similarity:
        feature_type = 'text_features'
        low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))
        reduced_similarity_probs = getReducedSimilarityProbs(low_frequency_probabilities, emission_probs)
        smoothed_state_trans_probs = getSmoothTransitionProbs(state_trans_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SimilarityViterbi(emission_probs, state_init_probs, smoothed_state_trans_probs, test_obs_list[i], reduced_similarity_probs, smooth=True)
            pred_ne_list.append(predicted_states)
    else:
        for i in xrange(len(test_obs_list)):
            predicted_states = Viterbi(emission_probs, state_init_probs, state_trans_probs, test_obs_list[i])
            pred_ne_list.append(predicted_states)

    return pred_ne_list
def getTestPreds(train_obs_list, train_ne_list, test_obs_list, low_frequency_probabilities, smooth, similarity_based, test_pos_list):
    emission_probs = getEmissionProbabilities(train_obs_list, train_ne_list, smooth)
    state_init_probs, state_trans_probs = getStateProbabilities(train_ne_list)
    pred_ne_list = []
    for i in xrange(len(test_obs_list)):
        predicted_states = Viterbi(emission_probs, state_init_probs, state_trans_probs, test_obs_list[i], low_frequency_probabilities, smooth, similarity_based, test_pos_list[i])
        pred_ne_list.append(predicted_states)
    return pred_ne_list
def getTestPreds(train_obs_list, train_ne_list, test_obs_list, smooth,
                 similarity):
    emission_probs = getEmissionProbabilities(train_obs_list, train_ne_list)
    state_init_probs, state_trans_probs = getBigramStateProbabilities(
        train_ne_list)
    pred_ne_list = []
    if smooth and not similarity:
        smoothed_emission_probs = getSmoothEmissionProbs(emission_probs)
        smoothed_state_trans_probs = getSmoothTransitionProbs(
            state_trans_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SmoothViterbi(smoothed_emission_probs,
                                             state_init_probs,
                                             smoothed_state_trans_probs,
                                             test_obs_list[i])
            pred_ne_list.append(predicted_states)
    elif similarity and not smooth:
        feature_type = 'text_features'
        low_frequency_probabilities = json.load(
            open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))
        reduced_similarity_probs = getReducedSimilarityProbs(
            low_frequency_probabilities, emission_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SimilarityViterbi(emission_probs,
                                                 state_init_probs,
                                                 state_trans_probs,
                                                 test_obs_list[i],
                                                 reduced_similarity_probs,
                                                 smooth=False)
            pred_ne_list.append(predicted_states)
    elif smooth and similarity:
        feature_type = 'text_features'
        low_frequency_probabilities = json.load(
            open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))
        reduced_similarity_probs = getReducedSimilarityProbs(
            low_frequency_probabilities, emission_probs)
        smoothed_state_trans_probs = getSmoothTransitionProbs(
            state_trans_probs)
        for i in xrange(len(test_obs_list)):
            predicted_states = SimilarityViterbi(emission_probs,
                                                 state_init_probs,
                                                 smoothed_state_trans_probs,
                                                 test_obs_list[i],
                                                 reduced_similarity_probs,
                                                 smooth=True)
            pred_ne_list.append(predicted_states)
    else:
        for i in xrange(len(test_obs_list)):
            predicted_states = Viterbi(emission_probs, state_init_probs,
                                       state_trans_probs, test_obs_list[i])
            pred_ne_list.append(predicted_states)

    return pred_ne_list
Ejemplo n.º 4
0
def getTestPreds(train_obs_list, train_ne_list, test_obs_list,
                 low_frequency_probabilities, smooth, similarity_based,
                 test_pos_list):
    emission_probs = getEmissionProbabilities(train_obs_list, train_ne_list,
                                              smooth)
    state_init_probs, state_trans_probs = getStateProbabilities(train_ne_list)
    pred_ne_list = []
    for i in xrange(len(test_obs_list)):
        predicted_states = Viterbi(emission_probs, state_init_probs,
                                   state_trans_probs, test_obs_list[i],
                                   low_frequency_probabilities, smooth,
                                   similarity_based, test_pos_list[i])
        pred_ne_list.append(predicted_states)
    return pred_ne_list