Exemple #1
0
def predict_error_analysis(test_data_lst, weight_dict):
    results_lst = []

    for sent in test_data_lst:
        words, gs_label, sentence = extract_features_from_sentence(sent)
        selected_features = get_features(words, weight_dict)
        classify_label = classify(selected_features, sentence)
        if classify_label != gs_label:
            results_lst.append((sentence, classify_label, gs_label))
    return results_lst
Exemple #2
0
def hinge_loss_bigram(weight_dict, sentence):
    words, gs_label, sent = extract_features_from_sentence_bigram(sentence)
    selected_features = get_features(words, weight_dict)
    classify_label = costClassify_bigram(selected_features, sent, gs_label)
    
    for feature_temp in selected_features:
        bigram = feature_temp[0]
        feature_weight = weight_dict[bigram][feature_temp]
        f_j_1 = feature_function_bigram(feature_temp, sent, gs_label)
        f_j_2 = feature_function_bigram(feature_temp, sent, classify_label)
        weight_update = feature_weight + 0.01*f_j_1 - 0.01*f_j_2
        weight_dict[bigram][feature_temp] = weight_update
Exemple #3
0
def predict_bigram(test_data_lst, w_dict):
    """
    Given a sentence and the trained weights for our features - predict the sentiment of a sentence.
    
    Same for Hinge Loss and for Perceptron Loss ?
    """
    actual_labels = []
    predicted_labels = []
    
    for sent in test_data_lst:
        words, gs_label, sentence = extract_features_from_sentence_bigram(sent)
        selected_features = get_features(words, w_dict)
        classify_label = classify_bigram(selected_features, sentence)
        actual_labels.append(gs_label)
        predicted_labels.append(classify_label)
        
    return accuracy_score(actual_labels, predicted_labels)
Exemple #4
0
def hinge_loss(weight_dict, sentence):
    """
    This code represents the hinge loss function as specified in the assignment.

    Note how the function does not return anything. This is because out weights are stored in a dictionary.

    Manipulations to the weight dictionary below will occur in-place as the dictionary has a global shared memory location.
    """
    words, gs_label, sent = extract_features_from_sentence(sentence)
    selected_features = get_features(words, weight_dict)
    classify_label = costClassify(
        selected_features, sent, gs_label
    )  # classify label only calculated once at the beginning of a training example. Note use of costClassify and not classify

    for feature_temp in selected_features:
        word = feature_temp[0]
        feature_weight = weight_dict[word][feature_temp]
        f_j_1 = feature_function(feature_temp, sent, gs_label)
        f_j_2 = feature_function(feature_temp, sent, classify_label)
        weight_update = feature_weight + 0.01 * f_j_1 - 0.01 * f_j_2  #sub-gradient descent step. Step size is fixed to 0.01
        weight_dict[word][feature_temp] = weight_update