def predict_error_analysis(test_data_lst, weight_dict): results_lst = [] for sent in test_data_lst: words, gs_label, sentence = extract_features_from_sentence(sent) selected_features = get_features(words, weight_dict) classify_label = classify(selected_features, sentence) if classify_label != gs_label: results_lst.append((sentence, classify_label, gs_label)) return results_lst
def hinge_loss_bigram(weight_dict, sentence): words, gs_label, sent = extract_features_from_sentence_bigram(sentence) selected_features = get_features(words, weight_dict) classify_label = costClassify_bigram(selected_features, sent, gs_label) for feature_temp in selected_features: bigram = feature_temp[0] feature_weight = weight_dict[bigram][feature_temp] f_j_1 = feature_function_bigram(feature_temp, sent, gs_label) f_j_2 = feature_function_bigram(feature_temp, sent, classify_label) weight_update = feature_weight + 0.01*f_j_1 - 0.01*f_j_2 weight_dict[bigram][feature_temp] = weight_update
def predict_bigram(test_data_lst, w_dict): """ Given a sentence and the trained weights for our features - predict the sentiment of a sentence. Same for Hinge Loss and for Perceptron Loss ? """ actual_labels = [] predicted_labels = [] for sent in test_data_lst: words, gs_label, sentence = extract_features_from_sentence_bigram(sent) selected_features = get_features(words, w_dict) classify_label = classify_bigram(selected_features, sentence) actual_labels.append(gs_label) predicted_labels.append(classify_label) return accuracy_score(actual_labels, predicted_labels)
def hinge_loss(weight_dict, sentence): """ This code represents the hinge loss function as specified in the assignment. Note how the function does not return anything. This is because out weights are stored in a dictionary. Manipulations to the weight dictionary below will occur in-place as the dictionary has a global shared memory location. """ words, gs_label, sent = extract_features_from_sentence(sentence) selected_features = get_features(words, weight_dict) classify_label = costClassify( selected_features, sent, gs_label ) # classify label only calculated once at the beginning of a training example. Note use of costClassify and not classify for feature_temp in selected_features: word = feature_temp[0] feature_weight = weight_dict[word][feature_temp] f_j_1 = feature_function(feature_temp, sent, gs_label) f_j_2 = feature_function(feature_temp, sent, classify_label) weight_update = feature_weight + 0.01 * f_j_1 - 0.01 * f_j_2 #sub-gradient descent step. Step size is fixed to 0.01 weight_dict[word][feature_temp] = weight_update