def score(y, predictions): predicted_values = np.round(predictions) accuracy = np.sum((y - predicted_values) == 0) / len(y) # precision = np.sum(np.logical_and(y == 1, predicted_values == 1)) / np.sum(predicted_values == 1) precision = np.sum( (y + predicted_values) == 2) / np.sum(predicted_values == 1) recall = np.sum((y + predicted_values) == 2) / np.sum(y == 1) f_value = 2 * recall * precision / (recall + precision) return accuracy, precision, recall, f_value if __name__ == '__main__': features_file = get_rel_path_from_working_directory( __file__, '../data/features.txt') source_file = get_rel_path_from_working_directory(__file__, '../data/sentiment.txt') theta_file = get_rel_path_from_working_directory(__file__, '../data/theta.npy') with open(features_file) as f: features = f.read().split() with open(source_file) as f: sentences = f.readlines() theta = np.load(theta_file) X, Y = create_x_y_data(sentences, features) predictions = sigmoid(X, theta) accuracy, precision, recall, f_value = score(Y, predictions) print('accuracy: %f \tprecision: %f \trecall: %f \tFvalue: %f' % (accuracy, precision, recall, f_value))
size)] removed_features = remove_no_appear_features(features, training_sentences) train_x, train_y = create_x_y_data(training_sentences, removed_features) theta = np.random.rand(len(removed_features) + 1) for i in range(EPOCH): print('epoch' + str(i), end='\r') if i % 500 == 0 or i == EPOCH - 1: obj, grad = object_and_grad(train_x, train_y, theta) max_update_value = np.max(np.absolute(ETA * grad)) print( 'epoch%d: objective function value %f max update value %.5e' % (i, obj, max_update_value)) theta = update_theta(ETA, train_x, train_y, theta) test_x, test_y = create_x_y_data(testing_sentences, removed_features) predictions = sigmoid(test_x, theta) result.append(score(test_y, predictions)) print('tested by testing setntences.\n') accuracy = sum((e[0] for e in result)) / N precision = sum((e[1] for e in result)) / N recall = sum((e[2] for e in result)) / N f_value = sum((e[3] for e in result)) / N print('accuracy: %f \tprecision: %f \trecall: %f \tFvalue: %f' % (accuracy, precision, recall, f_value)) """結果 accuracy: 0.750328 precision: 0.750446 recall: 0.750421 Fvalue: 0.750312 """
from mymodule.path_helpers import get_rel_path_from_working_directory import numpy as np from stemming.porter2 import stem from nlp100_73 import sigmoid def create_x_data(sentences, features): words = sentence.split() stemmed_words = [stem(w) for w in words] x_line = [1] + [1 if f in stemmed_words else 0 for f in features] x = np.array(x_line) return x if __name__ == '__main__': features_file = get_rel_path_from_working_directory( __file__, '../data/features.txt') theta_file = get_rel_path_from_working_directory(__file__, '../data/theta.npy') with open(features_file) as f: features = f.read().split() theta = np.load(theta_file) while True: sentence = input('sentence: ') x = create_x_data(sentence, features) h = sigmoid(x, theta) if h > 0.5: print('label: +1 (prediction: %f)' % h) else: print('label: -1 (prediction: %f)' % (1 - h))