Ejemplo n.º 1
0

def score(y, predictions):
    predicted_values = np.round(predictions)
    accuracy = np.sum((y - predicted_values) == 0) / len(y)
    # precision = np.sum(np.logical_and(y == 1, predicted_values == 1)) / np.sum(predicted_values == 1)
    precision = np.sum(
        (y + predicted_values) == 2) / np.sum(predicted_values == 1)
    recall = np.sum((y + predicted_values) == 2) / np.sum(y == 1)
    f_value = 2 * recall * precision / (recall + precision)
    return accuracy, precision, recall, f_value


if __name__ == '__main__':
    features_file = get_rel_path_from_working_directory(
        __file__, '../data/features.txt')
    source_file = get_rel_path_from_working_directory(__file__,
                                                      '../data/sentiment.txt')
    theta_file = get_rel_path_from_working_directory(__file__,
                                                     '../data/theta.npy')
    with open(features_file) as f:
        features = f.read().split()
    with open(source_file) as f:
        sentences = f.readlines()
    theta = np.load(theta_file)
    X, Y = create_x_y_data(sentences, features)
    predictions = sigmoid(X, theta)
    accuracy, precision, recall, f_value = score(Y, predictions)
    print('accuracy: %f \tprecision: %f \trecall: %f \tFvalue: %f' %
          (accuracy, precision, recall, f_value))
Ejemplo n.º 2
0
                                                                size)]
        removed_features = remove_no_appear_features(features,
                                                     training_sentences)
        train_x, train_y = create_x_y_data(training_sentences,
                                           removed_features)
        theta = np.random.rand(len(removed_features) + 1)
        for i in range(EPOCH):
            print('epoch' + str(i), end='\r')
            if i % 500 == 0 or i == EPOCH - 1:
                obj, grad = object_and_grad(train_x, train_y, theta)
                max_update_value = np.max(np.absolute(ETA * grad))
                print(
                    'epoch%d:   objective function value %f   max update value %.5e'
                    % (i, obj, max_update_value))
            theta = update_theta(ETA, train_x, train_y, theta)

        test_x, test_y = create_x_y_data(testing_sentences, removed_features)
        predictions = sigmoid(test_x, theta)
        result.append(score(test_y, predictions))
        print('tested by testing setntences.\n')

    accuracy = sum((e[0] for e in result)) / N
    precision = sum((e[1] for e in result)) / N
    recall = sum((e[2] for e in result)) / N
    f_value = sum((e[3] for e in result)) / N
    print('accuracy: %f \tprecision: %f \trecall: %f \tFvalue: %f' %
          (accuracy, precision, recall, f_value))
"""結果
accuracy: 0.750328 	precision: 0.750446 	recall: 0.750421 	Fvalue: 0.750312
"""
Ejemplo n.º 3
0
from mymodule.path_helpers import get_rel_path_from_working_directory
import numpy as np
from stemming.porter2 import stem
from nlp100_73 import sigmoid


def create_x_data(sentences, features):
    words = sentence.split()
    stemmed_words = [stem(w) for w in words]
    x_line = [1] + [1 if f in stemmed_words else 0 for f in features]
    x = np.array(x_line)
    return x


if __name__ == '__main__':
    features_file = get_rel_path_from_working_directory(
        __file__, '../data/features.txt')
    theta_file = get_rel_path_from_working_directory(__file__,
                                                     '../data/theta.npy')
    with open(features_file) as f:
        features = f.read().split()
    theta = np.load(theta_file)
    while True:
        sentence = input('sentence: ')
        x = create_x_data(sentence, features)
        h = sigmoid(x, theta)
        if h > 0.5:
            print('label: +1 (prediction: %f)' % h)
        else:
            print('label: -1 (prediction: %f)' % (1 - h))