Example #1
0
documents, classes = get_texts(categories)
train_docs, test_docs, train_classes, test_classes = train_test_split(
    documents, classes, train_size=0.7)

classifier = NaiveBayesTextClassifier(
    categories=categories,
    min_df=1,
    lowercase=True,
    stop_words=stopwords.words('english')
)

print('> Train classifier')
classifier.train(train_docs, train_classes)

print('> Classify test data...')
predicted_classes = classifier.classify(test_docs)

print('> Complete.')
print(classification_report(test_classes, predicted_classes))

print('-' * 42)
print("{:<25}: {:>4} articles".format("Test data size", len(test_classes)))
print("{:<25}: {:>6.2f} %".format(
    "Accuracy", 100 * accuracy_score(test_classes, predicted_classes))
)
print("{:<25}: {:>6.2f} %".format(
    "Kappa statistics", 100 * kappa(
        category_to_number(test_classes, categories),
        category_to_number(predicted_classes, categories)
    )
))
Example #2
0
print("{:<25}: {:>6} articles".format("Total", total_docs))
print("{:<25}: {:>6} words".format(
    "Number of words", classifier.bag.shape[1]
))
print("{:<25}: {:>6.2f} seconds".format(
    "Parse time", time.time() - start_time
))
print("-" * 42)

# -------------- Classify --------------- #

print("> Start classify data")
start_time = time.time()

if options.test:
    predicted_classes = classifier.classify(test_docs)

    print(classification_report(test_classes, predicted_classes))
    print('-' * 42)
    print("{:<25}: {:>6} articles".format("Test data size", len(test_classes)))
    print("{:<25}: {:>6.2f} %".format(
        "Accuracy", 100 * accuracy_score(test_classes, predicted_classes))
    )
    print("{:<25}: {:>6.2f} %".format(
        "Kappa statistics", 100 * kappa(test_classes, predicted_classes)
    ))

elif options.predict:
    predicted_classes = classifier.classify(test_data.review)

    print("> Save predicted results")
Example #3
0
train_docs, test_docs, train_classes, test_classes = non_shuffling_train_test_split(
    train_data['tweet'], train_data['label'])
train_docs = train_docs.fillna('1')
train_classes = train_classes.fillna('1')
print(train_docs.isnull().any())
print(train_classes.isnull().any())
print(type(train_docs))
print("> Train classifier")
classifier.train(train_docs, train_classes)
total_docs = len(train_docs)
print("-" * 42)
print("Total", total_docs, " tweets")
print("Number of words", classifier.bag.shape[1], " words")
print("Parse time", time.time() - start_time, "seconds")
print("-" * 42)

# -------------- Classify --------------- #

print("> Start classify data")
start_time = time.time()
test_docs = test_docs.fillna('1')
test_classes = test_classes.fillna('1')
predicted_classes = classifier.classify(test_docs)
print((predicted_classes), (test_classes))
print(classification_report(test_classes, predicted_classes))
print('-' * 42)
print("Test data size", len(test_classes), "articles")
print("Accuracy", 100 * accuracy_score(test_classes, predicted_classes), "%")
end_time = time.time()
print("Computation time", end_time - start_time, "seconds")
print('-' * 42)
Example #4
0
    total_docs = len(train_data)

print("-" * 42)
print("{:<25}: {:>6} articles".format("Total", total_docs))
print("{:<25}: {:>6} words".format("Number of words", classifier.bag.shape[1]))
print("{:<25}: {:>6.2f} seconds".format("Parse time",
                                        time.time() - start_time))
print("-" * 42)

# -------------- Classify --------------- #

print("> Start classify data")
start_time = time.time()

if options.test:
    predicted_classes = classifier.classify(test_docs)

    print(classification_report(test_classes, predicted_classes))
    print('-' * 42)
    print("{:<25}: {:>6} articles".format("Test data size", len(test_classes)))
    print("{:<25}: {:>6.2f} %".format(
        "Accuracy", 100 * accuracy_score(test_classes, predicted_classes)))
    print("{:<25}: {:>6.2f} %".format(
        "Kappa statistics", 100 * kappa(test_classes, predicted_classes)))

elif options.predict:
    predicted_classes = classifier.classify(test_data.review)

    print("> Save predicted results")
    print("> {}".format(PREDICTED_DATA_FILE))
    np.savetxt(PREDICTED_DATA_FILE,