Python NaiveBayesTextClassifier.train Examples

Programming Language: Python

Namespace/Package Name: naivebayes

Method/Function: train

Examples at hotexamples.com: 5

Python NaiveBayesTextClassifier.train - 5 examples found. These are the top rated real world Python examples of naivebayes.NaiveBayesTextClassifier.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

train(2)

classify(2)

NaiveBayesTextClassifier(1)

Example #1

Show file

File: h2.py Project: fayduan/nlp-homework

print('> Read files...')
categories = os.listdir(data_dir)
print('> Split data to test and train')
documents, classes = get_texts(categories)
train_docs, test_docs, train_classes, test_classes = train_test_split(
    documents, classes, train_size=0.7)

classifier = NaiveBayesTextClassifier(
    categories=categories,
    min_df=1,
    lowercase=True,
    stop_words=stopwords.words('english')
)

print('> Train classifier')
classifier.train(train_docs, train_classes)

print('> Classify test data...')
predicted_classes = classifier.classify(test_docs)

print('> Complete.')
print(classification_report(test_classes, predicted_classes))

print('-' * 42)
print("{:<25}: {:>4} articles".format("Test data size", len(test_classes)))
print("{:<25}: {:>6.2f} %".format(
    "Accuracy", 100 * accuracy_score(test_classes, predicted_classes))
)
print("{:<25}: {:>6.2f} %".format(
    "Kappa statistics", 100 * kappa(
        category_to_number(test_classes, categories),

Example #2

Show file

    categories=['1', '0', 't'],
    min_df=1,
    lowercase=True,
    # 127 English stop words
    stop_words=stopwords.words('english'))

print("> Split data to test and train")
train_docs, test_docs, train_classes, test_classes = non_shuffling_train_test_split(
    train_data['tweet'], train_data['label'])
train_docs = train_docs.fillna('1')
train_classes = train_classes.fillna('1')
print(train_docs.isnull().any())
print(train_classes.isnull().any())
print(type(train_docs))
print("> Train classifier")
classifier.train(train_docs, train_classes)
total_docs = len(train_docs)
print("-" * 42)
print("Total", total_docs, " tweets")
print("Number of words", classifier.bag.shape[1], " words")
print("Parse time", time.time() - start_time, "seconds")
print("-" * 42)

# -------------- Classify --------------- #

print("> Start classify data")
start_time = time.time()
test_docs = test_docs.fillna('1')
test_classes = test_classes.fillna('1')
predicted_classes = classifier.classify(test_docs)
print((predicted_classes), (test_classes))

Example #3

Show file

File: 20newsgroups.py Project: JingkaiTang/nlp-homework

    for category in categories:
        category_files_path = os.path.join(data_dir, category)
        text_ids = os.listdir(category_files_path)
        prepare_category_file = functools.partial(prepare_file, category_files_path)
        texts = [prepare_category_file(f) for f in text_ids]
        documents += texts
        classes += [category] * len(texts)

    return documents, classes

print('Get Gategories...')
categories = os.listdir(data_dir)
print('Reading Data...')
documents, classes = get_texts(categories)

train_docs, test_docs, train_classes, test_classes = train_test_split(documents, classes, train_size=0.9)

clf = NaiveBayesTextClassifier(categories=categories, min_df=1, lowercase=True, stop_words=stopwords.words('english'))

print('Training...')
clf.train(train_docs, train_classes)

print('Predicting...')
predicted_classes = clf.classify(test_docs)

print('Result:')
print('-' * 72)
print(classification_report(test_classes, predicted_classes))
print('-' * 72)

Example #4

Show file

File: __main__.py Project: itdxer/naive-bayes

classifier = NaiveBayesTextClassifier(
    categories=[0, 1],
    min_df=1,
    lowercase=True,
    # 127 English stop words
    stop_words=stopwords.words('english')
)

if options.test:
    print("> Split data to test and train")
    train_docs, test_docs, train_classes, test_classes = train_test_split(
        train_data.review, train_data.sentiment, train_size=0.7
    )

    print("> Train classifier")
    classifier.train(train_docs, train_classes)
    total_docs = len(train_docs)

elif options.predict:
    print("> Read test data")
    test_data = read_csv(TEST_DATA_FILE, sep='\t')

    print("> Train classifier")
    classifier.train(train_data.review, train_data.sentiment)
    total_docs = len(train_data)

print("-" * 42)
print("{:<25}: {:>6} articles".format("Total", total_docs))
print("{:<25}: {:>6} words".format(
    "Number of words", classifier.bag.shape[1]
))

Example #5

Show file

print("> Init classifier")
start_time = time.time()
classifier = NaiveBayesTextClassifier(
    categories=[0, 1],
    min_df=1,
    lowercase=True,
    # 127 English stop words
    stop_words=stopwords.words('english'))

if options.test:
    print("> Split data to test and train")
    train_docs, test_docs, train_classes, test_classes = train_test_split(
        train_data.review, train_data.sentiment, train_size=0.7)

    print("> Train classifier")
    classifier.train(train_docs, train_classes)
    total_docs = len(train_docs)

elif options.predict:
    print("> Read test data")
    test_data = read_csv(TEST_DATA_FILE, sep='\t')

    print("> Train classifier")
    classifier.train(train_data.review, train_data.sentiment)
    total_docs = len(train_data)

print("-" * 42)
print("{:<25}: {:>6} articles".format("Total", total_docs))
print("{:<25}: {:>6} words".format("Number of words", classifier.bag.shape[1]))
print("{:<25}: {:>6.2f} seconds".format("Parse time",
                                        time.time() - start_time))