Python NaiveBayesClassifier.NaiveBayesClassifier Examples

Programming Language: Python

Namespace/Package Name: nltk

Method/Function: NaiveBayesClassifier

Examples at hotexamples.com: 12

Python NaiveBayesClassifier.NaiveBayesClassifier - 12 examples found. These are the top rated real world Python examples of nltk.NaiveBayesClassifier.NaiveBayesClassifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

train(30)

NaiveBayesClassifier(12)

classify(4)

accuracy(2)

prob_classify(2)

show_most_informative_features(2)

labels(1)

most_informative_features(1)

Example #1

Show file

File: NLTK_NaiveBayes.py Project: lsbmsb/SpamFilter_NaiveBayes_Association

def train(samples_proportion=0.7):
    global words_in_ham, ham_word_count, words_in_spam, spam_word_count, raw_ham_prob, raw_spam_prob

    ham, spam = read_spam_ham()

    print("Spam size: " + str(len(spam)) + " Ham size: " + str(len(ham)))

    all_emails = append_ham_and_spam(ham, spam)

    random.shuffle(all_emails)

    print('Corpus size = ' + str(len(all_emails)) + ' emails')

    features = [(Preprocessor.get_features(email, ' '), label)
                for (email, label) in all_emails]

    print('Collected ' + str(len(features)) + ' feature sets')
    '''
    # define Support value in %
    support = 10
    spam_support_count = (spam_size * 10) / 100;
    ham_support_count = (ham_size * 10) / 100;
    print('Spam support count:' + str(spam_support_count))
    print('Ham support count:' + str(ham_support_count))
    # get the spam frequent itemset and ham frequent itemset
    # spam_frequent, ham_frequent = get_frequent(all_features, spam_support_count, ham_support_count)
    # train the our own naivebayes classifier and collect dictionary of raw probabilities of words
    '''

    train_size = int(len(features) * samples_proportion)

    train_set, test_set = features[:train_size], features[train_size:]

    ham_mail_count, spam_mail_count = mails_in_ham_spam(train_set)

    spam_prior = 1.0 * spam_mail_count / len(train_set)
    ham_prior = 1.0 * ham_mail_count / len(train_set)

    words_in_ham, words_in_spam = frequency_in_ham_spam(train_set)

    spam_vocab = len(spam_word_count)
    ham_vocab = len(ham_word_count)

    t = get_probabilities_in_each_class(ham_prior, words_in_ham, ham_vocab,
                                        ham_word_count, raw_ham_prob,
                                        raw_spam_prob, spam_prior,
                                        words_in_spam, spam_vocab,
                                        spam_word_count, test_set, train_set)

    ham_prior, words_in_ham, ham_vocab, raw_ham_prob, raw_spam_prob, spam_prior, words_in_spam, spam_vocab, test_set, train_set = get_parameters(
        t)
    #print("Train Size:" + str(len(train_set)) + str(' Test size:') + str(len(test_set)))

    #evaluate(train_set, test_set, raw_spam_prob, raw_ham_prob, words_in_spam, words_in_ham, spam_vocab, ham_vocab,
    #         spam_prior,
    #         ham_prior)

    classifier = NaiveBayesClassifier(list(spam_word_count),
                                      list(ham_word_count))
    t = classifier.prob_classify(classifier, train_set).max()

Example #2

Show file

File: analyzer1.py Project: mmahlenk/SentimentAnalysis

def train(labeled_featuresets, estimator=ELEProbDist):
    """Runs test sentences back through the model to train the model.
	"""
    # Create the P(label) distribution
    label_probdist = esitmator(label_freqdist)

    #Create the P(fval | label, fname) distribution
    feature_probdist = {}

    return NaiveBayesClassifier(label_probdist, feature_probdist)

Example #3

Show file

    def __init__(self,
                 label_probdist=None,
                 feature_probdist=None,
                 estimator=ELEProbDist):
        """Initialize NBClassifier."""
        self._estimator = estimator

        # in case arguments are specified (ie. when restoring the classifier)
        if all([label_probdist, feature_probdist]):
            self._classifier = NaiveBayesClassifier(
                label_probdist=label_probdist,
                feature_probdist=feature_probdist,
            )
        else:
            self._classifier = None

Example #4

Show file

   def train(labeled_featuresets, estimator=ELEProbDist):


    # Create the P(label) distribution

       label_probdist = estimator(label_freqdist)



    # Create the P(fval|label, fname) distribution

       feature_probdist = {}



       return NaiveBayesClassifier(label_probdist, feature_probdist)

Example #5

Show file

from nltk import NaiveBayesClassifier

classifier = NaiveBayesClassifier()

Example #6

Show file

File: improveAlgor.py Project: jordan139/302_Dissertation-

for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

allWords = []
for w in movie_reviews.words():
	allWords.append(w.lower())

allWords = nltk.FreqDist(allWords)
wordFeatures = list(allWords.keys())[:3000]


def findFeatures(document):
	words = set(document)
	features = {}
	for w in wordFeatures:
		features[w] = (w in words)
	return features
	 

#print((findFeatures(movie_reviews.words('neg/cv000_29416.txt'))))
featureSets = [(findFeatures(rev), category) for (rev, category) in documents ]

new_training_set = featureSets[:100]
testing_set = featureSets[100:]


cl = NaiveBayesClassifier(new_training_set)
print(cl.accuracy(testing_set))

Example #7

Show file

File: surname.py Project: Saman-Waheed/Law-Website-Augur-Justice

    print("Also see: Hindu Marriage Act")
elif resultc != -1 or y == "Christian":
    f1 = open("Christian.txt")
    f2 = open("christian01.txt")
    l1 = f1.read()
    arr = sent_tokenize(l1)
    l2 = f2.read()
    arr2 = word_tokenize(l2)
    for i in range(0, len(arr)):
        li1.append(tuple((arr[i], arr2[i])))
    f1.close()
    f2.close()
    print("Also see: Indian Divorce Act")
mycase = sys.argv[3]
#mycase=input("enter your case ")
c1 = 0
c2 = 0
model = NaiveBayesClassifier(li1)
#model=nltk.NaiveBayesClassifier.train(li1)
#print(model.classify(mycase))
case = sent_tokenize(mycase)
print(mycase)
for i in range(0, len(case)):
    temp = model.classify(case[i])

    if temp == "0":
        c1 = c1 + 1
    else:
        c2 = c2 + 1
print("Probability of winning case", (c1 / (c1 + c2)) * 100)

Example #8

Show file

File: practice.py Project: VigneshMohan1/analysis

def train(labeled_featuresets, estimator=ELEProbDist):
    label_probdist = estimator(label_freqdist)
    feature_probdist = {}
    return NaiveBayesClassifier(label_probdist, feature_probdist)

Example #9

Show file

File: train_nbc.py Project: ravenscroftj/partridge

            for word in features:

                if word not in labelled_features:
                    labelled_features[word.lower()] = label_count

                labelled_features[word.lower()][label] += features[word]

            print "Currently at %d distinct tokens and %d papers" % (
                len(labelled_features), samplecount)

    label_probdist = get_label_probdist(labelled_features)

    feature_probdist = get_feature_probdist(labelled_features)

    classifier = NaiveBayesClassifier(label_probdist, feature_probdist)

    for samplefile in test_samples:
        features = {}

        p = PaperParser()
        p.parsePaper(samplefile)

        for sentence in p.extractRawSentences():
            tokens = nltk.word_tokenize(sentence)

            for word in tokens:
                features[word] = True

        dirname = os.path.basename(os.path.dirname(samplefile))
        label = labels[dirname]

Example #10

Show file

File: naive.py Project: jordan139/302_Dissertation-

def updateNaiveBayes():
    cl = NaiveBayesClassifier(new_training_set)
    print(cl.accuracy(testing_set))

Example #11

Show file

File: naive.py Project: jordan139/302_Dissertation-

def train():

    classifer = NaiveBayesClassifier(training_data)
    f = open('algorithm.pickle', 'wb')
    pickle.dump(classifer, f)
    f.close()

Example #12

Show file

File: processjson.py Project: NimishVerma/HateSpeechDetection

import json
import re
from nltk import NaiveBayesClassifier


def clean_tweet(tweet):
    return ' '.join(
        re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
               tweet).split())


mydata = []
json_data = open('convertcsv.json', 'r')
data = json.load(json_data)
for d in data:
    if d.get('hate_speech') == 0:
        mydata.append({"text": clean_tweet(d.get('tweet')), "label": "pos"})
    else:
        mydata.append({"text": clean_tweet(d.get('tweet')), "label": "neg"})

cl = NaiveBayesClassifier(mydata, format="json")
cl.classify("This is an amazing library!")