Python NaiveBayesClassifier.train Examples

Programming Language: Python

Namespace/Package Name: NaiveBayes

Method/Function: train

Examples at hotexamples.com: 2

Python NaiveBayesClassifier.train - 2 examples found. These are the top rated real world Python examples of NaiveBayes.NaiveBayesClassifier.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NaiveBayesClassifier(5)

predict(3)

fit(2)

mean(2)

prior_probs(2)

train(2)

variance(2)

add_example(1)

build_model(1)

classify(1)

classify_record(1)

in_classes(1)

predict_all(1)

prob_den_func(1)

unfold_classifier(1)

Example #1

Show file

class SentimentAnalyzer:
    threshold = 1

    def __init__(self, file_location):
        self.features = set([])
        raw_data = []
        training_data = []
        word_freq = {}
        #self.word_freq = {}
        with open(file_location, 'rb') as data:
            data_reader = csv.DictReader(data)
            for row in data_reader:
                # print row
                h_tokens = nltk.word_tokenize(row['headline'].lower())
                #self.features = self.features.union(set(h_tokens))

                for token in h_tokens:
                    if token in word_freq:
                        word_freq[token] += 1
                    else:
                        word_freq[token] = 1

                #for token in h_tokens:
                #    if token in self.word_freq:
                #        self.word_freq[token] += 1
                #    else:
                #        self.word_freq[token] = 1

                raw_data.append(
                    (h_tokens, 0, float(row[' anger']) / 100))  # anger
                raw_data.append(
                    (h_tokens, 1, float(row[' disgust']) / 100))  # disgust
                raw_data.append(
                    (h_tokens, 2, float(row[' fear']) / 100))  # fear
                raw_data.append((h_tokens, 3, float(row[' joy']) / 100))  # joy
                raw_data.append(
                    (h_tokens, 4, float(row[' sadness']) / 100))  # sadness
                raw_data.append(
                    (h_tokens, 5, float(row[' surprise']) / 100))  # surprise

        for key in word_freq.keys():
            if word_freq[key] > self.threshold:
                self.features.add(key)

        print "F-vec size: " + str(len(self.features))

        for data in raw_data:
            f_vector = []
            for f in self.features:
                f_vector.append(1 if f in data[0] else 0)
            training_data.append((f_vector, data[1], data[2]))

        self.classifier = NaiveBayesClassifier(6, len(self.features))
        self.classifier.train(training_data)

    def predict(self, text):
        token_set = set(nltk.word_tokenize(text.lower()))
        f_vector = []
        for f in self.features:
            f_vector.append(1 if f in token_set else 0)
        return self.classifier.predict(f_vector)

    def predict_all(self, text):
        token_set = set(nltk.word_tokenize(text.lower()))
        f_vector = []
        for f in self.features:
            f_vector.append(1 if f in token_set else 0)
        return self.classifier.predict_all(f_vector)

    def test(self, test_file_location):
        test_data = open(test_file_location, 'rb')
        test_reader = csv.DictReader(test_data)
        total = 0
        correct = 0

        for row in test_reader:
            total += 1
            emotions = map(float, [
                row[' anger'], row[' disgust'], row[' fear'], row[' joy'],
                row[' sadness'], row[' surprise']
            ])
            acceptable_emotions = []
            for i in xrange(len(emotions)):
                if emotions[i] > 1:
                    acceptable_emotions.append(i)
            acceptable_emotions = sorted(acceptable_emotions,
                                         reverse=True,
                                         key=lambda x: emotions[x])[:3]

            #print acceptable_emotions
            #print emotion
            prediction = self.predict(row['headline'])[0]
            #print prediction

            if prediction in acceptable_emotions:
                correct += 1

        return float(correct) / total

Example #2

Show file

File: NaiveBayesTest.py Project: brianmannmath/DataScienceFromScratchCode

    with open(fn, 'r') as file:
        for line in file:
            if line.startswith("Subject:"):
                subject = re.sub(r"^Subject:", "", line).strip()
                data.append((subject, is_spam))


def split_data(data, p):
    return data[:int(len(data) * p)], data[int(len(data) * p):]


def in_random_order(data):
    indices = [i for i, _ in enumerate(data)]
    random.shuffle(indices)
    result = []
    for i in indices:
        result.append(data[i])
    return result


random.seed(0)
train_data, test_data = split_data(in_random_order(data), 0.75)
classifier = NaiveBayesClassifier()
classifier.train(train_data)

classified = [(message, is_spam, classifier.classify(message))
              for message, is_spam in test_data]

counts = Counter(
    (is_spam, spam_prob > 0.5) for (_, is_spam, spam_prob) in classified)