Ejemplos de NaiveBayes en Python, ejemplos de bayes.NaiveBayes en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: bayesTest.py Proyecto: dineshdb/data-mining

 def testExample1(self):
     dataset = pd.read_csv('allelectronics.csv')
     self.model = NaiveBayes(dataset)
     self.model.train()
     datatuple = {'age':'youth','income':'medium','student':'yes','credit_rating':'fair'}
     print(self.model.predict(datatuple))
     self.assertEqual(self.model.predict(datatuple),'yes')

Ejemplo n.º 2

0

Mostrar archivo

Archivo: bayes_r.py Proyecto: diegodpgs/Movie-Recommender

def runCV(folders,test_sample):
	
	results = {'Accuracy':[],'Precision':[],'Recall':[],'F1':[]}
	results_AVG = {'Accuracy':[],'Precision':[],'Recall':[],'F1':[]}
	
	results_tests = {'yes_8_10':0,'no_1_7':0}
	
	for index in xrange(len(folders)):
		test = folders[index]
		train = []
		
		for index2 in xrange(len(folders)):
			if index2 != index:
				train.extend(folders[index2])	
		
		NV = NaiveBayes(train,test,False)
		NV.train()
		result = NV.test()
		results_tests[NV.test_one(test_sample)] += 1
		
		for metric,value in result.iteritems():
			results[metric].append(value)
		
	for metric, values in results.iteritems():
		results_AVG[metric] = sum(values)/len(values)

	return (results_AVG,results_tests)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_naive_bayes.py Proyecto: judesmorning/MachineLearninginAction

 def test_nb_using_iris(self):
     iris = load_iris()
     data = iris['data']
     target = iris['target']
     nb = NaiveBayes()
     nb.fit(data, target)
     preds = nb.predict(data)
     assert accuracy_score(preds, target) > 0.9

Ejemplo n.º 4

0

Mostrar archivo

Archivo: bayes_srt.py Proyecto: diegodpgs/Movie-Recommender

def runCV(folders, new_data):
	
	# results = {'Accuracy':[],'Precision':[],'Recall':[],'F1':[]}
# 	results_AVG = {'Accuracy':[],'Precision':[],'Recall':[],'F1':[]}
	results = {}
	
	for index in xrange(len(folders)):
		test = folders[index]
		train = []
		
		for index2 in xrange(len(folders)):
			if index2 != index:
				train.extend(folders[index2])	
		
		NV = NaiveBayes(train,test,False)
		NV.train()
		
		for sample in new_data:
			result_class = NV.test_one(sample)
			
			if sample.file_name not in results:
				results[sample.file_name] = {}
			
			if result_class not in results[sample.file_name]:
				results[sample.file_name][result_class] = 0
			
			results[sample.file_name][result_class] += 1
		
# 		result = NV.test()
# 		for metric,value in result.iteritems():
# 			results[metric].append(value)
		
# 	for metric, values in results.iteritems():
# 		results_AVG[metric] = sum(values)/len(values)

# 	return (results_AVG,results_tests)

	return results

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_naive_bayes.py Proyecto: judesmorning/MachineLearninginAction

    def test_naive_bayes(self):
        data = array([
            [1, 'S'],
            [1, 'M'],
            [1, 'M'],
            [1, 'S'],
            [1, 'S'],
            [2, 'S'],
            [2, 'M'],
            [2, 'M'],
            [2, 'L'],
            [2, 'L'],
            [3, 'L'],
            [3, 'M'],
            [3, 'M'],
            [3, 'L'],
            [3, 'L'],
        ])
        labels = array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])

        nb = NaiveBayes()
        nb.fit(data, labels)
        preds = nb.predict(data)
        assert accuracy_score(preds, labels) > 0.7

Ejemplo n.º 6

0

Mostrar archivo

from dataset_parser import parse_dataset
from bayes import NaiveBayes
from test_train_split import dataset_split

dataset = parse_dataset()
(train, test) = dataset_split(dataset, 0.2)
naive_bayes = NaiveBayes(train, 'class')
score = naive_bayes.evaluate(test)

print(score)

Ejemplo n.º 7

0

Mostrar archivo

         +##+     +#+       
         ###      +#+       
        +##+      +##+      
        +##       +##+      
        +#+       +##+      
        +#+       +##       
        +#+       +#+       
        +##+     +##+       
         ###+    +##+       
         +####++###++       
         +#########         
          ++#######         
            +###+++         
                            
                            
                            
                            """

digitPercep = PerceptronNetwork(digitWidth * digitHeight, digitY)
digitPercep.train(digitWidth, digitHeight, digitTrainingImagesPath,
                  digitTrainingLabelsPath)

print "Perceptron guess:"
print digitPercep.test_one(digitWidth, digitHeight, digit)

digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2)
digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath,
                 digitTrainingLabelsPath)

print "Naive Bayes guess:"
print digitBayes.test_one(digitWidth, digitHeight, digit)

Ejemplo n.º 8

0

Mostrar archivo

from bayes import NaiveBayes
from util import FileOperate
from util import train_test_split
from metrics import accuracy_score

# 运行这部分代码的时候，要将 playML 这个文件夹设置为源代码的根文件夹

if __name__ == '__main__':
    # 1、加载数据，spam 表示垃圾短信（1），ham 表示非垃圾短信（0）
    data_path = '../input/SMSSpamCollection'
    label = '\t'
    fo = FileOperate(data_path, label)
    X, y = fo.load_data()

    # 2、分割数据集，得到训练数据集与测试数据集
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.25,
                                                        random_state=666)

    # 开始训练
    nb = NaiveBayes()
    nb.fit(X_train, y_train)

    # 开始预测
    y_pred = nb.predict(X_test)

    # 计算得分
    score = accuracy_score(y_test, y_pred)
    print('准确率：', score)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: spam_detector.py Proyecto: lorenanicole/python-naive-bayes-spam-classifier

 def __init__(self, categories, path):
     self.naive_bayes = NaiveBayes(categories)
     self.path = path
     self.classified_examples = dict()

Ejemplo n.º 10

0

Mostrar archivo

class SpamHamDetector(object):
    def __init__(self, categories, path):
        self.naive_bayes = NaiveBayes(categories)
        self.path = path
        self.classified_examples = dict()

    def train(self):
        with open('%s/labels.csv' % self.path, 'r') as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = (row['Prediction'])
                filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id'])
                try:
                    body = extract_body(filename)
                    self.naive_bayes.train(int(label), body)

                except Exception as e:
                    logger.info("Error training email %s: %s", row['Id'],
                                e.message)

    def train_and_evaluate(self):
        all_ids = list(range(1, 2501))
        random.shuffle(all_ids)
        training_ids, labeling_ids = all_ids[:2250], all_ids[2250:]

        with open('%s/labels.csv' % self.path, 'r') as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = (row['Prediction'])
                filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id'])
                if int(row['Id']) in training_ids:
                    try:
                        body = extract_body(filename)
                        self.naive_bayes.train(int(label), body)
                    except Exception as e:
                        logger.info("Error training email %s: %s", row['Id'],
                                    e.message)

        correct, incorrect = 0, 0
        with open('%s/labels.csv' % self.path, 'r') as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = (row['Prediction'])
                filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id'])
                if int(row['Id']) in labeling_ids:
                    try:
                        test_body = extract_body(filename)
                        result = self.naive_bayes.classify(test_body)
                        if result == int(label):
                            correct += 1
                        else:
                            incorrect += 1
                    except Exception as e:
                        logger.info("Error classifying email %s: %s",
                                    row['Id'], e.message)
        return self._calculate_results(correct, incorrect)

    def classify(self, size):
        counter = 1
        test = self.path + '/TT/TEST_%s.eml'

        while counter < size + 1:
            try:
                test_body = extract_body(test % counter)
                self.classified_examples[str(counter)] = str(
                    self.naive_bayes.classify(test_body))
            except Exception as e:
                logger.info("Error classifying email %s: %s", counter,
                            e.message)
            counter += 1

        self._store_results()

    def display_results(self):
        spam = sum(1 for category in self.classified_examples.values()
                   if category == '0')
        ham = sum(1 for category in self.classified_examples.values()
                  if category == '1')
        return "Spam Emails: %s\nHam Emails: %s\nSpam Percent: %s\nHam Percent: %s" \
               % (spam, ham, (float(spam) / len(self.classified_examples)),
                  (float(ham) / len(self.classified_examples)))

    def _calculate_results(self, correct, incorrect):
        return "correct %s, incorrect %s, performance measurement %s" % (
            correct, incorrect, (float(correct) / (correct + incorrect)))

    def _store_results(self):
        with open('%s/results.csv' % self.path, 'w+') as resultscsv:
            writer = csv.DictWriter(resultscsv,
                                    fieldnames=['id', 'Prediction'])
            writer.writeheader()
            for example_num, category in self.classified_examples.items():
                writer.writerow({'id': example_num, 'Prediction': category})

Ejemplo n.º 11

0

Mostrar archivo

    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
    ('fm', 'tech news', 'cnn'),
)

cmap = pickle.load(open('../hadoop/thread_views/var/cmap.b',
        'rb'))
clsfr = FMClassifier(cmap)
backend = RedisBackend()
bayes = NaiveBayes(backend=backend)
bayes.train(training_data)
# no we are ready to test the bayes filter

# TODO add support for subfeatures in features

import time
_start = time.time()
# bayes.classify(clsfr, ('aldfksjalskdjfasdflapoliticsadlskfajsldfj',), 'cnn')
# bayes.classify(clsfr, ('politics', 'aldfksjalspoliticskdjfasdflbusinessapmusicadlhomeskfajsldfj', 'music'), 'cnn')
# bayes.classify(clsfr, ('business', 'music', 'love', 'living', 'politics', 'music'), 'cnn', linear_weight_vector=True)
# bayes.classify(clsfr, ('tech', 'computers', 'news'), 'cnn')
line = 'gaming.www.myvidster.com/video/2797926/PornoTubecom_-_Keymon_Phoenix_Mister_Buck_Dee_Truth_Intrigue_and_Jermany_-_Browsin'
bayes.classify(clsfr, line.split('/'), 'myvidster.com', linear_weight_vector=True)
print (time.time() - _start), 'seconds'

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_bayes_yelp.py Proyecto: AJRenold/yelp_project

            labels.append('0')

for review in islice(reviews,200000,None):
    if 'Restaurants' in business_dict[review['business_id']]['categories']:

        if review['votes']['useful'] >= 1:
            test.append(review['text'])
            correct_labels.append('1')
        elif review['votes']['useful'] == 0:
            test.append(review['text'])
            correct_labels.append('0')

print "data loaded"


clfr = NaiveBayes(data,labels, 60, 2000, 50)
print "training done"
stops = clfr.find_n_most_common_words(50)
for i in range(len(stops)):
    print i, stops[i]

max_ent = clfr.max_entropy(20)
for i in range(len(max_ent)):
    print i, max_ent[i][1]

"""a, b = stops['1'], stops['0']
for i in range(len(a)):
    print i, a[i], b[i]"""
#clfr.find_max_prob_dif()

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test.py Proyecto: Renl1001/MachineLearning

# -*- coding:utf-8 -*-
from bayes import NaiveBayes


def loadDataSet():
    train_samples = [
        ['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
        ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
        ['my', 'dalmation', 'is', 'so', 'cute', ' and', 'I', 'love', 'him'],
        ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
        ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
        ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']
    ]
    test_samples = [['love', 'my', 'girl', 'friend'], ['stupid', 'garbage'],
                    ['Haha', 'I', 'really', "Love", "You"],
                    ['This', 'is', "my", "dog"]]
    train_classes = [0, 1, 0, 1, 0, 1]  # 0：good; 1:bad
    return train_samples, train_classes, test_samples


if __name__ == "__main__":
    train_samples, train_classes, test_samples = loadDataSet()

    clf = NaiveBayes()
    clf.train(train_samples, train_classes)
    # test:
    for item in test_samples:
        clf.classify(item)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: image_classification.py Proyecto: vaskal08/image-classification

print "times: {}".format(digitPercepTimes)
print "means: {}".format(digitPercepAvgs)
print "stds: {}".format(digitPercepStds)

# naive bayes classification
#print "---------- Naive Bayes ----------"
digitBayesAvgs = []
digitBayesStds = []
digitBayesTimes = []
for percent in percents:
    p = percent / 10.0
    x = 5
    res = []
    times = []
    for i in range(0, 5):
        digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2)
        t1 = time.time()
        digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath,
                         digitTrainingLabelsPath, p)
        dt = time.time() - t1
        percentageCorrect = digitBayes.test(digitWidth, digitHeight,
                                            digitTestImagesPath,
                                            digitTestLabelsPath)
        res.append(percentageCorrect)
        times.append(dt)
    avgTime = mean(times)
    avgAcc = mean(res)
    stdAcc = stddev(res)
    digitBayesAvgs.append(avgAcc)
    digitBayesStds.append(stdAcc)
    digitBayesTimes.append(avgTime)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: train.py Proyecto: gjcourt/bayes

#!/usr/bin/env python
import cPickle as pickle
import json
import time
from backends import RedisBackend
from bayes import NaiveBayes
from classifiers import FMClassifier
from optparse import OptionParser
parser = OptionParser(conflict_handler='resolve')
parser.add_option('-h', dest='host')
parser.add_option('-p', '--port', dest='port')
options, args = parser.parse_args()

clsfr = FMClassifier(pickle.load(open('/Users/georgecourtsunis/projects/disqus/disqus/analytics/hadoop/thread_views/var/cmap.b', 'rb')))
backend = RedisBackend(host=options.host, port=options.port)
bayes = NaiveBayes(backend=backend)

_start = time.time()
for file_name in args:
    print 'Training file %s' % file_name
    fd = open(file_name, 'r')
    _counter = 0
    for line in fd:
        _counter += 1
        if _counter % 100000 == 0:
            print _counter, (time.time() - _start)
        # if _counter % 1000000 == 0:
        #     break
        # grab args
        vector, count = line.split('\t')
        vector = json.loads(vector)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_bayes_yelp.py Proyecto: AJRenold/python_naive_bayes

            labels.append('0')

for review in islice(reviews,200000,None):
    if 'Restaurants' in business_dict[review['business_id']]['categories']:

        if review['votes']['useful'] >= 1:
            test.append(review['text'])
            correct_labels.append('1')
        elif review['votes']['useful'] == 0:
            test.append(review['text'])
            correct_labels.append('0')

print "data loaded"


clfr = NaiveBayes(data,labels)
print "training done"
#stops = clfr.find_n_most_common_words(50)
#for i in range(len(stops)):
#    print i, stops[i]

max_ent = clfr.max_entropy(20)
for i in range(len(max_ent)):
    print i, max_ent[i]

"""a, b = stops['1'], stops['0']
for i in range(len(a)):
    print i, a[i], b[i]"""
#clfr.find_max_prob_dif()

Ejemplo n.º 17

0

Mostrar archivo

from backends import RedisBackend
from bayes import NaiveBayes
from classifiers import FMClassifier
from optparse import OptionParser
parser = OptionParser(conflict_handler='resolve')
parser.add_option('-h', dest='host')
parser.add_option('-p', '--port', dest='port')
options, args = parser.parse_args()

clsfr = FMClassifier(
    pickle.load(
        open(
            '/Users/georgecourtsunis/projects/disqus/disqus/analytics/hadoop/thread_views/var/cmap.b',
            'rb')))
backend = RedisBackend(host=options.host, port=options.port)
bayes = NaiveBayes(backend=backend)

_start = time.time()
for file_name in args:
    print 'Training file %s' % file_name
    fd = open(file_name, 'r')
    _counter = 0
    for line in fd:
        _counter += 1
        if _counter % 100000 == 0:
            print _counter, (time.time() - _start)
        # if _counter % 1000000 == 0:
        #     break
        # grab args
        vector, count = line.split('\t')
        vector = json.loads(vector)

Ejemplo n.º 18

0

Mostrar archivo

 def __init__(self, categories, path):
     self.naive_bayes = NaiveBayes(categories)
     self.path = path
     self.classified_examples = dict()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: spam_detector.py Proyecto: lorenanicole/python-naive-bayes-spam-classifier

class SpamHamDetector(object):
    def __init__(self, categories, path):
        self.naive_bayes = NaiveBayes(categories)
        self.path = path
        self.classified_examples = dict()

    def train(self):
        with open("{0}/labels.csv".format(self.path), "r") as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = row["Prediction"]
                filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"])
                try:
                    body = extract_body(filename)
                    self.naive_bayes.train(int(label), body)

                except Exception as e:
                    logger.info("Error training email %s: %s", row["Id"], e.message)

    def train_and_evaluate(self):
        all_ids = list(range(1, 2501))
        random.shuffle(all_ids)
        training_ids, labeling_ids = all_ids[:2250], all_ids[2250:]

        with open("{0}/labels.csv".format(self.path), "r") as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = row["Prediction"]
                filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"])
                if int(row["Id"]) in training_ids:
                    try:
                        body = extract_body(filename)
                        self.naive_bayes.train(int(label), body)
                    except Exception as e:
                        logger.info("Error training email %s: %s", row["Id"], e.message)

        correct, incorrect = 0, 0
        with open("%s/labels.csv" % self.path, "r") as labels_csv:
            reader = csv.DictReader(labels_csv)
            for row in reader:
                label = row["Prediction"]
                filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"])
                if int(row["Id"]) in labeling_ids:
                    try:
                        test_body = extract_body(filename)
                        result = self.naive_bayes.classify(test_body)
                        if result == int(label):
                            correct += 1
                        else:
                            incorrect += 1
                    except Exception as e:
                        logger.info("Error classifying email %s: %s", row["Id"], e.message)
        return self._calculate_results(correct, incorrect)

    def classify(self, size):
        counter = 1
        test = self.path + "/TT/TEST_%s.eml"

        while counter < size + 1:
            try:
                test_body = extract_body(test % counter)
                self.classified_examples[str(counter)] = str(self.naive_bayes.classify(test_body))
            except Exception as e:
                logger.info("Error classifying email %s: %s", counter, e.message)
            counter += 1

        self._store_results()

    def display_results(self):
        spam = sum(1 for category in self.classified_examples.values() if category == "0")
        ham = sum(1 for category in self.classified_examples.values() if category == "1")
        return "Spam Emails: %s\nHam Emails: %s\nSpam Percent: %s\nHam Percent: %s" % (
            spam,
            ham,
            (float(spam) / len(self.classified_examples)),
            (float(ham) / len(self.classified_examples)),
        )

    def _calculate_results(self, correct, incorrect):
        return "correct %s, incorrect %s, performance measurement %s" % (
            correct,
            incorrect,
            (float(correct) / (correct + incorrect)),
        )

    def _store_results(self):
        with open("%s/results.csv" % self.path, "w+") as resultscsv:
            writer = csv.DictWriter(resultscsv, fieldnames=["id", "Prediction"])
            writer.writeheader()
            for example_num, category in self.classified_examples.items():
                writer.writerow({"id": example_num, "Prediction": category})