def testExample1(self): dataset = pd.read_csv('allelectronics.csv') self.model = NaiveBayes(dataset) self.model.train() datatuple = {'age':'youth','income':'medium','student':'yes','credit_rating':'fair'} print(self.model.predict(datatuple)) self.assertEqual(self.model.predict(datatuple),'yes')
def test_nb_using_iris(self): iris = load_iris() data = iris['data'] target = iris['target'] nb = NaiveBayes() nb.fit(data, target) preds = nb.predict(data) assert accuracy_score(preds, target) > 0.9
def test_naive_bayes(self): data = array([ [1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'], [1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'], [2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'], [3, 'M'], [3, 'L'], [3, 'L'], ]) labels = array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1]) nb = NaiveBayes() nb.fit(data, labels) preds = nb.predict(data) assert accuracy_score(preds, labels) > 0.7
def __init__(self, categories, path): self.naive_bayes = NaiveBayes(categories) self.path = path self.classified_examples = dict()
from dataset_parser import parse_dataset from bayes import NaiveBayes from test_train_split import dataset_split dataset = parse_dataset() (train, test) = dataset_split(dataset, 0.2) naive_bayes = NaiveBayes(train, 'class') score = naive_bayes.evaluate(test) print(score)
+##+ +#+ ### +#+ +##+ +##+ +## +##+ +#+ +##+ +#+ +## +#+ +#+ +##+ +##+ ###+ +##+ +####++###++ +######### ++####### +###+++ """ digitPercep = PerceptronNetwork(digitWidth * digitHeight, digitY) digitPercep.train(digitWidth, digitHeight, digitTrainingImagesPath, digitTrainingLabelsPath) print "Perceptron guess:" print digitPercep.test_one(digitWidth, digitHeight, digit) digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2) digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath, digitTrainingLabelsPath) print "Naive Bayes guess:" print digitBayes.test_one(digitWidth, digitHeight, digit)
from bayes import NaiveBayes from util import FileOperate from util import train_test_split from metrics import accuracy_score # 运行这部分代码的时候,要将 playML 这个文件夹设置为源代码的根文件夹 if __name__ == '__main__': # 1、加载数据,spam 表示垃圾短信(1),ham 表示非垃圾短信(0) data_path = '../input/SMSSpamCollection' label = '\t' fo = FileOperate(data_path, label) X, y = fo.load_data() # 2、分割数据集,得到训练数据集与测试数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=666) # 开始训练 nb = NaiveBayes() nb.fit(X_train, y_train) # 开始预测 y_pred = nb.predict(X_test) # 计算得分 score = accuracy_score(y_test, y_pred) print('准确率:', score)
from backends import RedisBackend from bayes import NaiveBayes from classifiers import FMClassifier from optparse import OptionParser parser = OptionParser(conflict_handler='resolve') parser.add_option('-h', dest='host') parser.add_option('-p', '--port', dest='port') options, args = parser.parse_args() clsfr = FMClassifier( pickle.load( open( '/Users/georgecourtsunis/projects/disqus/disqus/analytics/hadoop/thread_views/var/cmap.b', 'rb'))) backend = RedisBackend(host=options.host, port=options.port) bayes = NaiveBayes(backend=backend) _start = time.time() for file_name in args: print 'Training file %s' % file_name fd = open(file_name, 'r') _counter = 0 for line in fd: _counter += 1 if _counter % 100000 == 0: print _counter, (time.time() - _start) # if _counter % 1000000 == 0: # break # grab args vector, count = line.split('\t') vector = json.loads(vector)
# -*- coding:utf-8 -*- from bayes import NaiveBayes def loadDataSet(): train_samples = [ ['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'], ['my', 'dalmation', 'is', 'so', 'cute', ' and', 'I', 'love', 'him'], ['stop', 'posting', 'stupid', 'worthless', 'garbage'], ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'], ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid'] ] test_samples = [['love', 'my', 'girl', 'friend'], ['stupid', 'garbage'], ['Haha', 'I', 'really', "Love", "You"], ['This', 'is', "my", "dog"]] train_classes = [0, 1, 0, 1, 0, 1] # 0:good; 1:bad return train_samples, train_classes, test_samples if __name__ == "__main__": train_samples, train_classes, test_samples = loadDataSet() clf = NaiveBayes() clf.train(train_samples, train_classes) # test: for item in test_samples: clf.classify(item)
print "times: {}".format(digitPercepTimes) print "means: {}".format(digitPercepAvgs) print "stds: {}".format(digitPercepStds) # naive bayes classification #print "---------- Naive Bayes ----------" digitBayesAvgs = [] digitBayesStds = [] digitBayesTimes = [] for percent in percents: p = percent / 10.0 x = 5 res = [] times = [] for i in range(0, 5): digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2) t1 = time.time() digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath, digitTrainingLabelsPath, p) dt = time.time() - t1 percentageCorrect = digitBayes.test(digitWidth, digitHeight, digitTestImagesPath, digitTestLabelsPath) res.append(percentageCorrect) times.append(dt) avgTime = mean(times) avgAcc = mean(res) stdAcc = stddev(res) digitBayesAvgs.append(avgAcc) digitBayesStds.append(stdAcc) digitBayesTimes.append(avgTime)