예제 #1
0
 def testExample1(self):
     dataset = pd.read_csv('allelectronics.csv')
     self.model = NaiveBayes(dataset)
     self.model.train()
     datatuple = {'age':'youth','income':'medium','student':'yes','credit_rating':'fair'}
     print(self.model.predict(datatuple))
     self.assertEqual(self.model.predict(datatuple),'yes')
 def test_nb_using_iris(self):
     iris = load_iris()
     data = iris['data']
     target = iris['target']
     nb = NaiveBayes()
     nb.fit(data, target)
     preds = nb.predict(data)
     assert accuracy_score(preds, target) > 0.9
    def test_naive_bayes(self):
        data = array([
            [1, 'S'],
            [1, 'M'],
            [1, 'M'],
            [1, 'S'],
            [1, 'S'],
            [2, 'S'],
            [2, 'M'],
            [2, 'M'],
            [2, 'L'],
            [2, 'L'],
            [3, 'L'],
            [3, 'M'],
            [3, 'M'],
            [3, 'L'],
            [3, 'L'],
        ])
        labels = array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])

        nb = NaiveBayes()
        nb.fit(data, labels)
        preds = nb.predict(data)
        assert accuracy_score(preds, labels) > 0.7
예제 #4
0
 def __init__(self, categories, path):
     self.naive_bayes = NaiveBayes(categories)
     self.path = path
     self.classified_examples = dict()
예제 #5
0
from dataset_parser import parse_dataset
from bayes import NaiveBayes
from test_train_split import dataset_split

dataset = parse_dataset()
(train, test) = dataset_split(dataset, 0.2)
naive_bayes = NaiveBayes(train, 'class')
score = naive_bayes.evaluate(test)

print(score)
예제 #6
0
         +##+     +#+       
         ###      +#+       
        +##+      +##+      
        +##       +##+      
        +#+       +##+      
        +#+       +##       
        +#+       +#+       
        +##+     +##+       
         ###+    +##+       
         +####++###++       
         +#########         
          ++#######         
            +###+++         
                            
                            
                            
                            """

digitPercep = PerceptronNetwork(digitWidth * digitHeight, digitY)
digitPercep.train(digitWidth, digitHeight, digitTrainingImagesPath,
                  digitTrainingLabelsPath)

print "Perceptron guess:"
print digitPercep.test_one(digitWidth, digitHeight, digit)

digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2)
digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath,
                 digitTrainingLabelsPath)

print "Naive Bayes guess:"
print digitBayes.test_one(digitWidth, digitHeight, digit)
예제 #7
0
from bayes import NaiveBayes
from util import FileOperate
from util import train_test_split
from metrics import accuracy_score

# 运行这部分代码的时候,要将 playML 这个文件夹设置为源代码的根文件夹

if __name__ == '__main__':
    # 1、加载数据,spam 表示垃圾短信(1),ham 表示非垃圾短信(0)
    data_path = '../input/SMSSpamCollection'
    label = '\t'
    fo = FileOperate(data_path, label)
    X, y = fo.load_data()

    # 2、分割数据集,得到训练数据集与测试数据集
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.25,
                                                        random_state=666)

    # 开始训练
    nb = NaiveBayes()
    nb.fit(X_train, y_train)

    # 开始预测
    y_pred = nb.predict(X_test)

    # 计算得分
    score = accuracy_score(y_test, y_pred)
    print('准确率:', score)
예제 #8
0
from backends import RedisBackend
from bayes import NaiveBayes
from classifiers import FMClassifier
from optparse import OptionParser
parser = OptionParser(conflict_handler='resolve')
parser.add_option('-h', dest='host')
parser.add_option('-p', '--port', dest='port')
options, args = parser.parse_args()

clsfr = FMClassifier(
    pickle.load(
        open(
            '/Users/georgecourtsunis/projects/disqus/disqus/analytics/hadoop/thread_views/var/cmap.b',
            'rb')))
backend = RedisBackend(host=options.host, port=options.port)
bayes = NaiveBayes(backend=backend)

_start = time.time()
for file_name in args:
    print 'Training file %s' % file_name
    fd = open(file_name, 'r')
    _counter = 0
    for line in fd:
        _counter += 1
        if _counter % 100000 == 0:
            print _counter, (time.time() - _start)
        # if _counter % 1000000 == 0:
        #     break
        # grab args
        vector, count = line.split('\t')
        vector = json.loads(vector)
예제 #9
0
# -*- coding:utf-8 -*-
from bayes import NaiveBayes


def loadDataSet():
    train_samples = [
        ['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
        ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
        ['my', 'dalmation', 'is', 'so', 'cute', ' and', 'I', 'love', 'him'],
        ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
        ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
        ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']
    ]
    test_samples = [['love', 'my', 'girl', 'friend'], ['stupid', 'garbage'],
                    ['Haha', 'I', 'really', "Love", "You"],
                    ['This', 'is', "my", "dog"]]
    train_classes = [0, 1, 0, 1, 0, 1]  # 0:good; 1:bad
    return train_samples, train_classes, test_samples


if __name__ == "__main__":
    train_samples, train_classes, test_samples = loadDataSet()

    clf = NaiveBayes()
    clf.train(train_samples, train_classes)
    # test:
    for item in test_samples:
        clf.classify(item)
print "times: {}".format(digitPercepTimes)
print "means: {}".format(digitPercepAvgs)
print "stds: {}".format(digitPercepStds)

# naive bayes classification
#print "---------- Naive Bayes ----------"
digitBayesAvgs = []
digitBayesStds = []
digitBayesTimes = []
for percent in percents:
    p = percent / 10.0
    x = 5
    res = []
    times = []
    for i in range(0, 5):
        digitBayes = NaiveBayes(digitWidth * digitHeight, 10, 2)
        t1 = time.time()
        digitBayes.train(digitWidth, digitHeight, digitTrainingImagesPath,
                         digitTrainingLabelsPath, p)
        dt = time.time() - t1
        percentageCorrect = digitBayes.test(digitWidth, digitHeight,
                                            digitTestImagesPath,
                                            digitTestLabelsPath)
        res.append(percentageCorrect)
        times.append(dt)
    avgTime = mean(times)
    avgAcc = mean(res)
    stdAcc = stddev(res)
    digitBayesAvgs.append(avgAcc)
    digitBayesStds.append(stdAcc)
    digitBayesTimes.append(avgTime)