예제 #1
0
def testingNB():
    # 1. 加载数据集
    listOPosts, listClasses = bayes.loadDataSet()
    print('listOPosts: ', listOPosts,
          '\n************************************\nlistClasses: ', listClasses)

    # 2. 创建单词集合
    myVocabList = bayes.createVocabList(listOPosts)

    # 3. 计算单词是否出现并创建数据矩阵
    trainMat = []
    for postinDoc in listOPosts:
        # 返回m * len(myVocabList)的矩阵,记录的都是0,1信息
        # print('postinDoc:', postinDoc)
        trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))

    # 4. 训练数据

    p0V, p1V, pAb = trainNB0(np.array(trainMat), np.array(listClasses))

    # 5. 测试数据
    testEntry = ['love', 'my', 'dalmatioin']
    thisDoc = np.array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as: ', classifyNB(thisDoc, p0V, p1V, pAb))

    testEntry = ['stupid', 'garbage']
    thisDoc = np.array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as: ', classifyNB(thisDoc, p0V, p1V, pAb))
예제 #2
0
def main():
    postingList, classVec = bayes.loadDataSet()
    vlist = bayes.create_vacabulary_list(postingList)
    tranmat = []
    for row in postingList:
        tranmat.append(bayes.setOfWords2Vec(vlist, row))
    print bayes.trainNB0(tranmat, classVec)
def test_train():
    listOposts,listClasses=bayes.loadDataSet()
    myVocabList=bayes.createVocabList(listOposts)
    trainMat=[]
    for postinDoc in listOposts:
        trainMat.append(bayes.setOfWords2Vec(myVocabList,postinDoc))
    p0v,p1v,pab=bayes.trainNB0(trainMat,listClasses)

    print p1v
예제 #4
0
파일: test_bayes.py 프로젝트: doolin/mlp
 def test_bagOfWords2VecMN(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     features = bayes.bagOfWords2VecMN(myVocabList, listOPosts[0])
     expected = [
         0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
         0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1
     ]
     self.assertEqual(features, expected)
예제 #5
0
파일: test_bayes.py 프로젝트: doolin/mlp
 def test_setOfWords2Vec(self):
     # listOPosts is actually...
     # listClasses is actually a list of labels for the data in listOPosts
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     features = bayes.setOfWords2Vec(myVocabList, listOPosts[0])
     expected = [
         0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
         0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1
     ]
     self.assertEqual(features, expected)
예제 #6
0
    def test_createVocablist(self):
        data_set, _ = bayes.loadDataSet()
        vocab_list = bayes.createVocabList(data_set)
        print("\n vocab_list == %s" % (vocab_list))

        # 根据数据集第0行输出对应的向量表
        # (即,第0行中所有单词,在整个data_set词汇表中出现的单词位置设置为1)
        vec = bayes.setOfWords2Vec(vocab_list, data_set[0])
        print("\n vec == %s" % (vec))
        vec = bayes.setOfWords2Vec(vocab_list, data_set[3])
        print("\n vec == %s" % (vec))
예제 #7
0
    def test_train_nb(self):
        data_set, listClasses = bayes.loadDataSet()
        vocab_list = bayes.createVocabList(data_set)
        print("\n vocab_list == %s" % (vocab_list))

        trainMat = []
        for postinDoc in data_set:
            trainMat.append(bayes.setOfWords2Vec(vocab_list, postinDoc))

        p0Vect, p1Vect, pAbusive = bayes.trainNB0(trainMat, listClasses)
        print("\n p0Vect == %s\n p1Vect == %s\n pAbusive == %s\n" %
              (p0Vect, p1Vect, pAbusive))
예제 #8
0
def testingNB():
    listOPosts, listClasses = loadDataSet()
    myVocabList = createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(setOfWords2Vec(myVocabList, postinDoc))
    p0V, p1V, pAb = trainNB0(array(trainMat), array(listClasses))
    testEntry = ['love', 'my', 'dalmation']
    tesDoc = array(setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as: ', classifyNB(tesDoc, p0V, p1V, pAb))
    testEntry = ['stupid', 'garbage']
    tesDoc = array(setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as: ', classifyNB(tesDoc, p0V, p1V, pAb))
예제 #9
0
def testSimpTrain():
    listOPosts, listClasses = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))

    print "trainMat:", trainMat
    print "listClasses:", listClasses
    p0V,p1V,pAb = bayes.trainNB0(trainMat, listClasses)
    print "pAb:",pAb
    print "p0V:",p0V
    print "p1V:",p1V
예제 #10
0
def testingNB():
    listOPosts, listClasses = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
    p0V,p1V,pAb = bayes.trainNB0(trainMat, listClasses)

    testEntry = ['love', 'my', 'dalmation', 'stupid']
    thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print testEntry,'classified as: ', bayes.classifyNB(thisDoc, p0V, p1V, pAb)
    testEntry = ['quit', 'stupid']
    thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print testEntry,'classified as: ', bayes.classifyNB(thisDoc, p0V, p1V, pAb)
예제 #11
0
파일: test_bayes.py 프로젝트: doolin/mlp
 def test_trainNBO(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     trainMat = [] # list of lists, e.g., [[...], ..., [...]]
     for postinDoc in listOPosts:
         trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
     # this is interesting as the names sent to the funtion imply
     # different types than the names received by the function.
     # Compare sending trainCategory to receiving listClasses.
     # There isn't even a hint of meaning between those two names
     # at the program (self-referentiall) perspective.
     # p0Vect, p1Vect, pAbusive = trainNBO(trainMatrix, trainCategory)
     p0V, p1V, pAb = bayes.trainNBO(trainMat, listClasses)
     # print p0V, p1V, pAb
     self.assertAlmostEqual(pAb, 0.5)
예제 #12
0
def testingNB():
    postList, classList = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(postList)
    trainMat = []
    for post in postList:
        trainMat.append(bayes.setOfWords2Vec(myVocabList, post))
    p0V, p1V, pAb = bayes.trainNB0(trainMat, classList)
    testEntry = ['love', 'my', 'dalmation']
    thisDoc = bayes.setOfWords2Vec(myVocabList, testEntry)
    print testEntry, 'classified as: ', bayes.classifyNB(
        thisDoc, p0V, p1V, pAb)
    testEntry = ['stupid', 'garbage']
    thisDoc = bayes.setOfWords2Vec(myVocabList, testEntry)
    print testEntry, 'classified as: ', bayes.classifyNB(
        thisDoc, p0V, p1V, pAb)
예제 #13
0
파일: bayes.py 프로젝트: zhlei99/MLStudy
def testingNB():
    listOPosts, listClasses = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(listOPosts)
    print(myVocabList)
    trainMat = []
    for postinDoc in listOPosts:

        trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
    p0V, p1V, pAb = bayes.trainNB0(trainMat, listClasses)
    testEntry = ['love', 'my', 'dalmation']
    print
    thisDoc = np.array(bayes.setOfWords2Vec(myVocabList, testEntry))

    print(testEntry, 'classified as: ', classifyNB(thisDoc, p0V, p1V, pAb))
    testEntry = ['stupid', 'garbage']
    thisDoc = np.array(setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as: ', classifyNB(thisDoc, p0V, p1V, pAb))
예제 #14
0
파일: testBayes.py 프로젝트: ldgang0530/MLA
def testNB():
    listOPosts, listClasses = bayes.loadDataSet()  #加载数据
    myVocabList = bayes.createVocabList(listOPosts)
    trainMat = []
    for postinDoc in listOPosts:
        trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
    p0V, p1V, pAb = bayes.trainNB0(trainMat, listClasses)

    resultLabel = {0: 'Not garbage', 1: 'Garbage'}
    testEntry = ['love', 'my', 'dalmation']
    thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as:',
          resultLabel[bayes.classifyNB(thisDoc, p0V, p1V, pAb)])

    testEntry = ['stupid', 'garbage']
    thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
    print(testEntry, 'classified as:',
          resultLabel[bayes.classifyNB(thisDoc, p0V, p1V, pAb)])
예제 #15
0
def run():
    print 'begin--->run()'
    postingList, classVec = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(postingList)
    # print myVocabList
    # print bayes.words2Vec(myVocabList,postingList[0])
    # trainMat = []
    # for postinDoc in postingList:
    #     trainMat.append(bayes.words2Vec(myVocabList, postinDoc))
    # p0V, p1V, pAb = bayes.trainNB0(trainMat, classVec)
    # bayes.testingNB()
    # bayes.spamTest()
    # print pAb
    # print p0V
    # print p1V
    import feedparser
    ny = feedparser.parse(
        'https://newyork.craigslist.org/search/res?format=rss')
    sf = feedparser.parse('https://sfbay.craigslist.org/search/apa?format=rss')
    # bayes.localWords(ny,sf)
    bayes.getTopWords(ny, sf)
예제 #16
0
def simptestTest():
	listOPosts, listClasses = bayes.loadDataSet()
	myVocabList = bayes.createVocabList(listOPosts)
	print myVocabList
	print listOPosts[0]
	print bayes.setOfWords2Vec(myVocabList, listOPosts[0])
def test_vocList():
    listOposts,listClasses=bayes.loadDataSet()
    myVocabList=bayes.createVocabList(listOposts)
    print myVocabList
예제 #18
0
def test():
    listOPosts, listClasses = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(listOPosts)
    print(myVocabList)
    return
예제 #19
0
파일: test.py 프로젝트: liuchenbuaa/Action
import bayes;

listOPosts,listClasses = bayes.loadDataSet()

print listOPosts;
print listClasses;

myVocabList = bayes.createVocabList(listOPosts);

print myVocabList
print bayes.setOfWords2Vec(myVocabList,listOPosts[0])
예제 #20
0
# coding=utf-8
import bayes

dateset,vec=bayes.loadDataSet()
print dateset
print '--词集,不考虑出现次数'
val_set= bayes.createVocabList(dateset)
print val_set
print '--每篇的词集向量 '
word_vec =bayes.setOfWords2Vec(val_set,dateset[0])
print word_vec
traindoc=[]
for doc in dateset:
    traindoc.append(bayes.setOfWords2Vec(val_set,doc))
p0_v,p1_v,p_ab= bayes.trainNB0(traindoc, vec)
print p_ab
print p0_v
print p1_v

print '-- test'
bayes.testingNB()
print '--测试垃圾邮件'
bayes.spamTest()
예제 #21
0
#词集模型,每个词的出现与否作为特征
def setOfWords2Vec(vocabList, inputSet):  #输入参数词汇表,文档
    returnVec = [0] * len(vocabList)  #创建一个所含元素都为0的向量,长度与词汇表相同
    for word in inputSet:  #历遍文档的单词
        if word in vocabList:  #如果文档的单词在词汇表中出现
            returnVec[vocabList.index(word)] = 1

#相应return相应单词的索引对应的值变为1,(词汇表中未出现的单词默认为0)
        else:
            print("the word:'%s' is not in my Vocabulary!" % word)  #
    return returnVec
'''--------------测试函数---------------'''
import bayes

listOPosts, listClasses = bayes.loadDataSet()  #导入文档,并转化为向量形式
myVocabList = bayes.createVocaList(listOPosts)  #创建词汇表
myVocabList  #词汇表
len(myVocabList)
bayes.setOfWords2Vec(myVocabList, listOPosts[0])
bayes.setOfWords2Vec(myVocabList, listOPosts[3])

#数据分为两个两个等级
#1.文档,文档类别,向量
#2.词汇,词汇数,侮辱性词汇,正常词汇,单词'''
'''--------------训练算法:从词向量计算概率---------------'''
#伪代码见书和笔记
'''计算每个类别的文档数
对每片训练文档:
    对每个类别:
        如果词条出现在文档中:
예제 #22
0
import bayes
p, c = bayes.loadDataSet()
#
# v = bayes.createVocabList(p)
# h=bayes.setOfWords2Vec(v,['dog','do'])
# # print(h)
# # trainMat=[]
# # for Doc in p:
# #     trainMat.append(bayes.setOfWords2Vec(v,Doc))
# # p0v,p1v,pAb=bayes.trainNB0(trainMat,c)
# # print(pAb)
# bayes.testingNB()
# mysent = 'This book is the best book on Python or  M.L. I have ever laid eyes upon.'
# mysent.split()
# import re
# regex = re.compile('\\W+')
# ltoken = regex.split(mysent)
# tok = [token.lower() for token in ltoken if len(token)>0]
# print(tok)
# bayes.spamTest()
import feedparser
ny = feedparser.parse('http://newyork.craigslist.org/stp/index.rss')
sf = feedparser.parse('http://sfbay.craigslist.org/stp/index.rss')
# vocablist,psf,pny = bayes.localWords(ny,sf)

bayes.getTopWords(ny, sf)
예제 #23
0
__author__ = 'wanghao'

"""
    naive bayes main function
    author :  wanghao
    email  :  [email protected]
"""

from numpy import *
import bayes
import FilterMail

postingList, classVec = bayes.loadDataSet()

# get the vablist
vablist = bayes.createVocablist(postingList)
print "Show my vablist\n", vablist
print "-------------------------------"

# get the returnVec
returnVec = bayes.setOfwords2Vec(vablist, ["my", "love", "dog", "happy", "daddy"])
print "the word vec is ", returnVec
print "-------------------------------"
# get the prior probability
trainMat = []
for one in postingList:
    trainMat.append(bayes.setOfwords2Vec(vablist, one))

pa, p1Vec, p0Vec = bayes.trainNB0(trainMat, classVec)
print "the 1 probability is %f, " % pa
print "the each class , each element probability\n", p1Vec, '\n', p0Vec
예제 #24
0
파일: main.py 프로젝트: Daistory/Bayes
#!/usr/bin/python
#encoding:utf-8
import bayes
from numpy import *
postingList, classList = bayes.loadDataSet()
myVocabList = bayes.createVocabList(postingList)
trainMat = []
for a in postingList:
    trainMat.append(bayes.setOfWords(myVocabList, a))
pc, p0, p1 = bayes.trainNB0(trainMat, classList)
test = ['stupid', 'garbage']
thisDoc = array(bayes.setOfWords(myVocabList, test))
print bayes.classifyNB(thisDoc, p0, p1, pc)
예제 #25
0
파일: main.py 프로젝트: Daistory/Bayes
#!/usr/bin/python
#encoding:utf-8
import bayes
from numpy import *
postingList , classList = bayes.loadDataSet()
myVocabList = bayes.createVocabList(postingList)
trainMat = []
for a in postingList:
    trainMat.append( bayes.setOfWords( myVocabList, a ) )
pc, p0, p1 = bayes.trainNB0(trainMat, classList)
test = ['stupid', 'garbage']
thisDoc = array( bayes.setOfWords( myVocabList, test ) )
print bayes.classifyNB(thisDoc, p0, p1, pc)
예제 #26
0
파일: Script.py 프로젝트: HoneyB7/coco_ML
import bayes

PostingList, ClassVector = bayes.loadDataSet()
print '*** PostingList ***'
print PostingList
print '*** ClassVector ***'
print ClassVector
myVocabList = bayes.createVocabList(PostingList)
print '*** myVocabList ***'
print myVocabList
trainMat = []
for postinDoc in PostingList:
	trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))

print '*** trainMatrix ***'
print trainMat
p0V, p1V, pAb = bayes.trainNB0(array(trainMat), array(ClassVector)

print '*** (c0/w) ***'
print p0V
print '*** (c1/w) ***'
print p1V

testEntry = ['love', 'my', 'dalmation']
thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
if (classifyNB(thisDoc,p0V,p1V,pAb) == 0):
	print testEntry, 'classified as not Abusive :)'
else:
	print testEntry, 'classfied as Abusive :('

#print testEntry,'classified as: ',classifyNB(thisDoc,p0V,p1V,pAb);
예제 #27
0
__author__ = 'Kevin'
import bayes

listOPost, listClasses = bayes.loadDataSet()
myVocabList = bayes.createVocabList(listOPost)
print myVocabList
print bayes.setOfWords2Vec(myVocabList, listOPost[0])
print bayes.setOfWords2Vec(myVocabList, listOPost[3])
s = "ab,cde,fg"
print s
print s.split(",")
from numpy import *

trainMat = []
for postinDoc in listOPost:
    trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
p0V, p1V, pAb = bayes.trainNB0(trainMat, listClasses)
print "pAb: %s" % pAb
print "p0V: %s" % p0V
print "p1V: %s" % p1V


class TrainingLetter:
    result = "1"
    dataset = []
    measure = 0

    def __init__(self, letter, data, measure):
        self.result = letter
        self.dataset = data
        self.measure = measure
import bayes
from numpy import *
listOposts, listClasses = bayes.loadDataSet()
myVocabList = bayes.createVocabList(listOposts)
trainMat = []
print("-----start for about trainMat----- ")
for postinDoc in listOposts:
    print("postinDoc = ", postinDoc)
    trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
    print("trainMat = ", trainMat)

p0V, p1V, pAb = bayes.trainNB0(trainMat, listClasses)
print("p0V = ", p0V)
print("p1V = ", p1V)
print("pAb = ", pAb)
def test_word2vec():
    listOposts,listClasses=bayes.loadDataSet()
    myVocabList=bayes.createVocabList(listOposts)
    print bayes.setOfWords2Vec(myVocabList,listOposts[0])