Python createVocabList Examples

Programming Language: Python

Namespace/Package Name: bayes.bayes

Method/Function: createVocabList

Examples at hotexamples.com: 8

Python createVocabList - 8 examples found. These are the top rated real world Python examples of bayes.bayes.createVocabList extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_word2vec2(self):
     raw = bayes.loadDataSet()[0]
     vocabList = bayes.createVocabList(raw)
     print("THE VOCABLIST IS %s" % vocabList)
     for wd in raw:
         print("RAW WORD IS %s" % wd)
         print("OUTPUT VEC IS %s" % bayes.setOfWords2Vec(vocabList, wd))

Example #2

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_freq(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     print(myVocabList)
     zz = bayes.calcMostFreq(
         myVocabList, 'haha my steak is food, my problems is garbage')
     print(zz)

Example #3

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_word2vecBag(self):
     raw = bayes.loadDataSet()[0]
     vocabList = bayes.createVocabList(raw)
     print("THE VOCABLIST IS %s" % vocabList)
     for wd in raw:
         print("RAW WORD IS %s" % wd)
         print("OUTPUT VEC BAG IS %s" %
               bayes.bagOfWords2VecMN(vocabList, wd))  # 注意输出的向量里面how会有2个

Example #4

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_NB(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     trainMat = []
     for postinDoc in listOPosts:
         trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
     p0V, p1V, pAb = bayes.trainNB0Log(array(trainMat), array(listClasses))
     testEntry = ['love', 'my', 'dalmation']
     thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
     print(thisDoc)
     print(testEntry, 'classified as: ',
           bayes.classifyNB(thisDoc, p0V, p1V, pAb))
     testEntry = ['stupid', 'garbage']
     thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
     print(thisDoc)
     print(testEntry, 'classified as: ',
           bayes.classifyNB(thisDoc, p0V, p1V, pAb))

Example #5

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def spamTest(self):
     docList = []
     classList = []
     fullText = []
     for i in range(1, 26):
         wordList = bayes.textParse(
             io.open('email/spam/%d.txt' % i, encoding="ISO-8859-1").read())
         docList.append(wordList)
         fullText.extend(wordList)
         classList.append(1)
         wordList = bayes.textParse(
             io.open('email/ham/%d.txt' % i, encoding="ISO-8859-1").read())
         docList.append(wordList)
         fullText.extend(wordList)
         classList.append(0)
     vocabList = bayes.createVocabList(docList)  # create vocabulary
     print("VACABULIST IS %s \nwith VACABULIST size = %d" %
           (vocabList, len(vocabList)))
     trainingSet = range(50)
     testSet = []  # create test set
     for i in range(10):
         randIndex = int(random.uniform(0, len(trainingSet)))
         print("RANDOM IS %d" % randIndex)
         testSet.append(trainingSet[randIndex])
         del (list(trainingSet)[randIndex])  # 2和3的语法不一样，这里遵从2.7
     trainMat = []
     trainClasses = []
     for docIndex in trainingSet:  # train the classifier (get probs) trainNB0
         trainMat.append(
             bayes.bagOfWords2VecMN(vocabList, docList[docIndex]))
         trainClasses.append(classList[docIndex])
     print(trainMat)
     p0V, p1V, pSpam = bayes.trainNB0(
         array(trainMat), array(trainClasses))  # 得到训练后的朴素贝叶斯向量P(w|Ci)和P(Ci)
     errorCount = 0
     for docIndex in testSet:  # classify the remaining items
         wordVector = bayes.bagOfWords2VecMN(vocabList, docList[docIndex])
         if bayes.classifyNB(array(wordVector), p0V, p1V,
                             pSpam) != classList[docIndex]:
             errorCount += 1
             print("classification error", docList[docIndex])
     print('the error rate is: ', float(errorCount) / len(testSet))

Example #6

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_trainNB(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     print(myVocabList)  # 获得所有词集合，去重
     trainMat = []
     for postinDoc in listOPosts:
         trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
     print(trainMat)
     p0V, p1V, pAb = bayes.trainNB0(array(trainMat), array(listClasses))
     print(p0V)
     print(p1V)
     print(pAb)
     sm1 = sum(p0V)
     sm2 = sum(p1V)
     print("sm1=%f, sm2=%f" % (sm1, sm2))  # 和不一定为1，因为做了防除0的改造
     print("USING LOG DISP")
     p0V, p1V, pAb = bayes.trainNB0Log(array(trainMat), array(listClasses))
     print(p0V)
     print(p1V)
     print(pAb)

Example #7

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_word2vec(self):
     word = "my dog ate the food on the garbage"
     vocabList = bayes.createVocabList(bayes.loadDataSet()[0])
     print("THE VOCABLIST IS %s" % vocabList)
     print(bayes.setOfWords2Vec(vocabList, word.split()))

Example #8

0

Show file

File: testBayes.py Project: shenlan12/pyMachineLearning

 def test_dataUnique(self):
     print(bayes.createVocabList(bayes.loadDataSet()[0]))  # 获得所有词集合，去重