コード例 #1
0
 def test_word2vec2(self):
     raw = bayes.loadDataSet()[0]
     vocabList = bayes.createVocabList(raw)
     print("THE VOCABLIST IS %s" % vocabList)
     for wd in raw:
         print("RAW WORD IS %s" % wd)
         print("OUTPUT VEC IS %s" % bayes.setOfWords2Vec(vocabList, wd))
コード例 #2
0
 def test_freq(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     print(myVocabList)
     zz = bayes.calcMostFreq(
         myVocabList, 'haha my steak is food, my problems is garbage')
     print(zz)
コード例 #3
0
 def test_word2vecBag(self):
     raw = bayes.loadDataSet()[0]
     vocabList = bayes.createVocabList(raw)
     print("THE VOCABLIST IS %s" % vocabList)
     for wd in raw:
         print("RAW WORD IS %s" % wd)
         print("OUTPUT VEC BAG IS %s" %
               bayes.bagOfWords2VecMN(vocabList, wd))  # 注意输出的向量里面how会有2个
コード例 #4
0
 def test_NB(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     trainMat = []
     for postinDoc in listOPosts:
         trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
     p0V, p1V, pAb = bayes.trainNB0Log(array(trainMat), array(listClasses))
     testEntry = ['love', 'my', 'dalmation']
     thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
     print(thisDoc)
     print(testEntry, 'classified as: ',
           bayes.classifyNB(thisDoc, p0V, p1V, pAb))
     testEntry = ['stupid', 'garbage']
     thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))
     print(thisDoc)
     print(testEntry, 'classified as: ',
           bayes.classifyNB(thisDoc, p0V, p1V, pAb))
コード例 #5
0
 def test_trainNB(self):
     listOPosts, listClasses = bayes.loadDataSet()
     myVocabList = bayes.createVocabList(listOPosts)
     print(myVocabList)  # 获得所有词集合,去重
     trainMat = []
     for postinDoc in listOPosts:
         trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))
     print(trainMat)
     p0V, p1V, pAb = bayes.trainNB0(array(trainMat), array(listClasses))
     print(p0V)
     print(p1V)
     print(pAb)
     sm1 = sum(p0V)
     sm2 = sum(p1V)
     print("sm1=%f, sm2=%f" % (sm1, sm2))  # 和不一定为1,因为做了防除0的改造
     print("USING LOG DISP")
     p0V, p1V, pAb = bayes.trainNB0Log(array(trainMat), array(listClasses))
     print(p0V)
     print(p1V)
     print(pAb)
コード例 #6
0
 def test_word2vec(self):
     word = "my dog ate the food on the garbage"
     vocabList = bayes.createVocabList(bayes.loadDataSet()[0])
     print("THE VOCABLIST IS %s" % vocabList)
     print(bayes.setOfWords2Vec(vocabList, word.split()))
コード例 #7
0
 def test_dataUnique(self):
     print(bayes.createVocabList(bayes.loadDataSet()[0]))  # 获得所有词集合,去重
コード例 #8
0
 def test_loadDataSet(self):
     print(bayes.loadDataSet())