def testingNB():
    listOfPosts, listClasses = bayes.loadDataSet()
    myVocabList = bayes.createVocabList(listOfPosts)
    trainMatrix = []
    for post in listOfPosts:
        trainMatrix.append(bayes.setOfWordsToVector(myVocabList, post))
    p0,p1, pAbusive = bayes.trainNB(array(trainMatrix), array(listClasses))
    testEntry = ['love', 'my', 'dalmation']
    thisDoc = array(bayes.setOfWordsToVector(myVocabList, testEntry))
    print(testEntry, 'classified as: ', bayes.classifyNB(thisDoc, p0, p1, pAbusive))
    testEntry = ['stupid', 'garbage']
    thisDoc = array(bayes.setOfWordsToVector(myVocabList, testEntry))
    print(testEntry, 'classified as: ', bayes.classifyNB(thisDoc, p0, p1, pAbusive))
def localWords(feed0, feed1):
    import feedparser
    import numpy as np
    docList=[]
    classList=[]
    fullText=[]
    minLen = min(len(feed1['entries']), len(feed0['entries']))
    for i in range(minLen):
        wordList = bayes.textParse(feed1['entries'][i]['summary'])
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(1)
        wordList = bayes.textParse(feed0['entries'][i]['summary'])
        docList.append(wordList)
        fullText.extend(wordList)
        classList.append(0)
    vocabList = bayes.createVocabList(docList)
    top30Words = calculateMostFrequentValues(vocabList, fullText)
    for pairW in top30Words:
        if pairW[0] in vocabList: vocabList.remove(pairW[0])
    trainingset = list(range(2*minLen))
    testSet = []
    for i in range(20):
        randIndex = int(np.random.uniform(0, len(trainingset)))
        testSet.append(trainingset[randIndex])
        del(trainingset[randIndex])
    trainMat = []
    trainClasses = []
    for docIndex in trainingset:
        trainMat.append(bayes.bagOfWordsToVetor(vocabList, docList[docIndex]))
        trainClasses.append(classList[docIndex])
    p0v, p1v, pSpam = bayes.trainNB(np.asarray(trainMat), np.asarray(trainClasses))
    errorCount = 0
    for docIndex in testSet:
        wordVector = bayes.bagOfWordsToVetor(vocabList, docList[docIndex])
        if bayes.classifyNB(np.asarray(wordVector), p0v, p1v, pSpam) != classList[docIndex]:
            errorCount +=1
    print('the error rate is: ', float(errorCount)/len(testSet))
    return vocabList, p0v, p1v