def getCityTopWords(trainCity0Rss, trainCity1Rss):
    """
    获取城市中评论中最多的词汇
    :param trainCity1Rss:
    :param trainCity0Rss:
    """
    initialDocList, fullText, cityTypes = loadRSSText(trainCity0Rss, trainCity1Rss)
    vocaList = bayes.createVocabularyList(initialDocList)
    trainVocabularyMattrix = []
    # 将训练的文档集合针对vocaList进行标记
    for words in initialDocList:
        signedFeatureList = bayes.checkSignedFeatureList(vocaList, words)
        trainVocabularyMattrix.append(signedFeatureList)

    p_WiBasedOnClass0, p_WiBasedOnClass1, pAbusive = bayes.trainNavieBayesian(trainVocabularyMattrix, cityTypes)

    topCity0Words = []
    topCity1Words = []
    for i in range(len(p_WiBasedOnClass0)):
        if p_WiBasedOnClass0[i] > -6.0:
            topCity0Words.append(vocaList[i])
        if p_WiBasedOnClass1[i] > -6.0:
            topCity1Words.append(vocaList[i])

    print "*******City0最常用20的词汇*********"
    for word in topCity0Words[:20]:
        print word
    print "*******City1最常用的词汇*********"
    for word in topCity1Words[:20]:
        print word
def trainNavieBayesianTest():
    wordsList, classTypes = bayes.loadDataSet()
    vocaList = bayes.createVocabularyList(wordsList)
    # 将feature对应的标记为0,1
    trainVocabularyMattrix = []
    for words in wordsList:
        trainVocabularyMattrix.append(bayes.checkSignedFeatureList(vocaList, words))

    # print np.array(trainVocabularyMattrix)
    p_WiBasedOnClass0, p_WiBasedOnClass1, pAbusive = bayes.trainNavieBayesian(trainVocabularyMattrix, classTypes)
    print p_WiBasedOnClass0, '\n'
    print p_WiBasedOnClass1
    print pAbusive