def localWordsTest(city0Rss, city1Rss):
    """
    测试根据输入的text分类城市的准确率
    :param city0Rss:
    :param city1Rss:
    过滤垃圾邮件
    :return:
    """
    initialDocList, fullText, cityTypes = loadRSSText(city0Rss, city1Rss)
    voclist = bayes.createVocabularyList(initialDocList)
    print "未删除高频词汇的词汇表长度:", len(voclist)
    # 出现频率最高的词汇,例如:I and 等辅助词
    deletedVoc = calcFrequentWords(voclist, fullText)
    # 去除词汇列表的高频词汇
    for word in deletedVoc:
        if word[0] in voclist:
            voclist.remove(word[0])
    print "删除后的词汇表长度:", len(voclist)

    # 从initialDocList中随机创建10个待测试的文档
    testDocList = []
    # 待测试邮件的类型
    testDocClassList = []
    """
    注意此处随机选择10个数据,添加到测试集合,同时将原有的数据集删除,
    这种随机选择数据的一部分作为训练集合,而剩余部分作为测试集合的过程称为
    留存交叉验证:hold-out cross validation
    """
    for i in range(10):
        randomIndex = int(random.uniform(0, len(initialDocList)))
        testDocClassList.append(cityTypes[randomIndex])
        testDocList.append(initialDocList[randomIndex])
        del (initialDocList[randomIndex])
        del (cityTypes[randomIndex])

    errorCount = 0
    for j in range(len(testDocList)):
        classType = bayes.classifyNavieBayesian2(voclist, initialDocList, cityTypes, testDocList[j])
        if classType != testDocClassList[j]:  # 预测的结果和实际的结果进行比较
            print "分类错误的信息:", testDocList[j], "\n属于", testDocClassList[j], "错误分类成了:", classType
            errorCount += 1

    # 计算分类的误差
    errorRate = float(errorCount) / len(testDocList)
    print "the error rate is :", errorRate
    return errorRate