コード例 #1
0
def getUsers(table):
    '''

    :param table: 表名
    :return: 返回TwitterUser类对象的列表
    '''
    users = mysql.getUsersInfo(table)
    return users
コード例 #2
0
def GenerateAllUsersInterestTags(table="StandardUsers"):
    users = mysql.getUsersInfo(table)
    count = 0
    loss = 0
    for user in users:
        try:
            interests = GenerateInterestsWithFollowers(user.id)
            print "%s:" % user.id
            print interests
            # 写入数据库中
            mysql.updateUserInterest(table, user.id, interests.encode('utf-8'))
        except Exception as e:
            loss += 1
            print "lose userid:%s" % user.id
            print "loss %d users" % loss
        count += 1
        print "finished %d users" % count
コード例 #3
0
def InsertStandardUsers(table):
    db = Conn()
    collection = db['StandardUsers']
    users = mysql.getUsersInfo(table)
    # 开始插入
    count = 0
    for user in users:
        data = {}
        data['user_id'] = (long)(user.id)
        data['screen_name'] = user.screen_name
        data['name'] = (user.name).decode("Latin-1").encode('utf-8')
        data['location'] = (user.location).decode("Latin-1").encode('utf-8')
        data['statuses_count'] = user.statuses_count
        data['friends_count'] = user.friends_count
        data['followers_count'] = user.followers_count
        data['favourites_count'] = user.favourites_count
        data['verified'] = user.verified
        data['category'] = user.category
        data['influenceScore'] = user.influenceScore
        data['rank_influ'] = user.rank_influ
        data['psy'] = user.psy
        data['psy_seq'] = user.psy_seq
        data['psy_tweets_starttime'] = user.psy_tweets_starttime
        data['interest_tags'] = (
            user.interest_tags).decode("Latin-1").encode('utf-8')
        data['description'] = (
            user.description).decode("Latin-1").encode('utf-8')
        data['crawler_date'] = user.crawler_date
        collection.insert(data)
        count += 1
        print "insert %d users" % count
    # 建立索引
    try:
        collection.ensureIndex("user_id", unique=True)
    except Exception as e:
        print "索引建立失败"
コード例 #4
0
def getUsersInfo(table):
    # db = Conn(hostname,username,password,databasename)
    # cursor = db.cursor()
    user = mydb.getUsersInfo(table)
    return user
コード例 #5
0
def Accuracy(table="StandardUsers"):
    StandardUsers = mysql.getUsersInfo(table)
    categories = mysql.getCategoriesAndNumber(table)

    # 将用户的id保存
    StandardUsers_id = []

    for user in StandardUsers:
        StandardUsers_id.append(user.id)
    # ground_truth
    category_dic = GetCategoryById(StandardUsers)

    RandomForest_results, Multinomial_results, AdaBoost_results = GetClassifyResultsByWords(
        StandardUsers_id)
    save_file = open("S_results.pickle", "wb")
    pickle.dump(Multinomial_results, save_file)
    save_file.close()

    save_file = open("r_results.pickle", "wb")
    pickle.dump(RandomForest_results, save_file)
    save_file.close()

    save_file = open("a_results.pickle", "wb")
    pickle.dump(AdaBoost_results, save_file)
    save_file.close()

    # save_file = open("M_results.pickle","wb")
    # pickle.dump(MultiModels_results,save_file)
    # save_file.close()

    # open_file = open("results.pickle",'rb')
    # Multinomial_results = pickle.load(open_file)
    # MultiModels_results = Multinomial_results
    # open_file.close()

    S_Correct = calcCorrectN(Multinomial_results, category_dic) * 1.0 / len(
        category_dic.keys())
    R_Correct = calcCorrectN(RandomForest_results, category_dic) * 1.0 / len(
        category_dic.keys())
    A_Correct = calcCorrectN(AdaBoost_results, category_dic) * 1.0 / len(
        category_dic.keys())

    categories_sprecision = {}
    categories_rprecision = {}
    categories_aprecision = {}
    categories_srecall = {}
    categories_rrecall = {}
    categories_arecall = {}
    for category in categories.keys():
        # 计算在结果中共有多少该类别
        number_in_rclassify = calcCategoryN(RandomForest_results, category)
        number_in_sclassify = calcCategoryN(Multinomial_results, category)
        number_in_aclassify = calcCategoryN(AdaBoost_results, category)

        # 计算在结果中该类别中有多少正确的
        correct_number_in_sclassify = calcCategoryCorrectN(
            Multinomial_results, category, category_dic)
        correct_number_in_rclassify = calcCategoryCorrectN(
            RandomForest_results, category, category_dic)
        correct_number_in_aclassify = calcCategoryCorrectN(
            AdaBoost_results, category, category_dic)

        # 准确率
        categories_sprecision[
            category] = correct_number_in_sclassify * 1.0 / number_in_sclassify
        categories_rprecision[
            category] = correct_number_in_rclassify * 1.0 / number_in_rclassify
        categories_aprecision[
            category] = correct_number_in_aclassify * 1.0 / number_in_aclassify

        # 召回率
        categories_srecall[
            category] = correct_number_in_sclassify * 1.0 / calcCategoryN(
                category_dic, category)
        categories_rrecall[
            category] = correct_number_in_rclassify * 1.0 / calcCategoryN(
                category_dic, category)
        categories_arecall[
            category] = correct_number_in_aclassify * 1.0 / calcCategoryN(
                category_dic, category)

        # print "%s: 准确率 %f, 召回率 %f\n" % (category,categories_sprecision[category],categories_srecall[category])

    S_Precision = reduce(lambda x, y: x + y,
                         categories_sprecision.values()) / 9
    S_Recall = reduce(lambda x, y: x + y, categories_srecall.values()) / 9
    S_FScore = S_Precision * S_Recall * 2 / (S_Precision + S_Recall)

    R_Precision = reduce(lambda x, y: x + y,
                         categories_rprecision.values()) / 9
    R_Recall = reduce(lambda x, y: x + y, categories_rrecall.values()) / 9
    R_FScore = R_Precision * R_Recall * 2 / (R_Precision + R_Recall)

    A_Precision = reduce(lambda x, y: x + y,
                         categories_aprecision.values()) / 9
    A_Recall = reduce(lambda x, y: x + y, categories_arecall.values()) / 9
    A_FScore = A_Precision * A_Recall * 2 / (A_Precision + A_Recall)

    print "单模型多项式贝叶斯 精确率:%f\t平均准确率:%f\t平均召回率:%f\t平均F-Score:%f" % (
        S_Correct, S_Precision, S_Recall, S_FScore)
    print "多模型融合随机森林 精确率:%f\t平均准确率:%f\t平均召回率:%f\t平均F-Score:%f" % (
        R_Correct, R_Precision, R_Recall, R_FScore)
    print "多模型融合AdaBoost 精确率:%f\t平均准确率:%f\t平均召回率:%f\t平均F-Score:%f" % (
        A_Correct, A_Precision, A_Recall, A_FScore)