コード例 #1
0
def save_result_poa_list_to_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    NewsPOA["news"].drop()

    for i in range(len(university_list)):

        uni = university_list[i]
        print("开始", uni["zh_name"])
        json_path = "../news_result/" + uni["zh_name"] + ".json"
        current_uni_news_list = load_json_file(json_path)
        result_uni_news_list = []

        for j in range(len(current_uni_news_list)):
            result_uni_news_list.append(
                predict_poa_result_from_documnet_dict(
                    current_uni_news_list[j]))

        NewsPOA["news"].insert(result_uni_news_list)
        print(uni["zh_name"], "的新闻分析完毕,共有", str(len(result_uni_news_list)),
              "条")
コード例 #2
0
def save_newslist_to_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    # NewsPOA["newslist"].drop()

    for i in range(0, len(university_list)):
        # for i in range(0,1):

        uni = university_list[i]
        if NewsPOA['newslist'].find({"Uname": uni["zh_name"]}).count() != 0:
            continue
        news_documents_list = request_baidu_news(uni["zh_name"], 1,
                                                 MAX_PAGE_NUMBERS,
                                                 uni["en_name"])
        NewsPOA["newslist"].insert(news_documents_list)
        print(uni["zh_name"], "的新闻列表保存成功")

    print("新闻全部爬取完毕")
コード例 #3
0
def insert_university_list():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    NewsPOA["universitylist"].insert(university_list)
    print("学校列表表创建成功")
コード例 #4
0
def save_newslist_into_file():

    university_list = get_university_list()
    for i in range(0,len(university_list)):

        uni = university_list[i]
        news_documents_list = request_baidu_news(uni["zh_name"],1,MAX_PAGE_NUMBERS,uni["en_name"])

        news_path = "./news_result/" + uni["zh_name"] + ".json"
        with open(news_path, 'w', encoding='utf-8') as json_file:
            json.dump(news_documents_list, json_file, ensure_ascii=False)

            print(uni["zh_name"],"的新闻列表保存成功")
コード例 #5
0
def add_negative_news_from_old_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    # new_conn = MongoClient(db_config["host"],db_config["port"])

    new_conn = MongoClient("121.42.236.250", 27034)

    old_conn = MongoClient("121.42.236.250",27034)

    old_neg_news_cursor = old_conn.ResultPOA["news"].find({"sentiment": "-1"})

    old_neg_news_list = [ item for item in old_neg_news_cursor]

    new_neg_news_cursor = new_conn.NewsPOA["newslist"].find({"sentiment": "-1"})

    new_neg_news_list = [item for item in new_neg_news_cursor]

    add_neg_list = []

    for i in range(len(old_neg_news_list)):

        current_news = old_neg_news_list[i]
        current_news_url = current_news["url"]

        if judge_url_in_list(new_neg_news_list,current_news_url) == False:

            current_news["media"] = "unkown"
            current_news["ranking"] = "300"
            add_neg_list.append(current_news)

    new_conn.NewsPOA["newslist"].insert(add_neg_list)

    create_news_numbers_info()

    new_conn.close()
    old_conn.close()
コード例 #6
0
def compute_score():
    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient("121.42.236.250",27034)

    NewsPOA = conn.NewsPOA
    NewsPOA["influence"].drop()

    for uni in university_list:
        uni_name = uni['zh_name']

        uni_news_list = NewsPOA["newslist"].find({"Uname":uni_name})
        print("开始计算 ",uni_name,"的数据...")
        score = {}
        for news in uni_news_list:
            if score.get(news["media"]) is not None:
                score[news["media"]] += 1/(float(news["ranking"])/100+1)
            else:
                score[news["media"]] = 1/(float(news["ranking"])/100+1)

        score_list = []
        for key,value in score.items():

            current = {
                "Uname":uni_name,
                "media":key,
                "score":value
            }

            score_list.append(current)

        NewsPOA["influence"].insert(score_list)
        print(uni_name,"的数据保存完毕")
コード例 #7
0
def create_news_numbers_info():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient("121.42.236.250", 27034)

    NewsPOA = conn.NewsPOA
    news_number_list = []

    for uni in university_list:

        studyNumberList = []
        activityNumberList = []
        entranceNumberList = []
        socialNumberList = []

        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "-1"
        }).count())
        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "0"
        }).count())
        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "1"
        }).count())
        studyNumberList.append(studyNumberList[0] + studyNumberList[1] +
                               studyNumberList[2])

        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "-1"
        }).count())
        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "0"
        }).count())
        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "1"
        }).count())
        activityNumberList.append(activityNumberList[0] +
                                  activityNumberList[1] +
                                  activityNumberList[2])

        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "-1"
        }).count())
        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "0"
        }).count())
        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "1"
        }).count())
        entranceNumberList.append(entranceNumberList[0] +
                                  entranceNumberList[1] +
                                  entranceNumberList[2])

        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "-1"
        }).count())
        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "0"
        }).count())
        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "1"
        }).count())
        socialNumberList.append(socialNumberList[0] + socialNumberList[1] +
                                socialNumberList[2])

        news_number_list.append({
            "Uname": uni["zh_name"],
            "abbr": uni["en_name"],
            "studyNumber": studyNumberList,
            "activityNumber": activityNumberList,
            "entranceNumber": entranceNumberList,
            "socialNumber": socialNumberList
        })

    NewsPOA["newsNumber"].drop()
    NewsPOA["newsNumber"].insert(news_number_list)
    print("新闻数量表保存成功")