def save_result_poa_list_to_db(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 conn = MongoClient(db_config["host"], db_config["port"]) NewsPOA = conn.NewsPOA NewsPOA["news"].drop() for i in range(len(university_list)): uni = university_list[i] print("开始", uni["zh_name"]) json_path = "../news_result/" + uni["zh_name"] + ".json" current_uni_news_list = load_json_file(json_path) result_uni_news_list = [] for j in range(len(current_uni_news_list)): result_uni_news_list.append( predict_poa_result_from_documnet_dict( current_uni_news_list[j])) NewsPOA["news"].insert(result_uni_news_list) print(uni["zh_name"], "的新闻分析完毕,共有", str(len(result_uni_news_list)), "条")
def save_newslist_to_db(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 conn = MongoClient(db_config["host"], db_config["port"]) NewsPOA = conn.NewsPOA # NewsPOA["newslist"].drop() for i in range(0, len(university_list)): # for i in range(0,1): uni = university_list[i] if NewsPOA['newslist'].find({"Uname": uni["zh_name"]}).count() != 0: continue news_documents_list = request_baidu_news(uni["zh_name"], 1, MAX_PAGE_NUMBERS, uni["en_name"]) NewsPOA["newslist"].insert(news_documents_list) print(uni["zh_name"], "的新闻列表保存成功") print("新闻全部爬取完毕")
def insert_university_list(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 conn = MongoClient(db_config["host"], db_config["port"]) NewsPOA = conn.NewsPOA NewsPOA["universitylist"].insert(university_list) print("学校列表表创建成功")
def save_newslist_into_file(): university_list = get_university_list() for i in range(0,len(university_list)): uni = university_list[i] news_documents_list = request_baidu_news(uni["zh_name"],1,MAX_PAGE_NUMBERS,uni["en_name"]) news_path = "./news_result/" + uni["zh_name"] + ".json" with open(news_path, 'w', encoding='utf-8') as json_file: json.dump(news_documents_list, json_file, ensure_ascii=False) print(uni["zh_name"],"的新闻列表保存成功")
def add_negative_news_from_old_db(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 # new_conn = MongoClient(db_config["host"],db_config["port"]) new_conn = MongoClient("121.42.236.250", 27034) old_conn = MongoClient("121.42.236.250",27034) old_neg_news_cursor = old_conn.ResultPOA["news"].find({"sentiment": "-1"}) old_neg_news_list = [ item for item in old_neg_news_cursor] new_neg_news_cursor = new_conn.NewsPOA["newslist"].find({"sentiment": "-1"}) new_neg_news_list = [item for item in new_neg_news_cursor] add_neg_list = [] for i in range(len(old_neg_news_list)): current_news = old_neg_news_list[i] current_news_url = current_news["url"] if judge_url_in_list(new_neg_news_list,current_news_url) == False: current_news["media"] = "unkown" current_news["ranking"] = "300" add_neg_list.append(current_news) new_conn.NewsPOA["newslist"].insert(add_neg_list) create_news_numbers_info() new_conn.close() old_conn.close()
def compute_score(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 conn = MongoClient("121.42.236.250",27034) NewsPOA = conn.NewsPOA NewsPOA["influence"].drop() for uni in university_list: uni_name = uni['zh_name'] uni_news_list = NewsPOA["newslist"].find({"Uname":uni_name}) print("开始计算 ",uni_name,"的数据...") score = {} for news in uni_news_list: if score.get(news["media"]) is not None: score[news["media"]] += 1/(float(news["ranking"])/100+1) else: score[news["media"]] = 1/(float(news["ranking"])/100+1) score_list = [] for key,value in score.items(): current = { "Uname":uni_name, "media":key, "score":value } score_list.append(current) NewsPOA["influence"].insert(score_list) print(uni_name,"的数据保存完毕")
def create_news_numbers_info(): # 获取学校列表,数据库配置信息 university_list = get_university_list() db_config = get_database_dict_info() #建立数据库连接 conn = MongoClient("121.42.236.250", 27034) NewsPOA = conn.NewsPOA news_number_list = [] for uni in university_list: studyNumberList = [] activityNumberList = [] entranceNumberList = [] socialNumberList = [] studyNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "-1" }).count()) studyNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "0" }).count()) studyNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "1" }).count()) studyNumberList.append(studyNumberList[0] + studyNumberList[1] + studyNumberList[2]) activityNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "-1" }).count()) activityNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "0" }).count()) activityNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "1" }).count()) activityNumberList.append(activityNumberList[0] + activityNumberList[1] + activityNumberList[2]) entranceNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "-1" }).count()) entranceNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "0" }).count()) entranceNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "1" }).count()) entranceNumberList.append(entranceNumberList[0] + entranceNumberList[1] + entranceNumberList[2]) socialNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "-1" }).count()) socialNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "0" }).count()) socialNumberList.append(NewsPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "1" }).count()) socialNumberList.append(socialNumberList[0] + socialNumberList[1] + socialNumberList[2]) news_number_list.append({ "Uname": uni["zh_name"], "abbr": uni["en_name"], "studyNumber": studyNumberList, "activityNumber": activityNumberList, "entranceNumber": entranceNumberList, "socialNumber": socialNumberList }) NewsPOA["newsNumber"].drop() NewsPOA["newsNumber"].insert(news_number_list) print("新闻数量表保存成功")