def get_tree_data(idx_type, test_mode=False): total_gov_ids = df_2861_gaode_geo_all.index.values.tolist() score_df = get_score_df_data(idx_type, total_gov_ids, test_mode) score_df["area_eng"] = score_df.apply( lambda x: scatter_api.judge_area(x["people_score"], x[ "official_score"], para.DataParas.X_CTHD, para.DataParas.Y_CTHD), axis=1) score_df["area"] = score_df["area_eng"].apply( lambda x: para.AREAS_NAMES_DICT[x]) score_df["color"] = score_df["area_eng"].apply( lambda x: para.AREAS_COLORS_DICT[x]) score_df["data"] = score_df["color"].apply(lambda x: { "ratio": 100, "node_bg_conf": { "node_bg_color": [x, x], "node_bg_time": 0 } }) score_df["data"] = score_df["data"].apply( lambda x: json.dumps(x, ensure_ascii=False)) score_df["type_code"] = idx_type score_df["product_name"] = "MONG" # 清洗掉gov_id = 232 / 1450 的值 score_df = score_df.reset_index() score_df = score_df[~score_df["gov_id"].isin([232, 233, 249, 250, 1450])] final_score_df = deepcopy( score_df[["gov_id", "product_name", "type_code", "data", "version"]]) data_list = final_score_df.to_dict(orient="records") if test_mode: db_obj = DBObj(DBShortName.ProductTest).obj else: db_obj = DBObj(DBShortName.ProductPWFormal).obj db_obj.get_conn() table_name = "pw_tree_data" # version, -没必要多个version,每次更新覆盖就好 sqlstr_head = "INSERT INTO %s (gov_id, product_name, type_code, data, version) VALUES ({gov_id}, '{product_name}', '{type_code}', '{data}', '{version}') ON CONFLICT (gov_id, type_code, product_name) DO UPDATE SET data='{data}', version='{version}', update_time=now();" % ( table_name) sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list] row_num = len(sqlstr_list) for i in range(0, row_num, 1000): db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000])) print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i + 1000])), flush=True) db_obj.disconnect()
def get_score_from_db(idx_type, gov_ids, test_mode=True): """ @功能:从数据库取数据 :param idx_type: :param gov_ids: :param test_mode: :return: """ if test_mode: db_obj = DBObj(DBShortName.ProductPWDataTest).obj else: db_obj = DBObj(DBShortName.ProductPWDataFormal).obj table_name = "gov_modern_score" gov_ids_ = [str(int(i)) for i in gov_ids] gov_ids_str = ", ".join(gov_ids_) db_obj.get_conn() sqlstr_ = "SELECT {0}.gov_id, {0}.score_info, {0}.version FROM {0}, (SELECT MAX(version) AS version FROM {0} WHERE index_type = '{2}') aa WHERE {0}.index_type='{2}' AND {0}.version = aa.version AND {0}.gov_id in ({1});".format( table_name, gov_ids_str, idx_type) datas = db_obj.read_from_table(sqlstr_) db_obj.disconnect() # print(datas, flush=True) return datas
def update_map_data(test_mode=True): """ @功能:写地图数据 :param test_mode: :return: """ if test_mode: db_obj = DBObj(DBShortName.ProductPWTest)
from datetime import datetime import utils.path_manager as pm from utils.db_base import TableName, DBShortName, DBObj sg_file_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "files/" sg_data_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "data/" leaf_conf = "SG_leaf_conf.csv" index_conf = "SG_index_conf.csv" df_leaf_conf = pd.read_csv(sg_file_path+leaf_conf, encoding="GBK", index_col="index_code") df_index_conf = pd.read_csv(sg_file_path+index_conf, encoding="GBK", index_col="index_code") PWDataTestObj = DBObj(DBShortName.ProductPWDataTest).obj PWDataFormalObj = DBObj(DBShortName.ProductPWDataFormal).obj db_conf = {True: PWDataTestObj, False: PWDataFormalObj} treedata_insert_sqlstr = "INSERT INTO %s (gov_id, product_name, type_code, data, version, update_time) VALUES ({gov_id}, '{product_name}', '{type_code}', '{data}', '{version}', '%s') ON CONFLICT (gov_id, product_name, type_code) DO UPDATE SET data = '{data}', version = '{version}', update_time = '%s';"%(TableName.PWTreeData, datetime.now(), datetime.now()) mapcolor_insert_sqlstr = "INSERT INTO %s (gov_id, node_name, value, version) VALUES ({gov_id}, '{node_name}', '{value}', '{version}') ON CONFLICT (gov_id, node_name) DO UPDATE SET value = '{value}', version = '{version}';"%(TableName.PWMapColor) # 灰 - 红 colors_s = ['#808080', '#A9A9A9', '#BCBCBC', '#DEDEDE', '#EBEBEB', '#FEF2F2', '#FCADAF', '#FB777B', '#FA3F48', '#FF0000'] legends = [{"value": 0, "color": colors_s[3], "name": "差"}, {"value": 25, "color": colors_s[5], "name": "普通"}, {"value": 50, "color": colors_s[7], "name": "较好"}, {"value": 75, "color": colors_s[9], "name": "很好"},
def get_score_from_local_to_db(version, idx_type, with_stats_cols=[], test_mode=True): """ @功能:写入数据库,便于后续前端数据的提取 :param version: :param idx_type: :param with_stats_cols: :param test_mode: :return: """ file_path = pm.STABLE_SCORE_STORAGE + version + "/" + "GovModern/" score_file = "{}_ACCUMULATIVE_SCORE.csv".format(idx_type.upper()) df_score = pd.read_csv(file_path + score_file, encoding="utf8", index_col="gov_id") dst_cols = [ "people_data", "official_data", "people_rltpct", "official_rltpct", "people_score", "official_score" ] if with_stats_cols: stats_file = "{}_ACCUMULATIVE_STATS.csv".format(idx_type.upper()) df_stats = pd.read_csv(file_path + stats_file, encoding="utf8", index_col="gov_id") df_score[with_stats_cols] = df_stats[with_stats_cols] dst_cols = dst_cols + with_stats_cols df_score["score_info"] = df_score.apply( lambda x: {idx_col: x[idx_col] for idx_col in dst_cols}, axis=1) df_score["score_info"] = df_score["score_info"].apply( lambda x: json.dumps(x, ensure_ascii=False)) df_score["version"] = version # df_score["index_type"] = idx_type df_score = df_score.reset_index() df_into_db = deepcopy(df_score[["gov_id", "score_info", "version"]]) # 20191114改 不再分表,通过index_type字段区分不同指标 df_into_db["index_type"] = idx_type if test_mode: db_obj = DBObj(DBShortName.ProductPWDataTest).obj else: db_obj = DBObj(DBShortName.ProductPWDataFormal).obj db_obj.get_conn() # table_name = "{}_score".format(idx_type) # 20191114改 不再分表,通过index_type字段区分不同指标 table_name = "gov_modern_score" data_list = df_into_db.to_dict(orient="records") sqlstr_head = "INSERT INTO %s (gov_id, score_info, version, index_type) VALUES ({gov_id}, '{score_info}', '{version}', '{index_type}') ON CONFLICT (gov_id, version, index_type) DO UPDATE SET score_info='{score_info}', update_time='%s';" % ( table_name, datetime.now()) # 20191114改,新增index_type字段 sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list] row_num = len(sqlstr_list) for i in range(0, row_num, 1000): db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000])) print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i + 1000])), flush=True) db_obj.disconnect()
def get_leaf_score(version, test_mode=True, index_codes=[]): # file_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "files/" # leaf_conf_csv = "SG_leaf_conf.csv" # df_leaf_conf = pd.read_csv(file_path+leaf_conf_csv, encoding="GBK", index_col="index_code") df_leaf_conf_ = df_leaf_conf[df_leaf_conf.index.isin( index_codes)] if index_codes else df_leaf_conf if test_mode: tree_data_obj = DBObj(DBShortName.ProductPWDataTest).obj else: tree_data_obj = DBObj(DBShortName.ProductPWDataFormal).obj tree_data_obj.get_conn() for index_code, row in df_leaf_conf_.iterrows(): if row["idx_rank"] == 1: rank_ascending = True else: rank_ascending = False # 政府指标 if row["source"] == "gov": result = get_gov_idx_data(index_code, version, get_conn=True) data = result[0] idx_version = data["version"] value_dict = data["value"] value_series = pd.Series(value_dict) value_df = pd.DataFrame(value_series, columns=["value"]) value_df.index.name = "gov_id" value_df.index = value_df.index.astype(int) # 知识库 else: result = get_knowledge_base_data(index_code, version) idx_version = result["version"][index_code] value_df = pd.DataFrame.from_dict(result["datas"]).T value_df = value_df.rename(columns={index_code: "value"}) value_df.index.name = "gov_id" value_df.index = value_df.index.astype(int) # 特殊指标,特殊补齐 # 区县较全的指标 - 底层区县补中位值,上层(市/省)都用底层区县指数补全 if index_code not in county_missing_leafs: value_df_ = complement_prov_city_data_with_counties( value_df, columns=["value"], by=row["complement_by_subgovs"], verify_non_counties_with_median=True, with_hitec=False) # 区县缺失十分严重的指标 - 底层区县补中位值,市/省再具体判断 else: if index_code not in county_n_city_missing_leafs: # 市级数据较全的情况, 用市数据填充 value_df_ = complement_prov_city_data_with_counties( value_df, columns=["value"], by=row["complement_by_subgovs"], verify_non_counties_with_median=True, city_by_median=True, prov_by_sub_citys=True) else: # 区县+市缺失都很严重的情况,各级自己补充自己的。 value_df_ = complement_prov_city_data_with_counties( value_df, columns=["value"], by=row["complement_by_subgovs"], verify_non_counties_with_median=True, city_by_median=True, prov_by_median=True) value_df_["type_code"] = index_code value_df_["product_name"] = "SG" value_df_["version"] = version df_county = gov_utils.get_all_county_gov_id_info(with_hitec=False) df_city = gov_utils.get_all_city_gov_id_info() df_prov = gov_utils.get_all_province_gov_id_info() value_df_.loc[df_county.index, "data"] = value_df_.loc[df_county.index, "value"].rank( method="max", pct=True, ascending=rank_ascending) value_df_.loc[df_county.index, "pctr_desc"] = value_df_.loc[ df_county.index, "data"].apply(lambda x: "{}%的区县".format(round(x * 100, 2))) value_df_.loc[df_county.index, "rank"] = value_df_.loc[df_county.index, "data"].rank(method="min", ascending=False) value_df_.loc[df_city.index, "data"] = value_df_.loc[df_city.index, "value"].rank( method="max", pct=True, ascending=rank_ascending) value_df_.loc[df_city.index, "pctr_desc"] = value_df_.loc[ df_city.index, "data"].apply(lambda x: "{}%的市".format(round(x * 100, 2))) value_df_.loc[df_city.index, "rank"] = value_df_.loc[df_city.index, "data"].rank(method="min", ascending=False) value_df_.loc[df_prov.index, "data"] = value_df_.loc[df_prov.index, "value"].rank( method="max", pct=True, ascending=rank_ascending) value_df_.loc[df_prov.index, "pctr_desc"] = value_df_.loc[ df_prov.index, "data"].apply(lambda x: "{}%的省".format(round(x * 100, 2))) value_df_.loc[df_prov.index, "rank"] = value_df_.loc[df_prov.index, "data"].rank(method="min", ascending=False) value_df_["data"] = value_df_.apply(lambda x: json.dumps( { "score": round(x["data"] * 100, 2), "value": round(x["value"], 2), "rank": int(x["rank"]), "pctr": round(x["data"] * 100, 2), "pctr_desc": x["pctr_desc"], "idx_version": str(idx_version) }, ensure_ascii=False), axis=1) # 放出gov_id value_df_ = value_df_.reset_index() insert_tree_data(value_df_, tree_data_obj) print( "tree_data叶子节点数据插入完毕!index_code={} idx_version={} version={}". format(index_code, idx_version, version), flush=True) tree_data_obj.disconnect()
@Description: 计算叶子节点分数,写入数据库 """ import json import pandas as pd from datetime import datetime from copy import deepcopy import utils.path_manager as pm from utils.utilities import operate_es_knowledge from utils.db_base import DBObj, TableName, DBShortName from utils.get_2861_gaode_geo_gov_id_info import complement_prov_city_data_with_counties import utils.get_2861_gaode_geo_gov_id_info as gov_utils from utils.MyModule import TimeDispose from product.state_govern.parameters import df_leaf_conf, sg_data_path, county_missing_leafs, county_n_city_missing_leafs GovIndexDB = DBObj(DBShortName.GovIndexLocal).obj def get_gov_idx_data(idx_code, version, get_conn=False): sqlstr_ = "SELECT idx_code AS code, idx_value AS value, idx_version AS version FROM {} WHERE idx_code = '{}' AND idx_version <= '{}' ORDER BY idx_version DESC LIMIT 1;".format( TableName.GovIndex, idx_code, version + ' 00:00:00') if get_conn: GovIndexDB.get_conn() datas = GovIndexDB.read_from_table(sqlstr_) if get_conn: GovIndexDB.disconnect() return datas
""" @Time : 2019/9/25 15:19 @Author : Liu Yusheng @File : env_stable_utils.py @Description: 公用函数 """ import time import json import random import pandas as pd from datetime import datetime from urllib import request, parse from utils.db_base import DBObj, DBShortName CbdDataTestObj = DBObj(DBShortName.CbdDataTest).obj CbdDataFormalObj = DBObj(DBShortName.CbdDataFormal).obj db_conf = {True: CbdDataTestObj, False: CbdDataFormalObj} POINT_TABLE = "point_info_new" SCORE_TABLE = "cbd_gov_score" # 知识库 DATA_URL = "http://192.168.0.47:9400/knowledgebase/?" def get_html_result(url, result_json=True): count = 10 while count > 0: try:
def get_map_data(idx_type="mong", test_mode=False): """ @功能:点击地图染色 :param idx_type: :param test_mode: :return: """ total_gov_ids = df_2861_gaode_geo_all.index.values.tolist() score_df = get_score_df_data(idx_type, total_gov_ids, test_mode) score_df["area_eng"] = score_df.apply( lambda x: scatter_api.judge_area(x["people_score"], x[ "official_score"], para.DataParas.X_CTHD, para.DataParas.Y_CTHD), axis=1) score_df["area"] = score_df["area_eng"].apply( lambda x: para.AREAS_NAMES_DICT[x]) score_df["color"] = score_df["area_eng"].apply( lambda x: para.AREAS_COLOR_VALUES_DICT[x]) score_df["people_desc"] = score_df["people_rltpct"].apply( lambda x: scatter_api.get_scatter_point_desc(x, "民众抱怨")) score_df["official_desc"] = score_df["official_rltpct"].apply( lambda x: scatter_api.get_scatter_point_desc(x, "官方宣传")) final_cols = ["gov_name", "color", "area", "people_desc", "official_desc"] score_df["value"] = score_df.apply( lambda x: {col: x[col] for col in final_cols}, axis=1) score_df["value"] = score_df["value"].apply( lambda x: json.dumps(x, ensure_ascii=False)) score_df["node_name"] = "MONG" # 清洗掉gov_id = 232 / 1450 的值 score_df = score_df.reset_index() score_df = score_df[~score_df["gov_id"].isin([232, 233, 249, 250, 1450])] final_score_df = deepcopy( score_df[["gov_id", "node_name", "value", "version"]]) data_list = final_score_df.to_dict(orient="records") if test_mode: db_obj = DBObj(DBShortName.ProductTest).obj else: db_obj = DBObj(DBShortName.ProductPWFormal).obj db_obj.get_conn() table_name = "pw_map_color" # , version - map_color没必要存多个version,每次更新就好 sqlstr_head = "INSERT INTO %s (gov_id, node_name, value, version) VALUES ({gov_id}, '{node_name}', '{value}', '{version}') ON CONFLICT (gov_id, node_name) DO UPDATE SET value='{value}', version='{version}';" % ( table_name) sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list] row_num = len(sqlstr_list) for i in range(0, row_num, 1000): db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000])) print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i + 1000])), flush=True) db_obj.disconnect()