def get_tree_data(idx_type, test_mode=False):
    total_gov_ids = df_2861_gaode_geo_all.index.values.tolist()
    score_df = get_score_df_data(idx_type, total_gov_ids, test_mode)

    score_df["area_eng"] = score_df.apply(
        lambda x: scatter_api.judge_area(x["people_score"], x[
            "official_score"], para.DataParas.X_CTHD, para.DataParas.Y_CTHD),
        axis=1)

    score_df["area"] = score_df["area_eng"].apply(
        lambda x: para.AREAS_NAMES_DICT[x])
    score_df["color"] = score_df["area_eng"].apply(
        lambda x: para.AREAS_COLORS_DICT[x])

    score_df["data"] = score_df["color"].apply(lambda x: {
        "ratio": 100,
        "node_bg_conf": {
            "node_bg_color": [x, x],
            "node_bg_time": 0
        }
    })
    score_df["data"] = score_df["data"].apply(
        lambda x: json.dumps(x, ensure_ascii=False))
    score_df["type_code"] = idx_type
    score_df["product_name"] = "MONG"

    # 清洗掉gov_id = 232 / 1450 的值
    score_df = score_df.reset_index()
    score_df = score_df[~score_df["gov_id"].isin([232, 233, 249, 250, 1450])]

    final_score_df = deepcopy(
        score_df[["gov_id", "product_name", "type_code", "data", "version"]])
    data_list = final_score_df.to_dict(orient="records")

    if test_mode:
        db_obj = DBObj(DBShortName.ProductTest).obj
    else:
        db_obj = DBObj(DBShortName.ProductPWFormal).obj

    db_obj.get_conn()
    table_name = "pw_tree_data"

    # version,  -没必要多个version,每次更新覆盖就好
    sqlstr_head = "INSERT INTO %s (gov_id, product_name, type_code, data, version) VALUES ({gov_id}, '{product_name}', '{type_code}', '{data}', '{version}') ON CONFLICT (gov_id, type_code, product_name) DO UPDATE SET data='{data}', version='{version}', update_time=now();" % (
        table_name)

    sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list]

    row_num = len(sqlstr_list)

    for i in range(0, row_num, 1000):
        db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000]))
        print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i +
                                                                   1000])),
              flush=True)

    db_obj.disconnect()
def get_score_from_db(idx_type, gov_ids, test_mode=True):
    """
    @功能:从数据库取数据
    :param idx_type:
    :param gov_ids:
    :param test_mode:
    :return:
    """
    if test_mode:
        db_obj = DBObj(DBShortName.ProductPWDataTest).obj
    else:
        db_obj = DBObj(DBShortName.ProductPWDataFormal).obj

    table_name = "gov_modern_score"

    gov_ids_ = [str(int(i)) for i in gov_ids]
    gov_ids_str = ", ".join(gov_ids_)

    db_obj.get_conn()

    sqlstr_ = "SELECT {0}.gov_id, {0}.score_info, {0}.version FROM {0}, (SELECT MAX(version) AS version FROM {0} WHERE index_type = '{2}') aa WHERE {0}.index_type='{2}' AND {0}.version = aa.version AND {0}.gov_id in ({1});".format(
        table_name, gov_ids_str, idx_type)

    datas = db_obj.read_from_table(sqlstr_)

    db_obj.disconnect()

    # print(datas, flush=True)

    return datas
def update_map_data(test_mode=True):
    """
    @功能:写地图数据
    :param test_mode:
    :return:
    """
    if test_mode:
        db_obj = DBObj(DBShortName.ProductPWTest)
Ejemplo n.º 4
0
from datetime import datetime

import utils.path_manager as pm
from utils.db_base import TableName, DBShortName, DBObj

sg_file_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "files/"
sg_data_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "data/"

leaf_conf = "SG_leaf_conf.csv"
index_conf = "SG_index_conf.csv"

df_leaf_conf = pd.read_csv(sg_file_path+leaf_conf, encoding="GBK", index_col="index_code")
df_index_conf = pd.read_csv(sg_file_path+index_conf, encoding="GBK", index_col="index_code")


PWDataTestObj = DBObj(DBShortName.ProductPWDataTest).obj
PWDataFormalObj = DBObj(DBShortName.ProductPWDataFormal).obj

db_conf = {True: PWDataTestObj, False: PWDataFormalObj}

treedata_insert_sqlstr = "INSERT INTO %s (gov_id, product_name, type_code, data, version, update_time) VALUES ({gov_id}, '{product_name}', '{type_code}', '{data}',  '{version}', '%s') ON CONFLICT (gov_id, product_name, type_code) DO UPDATE SET data = '{data}', version = '{version}', update_time = '%s';"%(TableName.PWTreeData, datetime.now(), datetime.now())

mapcolor_insert_sqlstr = "INSERT INTO %s (gov_id, node_name, value, version) VALUES ({gov_id}, '{node_name}', '{value}', '{version}') ON CONFLICT (gov_id, node_name) DO UPDATE SET value = '{value}', version = '{version}';"%(TableName.PWMapColor)


# 灰 - 红
colors_s = ['#808080', '#A9A9A9', '#BCBCBC', '#DEDEDE', '#EBEBEB', '#FEF2F2', '#FCADAF', '#FB777B', '#FA3F48', '#FF0000']
legends = [{"value": 0, "color": colors_s[3], "name": "差"},
           {"value": 25, "color": colors_s[5], "name": "普通"},
           {"value": 50, "color": colors_s[7], "name": "较好"},
           {"value": 75, "color": colors_s[9], "name": "很好"},
Ejemplo n.º 5
0
def get_score_from_local_to_db(version,
                               idx_type,
                               with_stats_cols=[],
                               test_mode=True):
    """
    @功能:写入数据库,便于后续前端数据的提取
    :param version:
    :param idx_type:
    :param with_stats_cols:
    :param test_mode:
    :return:
    """
    file_path = pm.STABLE_SCORE_STORAGE + version + "/" + "GovModern/"
    score_file = "{}_ACCUMULATIVE_SCORE.csv".format(idx_type.upper())

    df_score = pd.read_csv(file_path + score_file,
                           encoding="utf8",
                           index_col="gov_id")

    dst_cols = [
        "people_data", "official_data", "people_rltpct", "official_rltpct",
        "people_score", "official_score"
    ]

    if with_stats_cols:
        stats_file = "{}_ACCUMULATIVE_STATS.csv".format(idx_type.upper())
        df_stats = pd.read_csv(file_path + stats_file,
                               encoding="utf8",
                               index_col="gov_id")
        df_score[with_stats_cols] = df_stats[with_stats_cols]
        dst_cols = dst_cols + with_stats_cols

    df_score["score_info"] = df_score.apply(
        lambda x: {idx_col: x[idx_col]
                   for idx_col in dst_cols}, axis=1)

    df_score["score_info"] = df_score["score_info"].apply(
        lambda x: json.dumps(x, ensure_ascii=False))

    df_score["version"] = version

    # df_score["index_type"] = idx_type

    df_score = df_score.reset_index()

    df_into_db = deepcopy(df_score[["gov_id", "score_info", "version"]])

    # 20191114改 不再分表,通过index_type字段区分不同指标
    df_into_db["index_type"] = idx_type

    if test_mode:
        db_obj = DBObj(DBShortName.ProductPWDataTest).obj
    else:
        db_obj = DBObj(DBShortName.ProductPWDataFormal).obj

    db_obj.get_conn()

    # table_name = "{}_score".format(idx_type)

    # 20191114改 不再分表,通过index_type字段区分不同指标
    table_name = "gov_modern_score"

    data_list = df_into_db.to_dict(orient="records")

    sqlstr_head = "INSERT INTO %s (gov_id, score_info, version, index_type) VALUES ({gov_id}, '{score_info}', '{version}', '{index_type}') ON CONFLICT (gov_id, version, index_type) DO UPDATE SET score_info='{score_info}', update_time='%s';" % (
        table_name, datetime.now())  # 20191114改,新增index_type字段

    sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list]

    row_num = len(sqlstr_list)

    for i in range(0, row_num, 1000):
        db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000]))
        print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i +
                                                                   1000])),
              flush=True)

    db_obj.disconnect()
Ejemplo n.º 6
0
def get_leaf_score(version, test_mode=True, index_codes=[]):
    # file_path = pm.LOCAL_STABLE_PROJECT_STORAGE + "state_govern/" + "files/"
    # leaf_conf_csv = "SG_leaf_conf.csv"
    # df_leaf_conf = pd.read_csv(file_path+leaf_conf_csv, encoding="GBK", index_col="index_code")

    df_leaf_conf_ = df_leaf_conf[df_leaf_conf.index.isin(
        index_codes)] if index_codes else df_leaf_conf

    if test_mode:
        tree_data_obj = DBObj(DBShortName.ProductPWDataTest).obj
    else:
        tree_data_obj = DBObj(DBShortName.ProductPWDataFormal).obj

    tree_data_obj.get_conn()

    for index_code, row in df_leaf_conf_.iterrows():
        if row["idx_rank"] == 1:
            rank_ascending = True
        else:
            rank_ascending = False

        # 政府指标
        if row["source"] == "gov":
            result = get_gov_idx_data(index_code, version, get_conn=True)
            data = result[0]
            idx_version = data["version"]

            value_dict = data["value"]
            value_series = pd.Series(value_dict)
            value_df = pd.DataFrame(value_series, columns=["value"])
            value_df.index.name = "gov_id"
            value_df.index = value_df.index.astype(int)
        # 知识库
        else:
            result = get_knowledge_base_data(index_code, version)
            idx_version = result["version"][index_code]

            value_df = pd.DataFrame.from_dict(result["datas"]).T
            value_df = value_df.rename(columns={index_code: "value"})
            value_df.index.name = "gov_id"
            value_df.index = value_df.index.astype(int)

        # 特殊指标,特殊补齐
        # 区县较全的指标 - 底层区县补中位值,上层(市/省)都用底层区县指数补全
        if index_code not in county_missing_leafs:
            value_df_ = complement_prov_city_data_with_counties(
                value_df,
                columns=["value"],
                by=row["complement_by_subgovs"],
                verify_non_counties_with_median=True,
                with_hitec=False)
        # 区县缺失十分严重的指标 - 底层区县补中位值,市/省再具体判断
        else:
            if index_code not in county_n_city_missing_leafs:  # 市级数据较全的情况, 用市数据填充
                value_df_ = complement_prov_city_data_with_counties(
                    value_df,
                    columns=["value"],
                    by=row["complement_by_subgovs"],
                    verify_non_counties_with_median=True,
                    city_by_median=True,
                    prov_by_sub_citys=True)
            else:  # 区县+市缺失都很严重的情况,各级自己补充自己的。
                value_df_ = complement_prov_city_data_with_counties(
                    value_df,
                    columns=["value"],
                    by=row["complement_by_subgovs"],
                    verify_non_counties_with_median=True,
                    city_by_median=True,
                    prov_by_median=True)

        value_df_["type_code"] = index_code
        value_df_["product_name"] = "SG"
        value_df_["version"] = version

        df_county = gov_utils.get_all_county_gov_id_info(with_hitec=False)
        df_city = gov_utils.get_all_city_gov_id_info()
        df_prov = gov_utils.get_all_province_gov_id_info()

        value_df_.loc[df_county.index,
                      "data"] = value_df_.loc[df_county.index, "value"].rank(
                          method="max", pct=True, ascending=rank_ascending)
        value_df_.loc[df_county.index, "pctr_desc"] = value_df_.loc[
            df_county.index,
            "data"].apply(lambda x: "{}%的区县".format(round(x * 100, 2)))
        value_df_.loc[df_county.index,
                      "rank"] = value_df_.loc[df_county.index,
                                              "data"].rank(method="min",
                                                           ascending=False)

        value_df_.loc[df_city.index,
                      "data"] = value_df_.loc[df_city.index, "value"].rank(
                          method="max", pct=True, ascending=rank_ascending)
        value_df_.loc[df_city.index, "pctr_desc"] = value_df_.loc[
            df_city.index,
            "data"].apply(lambda x: "{}%的市".format(round(x * 100, 2)))
        value_df_.loc[df_city.index,
                      "rank"] = value_df_.loc[df_city.index,
                                              "data"].rank(method="min",
                                                           ascending=False)

        value_df_.loc[df_prov.index,
                      "data"] = value_df_.loc[df_prov.index, "value"].rank(
                          method="max", pct=True, ascending=rank_ascending)
        value_df_.loc[df_prov.index, "pctr_desc"] = value_df_.loc[
            df_prov.index,
            "data"].apply(lambda x: "{}%的省".format(round(x * 100, 2)))
        value_df_.loc[df_prov.index,
                      "rank"] = value_df_.loc[df_prov.index,
                                              "data"].rank(method="min",
                                                           ascending=False)

        value_df_["data"] = value_df_.apply(lambda x: json.dumps(
            {
                "score": round(x["data"] * 100, 2),
                "value": round(x["value"], 2),
                "rank": int(x["rank"]),
                "pctr": round(x["data"] * 100, 2),
                "pctr_desc": x["pctr_desc"],
                "idx_version": str(idx_version)
            },
            ensure_ascii=False),
                                            axis=1)

        # 放出gov_id
        value_df_ = value_df_.reset_index()

        insert_tree_data(value_df_, tree_data_obj)

        print(
            "tree_data叶子节点数据插入完毕!index_code={}   idx_version={}   version={}".
            format(index_code, idx_version, version),
            flush=True)

    tree_data_obj.disconnect()
Ejemplo n.º 7
0
@Description: 计算叶子节点分数,写入数据库
"""
import json
import pandas as pd
from datetime import datetime
from copy import deepcopy

import utils.path_manager as pm
from utils.utilities import operate_es_knowledge
from utils.db_base import DBObj, TableName, DBShortName
from utils.get_2861_gaode_geo_gov_id_info import complement_prov_city_data_with_counties
import utils.get_2861_gaode_geo_gov_id_info as gov_utils
from utils.MyModule import TimeDispose
from product.state_govern.parameters import df_leaf_conf, sg_data_path, county_missing_leafs, county_n_city_missing_leafs

GovIndexDB = DBObj(DBShortName.GovIndexLocal).obj


def get_gov_idx_data(idx_code, version, get_conn=False):
    sqlstr_ = "SELECT idx_code AS code, idx_value AS value, idx_version AS version FROM {} WHERE idx_code = '{}' AND idx_version <= '{}' ORDER BY idx_version DESC LIMIT 1;".format(
        TableName.GovIndex, idx_code, version + ' 00:00:00')

    if get_conn:
        GovIndexDB.get_conn()

    datas = GovIndexDB.read_from_table(sqlstr_)

    if get_conn:
        GovIndexDB.disconnect()

    return datas
Ejemplo n.º 8
0
"""
@Time    : 2019/9/25 15:19
@Author  : Liu Yusheng
@File    : env_stable_utils.py
@Description: 公用函数
"""
import time
import json
import random
import pandas as pd
from datetime import datetime
from urllib import request, parse

from utils.db_base import DBObj, DBShortName

CbdDataTestObj = DBObj(DBShortName.CbdDataTest).obj
CbdDataFormalObj = DBObj(DBShortName.CbdDataFormal).obj

db_conf = {True: CbdDataTestObj, False: CbdDataFormalObj}

POINT_TABLE = "point_info_new"
SCORE_TABLE = "cbd_gov_score"

# 知识库
DATA_URL = "http://192.168.0.47:9400/knowledgebase/?"


def get_html_result(url, result_json=True):
    count = 10
    while count > 0:
        try:
def get_map_data(idx_type="mong", test_mode=False):
    """
    @功能:点击地图染色
    :param idx_type:
    :param test_mode:
    :return:
    """

    total_gov_ids = df_2861_gaode_geo_all.index.values.tolist()
    score_df = get_score_df_data(idx_type, total_gov_ids, test_mode)

    score_df["area_eng"] = score_df.apply(
        lambda x: scatter_api.judge_area(x["people_score"], x[
            "official_score"], para.DataParas.X_CTHD, para.DataParas.Y_CTHD),
        axis=1)

    score_df["area"] = score_df["area_eng"].apply(
        lambda x: para.AREAS_NAMES_DICT[x])
    score_df["color"] = score_df["area_eng"].apply(
        lambda x: para.AREAS_COLOR_VALUES_DICT[x])
    score_df["people_desc"] = score_df["people_rltpct"].apply(
        lambda x: scatter_api.get_scatter_point_desc(x, "民众抱怨"))
    score_df["official_desc"] = score_df["official_rltpct"].apply(
        lambda x: scatter_api.get_scatter_point_desc(x, "官方宣传"))

    final_cols = ["gov_name", "color", "area", "people_desc", "official_desc"]
    score_df["value"] = score_df.apply(
        lambda x: {col: x[col]
                   for col in final_cols}, axis=1)
    score_df["value"] = score_df["value"].apply(
        lambda x: json.dumps(x, ensure_ascii=False))
    score_df["node_name"] = "MONG"

    # 清洗掉gov_id = 232 / 1450 的值
    score_df = score_df.reset_index()
    score_df = score_df[~score_df["gov_id"].isin([232, 233, 249, 250, 1450])]

    final_score_df = deepcopy(
        score_df[["gov_id", "node_name", "value", "version"]])
    data_list = final_score_df.to_dict(orient="records")

    if test_mode:
        db_obj = DBObj(DBShortName.ProductTest).obj
    else:
        db_obj = DBObj(DBShortName.ProductPWFormal).obj

    db_obj.get_conn()
    table_name = "pw_map_color"

    # , version - map_color没必要存多个version,每次更新就好
    sqlstr_head = "INSERT INTO %s (gov_id, node_name, value, version) VALUES ({gov_id}, '{node_name}', '{value}', '{version}') ON CONFLICT (gov_id, node_name) DO UPDATE SET value='{value}', version='{version}';" % (
        table_name)

    sqlstr_list = [sqlstr_head.format(**data_row) for data_row in data_list]

    row_num = len(sqlstr_list)

    for i in range(0, row_num, 1000):
        db_obj.execute_any_sql("".join(sqlstr_list[i:i + 1000]))
        print("&&&&&&&&&&已插入{}条&&&&&&&&&&&".format(len(sqlstr_list[i:i +
                                                                   1000])),
              flush=True)

    db_obj.disconnect()