예제 #1
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_CZZG_XKDD",
                             mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #2
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_HY_XLDC_JD",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #3
0
# -*- coding: utf-8 -*-
"""JRCP_JJ_TTJJ_JZ_ALL"""
from database._mongodb import MongoClient


def data_shuffle(data):
    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_JJ_TTJJ_JZ_ALL",
                             mongo_collection="CRMJRCP_JJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #4
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZGJJW_HLWJR", mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #5
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_GJSCJDGLZJ_GWY",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #6
0
# -*- coding: utf-8 -*-
"""  GDSZ_ZHS_FGW_FZGH """
from database._mongodb import MongoClient
from tools.req_for_wordExcelZip import find_type


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="GDSZ_ZHS_FGW_FZGH",
                             mongo_collection="GOV_ZX_GDS")
    data_list = main_mongo.main()
    for data in data_list[:2]:
        re_data = data_shuffle(data)
        print(re_data)
예제 #7
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZHJXW_GNSC",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #8
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZGBJH_TB", mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #9
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZGHGW_NYHYDT",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #10
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_HX_SSGSYW",
                             mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #11
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZGYYXXW_DFDT",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #12
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_BX_ZESYH_APP_ALL",
                             mongo_collection="JRCP_BX")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #13
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZZW_SMJJ",
                             mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #14
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_GJTJJ_ZCJD",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #15
0
# -*- coding: utf-8 -*-
import re
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_XYK_WAK_ALL",
                             mongo_collection="JRCP_XYK")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #16
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZGQYLHH_CYDT",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #17
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_LCCP_ZSYH_GW_ALL2", mongo_collection="JRCP_LCCP")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #18
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_SHCJ_BGCZ",
                             mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #19
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_QGZXQYGZXT_JGGG",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #20
0
import jsonpath

from crm_scripts import GenericScript
from database._mongodb import MongoClient
from tools.web_api_of_baidu import get_lat_lng, get_area


def data_shuffle(data, ):
    re_data = dict()
    re_data['ACTIME_NAME_'] = data.get('TITLE_')
    re_data['RELEASE_DATE_'] = data.get('PUBLISH_TIME_')
    re_data['ACTIVE_DESC_HTML_'] = data.get('HTML_').replace('''<div class="right"><span class="f_red_12">您的位置:</span></div>\r\n        </div>\r\n      </div>\r\n      <div class="navquick_right"><span class="f_666_12"><a href="../index.html">首页</a></span> &gt; <span class="f_666_12">资讯信息</span></div>''', '')
    re_data['ACTIVE_DESC_TEXT_'] = data.get('CONTENT_').replace("您的位置:|首页|>|资讯信息|", '')[:501]
    re_data['DATA_SOURCE_NAME_'] = data.get('SOURCE_NAME_')
    re_data['DATA_SOURCE_URL_'] = data.get('URL_')
    re_data['AMOUNT_OF_READING_'] = data.get('READ_NUM_')
    re_data['ACTIVE_KEYWORDS_'] = data.get('')
    re_data['ACTIVE_OBJECT_'] = data.get('')

    re_data['BANK_NAME_'] = data.get('BANK_NAME_')

    return re_data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="CRMJPFX_YXHD_BJYH", mongo_collection="CRMJPFX_YXHD")
    data_list = main_mongo.main()
    for data in data_list[:2]:
        re_data = data_shuffle(data=data, )
        print(re_data)
예제 #21
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_MJW_HGXS",
                             mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #22
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_BX_ZGNYYH_APP_LCX", mongo_collection="JRCP_BX")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #23
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZHRMGHGSWJ_DFSW",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #24
0
    re_data["ENTITY_NAME_"] = data["ENTITY_NAME_"]
    re_data["URL_"] = data["URL_"]
    if "TEL_" in data:
        re_data["TEL_"] = data["TEL_"]
    re_data["BUSINESS_HOURS_"] = ""

    if "SOURCE_TYPE_NAME_" in data:
        re_data["SOURCE_TYPE_NAME_"] = data["SOURCE_TYPE_NAME_"]
    if "自助银行" in data or "ATM" in data:
        re_data["TYPE_NAME_"] = "自助银行"
        re_data["TYPE_"] = "ZZ"
    else:
        re_data["TYPE_NAME_"] = "支行"
        re_data["TYPE_"] = "ZH"

    return re_data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ECITICORGANIZE",
                             mongo_collection="WD_TY")
    sc = GenericScript
    # Mysql connection
    sc.mysql_client, sc.mysql_connection = sc.mysql_connect()
    province_list, city_list, area_list, dir_area_list, bank_list = sc.data_from_mysql(
    )
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data, province_list, city_list, area_list)
        # print(re_data)
예제 #25
0
# -*- coding: utf-8 -*-
"""中国建设银行-官网基金  代销基金  CHA_BRANCH_FUND_AGENT"""
from database._mongodb import MongoClient


def data_shuffle(data):
    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_JJ_ZGJSYH_GW_ALL",
                             mongo_collection="CRMJRCP_JJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #26
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_HY_SPSWW_GJYW",
                             mongo_collection="ZX_CJXW_HY")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #27
0
파일: __init__.py 프로젝트: ILKKAI/dataETL
    def run(self):
        # # 创建 Phoenix 对象
        p_client = PhoenixHbase(table_name="NEWS_FINASSIST")
        p_client.verify_list = self.verify_list
        # # 连接 Phoenix
        connection = p_client.connect_to_phoenix()
        # 创建 MongoDB 查询数据库对象
        m_client = MongoClient(mongo_collection="NEWS_FINASSIST")
        db, collection_list = m_client.client_to_mongodb()
        collection = m_client.get_check_collection(
            db=db, collection_list=collection_list)

        # # 删除表
        # p_client.drop_table_phoenix(connection=connection)
        #
        # # 表创建语句
        # sql = ('create table "NEWS_FINASSIST" ("ID_" varchar primary key, "T"."CONTENT_" varchar, '
        #        '"C"."ENTITY_NAME_" varchar, "C"."ENTITY_CODE_" varchar, "C"."TITLE_" varchar, "C"."BRIEF_" varchar, '
        #        '"C"."PUBLISH_TIME_" varchar, "C"."KEYWORDS_" varchar, "C"."URL_" varchar, "C"."DATA_SOURCE_" varchar,'
        #        '"C"."AREA_CODE_" varchar, "C"."BANK_CODE_" varchar, "C"."BANK_NAME_" varchar,'
        #        '"C"."UNIT_CODE_" varchar, "C"."PERIOD_CODE_" varchar, "C"."REMARK_" varchar,'
        #        '"C"."CREATE_TIME_" varchar, "C"."UPDATE_TIME_" varchar, "F"."STATUS_" varchar)'
        #        'IMMUTABLE_ROWS = true')

        # # 创建表
        # p_client.create_new_table_phoenix(connection=connection, sql=sql)

        # 遍历 ENTITY_CODE_ 列表
        for entity_code in self.code_list:
            status = False
            module_name = __import__(entity_code)
            self.logger.info("开始进行 ENTITY_CODE_ {}".format(entity_code))

            # self.remove_id_list = []
            # self.copy_mongo_data_list = []
            # self.branch_code_list = []
            if entity_code == "CAIJINGNEWS":
                find_id = "5c6bfa508d7fee512a4ca68f"
            else:
                find_id = ""
            # find_id = ""
            try:
                mongo_data_list = self.get_data_from_mongo(
                    m_client=m_client,
                    collection=collection,
                    entity_code=entity_code,
                    find_id=find_id)
            except pymongo.errors.ServerSelectionTimeoutError:
                sleep(1)
                mongo_data_list = self.get_data_from_mongo(
                    m_client=m_client,
                    collection=collection,
                    entity_code=entity_code,
                    find_id=find_id)

            # 清洗数据并插入 HBase
            if mongo_data_list:
                once_count = 0
                self.find_count = mongo_data_list.count()
                for i in range(1000000):
                    try:
                        data = mongo_data_list.__next__()
                    except pymongo.errors.ServerSelectionTimeoutError:
                        continue
                    except StopIteration:
                        break

                    # for data in mongo_data_list:
                    data_id = data["_id"]
                    if self.success_count % 100 == 0:
                        self.logger.info(
                            "running on data_id: {}".format(data_id))
                    # print(data_id)
                    # copy_data = {}
                    # self.remove_id_list.append(data_id)
                    try:
                        del data["_id"]
                        # copy_data = deepcopy(data)
                        # self.copy_mongo_data_list.append(copy_data)
                        data_list = [data]
                        re_data = module_name.data_shuffle(data_list)

                        if not re_data:
                            self.bad_count += 1
                            # self.remove_id_list.remove(data_id)
                            continue
                    except Exception as e:
                        # self.remove_id_list.remove(data_id)
                        # self.copy_mongo_data_list.remove(copy_data)
                        self.logger.warning("清洗错误,错误 _id 为{}, {}".format(
                            data_id, e))
                        continue

                    if isinstance(re_data, list):
                        for list_data in re_data:
                            # phoenix_HBase 插入数据
                            if list_data:
                                try:
                                    if entity_code != "CNINFONEWS":
                                        ai_data = self.get_brief_from_ai(
                                            data=list_data)
                                    else:
                                        ai_data = list_data
                                    # print(ai_data["CONTENT_"])
                                except Exception as e:
                                    self.logger.info("AI 调取失败, 错误信息", e)
                                    ai_data = re_data
                                try:
                                    success_count = p_client.upsert_to_phoenix_by_one(
                                        connection=connection, data=ai_data)
                                    once_count += success_count
                                    self.success_count += success_count
                                    if self.success_count % 10 == 0:
                                        self.logger.info(
                                            "HBase 插入成功, 成功条数{}条".format(
                                                once_count))
                                except Exception as e:
                                    # self.remove_id_list.remove(data_id)
                                    # self.copy_mongo_data_list.remove(copy_data)
                                    self.logger.warning(
                                        "HBase 插入 _id 为 {} 的数据失败, {}".format(
                                            data_id, e))
                                    continue
                                try:
                                    # 添加 {d:1}
                                    update_count = m_client.update_to_mongodb(
                                        collection=collection,
                                        data_id=data_id,
                                        data_dict={"d": 1})
                                    self.remove_count += update_count
                                    # self.logger.info("MongoDB 更新成功")
                                    if self.remove_count % 10 == 0:
                                        self.logger.info(
                                            "MongoDB 更新成功, 成功条数 {} 条".format(
                                                "10"))
                                except Exception as e:
                                    # self.remove_id_list.remove(data_id)
                                    # self.copy_mongo_data_list.remove(copy_data)
                                    self.logger.warning(
                                        "MongoDB 更新 _id 为 {} 的数据失败, {}".format(
                                            data_id, e))
                                    continue

                    elif isinstance(re_data, dict):
                        # phoenix_HBase 插入数据
                        if re_data:
                            try:
                                success_count = p_client.upsert_to_phoenix_by_one(
                                    connection=connection, data=re_data)
                                once_count += success_count
                                self.success_count += success_count
                                self.logger.info(
                                    "HBase 插入成功, 成功条数 {} 条".format(
                                        success_count))
                            except Exception as e:
                                # self.remove_id_list.remove(data_id)
                                # self.copy_mongo_data_list.remove(copy_data)
                                self.logger.warning(
                                    "HBase 插入 _id 为 {} 的数据失败, {}".format(
                                        data_id, e))
                                continue

                if once_count > 0:
                    status = True
                    self.logger.info("ENTITY_CODE_: {} 插入成功条数 {}".format(
                        entity_code, once_count))
                mongo_data_list.close()
            else:
                continue

        # 关闭连接
        m_client.client_close()
        p_client.close_client_phoenix(connection=connection)
        self.logger.info("本次共向 MongoDB 查取数据{}条".format(self.find_count))
        self.logger.info("本次共向 HBase 插入数据{}条".format(self.success_count))
        self.logger.info("本次共向 MongoDB 删除数据{}条".format(self.remove_count))
        self.logger.info("本次共向 MongoDB 插入数据{}条".format(self.old_count))
        self.logger.info("本次坏数据共 {} 条".format(self.bad_count))
        self.logger.handlers.clear()
예제 #28
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZGZQW_GGKX", mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #29
0
# -*- coding: utf-8 -*-
from database._mongodb import MongoClient


def data_shuffle(data):

    return data


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_NSFWW", mongo_collection="ZX_CJXW_ZYCJ")
    data_list = main_mongo.main()
    for data in data_list:
        re_data = data_shuffle(data)
        print(re_data)
예제 #30
0
        work_book = read_excel(response.content)
        sheet_name = work_book.sheet_names()[0]
        sheet = work_book.sheet_by_name(sheet_name)
        com_name_ = ""
        row_list = sheet.row_values(2)
        for n in range(3, sheet.nrows):
            data_item = {}
            for k, v in data.items():
                data_item[k] = v
            rows1 = sheet.row_values(n)
            sheet_dict = dict(zip(row_list, rows1))
            if sheet_dict["保险公司"]:
                com_name_ = sheet_dict["保险公司"]
            else:
                sheet_dict["保险公司"] = com_name_
            data_item["COM_NAME_"] = sheet_dict["保险公司"]
            data_item["PRO_NAME_"] = sheet_dict["保险产品名称"]
            data_item["ENSURE_SOURCE_TYPE_"] = sheet_dict["产品类型"]
            data_list.append(data_item)
    # return data_list


if __name__ == '__main__':
    main_mongo = MongoClient(entity_code="JRCP_BX_HXYH_GW_ALL",
                             mongo_collection="JRCP_BX")
    data_list = main_mongo.main()
    for data in data_list:
        data_list = data_shuffle(data)
        for item in data_list:
            print(item)