# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_CZZG_XKDD", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_HY_XLDC_JD", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- """JRCP_JJ_TTJJ_JZ_ALL""" from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_JJ_TTJJ_JZ_ALL", mongo_collection="CRMJRCP_JJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZGJJW_HLWJR", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_GJSCJDGLZJ_GWY", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- """ GDSZ_ZHS_FGW_FZGH """ from database._mongodb import MongoClient from tools.req_for_wordExcelZip import find_type def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="GDSZ_ZHS_FGW_FZGH", mongo_collection="GOV_ZX_GDS") data_list = main_mongo.main() for data in data_list[:2]: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZHJXW_GNSC", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZGBJH_TB", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZGHGW_NYHYDT", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_HX_SSGSYW", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_HY_ZGYYXXW_DFDT", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_BX_ZESYH_APP_ALL", mongo_collection="JRCP_BX") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZZW_SMJJ", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_GJTJJ_ZCJD", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- import re from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_XYK_WAK_ALL", mongo_collection="JRCP_XYK") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZGQYLHH_CYDT", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_LCCP_ZSYH_GW_ALL2", mongo_collection="JRCP_LCCP") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_SHCJ_BGCZ", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_QGZXQYGZXT_JGGG", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
import jsonpath from crm_scripts import GenericScript from database._mongodb import MongoClient from tools.web_api_of_baidu import get_lat_lng, get_area def data_shuffle(data, ): re_data = dict() re_data['ACTIME_NAME_'] = data.get('TITLE_') re_data['RELEASE_DATE_'] = data.get('PUBLISH_TIME_') re_data['ACTIVE_DESC_HTML_'] = data.get('HTML_').replace('''<div class="right"><span class="f_red_12">您的位置:</span></div>\r\n </div>\r\n </div>\r\n <div class="navquick_right"><span class="f_666_12"><a href="../index.html">首页</a></span> > <span class="f_666_12">资讯信息</span></div>''', '') re_data['ACTIVE_DESC_TEXT_'] = data.get('CONTENT_').replace("您的位置:|首页|>|资讯信息|", '')[:501] re_data['DATA_SOURCE_NAME_'] = data.get('SOURCE_NAME_') re_data['DATA_SOURCE_URL_'] = data.get('URL_') re_data['AMOUNT_OF_READING_'] = data.get('READ_NUM_') re_data['ACTIVE_KEYWORDS_'] = data.get('') re_data['ACTIVE_OBJECT_'] = data.get('') re_data['BANK_NAME_'] = data.get('BANK_NAME_') return re_data if __name__ == '__main__': main_mongo = MongoClient(entity_code="CRMJPFX_YXHD_BJYH", mongo_collection="CRMJPFX_YXHD") data_list = main_mongo.main() for data in data_list[:2]: re_data = data_shuffle(data=data, ) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_MJW_HGXS", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_BX_ZGNYYH_APP_LCX", mongo_collection="JRCP_BX") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_GJJRJG_ZHRMGHGSWJ_DFSW", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
re_data["ENTITY_NAME_"] = data["ENTITY_NAME_"] re_data["URL_"] = data["URL_"] if "TEL_" in data: re_data["TEL_"] = data["TEL_"] re_data["BUSINESS_HOURS_"] = "" if "SOURCE_TYPE_NAME_" in data: re_data["SOURCE_TYPE_NAME_"] = data["SOURCE_TYPE_NAME_"] if "自助银行" in data or "ATM" in data: re_data["TYPE_NAME_"] = "自助银行" re_data["TYPE_"] = "ZZ" else: re_data["TYPE_NAME_"] = "支行" re_data["TYPE_"] = "ZH" return re_data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ECITICORGANIZE", mongo_collection="WD_TY") sc = GenericScript # Mysql connection sc.mysql_client, sc.mysql_connection = sc.mysql_connect() province_list, city_list, area_list, dir_area_list, bank_list = sc.data_from_mysql( ) data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data, province_list, city_list, area_list) # print(re_data)
# -*- coding: utf-8 -*- """中国建设银行-官网基金 代销基金 CHA_BRANCH_FUND_AGENT""" from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_JJ_ZGJSYH_GW_ALL", mongo_collection="CRMJRCP_JJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_HY_SPSWW_GJYW", mongo_collection="ZX_CJXW_HY") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
def run(self): # # 创建 Phoenix 对象 p_client = PhoenixHbase(table_name="NEWS_FINASSIST") p_client.verify_list = self.verify_list # # 连接 Phoenix connection = p_client.connect_to_phoenix() # 创建 MongoDB 查询数据库对象 m_client = MongoClient(mongo_collection="NEWS_FINASSIST") db, collection_list = m_client.client_to_mongodb() collection = m_client.get_check_collection( db=db, collection_list=collection_list) # # 删除表 # p_client.drop_table_phoenix(connection=connection) # # # 表创建语句 # sql = ('create table "NEWS_FINASSIST" ("ID_" varchar primary key, "T"."CONTENT_" varchar, ' # '"C"."ENTITY_NAME_" varchar, "C"."ENTITY_CODE_" varchar, "C"."TITLE_" varchar, "C"."BRIEF_" varchar, ' # '"C"."PUBLISH_TIME_" varchar, "C"."KEYWORDS_" varchar, "C"."URL_" varchar, "C"."DATA_SOURCE_" varchar,' # '"C"."AREA_CODE_" varchar, "C"."BANK_CODE_" varchar, "C"."BANK_NAME_" varchar,' # '"C"."UNIT_CODE_" varchar, "C"."PERIOD_CODE_" varchar, "C"."REMARK_" varchar,' # '"C"."CREATE_TIME_" varchar, "C"."UPDATE_TIME_" varchar, "F"."STATUS_" varchar)' # 'IMMUTABLE_ROWS = true') # # 创建表 # p_client.create_new_table_phoenix(connection=connection, sql=sql) # 遍历 ENTITY_CODE_ 列表 for entity_code in self.code_list: status = False module_name = __import__(entity_code) self.logger.info("开始进行 ENTITY_CODE_ {}".format(entity_code)) # self.remove_id_list = [] # self.copy_mongo_data_list = [] # self.branch_code_list = [] if entity_code == "CAIJINGNEWS": find_id = "5c6bfa508d7fee512a4ca68f" else: find_id = "" # find_id = "" try: mongo_data_list = self.get_data_from_mongo( m_client=m_client, collection=collection, entity_code=entity_code, find_id=find_id) except pymongo.errors.ServerSelectionTimeoutError: sleep(1) mongo_data_list = self.get_data_from_mongo( m_client=m_client, collection=collection, entity_code=entity_code, find_id=find_id) # 清洗数据并插入 HBase if mongo_data_list: once_count = 0 self.find_count = mongo_data_list.count() for i in range(1000000): try: data = mongo_data_list.__next__() except pymongo.errors.ServerSelectionTimeoutError: continue except StopIteration: break # for data in mongo_data_list: data_id = data["_id"] if self.success_count % 100 == 0: self.logger.info( "running on data_id: {}".format(data_id)) # print(data_id) # copy_data = {} # self.remove_id_list.append(data_id) try: del data["_id"] # copy_data = deepcopy(data) # self.copy_mongo_data_list.append(copy_data) data_list = [data] re_data = module_name.data_shuffle(data_list) if not re_data: self.bad_count += 1 # self.remove_id_list.remove(data_id) continue except Exception as e: # self.remove_id_list.remove(data_id) # self.copy_mongo_data_list.remove(copy_data) self.logger.warning("清洗错误,错误 _id 为{}, {}".format( data_id, e)) continue if isinstance(re_data, list): for list_data in re_data: # phoenix_HBase 插入数据 if list_data: try: if entity_code != "CNINFONEWS": ai_data = self.get_brief_from_ai( data=list_data) else: ai_data = list_data # print(ai_data["CONTENT_"]) except Exception as e: self.logger.info("AI 调取失败, 错误信息", e) ai_data = re_data try: success_count = p_client.upsert_to_phoenix_by_one( connection=connection, data=ai_data) once_count += success_count self.success_count += success_count if self.success_count % 10 == 0: self.logger.info( "HBase 插入成功, 成功条数{}条".format( once_count)) except Exception as e: # self.remove_id_list.remove(data_id) # self.copy_mongo_data_list.remove(copy_data) self.logger.warning( "HBase 插入 _id 为 {} 的数据失败, {}".format( data_id, e)) continue try: # 添加 {d:1} update_count = m_client.update_to_mongodb( collection=collection, data_id=data_id, data_dict={"d": 1}) self.remove_count += update_count # self.logger.info("MongoDB 更新成功") if self.remove_count % 10 == 0: self.logger.info( "MongoDB 更新成功, 成功条数 {} 条".format( "10")) except Exception as e: # self.remove_id_list.remove(data_id) # self.copy_mongo_data_list.remove(copy_data) self.logger.warning( "MongoDB 更新 _id 为 {} 的数据失败, {}".format( data_id, e)) continue elif isinstance(re_data, dict): # phoenix_HBase 插入数据 if re_data: try: success_count = p_client.upsert_to_phoenix_by_one( connection=connection, data=re_data) once_count += success_count self.success_count += success_count self.logger.info( "HBase 插入成功, 成功条数 {} 条".format( success_count)) except Exception as e: # self.remove_id_list.remove(data_id) # self.copy_mongo_data_list.remove(copy_data) self.logger.warning( "HBase 插入 _id 为 {} 的数据失败, {}".format( data_id, e)) continue if once_count > 0: status = True self.logger.info("ENTITY_CODE_: {} 插入成功条数 {}".format( entity_code, once_count)) mongo_data_list.close() else: continue # 关闭连接 m_client.client_close() p_client.close_client_phoenix(connection=connection) self.logger.info("本次共向 MongoDB 查取数据{}条".format(self.find_count)) self.logger.info("本次共向 HBase 插入数据{}条".format(self.success_count)) self.logger.info("本次共向 MongoDB 删除数据{}条".format(self.remove_count)) self.logger.info("本次共向 MongoDB 插入数据{}条".format(self.old_count)) self.logger.info("本次坏数据共 {} 条".format(self.bad_count)) self.logger.handlers.clear()
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_ZGZQW_GGKX", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
# -*- coding: utf-8 -*- from database._mongodb import MongoClient def data_shuffle(data): return data if __name__ == '__main__': main_mongo = MongoClient(entity_code="ZX_CJXW_ZYCJ_NSFWW", mongo_collection="ZX_CJXW_ZYCJ") data_list = main_mongo.main() for data in data_list: re_data = data_shuffle(data) print(re_data)
work_book = read_excel(response.content) sheet_name = work_book.sheet_names()[0] sheet = work_book.sheet_by_name(sheet_name) com_name_ = "" row_list = sheet.row_values(2) for n in range(3, sheet.nrows): data_item = {} for k, v in data.items(): data_item[k] = v rows1 = sheet.row_values(n) sheet_dict = dict(zip(row_list, rows1)) if sheet_dict["保险公司"]: com_name_ = sheet_dict["保险公司"] else: sheet_dict["保险公司"] = com_name_ data_item["COM_NAME_"] = sheet_dict["保险公司"] data_item["PRO_NAME_"] = sheet_dict["保险产品名称"] data_item["ENSURE_SOURCE_TYPE_"] = sheet_dict["产品类型"] data_list.append(data_item) # return data_list if __name__ == '__main__': main_mongo = MongoClient(entity_code="JRCP_BX_HXYH_GW_ALL", mongo_collection="JRCP_BX") data_list = main_mongo.main() for data in data_list: data_list = data_shuffle(data) for item in data_list: print(item)