def tushare_to_sqlite_batch(file_name, table_name, field_pair_list, batch_size=500, **kwargs): """ 将Mysql数据导入到sqlite,全量读取然后导出 速度适中,可更加 batch_size 调剂对内存的需求 :param file_name: :param table_name: :param field_pair_list: :param batch_size: :param **kwargs: :return: """ logger.info('mysql %s 导入到 sqlite %s 开始', table_name, file_name) sqlite_db_folder_path = get_folder_path('sqlite_db', create_if_not_found=False) db_file_path = os.path.join(sqlite_db_folder_path, file_name) conn = sqlite3.connect(db_file_path) sql_str = f"select ts_code from {table_name} group by ts_code" with with_db_session(engine_md) as session: table = session.execute(sql_str) code_list = list([row[0] for row in table.fetchall()]) code_count, data_count, num = len(code_list), 0, 0 for code_sub_list in split_chunk(code_list, batch_size): in_clause = ", ".join([r'%s' for _ in code_sub_list]) sql_str = f"select * from {table_name} where ts_code in ({in_clause})" df_tot = pd.read_sql(sql_str, engine_md, params=code_sub_list) # 对 fields 进行筛选及重命名 if field_pair_list is not None: field_list = [_[0] for _ in field_pair_list] field_list.append('ts_code') df_tot = df_tot[field_list].rename(columns=dict(field_pair_list)) dfg = df_tot.groupby('ts_code') for num, (ts_code, df) in enumerate(dfg, start=num + 1): code_exchange = ts_code.split('.') sqlite_table_name = f"{code_exchange[1]}{code_exchange[0]}" df_len = df.shape[0] data_count += df_len logger.debug('%4d/%d) mysql %s -> sqlite %s %s %d 条记录', num, code_count, table_name, file_name, sqlite_table_name, df_len) df.drop('ts_code', axis=1, inplace=True) df.to_sql(sqlite_table_name, conn, index=False, if_exists='replace') logger.info('mysql %s 导入到 sqlite %s 结束,导出数据 %d 条', table_name, file_name, data_count)
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :return: """ table_name = 'ifind_future_info' has_table = engine_md.has_table(table_name) logger.info("更新 %s [%s] 开始", table_name, has_table) # 获取已存在合约列表 if has_table: sql_str = f'SELECT ths_code, ths_start_trade_date_future FROM {table_name}' with with_db_session(engine_md) as session: table = session.execute(sql_str) code_ipo_date_dic = dict(table.fetchall()) exchange_latest_ipo_date_dic = get_exchange_latest_data() else: code_ipo_date_dic = {} exchange_latest_ipo_date_dic = {} exchange_sectorid_dic_list = [ { 'exch_eng': 'SHFE', 'exchange_name': '上海期货交易所', 'sectorid': '091001', 'date_establish': '1995-05-10' }, { 'exch_eng': 'CFFEX', 'exchange_name': '中国金融期货交易所', 'sectorid': '091004', 'date_establish': '2013-09-10' }, { 'exch_eng': 'DCE', 'exchange_name': '大连商品交易所', 'sectorid': '091002', 'date_establish': '1999-01-10' }, { 'exch_eng': 'CZCE', 'exchange_name': '郑州商品交易所', 'sectorid': '091003', 'date_establish': '1999-01-10' }, ] # 字段列表及参数 indicator_param_list = [ ('ths_future_short_name_future', '', String(50)), ('ths_future_code_future', '', String(20)), ('ths_sec_type_future', '', String(20)), ('ths_td_variety_future', '', String(20)), ('ths_td_unit_future', '', DOUBLE), ('ths_pricing_unit_future', '', String(20)), ('ths_mini_chg_price_future', '', DOUBLE), ('ths_chg_ratio_lmit_future', '', DOUBLE), ('ths_td_deposit_future', '', DOUBLE), ('ths_start_trade_date_future', '', Date), ('ths_last_td_date_future', '', Date), ('ths_last_delivery_date_future', '', Date), ('ths_delivery_month_future', '', String(10)), ('ths_listing_benchmark_price_future', '', DOUBLE), ('ths_initial_td_deposit_future', '', DOUBLE), ('ths_contract_month_explain_future', '', String(120)), ('ths_td_time_explain_future', '', String(120)), ('ths_last_td_date_explian_future', '', String(120)), ('ths_delivery_date_explain_future', '', String(120)), ('ths_exchange_short_name_future', '', String(50)), ('ths_contract_en_short_name_future', '', String(50)), ('ths_contract_en_name_future', '', String(50)), ] json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in indicator_param_list], sep=';') # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['exch_eng'] = String(20) # 获取合约列表 code_set = set() ndays_per_update = 90 # 获取历史期货合约列表信息 sector_count = len(exchange_sectorid_dic_list) for num, exchange_sectorid_dic in enumerate(exchange_sectorid_dic_list, start=1): exchange_name = exchange_sectorid_dic['exchange_name'] exch_eng = exchange_sectorid_dic['exch_eng'] sector_id = exchange_sectorid_dic['sectorid'] date_establish = exchange_sectorid_dic['date_establish'] # 计算获取合约列表的起始日期 date_since = str_2_date( exchange_latest_ipo_date_dic.setdefault(exch_eng, date_establish)) date_yestoday = date.today() - timedelta(days=1) logger.info("%d/%d) %s[%s][%s] %s ~ %s", num, sector_count, exchange_name, exch_eng, sector_id, date_since, date_yestoday) while date_since <= date_yestoday: date_since_str = date_2_str(date_since) # #数据池-板块_板块成分-日期;同花顺代码;证券名称;当日行情端证券名称(仅股票节点有效)-iFinD数据接口 # 获取板块成分(期货商品的合约) # THS_DP('block','2021-01-15;091002003','date:Y,thscode:Y,security_name:Y,security_name_in_time:Y') try: future_info_df = invoker.THS_DataPool( 'block', '%s;%s' % (date_since_str, sector_id), 'thscode:Y,security_name:Y') except APIError as exp: if exp.ret_dic['error_code'] in ( -4001, -4210, ): future_info_df = None else: logger.exception("THS_DataPool %s 获取失败, '%s;%s'", exchange_name, date_since_str, sector_id) break # if future_info_df is None or future_info_df.shape[0] == 0: # break if future_info_df is not None and future_info_df.shape[0] > 0: code_set |= set(future_info_df['THSCODE']) if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday if DEBUG: break # 获取合约列表 code_list = [wc for wc in code_set if wc not in code_ipo_date_dic] # 获取合约基本信息 if len(code_list) > 0: for code_list in split_chunk(code_list, 500): future_info_df = invoker.THS_BasicData(code_list, json_indicator, json_param) if future_info_df is None or future_info_df.shape[0] == 0: data_count = 0 logger.warning("更新 %s 结束 %d 条记录被更新", table_name, data_count) else: # 补充 exch_eng 字段 future_info_df['exch_eng'] = '' for exchange_sectorid_dic in exchange_sectorid_dic_list: future_info_df['exch_eng'][ future_info_df['ths_exchange_short_name_future'] == exchange_sectorid_dic[ 'exchange_name']] = exchange_sectorid_dic[ 'exch_eng'] data_count = bunch_insert_on_duplicate_update( future_info_df, table_name, engine_md, dtype, primary_keys=['ths_code'], schema=config.DB_SCHEMA_MD) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)
def import_stock_info_hk(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 导入 港股股票信息 到 wind_stock_info_hk :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param refresh: 默认为False,True 则进行全部更新 :return: """ table_name = 'wind_stock_info_hk' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [ ('sec_name', String(20)), ('trade_code', String(20)), ('ipo_date', Date), ('delist_date', Date), ('mkt', String(20)), ('exch_city', String(20)), ('exch_eng', String(20)), ('prename', String(2000)), ] # 获取列属性名,以逗号进行分割 param = ",".join([key for key, _ in param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if refresh: date_fetch = DATE_BASE else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) seg_count = 1000 stock_info_df_list = [] for stock_code_list_sub in split_chunk(stock_code_list, seg_count): # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 # stock_info_df = invoker.wss(stock_code_list_sub, # "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename") # 获取接口文档数据信息 stock_info_df = invoker.wss(stock_code_list_sub, param) stock_info_df_list.append(stock_info_df) if DEBUG: break stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('wind_code', inplace=True) stock_info_all_df.rename(columns=rename_col_dic, inplace=True) logging.info('%d data will be import', stock_info_all_df.shape[0]) stock_info_all_df.reset_index(inplace=True) # data_list = list(stock_info_all_df.T.to_dict().values()) # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)".format( # table_name=table_name # ) # # sql_str = "insert INTO wind_stock_info_hk (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # with with_db_session(engine_md) as session: # session.execute(sql_str, data_list) # stock_count = session.execute('select count(*) from {table_name}'.format(table_name=table_name)).first()[0] # 创建表格数据 data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def tushare_to_sqlite_batch(file_name, table_name, field_pair_list, batch_size=500, sort_by='trade_date', clean_old_file_first=True, **kwargs): """ 将Mysql数据导入到sqlite,全量读取然后导出 速度适中,可更加 batch_size 调剂对内存的需求 :param file_name: :param table_name: :param field_pair_list: :param batch_size: :param sort_by: :param clean_old_file_first: :param kwargs: :return: """ logger.info('mysql %s 导入到 sqlite %s 开始', table_name, file_name) sqlite_db_folder_path = get_folder_path('sqlite_db', create_if_not_found=False) db_file_path = os.path.join(sqlite_db_folder_path, file_name) # 删除历史文件——可以提上导入文件速度 if clean_old_file_first and os.path.exists( db_file_path) and os.path.isfile(db_file_path): os.remove(db_file_path) conn = sqlite3.connect(db_file_path) # 对 fields 进行筛选及重命名 if field_pair_list is not None: field_list = [_[0] for _ in field_pair_list] field_list.append('ts_code') field_pair_dic = dict(field_pair_list) sort_by = field_pair_dic[sort_by] if sort_by is not None else None else: field_list = None field_pair_dic = None if table_name == 'tushare_stock_index_daily_md': # tushare_stock_index_daily_md 表处理方式有些特殊 ts_code_sqlite_table_name_dic = { # "": "CBIndex", # "h30024.CSI": "CYBZ", # 中证800保险 "399300.SZ": "HS300", # 沪深300 "000016.SH": "HS50", # 上证50 "399905.SZ": "HS500", # 中证500 "399678.SZ": "SCXG", # 深次新股 "399101.SZ": "ZXBZ", # 中小板综 } code_list = [_ for _ in ts_code_sqlite_table_name_dic.keys()] in_clause = ", ".join([r'%s' for _ in code_list]) sql_str = f"select * from {table_name} where ts_code in ({in_clause})" df_tot = pd.read_sql(sql_str, engine_md, params=code_list) # 对 fields 进行筛选及重命名 if field_pair_dic is not None: df_tot = df_tot[field_list].rename(columns=field_pair_dic) dfg = df_tot.groupby('ts_code') code_count, data_count = len(code_list), 0 for num, (ts_code, df) in enumerate(dfg, start=1): sqlite_table_name = ts_code_sqlite_table_name_dic[ts_code] df_len = df.shape[0] data_count += df_len logger.debug('%2d/%d) mysql %s -> sqlite %s %s %d 条记录', num, code_count, table_name, file_name, sqlite_table_name, df_len) df = df.drop('ts_code', axis=1) # 排序 if sort_by is not None: df = df.sort_values(sort_by) df.to_sql(sqlite_table_name, conn, index=False, if_exists='replace') else: # 非 tushare_stock_index_daily_md 表 sql_str = f"select ts_code from {table_name} group by ts_code" with with_db_session(engine_md) as session: table = session.execute(sql_str) code_list = list([row[0] for row in table.fetchall()]) code_count, data_count, num = len(code_list), 0, 0 for code_sub_list in split_chunk(code_list, batch_size): in_clause = ", ".join([r'%s' for _ in code_sub_list]) sql_str = f"select * from {table_name} where ts_code in ({in_clause})" df_tot = pd.read_sql(sql_str, engine_md, params=code_sub_list) # 对 fields 进行筛选及重命名 if field_pair_dic is not None: df_tot = df_tot[field_list].rename(columns=field_pair_dic) dfg = df_tot.groupby('ts_code') for num, (ts_code, df) in enumerate(dfg, start=num + 1): code_exchange = ts_code.split('.') sqlite_table_name = f"{code_exchange[1]}{code_exchange[0]}" df_len = df.shape[0] data_count += df_len logger.debug('%4d/%d) mysql %s -> sqlite %s %s %d 条记录', num, code_count, table_name, file_name, sqlite_table_name, df_len) df = df.drop('ts_code', axis=1) # 排序 if sort_by is not None: df = df.sort_values(sort_by) df.to_sql(sqlite_table_name, conn, index=False, if_exists='replace') logger.info('mysql %s 导入到 sqlite %s 结束,导出数据 %d 条', table_name, file_name, data_count)
def df_2_table(doc, df, format_by_index=None, format_by_col=None, max_col_count=None, mark_top_n=None, mark_top_n_on_cols=None): """ :param doc: :param df: :param format_by_index: 按索引格式化 :param format_by_col: 按列格式化 :param max_col_count: 每行最大列数(不包括索引) :param mark_top_n: 标记 top N :param mark_top_n_on_cols: 选择哪些列标记 top N,None 代表不标记 :return: """ if max_col_count is None: max_col_count = df.shape[1] if mark_top_n is not None: if mark_top_n_on_cols is not None: rank_df = df[mark_top_n_on_cols] else: rank_df = df rank_df = rank_df.rank(ascending=False) is_in_rank_df = rank_df <= mark_top_n else: is_in_rank_df = None for table_num, col_name_list in enumerate( split_chunk(list(df.columns), max_col_count)): if table_num > 0: # 如果是换行写入第二、三、四。。个表格,先打一个空行 doc.add_paragraph('') sub_df = df[col_name_list] row_num, col_num = sub_df.shape t = doc.add_table(row_num + 1, col_num + 1) # write head # col_name_list = list(sub_df.columns) for j in range(col_num): # t.cell(0, j).text = df.columns[j] # paragraph = t.cell(0, j).add_paragraph() paragraph = t.cell(0, j + 1).paragraphs[0] paragraph.add_run(str(col_name_list[j])).bold = True paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # write head bg color for j in range(col_num + 1): # t.cell(0, j).text = df.columns[j] t.cell(0, j)._tc.get_or_add_tcPr().append( parse_xml(r'<w:shd {} w:fill="00A2E8"/>'.format(nsdecls('w')))) # format table style to be a grid t.style = 'TableGrid' # populate the table with the dataframe for i in range(row_num): index = sub_df.index[i] paragraph = t.cell(i + 1, 0).paragraphs[0] index_str = str(date_2_str(index)) paragraph.add_run(index_str).bold = True paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT if format_by_index is not None and index in format_by_index: format_row = format_by_index[index] else: format_row = None for j in range(col_num): col_name = col_name_list[j] if format_row is None and format_by_col is not None and col_name in format_by_col: format_cell = format_by_col[col_name] else: format_cell = format_row content = sub_df.values[i, j] if format_cell is None: text = str(content) elif isinstance(format_cell, str): text = str.format(format_cell, content) elif callable(format_cell): text = format_cell(content) else: raise ValueError('%s: %s 无效', index, format_cell) paragraph = t.cell(i + 1, j + 1).paragraphs[0] paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT try: style = paragraph.add_run(text) if is_in_rank_df is not None and col_name in is_in_rank_df and is_in_rank_df.loc[ index, col_name]: style.font.color.rgb = RGBColor(0xed, 0x1c, 0x24) style.bold = True except TypeError as exp: logger.exception('df.iloc[%d, %d] = df["%s", "%s"] = %s', i, j, index, col_name, text) raise exp from exp for i in range(1, row_num + 1): for j in range(col_num + 1): if i % 2 == 0: t.cell(i, j)._tc.get_or_add_tcPr().append( parse_xml(r'<w:shd {} w:fill="A3D9EA"/>'.format( nsdecls('w'))))
def import_cb_info(chain_param=None, first_time=False): """ 获取全市场可转债数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param first_time: 第一次执行时将从 1999 年开始查找全部基本信息 :return: """ table_name = 'wind_convertible_bond_info' has_table = engine_md.has_table(table_name) name_param_list = [ ('trade_code', DOUBLE), ('fullname', String(45)), ('sec_name', String(45)), ('issue_announcement', Date), ('ipo_date', Date), ('clause_conversion_2_swapsharestartdate', Date), ('clause_conversion_2_swapshareenddate', Date), ('clause_conversion_code', DOUBLE), ('clause_interest_5', String(8)), ('clause_interest_8', String(8)), ('clause_interest_6', String(200)), ('clause_interest_compensationinterest', DOUBLE), ('clause_interest_compensationinterest', DOUBLE), ('issueamount', DOUBLE), ('term', DOUBLE), ('underlyingcode', String(20)), ('underlyingname', String(20)), ('redemption_beginning', Date), ] param = ",".join([key for key, _ in name_param_list]) # 设置dtype类型 dtype = {key: val for key, val in name_param_list} dtype['wind_code'] = String(20) # if first_time: date_since = datetime.strptime('1999-01-01', STR_FORMAT_DATE).date() date_list = [] one_year = timedelta(days=365) while date_since < date.today() - ONE_DAY: date_list.append(date_since) date_since += one_year else: date_list.append(date.today() - ONE_DAY) else: date_list = [date.today() - ONE_DAY] # 获取 wind_code 集合 wind_code_set = set() for fetch_date in date_list: data_set = get_cb_set(fetch_date) if data_set is not None: wind_code_set |= data_set # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") wind_code_list = list(wind_code_set) # wind_code_count = len(wind_code_list) # seg_count = 1000 # loop_count = math.ceil(float(wind_code_count) / seg_count) data_info_df_list = [] try: for sub_list in split_chunk(wind_code_list, 1000): # num_start = n * seg_count # num_end = (n + 1) * seg_count # # num_end = num_end if num_end <= wind_code_count else wind_code_count # sub_list = wind_code_list[n:(n + seg_count)] # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 data_df = invoker.wss(sub_list, param, "unit=1") if data_df is not None and data_df.shape[0] > 0: data_info_df_list.append(data_df) # 仅仅调试时使用 if DEBUG and len(data_info_df_list) > 1: break finally: if len(data_info_df_list) > 0: data_info_all_df = pd.concat(data_info_df_list) data_info_all_df.index.rename('wind_code', inplace=True) data_info_all_df.rename(columns={col: col.lower() for col in data_info_all_df.columns}, inplace=True) logging.info('%d data will be import', data_info_all_df.shape[0]) data_info_all_df.reset_index(inplace=True) data_count = bunch_insert_on_duplicate_update(data_info_all_df, table_name, engine_md, dtype=dtype) # logging.info("%d stocks have been in %s", len(data_info_all_df), table_name) logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 if engine_md.has_table(table_name): update_from_info_table(table_name)
def import_wind_stock_info(chain_param=None, refresh=False): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param refresh:获取全市场股票代码及名称 :return: """ table_name = 'wind_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) wind_indicator_param_list = [ ('sec_name', String(20)), ('trade_code', String(20)), ('ipo_date', Date), ('delist_date', Date), ('mkt', String(20)), ('exch_city', String(20)), ('exch_eng', String(20)), ('prename', String(2000)), ] # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in wind_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in wind_indicator_param_list} dtype['wind_code'] = String(20) if refresh: date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_code_set = set() # 对date_fetch 进行一个判断,获取stock_code_set while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) seg_count = 1000 stock_info_df_list = [] # 进行循环遍历获取stock_code_list_sub for stock_code_list_sub in split_chunk(stock_code_list, seg_count): # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 stock_info_df = invoker.wss(stock_code_list_sub, param) stock_info_df_list.append(stock_info_df) if DEBUG: break # 对数据表进行规范整理.整合,索引重命名 stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('wind_code', inplace=True) logging.info('%d data will be import', stock_info_all_df.shape[0]) stock_info_all_df.reset_index(inplace=True) # data_list = list(stock_info_all_df.T.to_dict().values()) # 对wind_stock_info表进行数据插入 # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # 事物提交执行更新 # with with_db_session(engine_md) as session: # session.execute(sql_str, data_list) # data_count = session.execute('select count(*) from {table_name}').scalar() data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def df_2_table(doc, df, format_by_index=None, format_by_col=None, max_col_count=None): if max_col_count is None: max_col_count = df.shape[1] for table_num, col_name_list in enumerate(split_chunk(list(df.columns), max_col_count)): if table_num > 0: # 如果是换行写入第二、三、四。。个表格,先打一个空行 doc.add_paragraph('') sub_df = df[col_name_list] row_num, col_num = sub_df.shape t = doc.add_table(row_num + 1, col_num + 1) # Highlight all cells limegreen (RGB 32CD32) if cell contains text "0.5" from docx.oxml.ns import nsdecls from docx.oxml import parse_xml from docx.enum.text import WD_ALIGN_PARAGRAPH # write head # col_name_list = list(sub_df.columns) for j in range(col_num): # t.cell(0, j).text = df.columns[j] # paragraph = t.cell(0, j).add_paragraph() paragraph = t.cell(0, j + 1).paragraphs[0] paragraph.add_run(str(col_name_list[j])).bold = True paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # write head bg color for j in range(col_num + 1): # t.cell(0, j).text = df.columns[j] t.cell(0, j)._tc.get_or_add_tcPr().append( parse_xml(r'<w:shd {} w:fill="00A2E8"/>'.format(nsdecls('w')))) # format table style to be a grid t.style = 'TableGrid' # populate the table with the dataframe for i in range(row_num): index = sub_df.index[i] paragraph = t.cell(i + 1, 0).paragraphs[0] index_str = str(date_2_str(index)) paragraph.add_run(index_str).bold = True paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT if format_by_index is not None and index in format_by_index: formater = format_by_index[index] else: formater = None for j in range(col_num): if formater is None and format_by_col is not None and col_name_list[j] in format_by_col: formater = format_by_col[col_name_list[j]] content = sub_df.values[i, j] if formater is None: text = str(content) elif isinstance(formater, str): text = str.format(formater, content) elif callable(formater): text = formater(content) else: raise ValueError('%s: %s 无效', index, formater) paragraph = t.cell(i + 1, j + 1).paragraphs[0] paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT paragraph.add_run(text) for i in range(1, row_num + 1): for j in range(col_num + 1): if i % 2 == 0: t.cell(i, j)._tc.get_or_add_tcPr().append( parse_xml(r'<w:shd {} w:fill="A3D9EA"/>'.format(nsdecls('w'))))