def merge_stock_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}), 'sec_name': (String(20), prefer_left, {'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name'}), 'cn_name': (String(100), get_value, {'key': 'ths_corp_cn_name_stock'}), 'en_name': (String(100), get_value, {'key': 'ths_corp_name_en_stock'}), 'delist_date': (Date, prefer_left, {'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date'}), 'ipo_date': (Date, prefer_left, {'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date'}), 'pre_name': (Text, prefer_left, {'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename'}), 'established_date': (Date, get_value, {'key': 'ths_established_date_stock'}), 'exch_city': (String(20), get_value, {'key': 'exch_city'}), 'exch_cn': (String(20), get_value, {'key': 'ths_listing_exchange_stock'}), 'exch_eng': (String(20), get_value, {'key': 'exch_eng'}), 'stock_code': (String(20), prefer_left, {'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code'}), 'mkt': (String(20), get_value, {'key': 'mkt'}), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = { key: val[0] for key, val in col_merge_dic.items() } data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def merge_tushare_daily(ths_code_set: set = None, date_from=None): table_name = 'tushare_daily' logging.info("合成 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(`trade_date`),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) # 獲取各個表格數據 # daily tushare_his_df = get_tushare_daily_df('tushare_stock_daily', date_from) tushare_ds_df = get_tushare_daily_df('tushare_daily_basic', date_from) tushare_suspend_df = get_tushare_daily_df('tushare_suspend', None) # quarterly tushare_merge_df = get_tushare_daily_df('tushare_stock_cashflow', None) # tushare_balancesheet_df = get_tushare_daily_df('tushare_stock_balancesheet', None) # tushare_icome_df = get_tushare_daily_df('tushare_stock_income', None) # tushare_indicator_df = get_tushare_daily_df('tushare_stock_fina_indicator', None) # tushare_merge_quarterly_df = merge_tushare_quarterly(None) # # # tushare_merge_df_one = pd.merge(tushare_cashflow_df, tushare_balancesheet_df, # how='outer', on=['ts_code', 'end_date']) # tushare_merge_df_two = pd.merge(tushare_merge_df_one, tushare_icome_df, # how='outer', on=['ts_code', 'end_date']) # tushare_merge_df = pd.merge(tushare_merge_df_two, tushare_indicator_df, # how='outer', on=['ts_code', 'end_date']) tushare_his_dis_df = pd.merge(tushare_his_df, tushare_ds_df, how='outer', on=[ 'ts_code', 'trade_date', ]) tushare_his_ds_df = pd.merge(tushare_his_dis_df, tushare_suspend_df, how='outer', on='ts_code') tushare_merge_df_g = tushare_merge_df.groupby('ts_code') tushare_his_ds_df_g = tushare_his_ds_df.groupby('ts_code') logging.debug('提取数据完成') merge_date_dic_dic = {} # # 計算財務紕漏事件 for suspend_date_g in [tushare_merge_df.groupby(['ts_code', 'ann_date'])]: for num, ((ts_code, ann_date), date_df) in enumerate(suspend_date_g): if ths_code_set is not None and ts_code not in ths_code_set: continue if is_nan_or_none(ann_date): continue suspend_date_dic = merge_date_dic_dic.setdefault(ts_code, {}) if ts_code not in tushare_merge_df_g.size(): logger.error('hebing 表中不存在 %s 的財務數據', ts_code) continue tushare_merge_df_temp = tushare_merge_df_g.get_group(ts_code) if ann_date not in merge_date_dic_dic: tushare_merge_df_temp = tushare_merge_df_temp[ tushare_merge_df_temp['f_ann_date'] <= ann_date] if tushare_merge_df_temp.shape[0] > 0: suspend_date_dic[ ann_date] = tushare_merge_df_temp.sort_values( 'f_ann_date').iloc[0] # # 設置dtype dtype = {'ann_date': Date} for dic in [ DTYPE_TUSHARE_STOCK_DAILY_MD, DTYPE_TUSHARE_STOCK_DAILY_BASIC, DTYPE_TUSHARE_SUSPEND, # DTYPE_TUSHARE_STOCK_INCOME, # DTYPE_TUSHARE_STOCK_BALABCESHEET, DTYPE_TUSHARE_CASHFLOW ]: for key, val in dic.items(): # len(dic)12 dtype[key] = val logging.debug("计算财报日期完成") # 整理 data_df 数据 tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len( merge_date_dic_dic) try: for num, (ts_code, report_date_dic) in enumerate( merge_date_dic_dic.items(), start=1): # key:ts_code nan 較多 列明: nan # TODO: size暫時使用 以後在驚醒改進 if ts_code not in tushare_his_ds_df_g.size(): logger.error('suspend 表中不存在 %s 的財務數據', ts_code) continue tushare_his_ds_df_cur_ths_code = tushare_his_ds_df_g.get_group( ts_code) # shape[1] 30 logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ts_code, tushare_his_ds_df_cur_ths_code.shape[0]) report_date_list = list(suspend_date_dic.keys()) report_date_list.sort() for report_date_from, report_date_to in generate_range( report_date_list): logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ts_code, date_2_str(report_date_from), date_2_str(report_date_to)) # 计算有效的日期范围 if report_date_from is None: is_fit = tushare_his_ds_df_cur_ths_code[ 'trade_date'] < report_date_to elif report_date_to is None: is_fit = tushare_his_ds_df_cur_ths_code[ 'trade_date'] >= report_date_from else: is_fit = (tushare_his_ds_df_cur_ths_code['trade_date'] < report_date_to) & ( tushare_his_ds_df_cur_ths_code['trade_date'] >= report_date_from) # 获取日期范围内的数据 tushare_his_ds_df_segment = tushare_his_ds_df_cur_ths_code[ is_fit].copy() segment_count = tushare_his_ds_df_segment.shape[0] if segment_count == 0: continue fin_s = report_date_dic[ report_date_from] if report_date_from is not None else None ################################################# for key in ( DTYPE_TUSHARE_STOCK_DAILY_MD.keys() and DTYPE_TUSHARE_STOCK_DAILY_BASIC.keys() and DTYPE_TUSHARE_SUSPEND.keys() and DTYPE_TUSHARE_CASHFLOW.keys() # and # DTYPE_TUSHARE_STOCK_BALABCESHEET.keys() and DTYPE_TUSHARE_STOCK_INCOME.keys() ): if key in ('ts_code', 'trade_date'): continue tushare_his_ds_df_segment[key] = fin_s[ key] if fin_s is not None and key in fin_s else None tushare_his_ds_df_segment['ann_date'] = report_date_from # 添加数据到列表 data_df_list.append(tushare_his_ds_df_segment) data_count += segment_count if DEBUG and len(data_df_list) > 1: break # 保存数据库 if data_count > 10000: # 保存到数据库 data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count data_count, data_df_list = 0, [] finally: # 保存到数据库 report_date if len(data_df_list) > 0: data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None): """将ds his 以及财务数据合并为 daily 数据""" table_name = 'ifind_stock_daily' logging.info("合成 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(`time`),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) # 獲取各個表格數據 ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from) ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from) ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date', None) ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None) ifind_fin_df_g = ifind_fin_df.groupby('ths_code') ths_code_set_4_daily = set(ifind_fin_df_g.size().index) # 合并 ds his 数据 ifind_his_ds_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) # 拼接後續有nan,無數據 ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code') logger.debug("提取数据完成") # 计算 财报披露时间 report_date_dic_dic = {} for report_date_g in [ ifind_report_date_df.groupby( ['ths_code', 'ths_regular_report_actual_dd_stock']) ]: for num, ((ths_code, report_date), data_df) in enumerate(report_date_g, start=1): if ths_code_set is not None and ths_code not in ths_code_set: continue if is_nan_or_none(report_date): continue report_date_dic = report_date_dic_dic.setdefault(ths_code, {}) if ths_code not in ths_code_set_4_daily: logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code) if report_date not in report_date_dic_dic: ifind_fin_df_temp = ifind_fin_df_temp[ ifind_fin_df_temp['time'] <= report_date] if ifind_fin_df_temp.shape[0] > 0: report_date_dic[ report_date] = ifind_fin_df_temp.sort_values( 'time').iloc[0] # # 设置 dtype dtype = {'report_date': Date} for dic in [ DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE, DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS ]: for key, val in dic.items(): dtype[key] = val logger.debug("计算财报日期完成") # 整理 data_df 数据 tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len( report_date_dic_dic) try: for num, (ths_code, report_date_dic) in enumerate(report_date_dic_dic.items(), start=1): # key:ths_code # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用 以後在驚醒改進 if ths_code not in ifind_his_ds_df_g.size(): logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue # open low 等 is NAN 2438 ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group( ths_code) # shape[1] 30 logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code, ifind_his_ds_df_cur_ths_code.shape[0]) report_date_list = list(report_date_dic.keys()) report_date_list.sort() for report_date_from, report_date_to in generate_range( report_date_list): logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ths_code, date_2_str(report_date_from), date_2_str(report_date_to)) # 计算有效的日期范围 if report_date_from is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] < report_date_to elif report_date_to is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] >= report_date_from else: is_fit = (ifind_his_ds_df_cur_ths_code['time'] < report_date_to) & ( ifind_his_ds_df_cur_ths_code['time'] >= report_date_from) # 获取日期范围内的数据 ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[ is_fit].copy() segment_count = ifind_his_ds_df_segment.shape[0] if segment_count == 0: continue fin_s = report_date_dic[ report_date_from] if report_date_from is not None else None for key in DTYPE_STOCK_DAILY_FIN.keys(): if key in ('ths_code', 'time'): continue ifind_his_ds_df_segment[key] = fin_s[ key] if fin_s is not None and key in fin_s else None ifind_his_ds_df_segment['report_date'] = report_date_from # 添加数据到列表 data_df_list.append(ifind_his_ds_df_segment) data_count += segment_count if DEBUG and len(data_df_list) > 1: break # 保存数据库 if data_count > 10000: # 保存到数据库 data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count data_count, data_df_list = 0, [] finally: # 保存到数据库 if len(data_df_list) > 0: data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_stock_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'stock_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name) ifind_table_his_name = 'ifind_{table_name}_his'.format( table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_his_sql_str = "select * from {table_name}".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name}".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_his_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from ]) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), # TODO: 原因不详,wind接口取到的部分 close 数据不准确 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准 # 因此出现冲突时应该以 wind 为准 'total_shares': (DOUBLE, prefer_right, { 'left_key': 'totalShares', 'right_key': 'total_shares' }), # 'susp_days': (Integer, '***', { # 'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status', # 'primary_keys': ('ths_code', 'time')}), 'max_up_or_down': (Integer, max_up_or_down, { 'ths_key': 'ths_up_and_down_status_stock', 'wind_key': 'maxupordown', 'primary_keys': ('ths_code', 'time') }), 'total_capital': (DOUBLE, get_value, { 'key': 'totalCapital' }), 'float_capital': (DOUBLE, get_value, { 'key': 'floatCapitalOfAShares' }), 'pct_chg': (DOUBLE, mean_value, { 'left_key': 'changeRatio', 'right_key': 'pct_chg', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'float_a_shares': (DOUBLE, get_value, { 'key': 'floatSharesOfAShares' }), # 对应wind float_a_shares 'free_float_shares': (DOUBLE, get_value, { 'key': 'free_float_shares' }), # 对应 ths ths_free_float_shares_stock # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准 # 因此应该如有不同应该以 wind 为准 'pe_ttm': (DOUBLE, prefer_right, { 'left_key': 'ths_pe_ttm_stock', 'right_key': 'pe_ttm', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'pe': (DOUBLE, get_value, { 'key': 'pe' }), 'pb': (DOUBLE, get_value, { 'key': 'pb' }), 'ps': (DOUBLE, get_value, { 'key': 'ps' }), 'pcf': (DOUBLE, get_value, { 'key': 'pcf' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def merge_index_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'index_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'sec_name': (String(20), prefer_left, { 'left_key': 'ths_index_short_name_index', 'right_key': 'sec_name' }), 'crm_issuer': (String(20), prefer_left, { 'left_key': 'ths_publish_org_index', 'right_key': 'crm_issuer' }), 'base_date': (Date, prefer_left, { 'left_key': 'ths_index_base_period_index', 'right_key': 'basedate' }), 'basevalue': (DOUBLE, prefer_left, { 'left_key': 'ths_index_base_point_index', 'right_key': 'basevalue' }), 'country': (String(20), get_value, { 'key': 'country' }), 'launchdate': (Date, get_value, { 'key': 'launchdate' }), 'index_code': (String(20), get_value, { 'key': 'ths_index_code_index' }), 'index_category': (String(10), get_value, { 'key': 'ths_index_category_index' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL , ADD PRIMARY KEY (`unique_code`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) return data_df
def merge_index_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'index_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_name_his = 'ifind_{table_name}_his'.format( table_name=table_name) ifind_table_name_ds = 'ifind_{table_name}_ds'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_his_sql_str = "select * from {table_name}".format( table_name=ifind_table_name_his) ifind_ds_sql_str = "select * from {table_name}".format( table_name=ifind_table_name_ds) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_his_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_name_his) ifind_ds_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_name_ds) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from ]) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' # change (ifind_df, wind_df) into joined_df ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') # data, columns processing col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_x', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_x', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean3_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 10, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 100, 'primary_keys': ('ths_code', 'time') }), 'free_turn': (DOUBLE, mean2_value, { 'left_key': 'turnoverRatio', 'right_key': 'free_turn', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'avgPrice': (DOUBLE, get_value, { 'key': 'avgPrice' }), 'changeRatio': (DOUBLE, get_value, { 'key': 'changeRatio' }), 'floatCapitalOfAShares': (DOUBLE, get_value, { 'key': 'floatCapitalOfAShares' }), 'floatCapitalOfBShares': (DOUBLE, get_value, { 'key': 'floatCapitalOfBShares' }), 'floatSharesOfAShares': (DOUBLE, get_value, { 'key': 'floatSharesOfAShares' }), 'floatSharesOfBShares': (DOUBLE, get_value, { 'key': 'floatSharesOfBShares' }), 'pb': (DOUBLE, get_value, { 'key': 'pb' }), 'pcf': (DOUBLE, get_value, { 'key': 'pcf' }), 'pe': (DOUBLE, get_value, { 'key': 'pe' }), 'pe_ttm': (DOUBLE, get_value, { 'key': 'preClose' }), 'preClose': (DOUBLE, get_value, { 'key': 'avgPrice' }), 'ps': (DOUBLE, get_value, { 'key': 'ps' }), 'totalCapital': (DOUBLE, get_value, { 'key': 'totalCapital' }), 'totalShares': (DOUBLE, get_value, { 'key': 'totalShares' }), 'transactionAmount': (DOUBLE, get_value, { 'key': 'transactionAmount' }), 'current_index': (DOUBLE, get_value, { 'key': 'ths_current_mv_index' }), 'dividend_rate_index': (DOUBLE, get_value, { 'key': 'ths_dividend_rate_index' }), 'buy_amt_index': (DOUBLE, get_value, { 'key': 'ths_financing_buy_amt_int_index' }), 'payment_amt_index': (DOUBLE, get_value, { 'key': 'ths_financing_payment_amt_int_index' }), 'ths_flaot_mv_ratio_index': (DOUBLE, get_value, { 'key': 'ths_flaot_mv_ratio_index' }), 'ths_float_ashare_index': (DOUBLE, get_value, { 'key': 'ths_float_ashare_index' }), 'ths_float_ashare_mv_index': (DOUBLE, get_value, { 'key': 'ths_float_ashare_mv_index' }), 'ths_float_ashare_to_total_shares_index': (DOUBLE, get_value, { 'key': 'ths_float_ashare_to_total_shares_index' }), 'ths_float_bshare_index': (DOUBLE, get_value, { 'key': 'ths_float_bshare_index' }), 'ths_float_bshare_to_total_shares_index': (DOUBLE, get_value, { 'key': 'ths_float_bshare_to_total_shares_index' }), 'ths_float_hshare_to_total_shares_index': (DOUBLE, get_value, { 'key': 'ths_float_hshare_to_total_shares_index' }), 'ths_limited_ashare_index': (DOUBLE, get_value, { 'key': 'ths_limited_ashare_index' }), 'ths_margin_trading_amtb_index': (DOUBLE, get_value, { 'key': 'ths_margin_trading_amtb_index' }), 'ths_margin_trading_balance_index': (DOUBLE, get_value, { 'key': 'ths_margin_trading_balance_index' }), 'ths_margin_trading_repay_amt_index': (DOUBLE, get_value, { 'key': 'ths_margin_trading_repay_amt_index' }), 'ths_margin_trading_sell_amt_index': (DOUBLE, get_value, { 'key': 'ths_margin_trading_sell_amt_index' }), 'ths_float_hshare_index': (DOUBLE, get_value, { 'key': 'ths_float_hshare_index' }), 'ths_limited_bshare_index': (DOUBLE, get_value, { 'key': 'ths_limited_bshare_index' }), 'ths_market_value_index': (DOUBLE, get_value, { 'key': 'ths_market_value_index' }), 'ths_mt_payment_vol_int_index': (DOUBLE, get_value, { 'key': 'ths_mt_payment_vol_int_index' }), 'ths_mt_sell_vol_int_index': (DOUBLE, get_value, { 'key': 'ths_mt_sell_vol_int_index' }), 'ths_mv_csrc_alg_index': (DOUBLE, get_value, { 'key': 'ths_mv_csrc_alg_index' }), 'ths_pb_index': (DOUBLE, get_value, { 'key': 'ths_pb_index' }), 'ths_pcf_index': (DOUBLE, get_value, { 'key': 'ths_pcf_index' }), 'ths_pe_index': (DOUBLE, get_value, { 'key': 'ths_pe_index' }), 'ths_ps_index': (DOUBLE, get_value, { 'key': 'ths_ps_index' }), 'ths_short_selling_amtb_index': (DOUBLE, get_value, { 'key': 'ths_short_selling_amtb_index' }), 'ths_short_selling_payment_vol_index': (DOUBLE, get_value, { 'key': 'ths_short_selling_payment_vol_index' }), 'ths_short_selling_sell_vol_index': (DOUBLE, get_value, { 'key': 'ths_short_selling_sell_vol_index' }), 'ths_short_selling_vol_balance_index': (DOUBLE, get_value, { 'key': 'ths_short_selling_vol_balance_index' }), 'ths_state_owned_lp_shares_index': (DOUBLE, get_value, { 'key': 'ths_state_owned_lp_shares_index' }), 'ths_state_owned_shares_index': (DOUBLE, get_value, { 'key': 'ths_state_owned_shares_index' }), 'ths_total_domestic_lp_shares_index': (DOUBLE, get_value, { 'key': 'ths_total_domestic_lp_shares_index' }), 'ths_total_float_shares_index': (DOUBLE, get_value, { 'key': 'ths_total_float_shares_index' }), 'ths_total_float_shares_ratio_index': (DOUBLE, get_value, { 'key': 'ths_total_float_shares_ratio_index' }), 'ths_total_limited_ashare_ratio_index': (DOUBLE, get_value, { 'key': 'ths_total_limited_ashare_ratio_index' }), 'ths_total_shares_index': (DOUBLE, get_value, { 'key': 'ths_total_shares_index' }), 'ths_unfloat_shares_index': (DOUBLE, get_value, { 'key': 'ths_unfloat_shares_index' }), 'ths_annual_volatility_index': (DOUBLE, get_value, { 'key': 'ths_annual_volatility_index' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def merge_future_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'future_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'sec_name': (String(50), prefer_left, { 'left_key': 'ths_future_short_name_future', 'right_key': 'sec_name' }), 'dl_month': (String(20), prefer_left, { 'left_key': 'ths_delivery_month_future', 'right_key': 'dlmonth' }), 'delivery_date': (Date, prefer_left, { 'left_key': 'ths_last_delivery_date_future', 'right_key': 'lastdelivery_date' }), 'prince': (DOUBLE, prefer_left, { 'left_key': 'ths_mini_chg_price_future', 'right_key': 'mfprice' }), 'ex_eng': (String(20), get_value, { 'key': 'ths_exchange_short_name_future' }), 'sc_code': (String(20), prefer_left, { 'left_key': 'ths_td_variety_future', 'right_key': 'sccode' }), 'p_unit': (String(20), prefer_left, { 'left_key': 'ths_pricing_unit_future', 'right_key': 'punit' }), 'l_prince': (DOUBLE, prefer_left, { 'left_key': 'ths_listing_benchmark_price_future', 'right_key': 'lprice' }), 'margin': (DOUBLE, prefer_left, { 'left_key': 'ths_initial_td_deposit_future', 'right_key': 'margin' }), 'ratio_lmit': (DOUBLE, get_value, { 'key': 'ths_chg_ratio_lmit_future' }), 'enshort_name': (String(100), get_value, { 'key': 'ths_contract_en_short_name_future' }), 'en_name': (String(100), get_value, { 'key': 'ths_contract_en_name_future' }), 'future_code': (String(20), prefer_left, { 'left_key': 'ths_future_code_future', 'right_key': 'trade_code' }), 'last_date': (Date, prefer_left, { 'left_key': 'ths_last_td_date_future', 'right_key': 'lasttrade_date' }), 'con_month': (String(60), get_value, { 'key': 'ths_contract_month_explain_future' }), 'de_date': (String(60), get_value, { 'key': 'ths_delivery_date_explain_future' }), 'td_date': (String(60), get_value, { 'key': 'ths_last_td_date_explian_future' }), 'benchmark_': (DOUBLE, get_value, { 'key': 'ths_listing_benchmark_price_future' }), 'sec_type': (String(20), get_value, { 'key': 'ths_sec_type_future' }), 'td_time': (String(60), get_value, { 'key': 'ths_td_time_explain_future' }), 'td_unit': (String(20), get_value, { 'key': 'ths_td_unit_future' }), 'td_variety': (String(20), get_value, { 'key': 'ths_td_variety_future' }), 'sec_englishname': (String(50), get_value, { 'key': 'sec_englishname' }), 'exch_eng': (String(50), get_value, { 'key': 'exch_eng' }), 'changelt': (DOUBLE, get_value, { 'key': 'changelt' }), 'contractmultiplier': (String(100), get_value, { 'key': 'contractmultiplier' }), 'ftmargins': (String(100), get_value, { 'key': 'ftmargins' }), 'thours': (String(200), get_value, { 'key': 'thours' }), 'ipo_date': (Date, prefer_left, { 'left_key': 'ths_start_trade_date_future', 'right_key': 'ipo_date' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL , ADD PRIMARY KEY (`unique_code`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) return data_df
def merge_future_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'future_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' # change (ifind_df, wind_df) into joined_df joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') # data, columns processing col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), # TODO: 原因不详,wind接口取到的部分 close 数据不准确 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 10, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean2_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'settle': (DOUBLE, mean_value, { 'left_key': 'settlement', 'right_key': 'settle', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'position': (DOUBLE, mean_value, { 'left_key': 'openInterest', 'right_key': 'position', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'maxupordown': (Integer, get_value, { 'key': 'maxupordown' }), 'st_stock': (DOUBLE, get_value, { 'key': 'st_stock' }), 'instrument_id': (String(20), get_value, { 'key': 'instrument_id' }), 'positionchange': (DOUBLE, get_value, { 'key': 'positionChange' }), 'preclose': (String(20), get_value, { 'key': 'preClose' }), 'presettlement': (DOUBLE, get_value, { 'key': 'preSettlement' }), 'amplitude': (DOUBLE, get_value, { 'key': 'amplitude' }), 'avgprice': (DOUBLE, get_value, { 'key': 'avgPrice' }), 'change': (DOUBLE, get_value, { 'key': 'change' }), 'change_settlement': (DOUBLE, get_value, { 'key': 'change_settlement' }), 'chg_settlement': (DOUBLE, get_value, { 'key': 'chg_settlement' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df