def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_future_daily' info_table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) indicator_param_list = [ ('preClose', String(20)), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('avgPrice', DOUBLE), ('change', DOUBLE), ('changeRatio', DOUBLE), ('preSettlement', DOUBLE), ('settlement', DOUBLE), ('change_settlement', DOUBLE), ('chg_settlement', DOUBLE), ('openInterest', DOUBLE), ('positionChange', DOUBLE), ('amplitude', DOUBLE), ] json_indicator = ','.join([key for key, _ in indicator_param_list]) if has_table: # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据 # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全, # 如:AL0202.SHF AL9902.SHF CU0202.SHF # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理 sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd ON fi.ths_code = wfd.ths_code ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ths_start_trade_date_future date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi ) tt""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: logger.info("%d future instrument will be handled", code_count) for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 logging.info("%s 新增数据 %d 条", table_name, data_count) # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count logging.info("%s 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_private_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None): """ 导入 daily 数据 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_private_fund_daily' indicator_param_list = [ ('netAssetValue', '', DOUBLE), ('adjustedNAV', '', DOUBLE), ('accumulatedNAV', '', DOUBLE), ('premium', '', DOUBLE), ('premiumRatio', '', DOUBLE), ('estimatedPosition', '', DOUBLE) ] # jsonIndicator='netAssetValue,adjustedNAV,accumulatedNAV,premium,premiumRatio,estimatedPosition' # jsonparam=';;;;' json_indicator, json_param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_established_date_sp) date_frm, ths_maturity_date_sp, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_private_fund_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) ORDER BY ths_code""".format(table_name=table_name) else: logger.warning('ifind_private_fund_daily 不存在,仅使用 ifind_private_fund_info 表进行计算日期范围') sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to FROM ( SELECT info.ths_code, ths_established_date_sp date_frm, ths_maturity_date_sp, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_private_fund_info info ) tt WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) ORDER BY ths_code""" with with_db_session(engine_md) as session: # 计算每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set} if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to} # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, json_param, begin_time, end_time ) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table: alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_index_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_index_daily_his :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_index_daily_his' if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 5: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_pub_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_pub_fund_daily :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_pub_fund_daily' has_table = engine_md.has_table(table_name) if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) indicator_param_list = [('netAssetValue', '', DOUBLE), ('adjustedNAV', '', DOUBLE), ('accumulatedNAV', '', DOUBLE)] # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_lof_listed_date_fund) date_frm, ths_fund_expiry_date_fund, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_pub_fund_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) ORDER BY ths_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 ifind_pub_fund_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to FROM ( SELECT info.ths_code, ths_lof_listed_date_fund date_frm, ths_fund_expiry_date_fund, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_pub_fund_info info ) tt WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df.rename( columns={col: col.lower() for col in data_df.columns}, inplace=True) data_df_list.append(data_df) if DEBUG and len(data_df_list) > 1: break # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: tot_data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])