def import_pub_fund_daily(wind_code_set, chain_param=None): """ 导入公募基金日线数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ logging.info("更新 wind_pub_fund_daily 开始") table_name = "wind_pub_fund_daily" has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to FROM ( SELECT info.wind_code, ifnull(nav_date, fund_setupdate) date_frm, fund_maturitydate, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_pub_fund_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(nav_date),1) nav_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning( 'wind_pub_fund_daily 不存在,仅使用 wind_pub_fund_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to FROM ( SELECT info.wind_code, fund_setupdate date_frm, fund_maturitydate, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_pub_fund_info info ) tt WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) ORDER BY wind_code """ # with with_db_session(engine_md) as session: # # 获取每只股票最新交易日数据 # sql_str = 'select wind_code, max(nav_date) from wind_pub_fund_daily group by wind_code' # table = session.execute(sql_str) # trade_date_latest_dic = dict(table.fetchall()) # # 获取市场有效交易日数据 # sql_str = "select trade_date from wind_trade_date where trade_date > '1997-1-1'" # table = session.execute(sql_str) # trade_date_sorted_list = [t[0] for t in table.fetchall()] # trade_date_sorted_list.sort() # # 获取每只股票上市日期、退市日期 # table = session.execute('SELECT wind_code, setup_date, maturity_date FROM wind_pub_fund_info') # # # wind_code_date_dic = { # wind_code: (setup_date, maturity_date if maturity_date is None or maturity_date > UN_AVAILABLE_DATE else None) # for # wind_code, setup_date, maturity_date in table.fetchall()} # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today() with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 trade_date_latest_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } data_df_list = [] wind_code_date_count = len(trade_date_latest_dic) logger.info('%d pub fund will been import into wind_pub_fund_daily', wind_code_date_count) # 获取股票量价等行情数据 field_col_name_list = [ ('NAV_date', Date), ('NAV_acc', String(20)), ('netasset_total', String(20)), ] wind_indictor_str = ",".join(key.lower() for key, _ in field_col_name_list) upper_col_2_name_dic = { name.upper(): name.lower() for name, _ in field_col_name_list } dtype = {key.lower(): val for key, val in field_col_name_list} dtype['wind_code'] = String(20) try: data_tot = 0 for data_num, (wind_code, (date_from, date_to)) in enumerate(trade_date_latest_dic.items()): # 初次加载阶段全量载入,以后 ipo_date为空的情况,直接warning跳过 # if setup_date is None: # # date_ipo = DATE_BASE # logging.warning("%d/%d) %s 缺少 ipo date", data_num, wind_code_date_count, wind_code) # continue # # 获取 date_from # if wind_code in trade_date_latest_dic: # date_latest_t1 = trade_date_latest_dic[wind_code] + ONE_DAY # date_from = max([date_latest_t1, DATE_BASE, setup_date]) # else: # date_from = max([DATE_BASE, setup_date]) # date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from) # # 获取 date_to # if maturity_date is None: # date_to = date_ending # else: # date_to = min([maturity_date, date_ending]) # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to) # if date_from is None or date_to is None or date_from > date_to: # continue try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;Days=Weekdays") except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, wind_code_date_count, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no ohlc data during %s %s', data_num, wind_code_date_count, wind_code, date_from, date_to) continue # 对数据进行 清理,整理,整合 data_df = data_df.drop_duplicates().dropna() data_df.rename(columns=upper_col_2_name_dic, inplace=True) logger.info('%d/%d) %d data of %s between %s and %s', data_num, wind_code_date_count, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_tot += data_df.shape[0] data_df_list.append(data_df) if data_tot > 10000: bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) data_df_list = [] data_tot = 0 # 仅仅调试时使用 # if DEBUG and len(data_df) > 2000: # break finally: # 导入数据库 if len(data_df_list) > 0: bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 wind_pub_fund_daily 结束 %d 条信息被更新", len(data_df_list)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_stock_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'stock_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name) ifind_table_his_name = 'ifind_{table_name}_his'.format( table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_his_sql_str = "select * from {table_name}".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name}".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_his_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from ]) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), # TODO: 原因不详,wind接口取到的部分 close 数据不准确 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准 # 因此出现冲突时应该以 wind 为准 'total_shares': (DOUBLE, prefer_right, { 'left_key': 'totalShares', 'right_key': 'total_shares' }), # 'susp_days': (Integer, '***', { # 'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status', # 'primary_keys': ('ths_code', 'time')}), 'max_up_or_down': (Integer, max_up_or_down, { 'ths_key': 'ths_up_and_down_status_stock', 'wind_key': 'maxupordown', 'primary_keys': ('ths_code', 'time') }), 'total_capital': (DOUBLE, get_value, { 'key': 'totalCapital' }), 'float_capital': (DOUBLE, get_value, { 'key': 'floatCapitalOfAShares' }), 'pct_chg': (DOUBLE, mean_value, { 'left_key': 'changeRatio', 'right_key': 'pct_chg', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'float_a_shares': (DOUBLE, get_value, { 'key': 'floatSharesOfAShares' }), # 对应wind float_a_shares 'free_float_shares': (DOUBLE, get_value, { 'key': 'free_float_shares' }), # 对应 ths ths_free_float_shares_stock # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准 # 因此应该如有不同应该以 wind 为准 'pe_ttm': (DOUBLE, prefer_right, { 'left_key': 'ths_pe_ttm_stock', 'right_key': 'pe_ttm', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'pe': (DOUBLE, get_value, { 'key': 'pe' }), 'pb': (DOUBLE, get_value, { 'key': 'pb' }), 'ps': (DOUBLE, get_value, { 'key': 'ps' }), 'pcf': (DOUBLE, get_value, { 'key': 'pcf' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None): """将ds his 以及财务数据合并为 daily 数据""" table_name = 'ifind_stock_daily' logging.info("合成 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(`time`),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) # 獲取各個表格數據 ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from) ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from) ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date', None) ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None) ifind_fin_df_g = ifind_fin_df.groupby('ths_code') ths_code_set_4_daily = set(ifind_fin_df_g.size().index) # 合并 ds his 数据 ifind_his_ds_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) # 拼接後續有nan,無數據 ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code') logger.debug("提取数据完成") # 计算 财报披露时间 report_date_dic_dic = {} for report_date_g in [ ifind_report_date_df.groupby( ['ths_code', 'ths_regular_report_actual_dd_stock']) ]: for num, ((ths_code, report_date), data_df) in enumerate(report_date_g, start=1): if ths_code_set is not None and ths_code not in ths_code_set: continue if is_nan_or_none(report_date): continue report_date_dic = report_date_dic_dic.setdefault(ths_code, {}) if ths_code not in ths_code_set_4_daily: logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code) if report_date not in report_date_dic_dic: ifind_fin_df_temp = ifind_fin_df_temp[ ifind_fin_df_temp['time'] <= report_date] if ifind_fin_df_temp.shape[0] > 0: report_date_dic[ report_date] = ifind_fin_df_temp.sort_values( 'time').iloc[0] # # 设置 dtype dtype = {'report_date': Date} for dic in [ DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE, DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS ]: for key, val in dic.items(): dtype[key] = val logger.debug("计算财报日期完成") # 整理 data_df 数据 tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len( report_date_dic_dic) try: for num, (ths_code, report_date_dic) in enumerate(report_date_dic_dic.items(), start=1): # key:ths_code # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用 以後在驚醒改進 if ths_code not in ifind_his_ds_df_g.size(): logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue # open low 等 is NAN 2438 ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group( ths_code) # shape[1] 30 logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code, ifind_his_ds_df_cur_ths_code.shape[0]) report_date_list = list(report_date_dic.keys()) report_date_list.sort() for report_date_from, report_date_to in iter_2_range( report_date_list): logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ths_code, date_2_str(report_date_from), date_2_str(report_date_to)) # 计算有效的日期范围 if report_date_from is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] < report_date_to elif report_date_to is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] >= report_date_from else: is_fit = (ifind_his_ds_df_cur_ths_code['time'] < report_date_to) & ( ifind_his_ds_df_cur_ths_code['time'] >= report_date_from) # 获取日期范围内的数据 ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[ is_fit].copy() segment_count = ifind_his_ds_df_segment.shape[0] if segment_count == 0: continue fin_s = report_date_dic[ report_date_from] if report_date_from is not None else None for key in DTYPE_STOCK_DAILY_FIN.keys(): if key in ('ths_code', 'time'): continue ifind_his_ds_df_segment[key] = fin_s[ key] if fin_s is not None and key in fin_s else None ifind_his_ds_df_segment['report_date'] = report_date_from # 添加数据到列表 data_df_list.append(ifind_his_ds_df_segment) data_count += segment_count if DEBUG and len(data_df_list) > 1: break # 保存数据库 if data_count > 10000: # 保存到数据库 data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count data_count, data_df_list = 0, [] finally: # 保存到数据库 if len(data_df_list) > 0: data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_smfund_info(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_smfund_info" has_table = engine_md.has_table(table_name) # w.start() types = { u'主动股票型分级母基金': 1000007766000000, u'被动股票型分级母基金': 1000007767000000, u'纯债券型分级母基金': 1000007768000000, u'混合债券型分级母基金': 1000007769000000, u'混合型分级母基金': 1000026143000000, u'QDII分级母基金': 1000019779000000 } col_name_param_list = [ ('wind_code', String(20)), ('fund_type', String(20)), ('sec_name', String(50)), ('class_a_code', String(20)), ('class_a_name', String(50)), ('class_b_code', String(20)), ('class_b_name', String(50)), ('track_indexcode', String(20)), ('track_indexname', String(50)), ('a_pct', DOUBLE), ('b_pct', DOUBLE), ('upcv_nav', DOUBLE), ('downcv_nav', DOUBLE), ('max_purchasefee', DOUBLE), ('max_redemptionfee', DOUBLE), ] col_name = ",".join([col_name for col_name, _ in col_name_param_list]) # 获取各个历史时段的分级基金列表,并汇总全部基金代码 dates = [ '2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01', '2018-01-01' ] # 分三个时间点获取市场上所有分级基金产品 df = pd.DataFrame() # 获取接数据 for date_p in dates: temp_df = invoker.wset("sectorconstituent", "date=%s;sectorid=1000006545000000" % date_p) df = df.append(temp_df) wind_code_all = df['wind_code'].unique() # 设置dtype dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) dtype["tradable"] = String(20) dtype["fund_setupdate"] = Date dtype["fund_maturitydate"] = Date if has_table: with with_db_session(engine_md) as session: table = session.execute("SELECT wind_code FROM wind_smfund_info") wind_code_existed = set( [content[0] for content in table.fetchall()]) wind_code_new = list(set(wind_code_all) - wind_code_existed) else: wind_code_new = list(set(wind_code_all)) # if len(wind_code_new) == 0: # print('no sm fund imported') # 查询数据库,剔除已存在的基金代码 wind_code_new = [code for code in wind_code_new if code.find('!') < 0] info_df = invoker.wss(wind_code_new, 'fund_setupdate, fund_maturitydate') if info_df is None: raise Exception('no data') info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply( lambda x: str_2_date(x)) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply( lambda x: str_2_date(x)) info_df.rename(columns={ 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate' }, inplace=True) field = col_name # field = "fund_type,wind_code,sec_name,class_a_code,class_a_name,class_b_code,class_b_name,a_pct,b_pct,upcv_nav,downcv_nav,track_indexcode,track_indexname,max_purchasefee,max_redemptionfee" for code in info_df.index: beginDate = info_df.loc[code, 'fund_setupdate'].strftime('%Y-%m-%d') temp_df = invoker.wset("leveragedfundinfo", "date=%s;windcode=%s;field=%s" % (beginDate, code, field)) # ;field=%s , field df = df.append(temp_df) if DEBUG and len(df) > 10: break df.set_index('wind_code', inplace=True) df['tradable'] = df.index.map(lambda x: x if 'S' in x else None) # df.index = df.index.map(lambda x: x[:-2] + 'OF') info_df = info_df.join(df, how='outer') # TODO: 需要检查一下代码 info_df.rename( columns={ 'a_nav': 'nav_a', 'b_nav': 'nav_b', 'a_fs_inc': 'fs_inc_a', 'b_fs_inc': 'fs_inc_b' }) info_df.index.rename('wind_code', inplace=True) info_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, len(info_df)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_index_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_index_daily_ds' has_table = engine_md.has_table(table_name) json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""".format(table_name=table_name) else: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) end_time = date_2_str(end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_report_date(chain_param=None, ths_code_set: set = None, begin_time=None, interval='Q'): """ 通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认) :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param interval: Q 季度 M 月 W 周 D 日 :return: """ table_name = 'ifind_stock_hk_report_date' info_table_name = 'ifind_stock_hk_info' has_table = engine_md.has_table(table_name) # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks' # jsonparam=';' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, "Days:Tradedays,Fill:Previous,Interval:{interval}".format( interval=interval), begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_fin_by_report_date_weekly(chain_param=None, ths_code_set: set = None, begin_time=None, refresh=False): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为周度获取 以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param refresh: 全部刷新 :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from ( select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date union select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date ) tt where report_date is not null order by ths_code, report_date""" if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取报告日-10天到报告日日期范围列表 table = session.execute(ths_code_report_date_str) ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {} for ths_code, date_from, date_to in table.fetchall(): if ths_code_set is None or ths_code in ths_code_set: ths_code_report_date_range_list_dic_tmp.setdefault( ths_code, []).append((date_from, date_to)) # 获取每只股票需要获取日线数据的日期区间 if not refresh: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min( [date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } else: code_date_range_dic = {} # 合并重叠的日期 for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items( ): if not refresh and ths_code in code_date_range_dic: code_date_range = code_date_range_dic[ths_code] else: code_date_range = None # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并 date_range_list_new, date_from_last, date_to_last = [], None, None for date_from, date_to in date_range_list: if code_date_range is not None: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 if not refresh and (date_to < code_date_range[0] or code_date_range[1] < date_from): continue if date_from_last is None: # 首次循环 设置 date_from_last date_from_last = date_from elif date_from < date_to_last: # 日期重叠,需要合并 pass else: # 日期未重叠,保存 range date_range_list_new.append((date_from_last, date_to_last)) date_from_last = date_from # 循环底部,设置 date_to_last date_to_last = date_to # 循环结束,保存 range if date_from_last is not None and date_to_last is not None: date_range_list_new.append((date_from_last, date_to_last)) if len(date_range_list_new) > 0: ths_code_report_date_range_list_dic[ths_code] = date_range_list_new data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( ths_code_report_date_range_list_dic) try: for num, (ths_code, date_range_list) in enumerate( ths_code_report_date_range_list_dic.items(), start=1): for begin_time, end_time in date_range_list: logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:W', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_quertarly_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = "wind_stock_quertarly_hk" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ths_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning( 'wind_stock_quertarly_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } data_df_list = [] logger.info('%d data will been import into %s', len(stock_date_dic), table_name) # 获取股票量价等行情数据 field_col_name_list = [ ('roic_ttm', String(20)), ('yoyprofit', String(20)), ('ebit', String(20)), ('ebit2', String(20)), ('ebit2_ttm', String(20)), ('surpluscapitalps', String(20)), ('undistributedps', String(20)), ('stm_issuingdate', Date), ] # 获取列表属性名 dtype = {key: val for key, val in field_col_name_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date wind_indictor_str = ",".join(key for key, _ in field_col_name_list) upper_col_2_name_dic = { name.upper(): val for name, val in field_col_name_list } # 获取接口数据 try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename(columns=upper_col_2_name_dic, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index): is_all_none = data_df.loc[trade_date].apply( lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) # 仅供调试使用 if DEBUG and len(data_df_list) > 5: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql('wind_stock_quertarly_hk', engine_md, if_exists='append') bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly_hk 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_info_hk(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 导入 港股股票信息 到 wind_stock_info_hk :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param refresh: 默认为False,True 则进行全部更新 :return: """ table_name = 'wind_stock_info_hk' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [ ('sec_name', String(20)), ('trade_code', String(20)), ('ipo_date', Date), ('delist_date', Date), ('mkt', String(20)), ('exch_city', String(20)), ('exch_eng', String(20)), ('prename', String(2000)), ] # 获取列属性名,以逗号进行分割 param = ",".join([key for key, _ in param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if refresh: date_fetch = DATE_BASE else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) seg_count = 1000 stock_info_df_list = [] for stock_code_list_sub in split_chunk(stock_code_list, seg_count): # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 # stock_info_df = invoker.wss(stock_code_list_sub, # "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename") # 获取接口文档数据信息 stock_info_df = invoker.wss(stock_code_list_sub, param) stock_info_df_list.append(stock_info_df) if DEBUG: break stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('wind_code', inplace=True) stock_info_all_df.rename(columns=rename_col_dic, inplace=True) logging.info('%d data will be import', stock_info_all_df.shape[0]) stock_info_all_df.reset_index(inplace=True) # data_list = list(stock_info_all_df.T.to_dict().values()) # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)".format( # table_name=table_name # ) # # sql_str = "insert INTO wind_stock_info_hk (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # with with_db_session(engine_md) as session: # session.execute(sql_str, data_list) # stock_count = session.execute('select count(*) from {table_name}'.format(table_name=table_name)).first()[0] # 创建表格数据 data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_private_fund_nav_daily(chain_param=None, wind_code_list=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_list: :return: """ table_name = 'wind_fund_nav_daily' # 初始化数据下载端口 # 初始化数据库engine # 链接数据库,并获取fundnav旧表 # with get_db_session(engine) as session: # table = session.execute('select wind_code, ADDDATE(max(trade_date),1) from wind_fund_nav group by wind_code') # fund_trade_date_begin_dic = dict(table.fetchall()) # 获取wind_fund_info表信息 col_name_param_list = [ ('trade_date', Date), ('nav', DOUBLE), ('nav_acc', DOUBLE), ('nav_date', Date), ] dtype = {col_name: val for col_name, val in col_name_param_list} dtype['wind_code'] = String(200) has_table = engine_md.has_table(table_name) if has_table: fund_info_df = pd.read_sql_query( """SELECT DISTINCT fi.wind_code AS wind_code, IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from, if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to FROM fund_info fi LEFT JOIN ( SELECT wind_code, ADDDATE(max(trade_date),1) trade_date_from FROM wind_fund_nav_daily GROUP BY wind_code ) wfn ON fi.wind_code = wfn.wind_code""", engine_md) else: fund_info_df = pd.read_sql_query( """SELECT DISTINCT fi.wind_code AS wind_code, IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from, if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to FROM fund_info fi ORDER BY wind_code;""", engine_md) wind_code_date_frm_to_dic = {wind_code: (str_2_date(date_from), str_2_date(date_to)) for wind_code, date_from, date_to in zip(fund_info_df['wind_code'], fund_info_df['date_from'], fund_info_df['date_to'])} fund_info_df.set_index('wind_code', inplace=True) if wind_code_list is None: wind_code_list = list(fund_info_df.index) else: wind_code_list = list(set(wind_code_list) & set(fund_info_df.index)) # 结束时间 date_last_day = date.today() - timedelta(days=1) # date_end_str = date_end.strftime(STR_FORMAT_DATE) fund_nav_all_df = [] no_data_count = 0 code_count = len(wind_code_list) # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值 wind_code_trade_date_latest_dic = {} date_gap = timedelta(days=10) try: for num, wind_code in enumerate(wind_code_list): date_begin, date_end = wind_code_date_frm_to_dic[wind_code] # if date_end > date_last_day: # date_end = date_last_day if date_begin > date_end: continue # 设定数据获取的起始日期 # wind_code_trade_date_latest_dic[wind_code] = date_to # if wind_code in fund_trade_date_begin_dic: # trade_latest = fund_trade_date_begin_dic[wind_code] # if trade_latest > date_end: # continue # date_begin = max([date_begin, trade_latest]) # if date_begin is None: # continue # elif isinstance(date_begin, str): # date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date() if isinstance(date_begin, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_begin_str = date_begin.strftime('%Y-%m-%d') else: logger.error("%s date_begin:%s", wind_code, date_begin) continue if isinstance(date_end, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_end_str = date_end.strftime('%Y-%m-%d') else: logger.error("%s date_end:%s", wind_code, date_end) continue # 尝试获取 fund_nav 数据 for k in range(2): try: fund_nav_tmp_df = invoker.wsd(codes=wind_code, fields='nav,NAV_acc,NAV_date', beginTime=date_begin_str, endTime=date_end_str, options='Fill=Previous') trade_date_latest = datetime.strptime(date_end_str, '%Y-%m-%d').date() - date_gap wind_code_trade_date_latest_dic[wind_code] = trade_date_latest break except APIError as exp: # -40520007z if exp.ret_dic.setdefault('error_code', 0) == -40520007: trade_date_latest = datetime.strptime(date_end_str, '%Y-%m-%d').date() - date_gap wind_code_trade_date_latest_dic[wind_code] = trade_date_latest logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue except Exception as exp: logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue else: fund_nav_tmp_df = None if fund_nav_tmp_df is None: logger.info('%s No data', wind_code) # del wind_code_trade_date_latest_dic[wind_code] no_data_count += 1 logger.warning('%d funds no data', no_data_count) else: fund_nav_tmp_df.dropna(how='all', inplace=True) df_len = fund_nav_tmp_df.shape[0] if df_len == 0: continue fund_nav_tmp_df['wind_code'] = wind_code # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新 # del wind_code_trade_date_latest_dic[wind_code] trade_date_latest = fund_nav_df_2_sql(table_name, fund_nav_tmp_df, engine_md, is_append=True) if trade_date_latest is None: logger.error('%s[%d] data insert failed', wind_code) else: wind_code_trade_date_latest_dic[wind_code] = trade_date_latest logger.info('%d) %s updated, %d funds left', num, wind_code, code_count - num) if DEBUG and num > 1: # 调试使用 break # finally: import_wind_fund_nav_to_nav() update_trade_date_latest(wind_code_trade_date_latest_dic) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return fund_nav_all_df
def import_stock_daily_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = 'wind_stock_daily_hk' logging.info("更新 %s 开始", table_name) param_list = [ ('OPEN', DOUBLE), ('HIGH', DOUBLE), ('LOW', DOUBLE), ('CLOSE', DOUBLE), ('ADJFACTOR', DOUBLE), ('VOLUME', DOUBLE), ('AMT', DOUBLE), ('PCT_CHG', DOUBLE), ('MAXUPORDOWN', Integer), ('SWING', DOUBLE), ('TURN', DOUBLE), ('FREE_TURN', DOUBLE), ('TRADE_STATUS', String(20)), ('SUSP_DAYS', Integer), ('TOTAL_SHARES', DOUBLE), ('FREE_FLOAT_SHARES', DOUBLE), ('EV2_TO_EBITDA', DOUBLE), ('PS_TTM', DOUBLE), ('PE_TTM', DOUBLE), ('PB_MRQ', DOUBLE), ] # 将列表列名转化为小写 col_name_dic = { col_name.upper(): col_name.lower() for col_name, _ in param_list } # 获取列表列名 col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] # wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \ # "swing,turn,free_turn,trade_status,susp_days," + \ # "total_shares,free_float_shares,ev2_to_ebitda" wind_indictor_str = ",".join(col_name_list) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(stock_date_dic) # 获取接口数据 logger.info('%d data will been import into %s', data_len, table_name) try: for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): logger.debug('%d/%d) %s [%s - %s]', data_num, data_len, wind_code, date_from, date_to) # if wind_code == '0192!1.HK': # logger.warning('%s wind 没有这只股票数据', wind_code) # continue try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns=col_name_dic, inplace=True) data_df_list.append(data_df) # 仅供调试使用 if DEBUG and len(data_df_list) > 1: break finally: # 导入数据库 创建 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_private_fund_info(table_name, chain_param=None, get_df=False): # 初始化服务器接口,用于下载万得数据 # table_name = 'fund_info' has_table = engine_md.has_table(table_name) types = {u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000} df = pd.DataFrame() today = date.today().strftime('%Y-%m-%d') param_list = [ ('FUND_SETUPDATE', Date), ('FUND_MATURITYDATE', Date), ('FUND_MGRCOMP', String(800)), ('FUND_EXISTINGYEAR', String(500)), ('FUND_PTMYEAR', String(30)), ('FUND_TYPE', String(20)), ('FUND_FUNDMANAGER', String(500)), ('SEC_NAME', String(2000)), ('STRATEGY_TYPE', String(200)), ('TRADE_DATE_LATEST', String(200)), ] col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in param_list} # 获取列表名 col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] param_str = ",".join(col_name_list[:8]) # 设置dtype类型 dtype = {key.lower(): val for key, val in param_list} dtype['wind_code'] = String(20) for i in types.keys(): temp = invoker.wset("sectorconstituent", "date=%s;sectorid=%s" % (today, str(types[i]))) temp['strategy_type'] = i df = pd.concat([df, temp], axis=0) if DEBUG and len(df) > 1000: break # 插入数据库 # 初始化数据库engine # 整理数据 fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']] fund_types_df.set_index('wind_code', inplace=True) # 获取基金基本面信息 code_list = list(fund_types_df.index) # df['wind_code'] code_count = len(code_list) seg_count = 5000 info_df = pd.DataFrame() for n in range(int(code_count / seg_count) + 1): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= code_count else code_count if num_start <= code_count: codes = ','.join(code_list[num_start:num_end]) # 分段获取基金成立日期数据 info2_df = invoker.wss(codes, param_str) logging.info('%05d ) [%d %d]' % (n, num_start, num_end)) info_df = info_df.append(info2_df) if DEBUG and len(info_df) > 1000: break else: break # 整理数据插入数据库) info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(lambda x: str_2_date(x)) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(lambda x: str_2_date(x)) info_df = fund_types_df.join(info_df, how='right') info_df.rename(columns=col_name_dic, inplace=True) info_df['trade_date_latest'] = None info_df.index.names = ['wind_code'] info_df.reset_index(inplace=True) info_df.drop_duplicates(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) logging.info('%d funds inserted' % len(info_df)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name) if get_df: return info_df
def import_wind_code_kv(keys: list, chain_param=None, wind_code_list=None): """ 插入股票相关市场数据。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 w.wsd("600000.SH", "west_eps", "2018-05-05", "2018-06-03", "year=2018;westPeriod=180") :param keys: :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_list: :return: """ table_name = "wind_code_kv" wind_code_set = set(wind_code_list) if wind_code_list is not None else None logging.info("更新 wind_stock_daily 开始") has_table = engine_md.has_table(table_name) # 本次获取数据的截止日期 date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today() param_list = [ ('trade_date', Date), ('key', String(45)), ('value', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) for item_key in keys: item_key = item_key.lower() # 获取每只股票对应key 的最近一个交易日数据 start_date # 如果没有,则使用 DATE_BASE ipo_date 中最大的一个 # sql_str = """SELECT info.wind_code, ifnull(trade_date_max_1,start_date) start_date, delist_date FROM # ( # SELECT wind_code, if (ipo_date<%s,date(%s),ipo_date) start_date, delist_date FROM wind_stock_info # ) info # LEFT JOIN # ( # SELECT wind_code, adddate(max(Trade_date),1) trade_date_max_1 FROM wind_code_kv # WHERE wind_code_kv.key=%s GROUP BY wind_code # ) kv # ON info.wind_code = kv.wind_code""" # date_df = pd.read_sql(sql_str, engine_md, params=[DATE_BASE, DATE_BASE, item_key], index_col='wind_code') # # with with_db_session(engine_md) as session: # # 获取市场有效交易日数据 # sql_str = "SELECT trade_date FROM wind_trade_date WHERE trade_date >= :trade_date" # table = session.execute(sql_str, params={'trade_date': DATE_BASE}) # trade_date_sorted_list = [t[0] for t in table.fetchall()] # trade_date_sorted_list.sort() if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) kv ON info.wind_code = kv.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_stock_kv 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} data_df_list = [] data_len = len(stock_date_dic) logger.info('%d stocks will been import into wind_code_kv', data_len) try: for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1): # if wind_code_set is not None and wind_code not in wind_code_set: # continue # # 获取 date_from # date_from, date_delist = date_s['start_date'], date_s['delist_date'] # # 获取 date_to # if date_delist is None: # date_to = date_ending # else: # date_to = min([date_delist, date_ending]) # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to) # if date_from is None or date_to is None or date_from > date_to: # continue # 获取股票量价等行情数据 wind_indictor_str = item_key data_df = get_wind_kv_per_year(wind_code, wind_indictor_str, date_from, date_to, "year=%(year)d;westPeriod=180") # try: # data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to, "year=2018;westPeriod=180") # except APIError as exp: # logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) # if exp.ret_dic.setdefault('error_code', 0) in ( # -40520007, # 没有可用数据 # -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 # ): # continue # else: # break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue data_df.dropna(inplace=True) if data_df.shape[0] == 0: logger.warning('%d/%d) %s has 0 data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={item_key.upper(): 'value'}, inplace=True) data_df['key'] = item_key data_df_list.append(data_df) # 调试使用 if data_num >= 5: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_pub_fund_info(chain_param=None, first_time=False): """ 获取全市场可转债基本信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param first_time: 第一次执行时将从2004年开始查找全部公募基金数据 :return: """ table_name = "wind_pub_fund_info" has_table = engine_md.has_table(table_name) if first_time: date_since = datetime.strptime('2004-01-01', STR_FORMAT_DATE).date() date_list = [] one_year = timedelta(days=365) while date_since < date.today() - ONE_DAY: date_list.append(date_since) date_since += one_year else: date_list.append(date.today() - ONE_DAY) else: date_list = [date.today() - ONE_DAY] # 获取 wind_code 集合 wind_code_set = set() for fetch_date in date_list: data_set = get_wind_code_set(fetch_date) if data_set is not None: wind_code_set |= data_set if DEBUG and len(wind_code_set) > 6: break if has_table: with with_db_session(engine_md) as session: sql_str = "select wind_code from %s" % table_name table = session.execute(sql_str) wind_code_set_existed = { content[0] for content in table.fetchall() } wind_code_set -= wind_code_set_existed else: wind_code_set = wind_code_set # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") wind_code_list = list(wind_code_set) wind_code_count = len(wind_code_list) seg_count = 1000 # loop_count = math.ceil(float(wind_code_count) / seg_count) data_info_df_list = [] fund_info_field_col_name_list = [ ('FUND_FULLNAME', String(200)), ('FUND_EXCHANGESHORTNAME', String(50)), ('FUND_BENCHMARK', String(200)), ('FUND_SETUPDATE', Date), ('FUND_MATURITYDATE', Date), ('FUND_FUNDMANAGER', String(500)), ('FUND_FUNDMANAGER', String(50)), ('FUND_MGRCOMP', String(200)), ('FUND_CUSTODIANBANK', String(50)), ('FUND_TYPE', String(50)), ('FUND_FIRSTINVESTTYPE', String(50)), ('FUND_INVESTTYPE', String(50)), ('FUND_STRUCTUREDFUNDORNOT', String(50)), ('FUND_BENCHINDEXCODE', String(50)), ] # col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in fund_info_field_col_name_list} # col_name_list = ",".join([col_name.lower() for col_name in col_name_dic.keys()]) dtype = {key.lower(): val for key, val in fund_info_field_col_name_list} dtype['wind_code'] = String(20) # dtype['invest_type_level1'] = String(50) for n in range(0, wind_code_count, seg_count): sub_list = wind_code_list[n:(n + seg_count)] # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 # w.wss("000309.OF", "fund_fullname, fund_exchangeshortname,fund_benchmark,fund_benchindexcode,fund_setupdate, # fund_maturitydate,fund_fundmanager,fund_mgrcomp,fund_custodianbank,fund_type,fund_firstinvesttype, # fund_investtype,fund_structuredfundornot, # fund_investstyle") structuredfundornot field_str = ",".join([ col_name.lower() for col_name, _ in fund_info_field_col_name_list ]) stock_info_df = invoker.wss(sub_list, field_str) data_info_df_list.append(stock_info_df) if DEBUG and len(data_info_df_list) > 1000: break if len(data_info_df_list) == 0: logger.info("%s 没有数据可以导入", table_name) return data_info_all_df = pd.concat(data_info_df_list) data_info_all_df.index.rename('wind_code', inplace=True) data_info_all_df.rename( columns={col: col.lower() for col in data_info_all_df.columns}, inplace=True) logging.info('%d data will be import', data_info_all_df.shape[0]) data_info_all_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(data_info_all_df, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) update_from_info_table(table_name)
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_stock_hk_info' logging.info("更新 %s 开始", table_name) if ths_code is None: # 获取全市场港股代码及名称 if refresh: date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_hk_code_set = set() while date_fetch < date_end: stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub date_fetch += timedelta(days=365) stock_hk_code_set_sub = get_stock_hk_code_set(date_end) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub if DEBUG: stock_hk_code_set = list(stock_hk_code_set)[:10] ths_code = ','.join(stock_hk_code_set) indicator_param_list = [ ('ths_stock_short_name_hks', '', String(40)), ('ths_stock_code_hks', '', String(20)), ('ths_isin_code_hks', '', String(40)), ('ths_corp_ashare_short_name_hks', '', String(10)), ('ths_corp_ashare_code_hks', '', String(60)), ('ths_stock_varieties_hks', '', String(40)), ('ths_ipo_date_hks', '', Date), ('ths_listed_exchange_hks', '', String(60)), ('ths_stop_listing_date_hks', '', Date), ('ths_corp_cn_name_hks', '', String(120)), ('ths_corp_name_en_hks', '', String(120)), ('ths_established_date_hks', '', Date), ('ths_accounting_date_hks', '', String(20)), ('ths_general_manager_hks', '', String(40)), ('ths_secretary_hks', '', String(40)), ('ths_operating_scope_hks', '', Text), ('ths_mo_product_name_hks', '', String(200)), ('ths_district_hks', '', String(60)), ('ths_reg_address_hks', '', String(200)), ('ths_office_address_hks', '', String(200)), ('ths_corp_tel_hks', '', String(200)), ('ths_corp_fax_hks', '', String(200)), ('ths_corp_website_hks', '', String(200)), ('ths_auditor_hks', '', String(60)), ('ths_legal_counsel_hks', '', String(300)), ('ths_hs_industry_hks', '', String(40)), ] # jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks' # jsonparam=';;;;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') param += '100' data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 stock_hk info 可以更新") return # 删除历史数据,更新数据 has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: session.execute( "DELETE FROM {table_name} WHERE ths_code IN (".format( table_name=table_name) + ','.join( [':code%d' % n for n in range(len(stock_hk_code_set))]) + ")", params={ 'code%d' % n: val for n, val in enumerate(stock_hk_code_set) }) session.commit() dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # data_count = data_df.shape[0] # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_future_daily' info_table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) indicator_param_list = [ ('preClose', String(20)), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('avgPrice', DOUBLE), ('change', DOUBLE), ('changeRatio', DOUBLE), ('preSettlement', DOUBLE), ('settlement', DOUBLE), ('change_settlement', DOUBLE), ('chg_settlement', DOUBLE), ('openInterest', DOUBLE), ('positionChange', DOUBLE), ('amplitude', DOUBLE), ] json_indicator = ','.join([key for key, _ in indicator_param_list]) if has_table: # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据 # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全, # 如:AL0202.SHF AL9902.SHF CU0202.SHF # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理 sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd ON fi.ths_code = wfd.ths_code ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) -- AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ths_start_trade_date_future date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi ) tt""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: logger.info("%d future instrument will be handled", code_count) for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): if begin_time is None: continue if begin_time > end_time: logger.warning('%d/%d) %s [%s - %s] 日期范围无效,跳过', num, code_count, ths_code, begin_time, end_time) continue else: logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) try: data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, '', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) except APIError as exp: if exp.ret_dic['error_code'] in ( -4001, -4210, ): data_df = None logger.exception( "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s 不影响后续任务", ths_code, json_indicator, begin_time, end_time) else: logger.exception( "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s", ths_code, json_indicator, begin_time, end_time) break if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 logging.info("%s 新增数据 %d 条", table_name, data_count) # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count logging.info("%s 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_daily_ds' info_table_name = 'ifind_stock_hk_info' # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock' # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_wind_bonus(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_stock_bonus' has_table = engine_md.has_table(table_name) param_list = [ ('div_cashbeforetax2', DOUBLE), ('div_cashaftertax2', DOUBLE), ('div_stock2', DOUBLE), ('div_capitalization2', DOUBLE), ('div_capitalization', DOUBLE), ('div_stock', DOUBLE), ('div_cashaftertax', DOUBLE), ('div_cashbeforetax', DOUBLE), ('div_cashandstock', DOUBLE), ('div_recorddate', Date), ('div_exdate', Date), ('div_paydate', Date), ('div_trddateshareb', Date), ('div_preDisclosureDate', Date), ('div_prelandate', Date), ('div_smtgdate', Date), ('div_impdate', Date), ] param = ",".join([key for key, _ in param_list]) with with_db_session(engine_md) as session: table = session.execute('SELECT wind_code, ipo_date, delist_date FROM wind_stock_info') stock_date_dic = {wind_code: (ipo_date, delist_date if delist_date > UN_AVAILABLE_DATE else None) for wind_code, ipo_date, delist_date in table.fetchall()} print(len(stock_date_dic)) DATE_LIST = [datetime.strptime('2010-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2011-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2012-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2013-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2014-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2015-12-31', STR_FORMAT_DATE).date(), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dic_exdate_df_list = [] for rep_date in DATE_LIST: rep_date_str = rep_date.strftime('%Y%m%d') stock_list = [s for s, date_pair in stock_date_dic.items() if date_pair[0] < rep_date and (rep_date < date_pair[1] if date_pair[1] is not None else True)] dic_exdate_df = invoker.wss(stock_list, param) dic_exdate_df_list.append(dic_exdate_df) dic_exdate_df_all = pd.concat(dic_exdate_df_list) dic_exdate_df_all.index.rename('wind_code', inplace=True) dic_exdate_df_all.drop_duplicates(inplace=True) dic_exdate_df_all['DIV_EXDATE'] = dic_exdate_df_all['DIV_EXDATE'].apply(str_date) dic_exdate_df_all['DIV_PAYDATE'] = dic_exdate_df_all['DIV_PAYDATE'].apply(str_date) dic_exdate_df_all['DIV_IMPDATE'] = dic_exdate_df_all['DIV_IMPDATE'].apply(str_date) dic_exdate_df_all['DIV_RECORDDATE'] = dic_exdate_df_all['DIV_RECORDDATE'].apply(str_date) dic_exdate_df_all['DIV_PREDISCLOSUREDATE'] = dic_exdate_df_all['DIV_PREDISCLOSUREDATE'].apply(str_date) dic_exdate_df_all['DIV_PRELANDATE'] = dic_exdate_df_all['DIV_PRELANDATE'].apply(str_date) dic_exdate_df_all['DIV_SMTGDATE'] = dic_exdate_df_all['DIV_SMTGDATE'].apply(str_date) dic_exdate_df_all['DIV_TRDDATESHAREB'] = dic_exdate_df_all['DIV_TRDDATESHAREB'].apply(str_date) condition = ~(dic_exdate_df_all['DIV_EXDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_PAYDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_IMPDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_RECORDDATE'].apply(lambda x: x is None) ) dic_exdate_df_available = dic_exdate_df_all[condition] bunch_insert_on_duplicate_update(dic_exdate_df_available, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_fin_quarterly(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为季度获取 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:Q', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_daily(chain_param=None, wind_code_set=None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_future_daily" logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [("open", DOUBLE), ("high", DOUBLE), ("low", DOUBLE), ("close", DOUBLE), ("volume", DOUBLE), ("amt", DOUBLE), ("dealnum", DOUBLE), ("settle", DOUBLE), ("oi", DOUBLE), ("st_stock", DOUBLE), ('position', DOUBLE), ('instrument_id', String(20)), ( 'trade_date', Date, )] wind_indictor_str = ",".join([key for key, _ in param_list[:10]]) if has_table: sql_str = """ select wind_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( select fi.wind_code, ifnull(trade_date_max_1, ipo_date) date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_future_info fi left outer join (select wind_code, adddate(max(trade_date),1) trade_date_max_1 from {table_name} group by wind_code) wfd on fi.wind_code = wfd.wind_code ) tt where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) order by wind_code""".format(table_name=table_name) else: sql_str = """ SELECT wind_code, date_frm, if(lasttrade_date<end_date,lasttrade_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_future_info info ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY wind_code; """ logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) # 获取date_from,date_to,将date_from,date_to做为value值 future_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(future_date_dic) try: logger.info("%d future instrument will be handled", data_len) for num, (wind_code, (date_frm, date_to)) in enumerate(future_date_dic.items()): # 暂时只处理 RU 期货合约信息 # if wind_code.find('RU') == -1: # continue if date_frm > date_to: continue date_frm_str = date_frm.strftime(STR_FORMAT_DATE) date_to_str = date_to.strftime(STR_FORMAT_DATE) logger.info('%d/%d) get %s between %s and %s', num, data_len, wind_code, date_frm_str, date_to_str) # data_df = wsd_cache(w, wind_code, "open,high,low,close,volume,amt,dealnum,settle,oi,st_stock", # date_frm, date_to, "") try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_frm_str, date_to_str, "") except APIError as exp: logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_frm_str, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_frm_str, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={c: str.lower(c) for c in data_df.columns}, inplace=True) data_df.rename(columns={'oi': 'position'}, inplace=True) data_df['instrument_id'] = wind_code.split('.')[0] data_df_list.append(data_df) # 仅仅调试时使用 if DEBUG and len(data_df_list) >= 1: break finally: data_df_count = len(data_df_list) if data_df_count > 0: logger.info('merge data with %d df', data_df_count) data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) else: logger.info("更新 %s 结束 0 条记录被更新", table_name)
def import_stock_quertarly(chain_param=None, wind_code_set=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ logging.info("更新 wind_fina_indicator 开始") table_name = 'wind_fina_indicator' has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 获取股票量价等行情数据 param_list = [ ('roic_ttm', DOUBLE), ('yoyprofit', DOUBLE), ('ebit', DOUBLE), ('ebit2', DOUBLE), ('ebit2_ttm', DOUBLE), ('surpluscapitalps', DOUBLE), ('undistributedps', DOUBLE), ('stm_issuingdate', DOUBLE), ] # 获取参数列表 wind_indictor_str = ",".join(key for key, _ in param_list) dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] logger.info('%d stocks will been import into wind_stock_quertarly', len(stock_date_dic)) try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): # 获取股票量价等行情数据 # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q") data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename( columns={c: str(c).lower() for c in data_df.columns}, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index[:10]): is_all_none = data_df.loc[trade_date].apply( lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) if DEBUG and len(data_df_list) > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_future_info" has_table = engine_md.has_table(table_name) logger.info("更新 %s 开始", table_name) # 获取已存在合约列表 if has_table: sql_str = 'select wind_code, ipo_date from {table_name}'.format( table_name=table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) wind_code_ipo_date_dic = dict(table.fetchall()) else: wind_code_ipo_date_dic = {} # 通过wind获取合约列表 # w.start() # 初始化服务器接口,用于下载万得数据 future_sectorid_dic_list = [ #中金所期货合约 # {'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", # 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16'}, # {'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", # 'sectorid': '1000014871000000', 'date_establish': '2015-4-16'}, # {'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", # 'sectorid': '1000014872000000', 'date_establish': '2015-4-16'}, # {'subject_name': '2年期国债', 'regex': r"TS\d{4}\.CFE", # 'sectorid': '1000014880000000', 'date_establish': '2018-08-17'}, # {'subject_name': '5年期国债', 'regex': r"TF\d{4}\.CFE", # 'sectorid': '1000010299000000', 'date_establish': '2013-09-06'}, # {'subject_name': '10年期国债', 'regex': r"T\d{4}\.CFE", # 'sectorid': '1000014874000000', 'date_establish': '2015-03-20'}, #中金所指数 { 'subject_name': 'CFE 沪深300', 'regex': r"IF\.CFE", 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\.CFE", 'sectorid': '1000014871000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\.CFE", 'sectorid': '1000014872000000', 'date_establish': '2015-4-16' }, { 'subject_name': '2年期国债', 'regex': r"TS\.CFE", 'sectorid': '1000014880000000', 'date_establish': '2018-08-17' }, { 'subject_name': '5年期国债', 'regex': r"TF\.CFE", 'sectorid': '1000010299000000', 'date_establish': '2013-09-06' }, { 'subject_name': '10年期国债', 'regex': r"T\.CFE", 'sectorid': '1000014874000000', 'date_establish': '2015-03-20' }, # 大连商品交易所合约 # {'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.DCE", # 'sectorid': '1000002976000000', 'date_establish': '2011-04-15'}, # {'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.DCE", # 'sectorid': '1000009338000000', 'date_establish': '2013-03-22'}, # {'subject_name': '铁矿石', 'regex': r"I\d{4}\.DCE", # 'sectorid': '1000006502000000', 'date_establish': '2013-10-18'}, # {'subject_name': '豆粕', 'regex': r"M\d{4}\.DCE", # 'sectorid': 'a599010304000000', 'date_establish': '2000-07-17'}, # {'subject_name': '豆油', 'regex': r"Y\d{4}\.DCE", # 'sectorid': 'a599010306000000', 'date_establish': '2006-01-09'}, # {'subject_name': '棕榈油', 'regex': r"P\d{4}\.DCE", # 'sectorid': 'a599010307000000', 'date_establish': '2007-10-29'}, # {'subject_name': '豆一', 'regex': r"A\d{4}\.DCE", # 'sectorid': 'a599010302000000', 'date_establish': '2004-07-15'}, # {'subject_name': '豆二', 'regex': r"B\d{4}\.DCE", # 'sectorid': 'a599010303000000', 'date_establish': '2004-12-22'}, # {'subject_name': '玉米', 'regex': r"C\d{4}\.DCE", # 'sectorid': 'a599010305000000', 'date_establish': '2004-09-22'}, # {'subject_name': '玉米淀粉', 'regex': r"CS\d{4}\.DCE", # 'sectorid': '1000011469000000', 'date_establish': '2014-12-19'}, # {'subject_name': '鸡蛋', 'regex': r"JD\d{4}\.DCE", # 'sectorid': '1000011464000000', 'date_establish': '2013-11-08'}, # {'subject_name': '线型低密度聚乙烯', 'regex': r"L\d{4}\.DCE", # 'sectorid': 'a599010308000000', 'date_establish': '2007-07-31'}, # {'subject_name': '聚氯乙烯', 'regex': r"V\d{4}\.DCE", # 'sectorid': 'a599010309000000', 'date_establish': '2009-05-25'}, # {'subject_name': '聚丙烯', 'regex': r"PP\d{4}\.DCE", # 'sectorid': '1000011468000000', 'date_establish': '2014-02-28'}, #大连指数 { 'subject_name': 'DCE 焦炭', 'regex': r"J\.DCE", 'sectorid': '1000002976000000', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\.DCE", 'sectorid': '1000009338000000', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\.DCE", 'sectorid': '1000006502000000', 'date_establish': '2013-10-18' }, { 'subject_name': '豆粕', 'regex': r"M\.DCE", 'sectorid': 'a599010304000000', 'date_establish': '2000-07-17' }, { 'subject_name': '豆油', 'regex': r"Y\.DCE", 'sectorid': 'a599010306000000', 'date_establish': '2006-01-09' }, { 'subject_name': '棕榈油', 'regex': r"P\.DCE", 'sectorid': 'a599010307000000', 'date_establish': '2007-10-29' }, { 'subject_name': '豆一', 'regex': r"A\.DCE", 'sectorid': 'a599010302000000', 'date_establish': '2004-07-15' }, { 'subject_name': '豆二', 'regex': r"B\.DCE", 'sectorid': 'a599010303000000', 'date_establish': '2004-12-22' }, { 'subject_name': '玉米', 'regex': r"C\.DCE", 'sectorid': 'a599010305000000', 'date_establish': '2004-09-22' }, { 'subject_name': '玉米淀粉', 'regex': r"CS\.DCE", 'sectorid': '1000011469000000', 'date_establish': '2014-12-19' }, { 'subject_name': '鸡蛋', 'regex': r"JD\.DCE", 'sectorid': '1000011464000000', 'date_establish': '2013-11-08' }, { 'subject_name': '线型低密度聚乙烯', 'regex': r"L\.DCE", 'sectorid': 'a599010308000000', 'date_establish': '2007-07-31' }, { 'subject_name': '聚氯乙烯', 'regex': r"V\.DCE", 'sectorid': 'a599010309000000', 'date_establish': '2009-05-25' }, { 'subject_name': '聚丙烯', 'regex': r"PP\.DCE", 'sectorid': '1000011468000000', 'date_establish': '2014-02-28' }, #上海期货交易所合约 # {'subject_name': '天然橡胶', 'regex': r"RU\d{4}\.SHF", # 'sectorid': 'a599010208000000', 'date_establish': '1995-06-01'}, # {'subject_name': '铜', 'regex': r"CU\d{4}\.SHF", # 'sectorid': 'a599010202000000', 'date_establish': '1995-05-01'}, # {'subject_name': '铝', 'regex': r"AL\d{4}\.SHF", # 'sectorid': 'a599010203000000', 'date_establish': '1995-05-01'}, # {'subject_name': '锌', 'regex': r"ZN\d{4}\.SHF", # 'sectorid': 'a599010204000000', 'date_establish': '2007-03-26'}, # {'subject_name': '铅', 'regex': r"PB\d{4}\.SHF", # 'sectorid': '1000002892000000', 'date_establish': '2011-03-24'}, # {'subject_name': '镍', 'regex': r"NI\d{4}\.SHF", # 'sectorid': '1000011457000000', 'date_establish': '2015-03-27'}, # {'subject_name': '锡', 'regex': r"SN\d{4}\.SHF", # 'sectorid': '1000011458000000', 'date_establish': '2015-03-27'}, # {'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", # 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09'}, # {'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", # 'sectorid': '1000006502000000', 'date_establish': '2012-05-10'}, # {'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", # 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27'}, # {'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", # 'sectorid': '1000011455000000', 'date_establish': '2014-03-21'}, # {'subject_name': 'SHFE 沥青', 'regex': r"BU\d{4}\.SHF", # 'sectorid': '1000011013000000', 'date_establish': '2013-10-09'}, # {'subject_name': '原油', 'regex': r"SC\d{4}\.SHF", # 'sectorid': '1000011463000000', 'date_establish': '2018-03-26'}, #上海期货交易所指数 { 'subject_name': '天然橡胶', 'regex': r"RU\.SHF", 'sectorid': 'a599010208000000', 'date_establish': '1995-06-01' }, { 'subject_name': '铜', 'regex': r"CU\.SHF", 'sectorid': 'a599010202000000', 'date_establish': '1995-05-01' }, { 'subject_name': '铝', 'regex': r"AL\.SHF", 'sectorid': 'a599010203000000', 'date_establish': '1995-05-01' }, { 'subject_name': '锌', 'regex': r"ZN\.SHF", 'sectorid': 'a599010204000000', 'date_establish': '2007-03-26' }, { 'subject_name': '铅', 'regex': r"PB\.SHF", 'sectorid': '1000002892000000', 'date_establish': '2011-03-24' }, { 'subject_name': '镍', 'regex': r"NI\.SHF", 'sectorid': '1000011457000000', 'date_establish': '2015-03-27' }, { 'subject_name': '锡', 'regex': r"SN\.SHF", 'sectorid': '1000011458000000', 'date_establish': '2015-03-27' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\.SHF", 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\.SHF", 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\.SHF", 'sectorid': '1000011455000000', 'date_establish': '2014-03-21' }, { 'subject_name': 'SHFE 沥青', 'regex': r"BU\\.SHF", 'sectorid': '1000011013000000', 'date_establish': '2013-10-09' }, { 'subject_name': '原油', 'regex': r"SC\.SHF", 'sectorid': '1000011463000000', 'date_establish': '2018-03-26' }, #郑商所合约 # {'subject_name': '白糖', 'regex': r"SR\d{3,4}\.CZC", # 'sectorid': 'a599010405000000', 'date_establish': '2006-01-06'}, # {'subject_name': '棉花', 'regex': r"CF\d{3,4}\.CZC", # 'sectorid': 'a599010404000000', 'date_establish': '2004-06-01'}, # {'subject_name': '动力煤', 'regex': r"(ZC|TC)\d{3,4}\.CZC", # 'sectorid': '1000011012000000', 'date_establish': '2013-09-26'}, # {'subject_name': '玻璃', 'regex': r"FG\d{3,4}\.CZC", # 'sectorid': '1000008549000000', 'date_establish': '2013-12-03'}, # {'subject_name': '精对苯二甲酸', 'regex': r"TA\d{3,4}\.CZC", # 'sectorid': 'a599010407000000', 'date_establish': '2006-12-18'}, # {'subject_name': '甲醇', 'regex': r"(ME|MA)\d{3,4}\.CZC", # 'sectorid': '1000005981000000', 'date_establish': '2011-10-28'}, # {'subject_name': '菜籽油', 'regex': r"OI\d{3,4}\.CZC", # 'sectorid': 'a599010408000000', 'date_establish': '2007-06-08'}, # {'subject_name': '菜籽粕', 'regex': r"RM\d{3,4}\.CZC", # 'sectorid': '1000008622000000', 'date_establish': '2012-12-28'}, #郑商所指数 { 'subject_name': '白糖', 'regex': r"SR\.CZC", 'sectorid': 'a599010405000000', 'date_establish': '2006-01-06' }, { 'subject_name': '棉花', 'regex': r"CF\.CZC", 'sectorid': 'a599010404000000', 'date_establish': '2004-06-01' }, { 'subject_name': '动力煤', 'regex': r"(ZC|TC)\.CZC", 'sectorid': '1000011012000000', 'date_establish': '2013-09-26' }, { 'subject_name': '玻璃', 'regex': r"FG\.CZC", 'sectorid': '1000008549000000', 'date_establish': '2013-12-03' }, { 'subject_name': '精对苯二甲酸', 'regex': r"TA\.CZC", 'sectorid': 'a599010407000000', 'date_establish': '2006-12-18' }, { 'subject_name': '甲醇', 'regex': r"(ME|MA)\.CZC", 'sectorid': '1000005981000000', 'date_establish': '2011-10-28' }, { 'subject_name': '菜籽油', 'regex': r"OI\.CZC", 'sectorid': 'a599010408000000', 'date_establish': '2007-06-08' }, { 'subject_name': '菜籽粕', 'regex': r"RM\.CZC", 'sectorid': '1000008622000000', 'date_establish': '2012-12-28' }, ] wind_code_set = set() ndays_per_update = 60 # 获取接口参数以及参数列表 col_name_param_list = [ ("ipo_date", Date), ("sec_name", String(50)), ("sec_englishname", String(200)), ("exch_eng", String(200)), ("lasttrade_date", Date), ("lastdelivery_date", Date), ("dlmonth", String(20)), ("lprice", DOUBLE), ("sccode", String(20)), ("margin", DOUBLE), ("punit", String(200)), ("changelt", DOUBLE), ("mfprice", DOUBLE), ("contractmultiplier", DOUBLE), ("ftmargins", String(100)), ("trade_code", String(200)), ] wind_indictor_str = ",".join(col_name for col_name, _ in col_name_param_list) dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) # 获取历史期货合约列表信息 logger.info("获取历史期货合约列表信息") for future_sectorid_dic in future_sectorid_dic_list: subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() date_since = get_date_since(wind_code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) logger.info("%s[%s] %s ~ %s", subject_name, sector_id, date_since, date_yestoday) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) future_info_df = invoker.wset( "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) data_count = 0 if future_info_df is None else future_info_df.shape[ 0] logger.info("subject_name=%s[%s] %s 返回 %d 条数据", subject_name, sector_id, date_since_str, data_count) if data_count > 0: wind_code_set |= set(future_info_df['wind_code']) if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday # 获取合约列表 wind_code_list = [ wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic ] # 获取合约基本信息 # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") if len(wind_code_list) > 0: logger.info("%d wind_code will be invoked by wss, wind_code_list=%s", len(wind_code_list), wind_code_list) future_info_df = invoker.wss(wind_code_list, wind_indictor_str) future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply( mfprice_2_num) future_info_count = future_info_df.shape[0] future_info_df.rename( columns={c: str.lower(c) for c in future_info_df.columns}, inplace=True) future_info_df.index.rename('wind_code', inplace=True) future_info_df.reset_index(inplace=True) data_count = bunch_insert_on_duplicate_update(future_info_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logger.info("更新 wind_future_info 结束 %d 条记录被更新", future_info_count) update_from_info_table(table_name)
def import_tushare_stock_index_weight(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_weight' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code index_code,trade_date trade_date_list FROM md_integration.tushare_stock_index_daily_md """ with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = {} # for ts_code, trade_date in table.fetchall(): # code_date_range_dic.setdefault(ts_code, []).append(trade_date) for index_code, trade_date_list in table.fetchall(): code_date_range_dic.setdefault(index_code, []).append(trade_date_list) data_len = len(code_date_range_dic) logger.info( '%d index weight will been import into tushare_stock_index_weight table', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (index_code, trade_date_list) in enumerate(code_date_range_dic.items(), start=1): trade_date_list_len = len(trade_date_list) for i, trade_date in enumerate(trade_date_list): # trade_date=trade_date_list[i] logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i, trade_date_list_len, index_code, trade_date) data_df = invoke_index_weight(index_code=index_code, trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_WEIGHT) logging.info("%s 更新 %s %d 条信息被更新", trade_date, table_name, data_count) else: break Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_WEIGHT) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_daily(chain_param=None, wind_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_stock_daily' logging.info("更新 %s 开始", table_name) param_list = [ ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('adjfactor', DOUBLE), ('volume', DOUBLE), ('amt', DOUBLE), ('pct_chg', DOUBLE), ('maxupordown', Integer), ('swing', DOUBLE), ('turn', DOUBLE), ('free_turn', DOUBLE), ('trade_status', String(30)), ('susp_days', Integer), ('total_shares', DOUBLE), ('free_float_shares', DOUBLE), ('ev2_to_ebitda', DOUBLE), ] wind_indictor_str = ",".join([key for key, _ in param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有wind_stock_daily if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" logger.warning('%s 不存在,仅使用 wind_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, wind_code, date_from, date_to) try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) data_df_all.rename(columns=rename_col_dic, inplace=True) # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True) # data_df_all.to_sql('wind_stock_daily', engine_md, if_exists='append', dtype=dtype) # logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0]) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_index_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_index_daily_his :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_index_daily_his' if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 5: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def add_data_2_ckdvp(col_name, param, wind_code_set: set = None, begin_time=None): """判断表格是否存在,存在则进行表格的关联查询 :param col_name: 增加的列属性名 :param param: 参数 :param wind_code_set: 默认为None :param begin_time: 默认为None :return: """ table_name = 'wind_ckdvp_stock' all_finished = False has_table = engine_md.has_table('wind_ckdvp_stock') if has_table: # 执行语句,表格数据联立 sql_str = """ select wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( select info.wind_code, (ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_stock_info info left outer join (select wind_code, adddate(max(time),1) from wind_ckdvp_stock where wind_ckdvp_stock.key='{0}' and param='{1}' group by wind_code ) daily on info.wind_code = daily.wind_code ) tt where date_frm <= if(delist_date<end_date,delist_date, end_date) order by wind_code""".format(col_name, param) else: logger.warning('wind_ckdvp_stock 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date,delist_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = { 'wind_code': String(20), 'key': String(80), 'time': Date, 'value': String(80), 'param': String(80), } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, date_from, date_to) data_df = invoker.wsd(wind_code, col_name, date_from, date_to, param) if data_df is not None and data_df.shape[0] > 0: # 对我们的表格进行规范整理,整理我们的列名,索引更改 data_df['key'] = col_name data_df['param'] = param data_df['wind_code'] = wind_code data_df.rename(columns={col_name.upper(): 'value'}, inplace=True) data_df.index.rename('time', inplace=True) data_df.reset_index(inplace=True) data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break all_finished = True finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `wind_code` `wind_code` VARCHAR(20) NOT NULL , CHANGE COLUMN `time` `time` DATE NOT NULL , CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL , CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL , ADD PRIMARY KEY (`wind_code`, `time`, `key`, `param`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return all_finished
def merge_stock_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'sec_name': (String(20), prefer_left, { 'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name' }), 'cn_name': (String(100), get_value, { 'key': 'ths_corp_cn_name_stock' }), 'en_name': (String(100), get_value, { 'key': 'ths_corp_name_en_stock' }), 'delist_date': (Date, prefer_left, { 'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date' }), 'ipo_date': (Date, prefer_left, { 'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date' }), 'pre_name': (Text, prefer_left, { 'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename' }), 'established_date': (Date, get_value, { 'key': 'ths_established_date_stock' }), 'exch_city': (String(20), get_value, { 'key': 'exch_city' }), 'exch_cn': (String(20), get_value, { 'key': 'ths_listing_exchange_stock' }), 'exch_eng': (String(20), get_value, { 'key': 'exch_eng' }), 'stock_code': (String(20), prefer_left, { 'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code' }), 'mkt': (String(20), get_value, { 'key': 'mkt' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def import_wind_stock_info(chain_param=None, refresh=False): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param refresh:获取全市场股票代码及名称 :return: """ table_name = 'wind_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) wind_indicator_param_list = [ ('sec_name', String(20)), ('trade_code', String(20)), ('ipo_date', Date), ('delist_date', Date), ('mkt', String(20)), ('exch_city', String(20)), ('exch_eng', String(20)), ('prename', String(2000)), ] # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in wind_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in wind_indicator_param_list} dtype['wind_code'] = String(20) if refresh: date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_code_set = set() # 对date_fetch 进行一个判断,获取stock_code_set while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) seg_count = 1000 stock_info_df_list = [] # 进行循环遍历获取stock_code_list_sub for stock_code_list_sub in split_chunk(stock_code_list, seg_count): # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 stock_info_df = invoker.wss(stock_code_list_sub, param) stock_info_df_list.append(stock_info_df) if DEBUG: break # 对数据表进行规范整理.整合,索引重命名 stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('wind_code', inplace=True) logging.info('%d data will be import', stock_info_all_df.shape[0]) stock_info_all_df.reset_index(inplace=True) # data_list = list(stock_info_all_df.T.to_dict().values()) # 对wind_stock_info表进行数据插入 # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # 事物提交执行更新 # with with_db_session(engine_md) as session: # session.execute(sql_str, data_list) # data_count = session.execute('select count(*) from {table_name}').scalar() data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_daily_md' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, base_date date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into tushare_stock_index_daily_md', data_len) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if len(data_df) > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['trade_date'].iloc[-1]), None df2 = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2 > 0): last_date_in_df_cur = try_2_date( df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "rqdatac_future_info" has_table = engine_md.has_table(table_name) logger.info("更新 %s 开始", table_name) # 获取已存在合约列表 if has_table: sql_str = 'select max(listed_date) from {table_name}'.format( table_name=table_name) with with_db_session(engine_md) as session: listed_date = session.scalar(sql_str) if listed_date is None: has_table = False ndays_per_update = 60 # 获取接口参数以及参数列表 col_name_param_list = [ ("order_book_id", String(20)), # 期货代码,期货的独特的标识符(郑商所期货合约数字部分进行了补齐。例如原有代码'ZC609'补齐之后变为'ZC1609')。主力连续合约 UnderlyingSymbol+88,例如'IF88' ;指数连续合约命名规则为 UnderlyingSymbol+99 ("underlying_symbol", String(20)), # 合约标的名称,例如 IF1005 的合约标的名称为'IF' ("market_tplus", TINYINT), # 交易制度。'0'表示 T+0,'1'表示 T+1,往后顺推 ("symbol", String(100)), # 期货的简称,例如'沪深 1005' ("margin_rate", DOUBLE), # 期货合约的最低保证金率 ("maturity_date", Date), # 期货到期日。主力连续合约与指数连续合约都为'0000-00-00' ("type", String(20)), # 合约类型,'Future' ("trading_code", String(20)), # ( "exchange", String(10) ), # 交易所,'DCE' - 大连商品交易所, 'SHFE' - 上海期货交易所,'CFFEX' - 中国金融期货交易所, 'CZCE'- 郑州商品交易所 ("product", String(20)), # 合约种类,'Commodity'-商品期货,'Index'-股指期货,'Government'-国债期货 ("contract_multiplier", SMALLINT), # 合约乘数,例如沪深 300 股指期货的乘数为 300.0 ("round_lot", TINYINT), # 期货全部为 1.0 ("trading_hours", String(100)), # 合约交易时间 ("listed_date", Date), # 期货的上市日期。主力连续合约与指数连续合约都为'0000-00-00' ("industry_name", String(50)), ("de_listed_date", Date), # 目测与 maturity_date 相同 ("underlying_order_book_id", String(20)), # 合约标的代码,目前除股指期货(IH, IF, IC)之外的期货合约,这一字段全部为'null' ] dtype = {key: val for key, val in col_name_param_list} if not has_table: instrument_info_df = rqdatac.all_instruments(type='Future', market='cn') else: date_yestoday = date.today() - timedelta(days=1) ndays_per_update = 60 instrument_info_df = None for param in get_date_iter(listed_date, date_yestoday, ndays_per_update): if instrument_info_df is None: instrument_info_df = rqdatac.all_instruments(type='Future', market='cn', date=param) else: instrument_info_df_tmp = rqdatac.all_instruments(type='Future', market='cn', date=param) instrument_info_df.append(instrument_info_df_tmp) instrument_info_df.drop_duplicates(inplace=True) instrument_info_df.loc[instrument_info_df['underlying_order_book_id'] == 'null', 'underlying_order_book_id'] = None instrument_info_df.rename( columns={c: str.lower(c) for c in instrument_info_df.columns}, inplace=True) data_count = bunch_insert_on_duplicate_update(instrument_info_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logger.info("更新 wind_future_info 结束 %d 条记录被更新", data_count)