def import_stock_quertarly_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = "wind_stock_quertarly_hk" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ths_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning( 'wind_stock_quertarly_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } data_df_list = [] logger.info('%d data will been import into %s', len(stock_date_dic), table_name) # 获取股票量价等行情数据 field_col_name_list = [ ('roic_ttm', String(20)), ('yoyprofit', String(20)), ('ebit', String(20)), ('ebit2', String(20)), ('ebit2_ttm', String(20)), ('surpluscapitalps', String(20)), ('undistributedps', String(20)), ('stm_issuingdate', Date), ] # 获取列表属性名 dtype = {key: val for key, val in field_col_name_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date wind_indictor_str = ",".join(key for key, _ in field_col_name_list) upper_col_2_name_dic = { name.upper(): val for name, val in field_col_name_list } # 获取接口数据 try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename(columns=upper_col_2_name_dic, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index): is_all_none = data_df.loc[trade_date].apply( lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) # 仅供调试使用 if DEBUG and len(data_df_list) > 5: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql('wind_stock_quertarly_hk', engine_md, if_exists='append') bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly_hk 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_daily(chain_param=None, wind_code_set=None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_future_daily" logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [("open", DOUBLE), ("high", DOUBLE), ("low", DOUBLE), ("close", DOUBLE), ("volume", DOUBLE), ("amt", DOUBLE), ("dealnum", DOUBLE), ("settle", DOUBLE), ("oi", DOUBLE), ("st_stock", DOUBLE), ('position', DOUBLE), ('instrument_id', String(20)), ( 'trade_date', Date, )] wind_indictor_str = ",".join([key for key, _ in param_list[:10]]) if has_table: sql_str = """ select wind_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( select fi.wind_code, ifnull(trade_date_max_1, ipo_date) date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_future_info fi left outer join (select wind_code, adddate(max(trade_date),1) trade_date_max_1 from {table_name} group by wind_code) wfd on fi.wind_code = wfd.wind_code ) tt where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) order by wind_code""".format(table_name=table_name) else: sql_str = """ SELECT wind_code, date_frm, if(lasttrade_date<end_date,lasttrade_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_future_info info ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY wind_code; """ logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) # 获取date_from,date_to,将date_from,date_to做为value值 future_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(future_date_dic) try: logger.info("%d future instrument will be handled", data_len) for num, (wind_code, (date_frm, date_to)) in enumerate(future_date_dic.items(), start=1): # 暂时只处理 RU 期货合约信息 # if wind_code.find('RU') == -1: # continue if date_frm > date_to: continue date_frm_str = date_frm.strftime(STR_FORMAT_DATE) date_to_str = date_to.strftime(STR_FORMAT_DATE) logger.info('%d/%d) get %s between %s and %s', num, data_len, wind_code, date_frm_str, date_to_str) # data_df = wsd_cache(w, wind_code, "open,high,low,close,volume,amt,dealnum,settle,oi,st_stock", # date_frm, date_to, "") try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_frm_str, date_to_str, "") except APIError as exp: from tasks.wind import ERROR_CODE_MSG_DIC error_code = exp.ret_dic.setdefault('error_code', 0) if error_code in ERROR_CODE_MSG_DIC: logger.error("%d/%d) %s 执行异常 error_code=%d, %s", num, data_len, wind_code, error_code, ERROR_CODE_MSG_DIC[error_code]) else: logger.exception("%d/%d) %s 执行异常 error_code=%d", num, data_len, wind_code, error_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_frm_str, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_frm_str, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={c: str.lower(c) for c in data_df.columns}, inplace=True) data_df.rename(columns={'oi': 'position'}, inplace=True) # oi 应该是 open_interest data_df['instrument_id'] = wind_code.split('.')[0] data_df_list.append(data_df) # 仅仅调试时使用 if DEBUG and len(data_df_list) >= 1: break finally: data_df_count = len(data_df_list) if data_df_count > 0: logger.info('merge data with %d df', data_df_count) data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) else: logger.info("更新 %s 结束 0 条记录被更新", table_name)
def import_stock_daily_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = 'wind_stock_daily_hk' logging.info("更新 %s 开始", table_name) param_list = [ ('OPEN', DOUBLE), ('HIGH', DOUBLE), ('LOW', DOUBLE), ('CLOSE', DOUBLE), ('ADJFACTOR', DOUBLE), ('VOLUME', DOUBLE), ('AMT', DOUBLE), ('PCT_CHG', DOUBLE), ('MAXUPORDOWN', Integer), ('SWING', DOUBLE), ('TURN', DOUBLE), ('FREE_TURN', DOUBLE), ('TRADE_STATUS', String(20)), ('SUSP_DAYS', Integer), ('TOTAL_SHARES', DOUBLE), ('FREE_FLOAT_SHARES', DOUBLE), ('EV2_TO_EBITDA', DOUBLE), ('PS_TTM', DOUBLE), ('PE_TTM', DOUBLE), ('PB_MRQ', DOUBLE), ('close_usd', DOUBLE), ('pe_lyr', DOUBLE), ('pb_lyr', DOUBLE), ] # 将列表列名转化为小写 col_name_dic = { col_name.upper(): col_name.lower() for col_name, _ in param_list } # 获取列表列名 col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] # wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \ # "swing,turn,free_turn,trade_status,susp_days," + \ # "total_shares,free_float_shares,ev2_to_ebitda" wind_indictor_str = ",".join(col_name_list) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date #data_df_list = [] data_len = len(stock_date_dic) # 获取接口数据 logger.info('%d data will been import into %s', data_len, table_name) try: for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): logger.debug('%d/%d) %s [%s - %s]', data_num, data_len, wind_code, date_from, date_to) data_df_list = [] # if wind_code == '0192!1.HK': # logger.warning('%s wind 没有这只股票数据', wind_code) # continue try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns=col_name_dic, inplace=True) data_df_list.append(data_df) if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 仅供调试使用 if DEBUG and len(data_df_list) > 1: break finally: # 导入数据库 创建 # if len(data_df_list) > 0: # data_df_all = pd.concat(data_df_list) # # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # if not has_table and engine_md.has_table(table_name): # alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) pass
def import_stock_quertarly(chain_param=None, wind_code_set=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ logging.info("更新 wind_fina_indicator 开始") table_name = 'wind_fina_indicator' has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} # 获取股票量价等行情数据 param_list = [ ('roic_ttm', DOUBLE), ('yoyprofit', DOUBLE), ('ebit', DOUBLE), ('ebit2', DOUBLE), ('ebit2_ttm', DOUBLE), ('surpluscapitalps', DOUBLE), ('undistributedps', DOUBLE), ('stm_issuingdate', DOUBLE), ] # 获取参数列表 wind_indictor_str = ",".join(key for key, _ in param_list) dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] logger.info('%d stocks will been import into wind_stock_quertarly', len(stock_date_dic)) try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): # 获取股票量价等行情数据 # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q") data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename(columns={c: str(c).lower() for c in data_df.columns}, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index[:10]): is_all_none = data_df.loc[trade_date].apply(lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) if DEBUG and len(data_df_list) > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_private_fund_nav_daily(chain_param=None, wind_code_list=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_list: :return: """ table_name = 'wind_fund_nav_daily' # 初始化数据下载端口 # 初始化数据库engine # 链接数据库,并获取fundnav旧表 # with get_db_session(engine) as session: # table = session.execute('select wind_code, ADDDATE(max(trade_date),1) from wind_fund_nav group by wind_code') # fund_trade_date_begin_dic = dict(table.fetchall()) # 获取wind_fund_info表信息 col_name_param_list = [ ('trade_date', Date), ('nav', DOUBLE), ('nav_acc', DOUBLE), ('nav_date', Date), ] dtype = {col_name: val for col_name, val in col_name_param_list} dtype['wind_code'] = String(200) has_table = engine_md.has_table(table_name) if has_table: fund_info_df = pd.read_sql_query( """SELECT DISTINCT fi.wind_code AS wind_code, IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from, if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to FROM fund_info fi LEFT JOIN ( SELECT wind_code, ADDDATE(max(trade_date),1) trade_date_from FROM wind_fund_nav_daily GROUP BY wind_code ) wfn ON fi.wind_code = wfn.wind_code""", engine_md) else: fund_info_df = pd.read_sql_query( """SELECT DISTINCT fi.wind_code AS wind_code, IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from, if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to FROM fund_info fi ORDER BY wind_code;""", engine_md) wind_code_date_frm_to_dic = { wind_code: (str_2_date(date_from), str_2_date(date_to)) for wind_code, date_from, date_to in zip(fund_info_df['wind_code'], fund_info_df['date_from'], fund_info_df['date_to']) } fund_info_df.set_index('wind_code', inplace=True) if wind_code_list is None: wind_code_list = list(fund_info_df.index) else: wind_code_list = list( set(wind_code_list) & set(fund_info_df.index)) # 结束时间 date_last_day = date.today() - timedelta(days=1) # date_end_str = date_end.strftime(STR_FORMAT_DATE) fund_nav_all_df = [] no_data_count = 0 code_count = len(wind_code_list) # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值 wind_code_trade_date_latest_dic = {} date_gap = timedelta(days=10) try: for num, wind_code in enumerate(wind_code_list): date_begin, date_end = wind_code_date_frm_to_dic[wind_code] # if date_end > date_last_day: # date_end = date_last_day if date_begin > date_end: continue # 设定数据获取的起始日期 # wind_code_trade_date_latest_dic[wind_code] = date_to # if wind_code in fund_trade_date_begin_dic: # trade_latest = fund_trade_date_begin_dic[wind_code] # if trade_latest > date_end: # continue # date_begin = max([date_begin, trade_latest]) # if date_begin is None: # continue # elif isinstance(date_begin, str): # date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date() if isinstance(date_begin, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_begin_str = date_begin.strftime('%Y-%m-%d') else: logger.error("%s date_begin:%s", wind_code, date_begin) continue if isinstance(date_end, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_end_str = date_end.strftime('%Y-%m-%d') else: logger.error("%s date_end:%s", wind_code, date_end) continue # 尝试获取 fund_nav 数据 for k in range(2): try: fund_nav_tmp_df = invoker.wsd( codes=wind_code, fields='nav,NAV_acc,NAV_date', beginTime=date_begin_str, endTime=date_end_str, options='Fill=Previous') trade_date_latest = datetime.strptime( date_end_str, '%Y-%m-%d').date() - date_gap wind_code_trade_date_latest_dic[ wind_code] = trade_date_latest break except APIError as exp: # -40520007z if exp.ret_dic.setdefault('error_code', 0) == -40520007: trade_date_latest = datetime.strptime( date_end_str, '%Y-%m-%d').date() - date_gap wind_code_trade_date_latest_dic[ wind_code] = trade_date_latest logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue except Exception as exp: logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue else: fund_nav_tmp_df = None if fund_nav_tmp_df is None: logger.info('%s No data', wind_code) # del wind_code_trade_date_latest_dic[wind_code] no_data_count += 1 logger.warning('%d funds no data', no_data_count) else: fund_nav_tmp_df.dropna(how='all', inplace=True) df_len = fund_nav_tmp_df.shape[0] if df_len == 0: continue fund_nav_tmp_df['wind_code'] = wind_code # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新 # del wind_code_trade_date_latest_dic[wind_code] trade_date_latest = fund_nav_df_2_sql(table_name, fund_nav_tmp_df, engine_md, is_append=True) if trade_date_latest is None: logger.error('%s[%d] data insert failed', wind_code) else: wind_code_trade_date_latest_dic[ wind_code] = trade_date_latest logger.info('%d) %s updated, %d funds left', num, wind_code, code_count - num) if DEBUG and num > 1: # 调试使用 break # finally: import_wind_fund_nav_to_nav() update_trade_date_latest(wind_code_trade_date_latest_dic) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return fund_nav_all_df
def import_stock_daily(chain_param=None, wind_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_stock_daily' logging.info("更新 %s 开始", table_name) param_list = [ ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('adjfactor', DOUBLE), ('volume', DOUBLE), ('amt', DOUBLE), ('pct_chg', DOUBLE), ('maxupordown', Integer), ('swing', DOUBLE), ('turn', DOUBLE), ('free_turn', DOUBLE), ('trade_status', String(30)), ('susp_days', Integer), ('total_shares', DOUBLE), ('free_float_shares', DOUBLE), ('ev2_to_ebitda', DOUBLE), ] wind_indictor_str = ",".join([key for key, _ in param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有wind_stock_daily if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" logger.warning('%s 不存在,仅使用 wind_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, wind_code, date_from, date_to) try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) data_df_all.rename(columns=rename_col_dic, inplace=True) # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True) # data_df_all.to_sql('wind_stock_daily', engine_md, if_exists='append', dtype=dtype) # logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0]) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def add_data_2_ckdvp(col_name, param, wind_code_set: set = None, begin_time=None): """判断表格是否存在,存在则进行表格的关联查询 :param col_name: 增加的列属性名 :param param: 参数 :param wind_code_set: 默认为None :param begin_time: 默认为None :return: """ table_name = 'wind_ckdvp_stock' all_finished = False has_table = engine_md.has_table('wind_ckdvp_stock') if has_table: # 执行语句,表格数据联立 sql_str = """ select wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( select info.wind_code, (ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_stock_info info left outer join (select wind_code, adddate(max(time),1) from wind_ckdvp_stock where wind_ckdvp_stock.key='{0}' and param='{1}' group by wind_code ) daily on info.wind_code = daily.wind_code ) tt where date_frm <= if(delist_date<end_date,delist_date, end_date) order by wind_code""".format(col_name, param) else: logger.warning('wind_ckdvp_stock 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date,delist_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} # 设置 dtype dtype = { 'wind_code': String(20), 'key': String(80), 'time': Date, 'value': String(80), 'param': String(80), } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic) try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, date_from, date_to) data_df = invoker.wsd( wind_code, col_name, date_from, date_to, param ) if data_df is not None and data_df.shape[0] > 0: # 对我们的表格进行规范整理,整理我们的列名,索引更改 data_df['key'] = col_name data_df['param'] = param data_df['wind_code'] = wind_code data_df.rename(columns={col_name.upper(): 'value'}, inplace=True) data_df.index.rename('time', inplace=True) data_df.reset_index(inplace=True) data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break all_finished = True finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `wind_code` `wind_code` VARCHAR(20) NOT NULL , CHANGE COLUMN `time` `time` DATE NOT NULL , CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL , CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL , ADD PRIMARY KEY (`wind_code`, `time`, `key`, `param`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return all_finished
def fill_col_by_wsd(col_name_dic: dict, table_name, top_n=None): """补充历史col数据 :param col_name_dic: :param table_name: :param top_n: :return: """ # 股票列表 # db_col_name_list = [col_name.lower() for col_name in col_name_dic.values()] col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] # 获取每只股票ipo 日期 及 最小的交易日前一天 # sql_str = """select si.wind_code, td_from, td_to # from wind_stock_info si, # (select wind_code, min(trade_date) td_from, max(trade_date) td_to from wind_stock_daily where ev2_to_ebitda is null group by wind_code) sd # where si.wind_code = sd.wind_code""" where_sub_str = ' and '.join([col_name + ' is null' for col_name in col_name_dic.values()]) sql_str = """ select wsd.wind_code, min_trade_date, max_trade_date from ( select wind_code, min(trade_date) min_trade_date, max(trade_date) max_trade_date from wind_convertible_bond_daily group by wind_code ) wsd INNER JOIN ( select wind_code from """ + table_name + """ where """ + where_sub_str + """ group by wind_code ) wsd_not_null on wsd.wind_code = wsd_not_null.wind_code""" with with_db_session(engine_md) as session: table = session.execute(sql_str) stock_trade_date_range_dic = {content[0]: (content[1], content[2]) for content in table.fetchall()} data_df_list = [] data_count = len(stock_trade_date_range_dic) try: # for n, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items()): for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items(), start=1): if top_n is not None and data_num > top_n: break # 获取股票量价等行情数据 wind_indictor_str = col_name_list data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_count, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_count, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df_list.append(data_df) # 仅供调试使用 # if data_num >= 10: # break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) # 只有全部列为空的项才需要剔除 is_na_s = None for col_name in col_name_dic.keys(): col_name = col_name.upper() if is_na_s is None: is_na_s = data_df_all[col_name].isna() else: is_na_s = is_na_s & data_df_all[col_name].isna() data_df_not_null = data_df_all[~is_na_s] data_df_not_null.fillna(0, inplace=True) if data_df_not_null.shape[0] > 0: data_dic_list = data_df_not_null.to_dict(orient='records') sql_str = "update %s set " % table_name + \ ",".join( ["%s=:%s" % (db_col_name, col_name.upper()) for col_name, db_col_name in col_name_dic.items()]) + \ " where wind_code=:wind_code and trade_date=:trade_date" with with_db_session(engine_md) as session: session.execute(sql_str, params=data_dic_list) logger.info('%d data updated on %s', data_df_not_null.shape[0], table_name) else: logger.warning('no data for updating on %s', table_name)
def import_cb_daily(chain_param=None, wind_code_set: set = None, begin_time=None): """ 导入可转债日线数据 需要补充 转股价格 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_convertible_bond_daily" info_table_name = "wind_convertible_bond_info" has_table = engine_md.has_table(table_name) col_name_param_list = [ ('outstandingbalance', DOUBLE), ('clause_conversion2_bondlot', DOUBLE), ('clause_conversion2_bondproportion', DOUBLE), ('clause_conversion2_swapshareprice', DOUBLE), ('clause_conversion2_conversionproportion', DOUBLE), ('convpremium', DOUBLE), ('convpremiumratio', DOUBLE), ('convvalue', DOUBLE), ('convpe', DOUBLE), ('convpb', DOUBLE), ('underlyingpe', DOUBLE), ('underlyingpb', DOUBLE), ('diluterate', DOUBLE), ('ldiluterate', DOUBLE), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ] wind_indictor_str = ",".join(col_name for col_name, _ in col_name_param_list) logging.info("更新 %s 开始", table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, clause_conversion_2_swapshareenddate delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_convertible_bond_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, clause_conversion_2_swapshareenddate delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_convertible_bond_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} # 设置dtype dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic) # 获取股票量价等行情数据 try: for num, (wind_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, begin_time, end_time) try: data_df = invoker.wsd(wind_code, wind_indictor_str, begin_time, end_time, "unit=1") except APIError as exp: if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): logger.warning("%d/%d) %s 执行异常 %s", num, code_count, wind_code, exp.ret_dic) continue else: logger.exception("%d/%d) %s 执行异常", num, code_count, wind_code) break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, code_count, wind_code, begin_time, end_time) continue if data_df is not None and data_df.shape[0] > 0: data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={col: col.lower() for col in data_df.columns}, inplace=True) data_df_list.append(data_df) data_count += data_df.shape[0] # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype) logging.info("更新 %s %d 条信息", table_name, data_count) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_index_daily(chain_param=None): """导入指数数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_index_daily" has_table = engine_md.has_table(table_name) col_name_param_list = [ ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amt', DOUBLE), ('turn', DOUBLE), ('free_turn', DOUBLE), ] wind_indictor_str = ",".join([key for key, _ in col_name_param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in col_name_param_list} dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) # TODO: 'trade_date' 声明为 Date 类型后,插入数据库会报错,目前原因不详,日后再解决 # dtype['trade_date'] = Date, # yesterday = date.today() - timedelta(days=1) # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today() # sql_str = """select wii.wind_code, wii.sec_name, ifnull(adddate(latest_date, INTERVAL 1 DAY), wii.basedate) date_from # from wind_index_info wii left join # ( # select wind_code,index_name, max(trade_date) as latest_date # from wind_index_daily group by wind_code # ) daily # on wii.wind_code=daily.wind_code""" # with with_db_session(engine_md) as session: # table = session.execute(sql_str) # wind_code_date_from_dic = {wind_code: (sec_name, date_from) for wind_code, sec_name, date_from in table.fetchall()} # with with_db_session(engine_md) as session: # # 获取市场有效交易日数据 # sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'" # table = session.execute(sql_str) # trade_date_sorted_list = [t[0] for t in table.fetchall()] # trade_date_sorted_list.sort() # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_ending) # data_len = len(wind_code_date_from_dic) if has_table: sql_str = """ SELECT wind_code, date_frm, if(null<end_date, null, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, basedate) date_frm, null, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_index_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(null<end_date, null, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 wind_index_info 表进行计算日期范围', table_name) sql_str = """ SELECT wind_code, date_frm, if(null<end_date, null, end_date) date_to FROM ( SELECT info.wind_code, basedate date_frm, null, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_index_info info ) tt WHERE date_frm <= if(null<end_date, null, end_date) ORDER BY wind_code;""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 begin_time = None wind_code_date_from_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} data_len = len(wind_code_date_from_dic) logger.info('%d indexes will been import', data_len) for data_num, (wind_code, (date_from, date_to)) in enumerate(wind_code_date_from_dic.items()): if str_2_date(date_from) > date_to: logger.warning("%d/%d) %s %s - %s 跳过", data_num, data_len, wind_code, date_from, date_to) continue try: temp = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break temp.reset_index(inplace=True) temp.rename(columns={'index': 'trade_date'}, inplace=True) temp.rename(columns=rename_col_dic, inplace=True) temp.trade_date = temp.trade_date.apply(str_2_date) temp['wind_code'] = wind_code bunch_insert_on_duplicate_update(temp, table_name, engine_md, dtype=dtype) logger.info('更新指数 %s 至 %s 成功', wind_code, date_2_str(date_to)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_smfund_daily(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_smfund_daily" has_table = engine_md.has_table(table_name) col_name_param_list = [ ('next_pcvdate', Date), ('a_nav', DOUBLE), ('b_nav', DOUBLE), ('a_fs_inc', DOUBLE), ('b_fs_inc', DOUBLE), ('cur_interest', DOUBLE), ('next_interest', DOUBLE), ('ptm_year', DOUBLE), ('anal_pricelever', DOUBLE), ('anal_navlevel', DOUBLE), ('t1_premium', DOUBLE), ('t2_premium', DOUBLE), ('dq_status', String(50)), ('tm_type', TEXT), ('code_p', String(20)), ('trade_date', Date), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amt', DOUBLE), ('pct_chg', DOUBLE), ('open_a', DOUBLE), ('high_a', DOUBLE), ('low_a', DOUBLE), ('close_a', DOUBLE), ('volume_a', DOUBLE), ('amt_a', DOUBLE), ('pct_chg_a', DOUBLE), ('open_b', DOUBLE), ('high_b', DOUBLE), ('low_b', DOUBLE), ('close_b', DOUBLE), ('volume_b', DOUBLE), ('amt_b', DOUBLE), ('pct_chg_b', DOUBLE), ] # wset的调用参数 wind_indictor_str = ",".join([key for key, _ in col_name_param_list[:14]]) # 设置dtype类型 dtype = {key: val for key, val in col_name_param_list} date_ending = date.today() - ONE_DAY if datetime.now( ).hour < BASE_LINE_HOUR else date.today() date_ending_str = date_ending.strftime('%Y-%m-%d') # 对于 表格是否存在进行判断,取值 if has_table: sql_str = """ SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code FROM wind_smfund_info fi LEFT OUTER JOIN (SELECT code_p, adddate(max(trade_date), 1) trade_date_max FROM wind_smfund_daily GROUP BY code_p) smd ON fi.wind_code = smd.code_p WHERE fund_setupdate IS NOT NULL AND class_a_code IS NOT NULL AND class_b_code IS NOT NULL""" else: sql_str = """ SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code FROM wind_smfund_info WHERE fund_setupdate IS NOT NULL AND class_a_code IS NOT NULL AND class_b_code IS NOT NULL""" df = pd.read_sql(sql_str, engine_md) df.set_index('wind_code', inplace=True) data_len = df.shape[0] logger.info('分级基金数量: %d', data_len) index_start = 1 # 获取data_from for data_num, wind_code in enumerate( df.index, start=1): # 可调整 # [100:min([df_count, 200])] if data_num < index_start: continue logger.info('%d/%d) %s start to import', data_num, data_len, wind_code) date_from = df.loc[wind_code, 'date_start'] date_from = str_2_date(date_from) if type(date_from) not in (date, datetime, Timestamp): logger.info('%d/%d) %s has no fund_setupdate will be ignored', data_num, data_len, wind_code) # print(df.iloc[i, :]) continue date_from_str = date_from.strftime('%Y-%m-%d') if date_from > date_ending: logger.info('%d/%d) %s %s %s 跳过', data_num, data_len, wind_code, date_from_str, date_ending_str) continue # 设置wsd接口参数 field = "open,high,low,close,volume,amt,pct_chg" # wsd_cache(w, code, field, beginTime, today, "") try: df_p = invoker.wsd(wind_code, field, date_from_str, date_ending_str, "") except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if df_p is None: continue df_p.rename(columns=lambda x: x.swapcase(), inplace=True) df_p['code_p'] = wind_code code_a = df.loc[wind_code, 'class_a_code'] if code_a is None: print('%d %s has no code_a will be ignored' % (data_num, wind_code)) # print(df.iloc[i, :]) continue # 获得数据存储到df_a里面 # df_a = wsd_cache(w, code_a, field, beginTime, today, "") df_a = invoker.wsd(code_a, field, date_from_str, date_ending_str, "") df_a.rename(columns=lambda x: x.swapcase() + '_a', inplace=True) code_b = df.loc[wind_code, 'class_b_code'] # df_b = wsd_cache(w, code_b, field, beginTime, today, "") # 获取接口数据 获得df_b df_b = invoker.wsd(code_b, field, date_from_str, date_ending_str, "") df_b.columns = df_b.columns.map(lambda x: x.swapcase() + '_b') new_df = pd.DataFrame() for date_str in df_p.index: # time = date_str.date().strftime('%Y-%m-%d') field = "date=%s;windcode=%s;field=%s" % (date_str, wind_code, wind_indictor_str) # wset_cache(w, "leveragedfundinfo", field) temp = invoker.wset("leveragedfundinfo", field) temp['date'] = date_str new_df = new_df.append(temp) if DEBUG and len(new_df) > 8: break # 将获取信息进行表格联立 合并 new_df['next_pcvdate'] = new_df['next_pcvdate'].map( lambda x: str_2_date(x) if x is not None else x) new_df.set_index('date', inplace=True) one_df = pd.concat([df_p, df_a, df_b, new_df], axis=1) one_df.index.rename('trade_date', inplace=True) one_df.reset_index(inplace=True) # one_df['date'] = one_df['date'].map(lambda x: x.date()) one_df.rename(columns={'date': 'trade_date'}, inplace=True) # one_df.rename(columns={"index":'trade_date'},inplace=True) # one_df.set_index(['code_p', 'trade_date'], inplace=True) bunch_insert_on_duplicate_update(one_df, table_name, engine_md, dtype=dtype) logger.info('%d/%d) %s import success', data_num, data_len, wind_code) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) # 手动创建主键, 主键不是wind_code create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `code_p` `code_p` VARCHAR(20) NOT NULL , CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL , ADD PRIMARY KEY (`code_p`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_pub_fund_daily(wind_code_set,chain_param=None): """ 导入公募基金日线数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ logging.info("更新 wind_pub_fund_daily 开始") table_name = "wind_pub_fund_daily" has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to FROM ( SELECT info.wind_code, ifnull(nav_date, fund_setupdate) date_frm, fund_maturitydate, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_pub_fund_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(nav_date),1) nav_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_pub_fund_daily 不存在,仅使用 wind_pub_fund_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to FROM ( SELECT info.wind_code, fund_setupdate date_frm, fund_maturitydate, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_pub_fund_info info ) tt WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) ORDER BY wind_code """ # with with_db_session(engine_md) as session: # # 获取每只股票最新交易日数据 # sql_str = 'select wind_code, max(nav_date) from wind_pub_fund_daily group by wind_code' # table = session.execute(sql_str) # trade_date_latest_dic = dict(table.fetchall()) # # 获取市场有效交易日数据 # sql_str = "select trade_date from wind_trade_date where trade_date > '1997-1-1'" # table = session.execute(sql_str) # trade_date_sorted_list = [t[0] for t in table.fetchall()] # trade_date_sorted_list.sort() # # 获取每只股票上市日期、退市日期 # table = session.execute('SELECT wind_code, setup_date, maturity_date FROM wind_pub_fund_info') # # # wind_code_date_dic = { # wind_code: (setup_date, maturity_date if maturity_date is None or maturity_date > UN_AVAILABLE_DATE else None) # for # wind_code, setup_date, maturity_date in table.fetchall()} # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today() with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 trade_date_latest_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} data_df_list = [] wind_code_date_count = len(trade_date_latest_dic) logger.info('%d pub fund will been import into wind_pub_fund_daily', wind_code_date_count) # 获取股票量价等行情数据 field_col_name_list = [ ('NAV_date', Date), ('NAV_acc', String(20)), ('netasset_total', String(20)), ] wind_indictor_str = ",".join(key.lower() for key, _ in field_col_name_list) upper_col_2_name_dic = {name.upper(): name.lower() for name, _ in field_col_name_list} dtype = {key.lower(): val for key, val in field_col_name_list} dtype['wind_code'] = String(20) try: data_tot = 0 for data_num, (wind_code, (date_from, date_to)) in enumerate(trade_date_latest_dic.items()): # 初次加载阶段全量载入,以后 ipo_date为空的情况,直接warning跳过 # if setup_date is None: # # date_ipo = DATE_BASE # logging.warning("%d/%d) %s 缺少 ipo date", data_num, wind_code_date_count, wind_code) # continue # # 获取 date_from # if wind_code in trade_date_latest_dic: # date_latest_t1 = trade_date_latest_dic[wind_code] + ONE_DAY # date_from = max([date_latest_t1, DATE_BASE, setup_date]) # else: # date_from = max([DATE_BASE, setup_date]) # date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from) # # 获取 date_to # if maturity_date is None: # date_to = date_ending # else: # date_to = min([maturity_date, date_ending]) # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to) # if date_from is None or date_to is None or date_from > date_to: # continue try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;Days=Weekdays") except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, wind_code_date_count, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no ohlc data during %s %s', data_num, wind_code_date_count, wind_code, date_from, date_to) continue # 对数据进行 清理,整理,整合 data_df = data_df.drop_duplicates().dropna() data_df.rename(columns=upper_col_2_name_dic, inplace=True) logger.info('%d/%d) %d data of %s between %s and %s', data_num, wind_code_date_count, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_tot += data_df.shape[0] data_df_list.append(data_df) if data_tot > 10000: bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) data_df_list = [] data_tot = 0 # 仅仅调试时使用 # if DEBUG and len(data_df) > 2000: # break finally: # 导入数据库 if len(data_df_list) > 0: bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 wind_pub_fund_daily 结束 %d 条信息被更新", len(data_df_list)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])