def fill_col_by_wss(col_name_dic, table_name): """补充历史col数据 :param col_name_dic: :param table_name: :return: """ # 股票列表 col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] # 获取每只股票ipo 日期 及 最小的交易日前一天 sql_str = """select wind_code from %s""" % table_name with with_db_session(engine_md) as session: table = session.execute(sql_str) wind_code_set = {content[0] for content in table.fetchall()} data_count = len(wind_code_set) data_df_list = [] try: # for n, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items()): for data_num, wind_code in enumerate(wind_code_set, start=1): if wind_code not in wind_code_set: continue # 获取股票量价等行情数据 wind_indictor_str = col_name_list data_df = invoker.wss(wind_code, wind_indictor_str) if data_df is None: logger.warning('%d) %s has no data during %s %s', data_num, wind_code) continue logger.debug('%d/%d) 获取 %s', data_num, data_count, wind_code) # data_df['wind_code'] = wind_code data_df_list.append(data_df) # 仅供调试使用 # if data_num > 10: # break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('wind_code', inplace=True) data_df_all.reset_index(inplace=True) # 只有全部列为空的项才需要剔除 is_na_s = None for col_name in col_name_dic.keys(): col_name = col_name.upper() if is_na_s is None: is_na_s = data_df_all[col_name].isna() else: is_na_s = is_na_s & data_df_all[col_name].isna() data_df_not_null = data_df_all[~is_na_s] data_df_not_null.fillna('null', inplace=True) data_dic_list = data_df_not_null.to_dict(orient='records') sql_str = "update %s set " % table_name + \ ",".join(["%s=:%s" % (db_col_name, col_name.upper()) for col_name, db_col_name in col_name_dic.items()]) + \ " where wind_code=:wind_code" with with_db_session(engine_md) as session: table = session.execute(sql_str, params=data_dic_list) logger.info('%d data updated', data_df_not_null.shape[0]) else: logger.warning('no data for update')
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, trade_date, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_coin_info(): """获取全球交易币基本信息""" table_name = 'tushare_coin_info' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'coin': String(60), 'en_name': String(60), 'cn_name': String(60), 'issue_date': Date, 'amount': DOUBLE, } coinlist_df = pro.coinlist(start_date='20170101', end_date=date_2_str(date.today(), DATE_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(coinlist_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `coin` `coin` VARCHAR(60) NOT NULL FIRST, CHANGE COLUMN `en_name` `en_name` VARCHAR(60) NOT NULL AFTER `coin`, ADD PRIMARY KEY (`coin`, `en_name`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_macroeconomy_info(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_macroeconomy_info' has_table = engine_md.has_table(table_name) indicators_dic = [ # 人民币汇率 ["M0067855", "us2rmb", "美元兑人民币即期汇率", "1994-01-04", None, '中国货币网'], ] dtype = { 'key': String(20), 'en_name': String(120), 'cn_name': String(120), 'begin_date': Date, 'end_date': Date, 'remark': Text, } name_list = [ 'key', 'en_name', 'cn_name', 'begin_date', 'end_date', 'remark' ] info_df = pd.DataFrame(data=indicators_dic, columns=name_list) data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype) logger.info('%d 条记录被更新', data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `key` `key` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`key`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `key` 主键设置完成', table_name)
def merge_tushare_daily(date_from=None): """ 合並 adj_factor,daily_basic,stock,suspend 表格數據 :param date_from: :return: """ table_name = 'tushare_stock_daily' has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format(table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) tushare_adj_factor_df = get_tushare_daily_df('tushare_stock_daily_adj_factor', date_from) tushare_daily_basic_df = get_tushare_daily_df('tushare_stock_daily_basic', date_from) tushare_stock_daily_md_df = get_tushare_daily_df('tushare_stock_daily_md', date_from) # tushare_stock_daily_suspend_df = get_tushare_daily_df('tushare_stock_daily_suspend', date_from) tushare_daily_two_form_df = pd.merge(tushare_adj_factor_df, tushare_daily_basic_df, how='outer', on=['ts_code', 'trade_date']) tushare_daily_df = pd.merge(tushare_daily_two_form_df, tushare_stock_daily_md_df, how='outer', on=['ts_code', 'trade_date']) # tushare_daily_df = pd.merge(tushare_daily_three_form_df, tushare_stock_daily_suspend_df, how='outer', # on=['ts_code']) # 设置 dtype dtype = {} for dic in [DTYPE_TUSHARE_SUSPEND, DTYPE_TUSHARE_DAILY_BASIC, DTYPE_TUSHARE_DAILY]: for key, val in dic.items(): dtype[key] = val tushare_daily_df["close"] = tushare_daily_df.apply(get_merge, axis=1) tushare_daily_df.drop(['close_x', 'close_y'], axis=1, inplace=True) data_count = bunch_insert_on_duplicate_update(tushare_daily_df, table_name, engine_md, dtype) logging.info('%s 更新 %d', table_name, data_count) # if not has_table and engine_md.has_table(table_name): # alter_table_2_myisam(engine_md, table_name) # build_primary_key([table_name]) return tushare_daily_df
def partition_table_pytdx_stock_tick(): sql_str = """ALTER TABLE `pytdx_stock_tick` PARTITION BY RANGE COLUMNS(`date`) ( PARTITION p2000 VALUES LESS THAN ('2001-01-01'), PARTITION p2001 VALUES LESS THAN ('2002-01-01'), PARTITION p2002 VALUES LESS THAN ('2003-01-01'), PARTITION p2003 VALUES LESS THAN ('2004-01-01'), PARTITION p2004 VALUES LESS THAN ('2005-01-01'), PARTITION p2005 VALUES LESS THAN ('2006-01-01'), PARTITION p2006 VALUES LESS THAN ('2007-01-01'), PARTITION p2007 VALUES LESS THAN ('2008-01-01'), PARTITION p2008 VALUES LESS THAN ('2009-01-01'), PARTITION p2009 VALUES LESS THAN ('2010-01-01'), PARTITION p2010 VALUES LESS THAN ('2011-01-01'), PARTITION p2011 VALUES LESS THAN ('2012-01-01'), PARTITION p2012 VALUES LESS THAN ('2013-01-01'), PARTITION p2013 VALUES LESS THAN ('2014-01-01'), PARTITION p2014 VALUES LESS THAN ('2015-01-01'), PARTITION p2015 VALUES LESS THAN ('2016-01-01'), PARTITION p2016 VALUES LESS THAN ('2017-01-01'), PARTITION p2017 VALUES LESS THAN ('2018-01-01'), PARTITION p2018 VALUES LESS THAN ('2019-01-01'), PARTITION p2019 VALUES LESS THAN ('2020-01-01'), PARTITION p2020 VALUES LESS THAN ('2021-01-01'), PARTITION pother VALUES LESS THAN (MAXVALUE) ) """ datetime_start = datetime.now() with with_db_session(engine_md) as session: session.execute(sql_str) datetime_end = datetime.now() span = datetime_end - datetime_start print('花费时间 ', span)
def clean_fund_nav(date_str): """ wind数据库中存在部分数据净值记录前后不一致的问题 比如:某日记录净值 104,次一后期净值变为 1.04 导致净值收益率走势出现偏差 此脚本主要目的在于对这种偏差进行修正 :param date_str: :return: """ sql_str = """SELECT fn_before.wind_code, fn_before.nav_date nav_date_before, fn_after.nav_date nav_date_after, fn_before.nav_acc nav_acc_before, fn_after.nav_acc nav_acc_after, fn_after.nav_acc / fn_before.nav_acc nav_acc_pct FROM fund_nav fn_before, fund_nav fn_after, ( SELECT wind_code, max(if(nav_date<%s, nav_date, NULL)) nav_date_before, min(if(nav_date>=%s, nav_date, NULL)) nav_date_after FROM fund_nav GROUP BY wind_code HAVING nav_date_before IS NOT NULL AND nav_date_after IS NOT NULL ) fn_date WHERE fn_before.nav_date = fn_date.nav_date_before AND fn_before.wind_code = fn_date.wind_code AND fn_after.nav_date = fn_date.nav_date_after AND fn_after.wind_code = fn_date.wind_code AND fn_after.nav_acc / fn_before.nav_acc < 0.5 """ data_df = pd.read_sql(sql_str, engine_md, params=[date_str, date_str]) data_count = data_df.shape[0] if data_count == 0: logger.info('no data for clean on %s', date_str) return logger.info('\n%s', data_df) data_list = data_df.to_dict(orient='records') with with_db_session(engine_md) as session: for content in data_list: wind_code = content['wind_code'] nav_date_before = content['nav_date_before'] logger.info('update wind_code=%s nav_date<=%s', wind_code, nav_date_before) sql_str = "UPDATE fund_nav SET nav = nav/100, nav_acc = nav_acc/100 WHERE wind_code = :wind_code AND nav_date <= :nav_date" session.execute(sql_str, params={'wind_code': wind_code, 'nav_date': nav_date_before})
def repair_table(): datetime_start = datetime.now() with with_db_session(engine_md) as session: session.execute('REPAIR TABLE pytdx_stock_tick USE_FRM') datetime_end = datetime.now() span = datetime_end - datetime_start print('花费时间 ', span)
def import_tushare_namechange(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_namechange' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """select max(start_date) start_date FROM md_integration.tushare_stock_namechange""" else: sql_str = """select min(list_date) start_date FROM md_integration.tushare_stock_info""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) start_date = list(row[0] for row in table.fetchall()) start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS) end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS) try: data_df = pro.namechange( start_date=start_date, end_date=end_date, fields='ts_code,name,start_date,end_date,change_reason') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE) logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `start_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
def import_tushare_stock_pledge_stat(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_pledge_stat' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code FROM tushare_stock_info """ logger.warning('使用 tushare_stock_info 表确认需要提取股票质押数据的范围') with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) ts_code_list = list(row[0] for row in table.fetchall()) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( ts_code_list) logger.info('%d 只股票的质押信息将被插入 tushare_stock_pledge_stat 表', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for ts_code in ts_code_list: data_df = invoke_pledge_stat(ts_code=ts_code) logger.warning('提取 %s 质押信息 %d 条', ts_code, len(data_df)) # 把数据攒起来 if data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_PLEDGE_STAT) logger.warning('更新股票质押信息 %d 条', data_count) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 2: break finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_PLEDGE_STAT) all_data_count = all_data_count + data_count logging.info("更新 %s 结束,总共 %d 条信息被更新", table_name, all_data_count)
def get_trade_date_list_sorted(exch_code='SZSE') -> list: """ 獲取交易日列表 :return: """ sql_str = "SELECT trade_date FROM wind_trade_date WHERE exch_code = :exch_code ORDER BY trade_date" with with_db_session(engine_md) as session: trade_date_list_sorted = [content[0] for content in session.execute(sql_str, params={'exch_code': exch_code}).fetchall()] return trade_date_list_sorted
def update_from_info_table(table_name): """ 更新 code_mapping 表,根据对应的 info表中的 ths_code 或 wind_code 更新对应的 code_mapping表中对应字段 :param table_name: :return: """ ifind_cap_type = search_cap_type(ifind_info_table_pattern, table_name) wind_cap_type = search_cap_type(wind_info_table_pattern, table_name) if ifind_cap_type is not None: if ifind_cap_type.find('fund') >= 0: update_from_ifind_fund(table_name, ifind_cap_type) else: sql_str = """insert into code_mapping(unique_code, ths_code, market, type) select ths_code, ths_code, substring(ths_code, locate('.', ths_code) + 1, length(ths_code)) market, '{cap_type}' from {table_name} on duplicate key update ths_code=values(ths_code), market=values(market), type=values(type) """.format(table_name=table_name, cap_type=ifind_cap_type) with with_db_session(engine_md) as session: rslt = session.execute(sql_str) logger.debug('从 %s 表中更新 code_mapping 记录 %d 条', table_name, rslt.rowcount) session.commit() elif wind_cap_type is not None: if wind_cap_type.find('fund') >= 0: update_from_wind_fund(table_name, wind_cap_type) else: sql_str = """insert into code_mapping(unique_code, wind_code, market, type) select wind_code, wind_code, substring(wind_code, locate('.', wind_code) + 1, length(wind_code)) market, '{cap_type}' from {table_name} on duplicate key update wind_code=values(wind_code), market=values(market), type=values(type) """.format(table_name=table_name, cap_type=wind_cap_type) with with_db_session(engine_md) as session: rslt = session.execute(sql_str) logger.debug('从 %s 表中更新 code_mapping 记录 %d 条', table_name, rslt.rowcount) session.commit() else: raise ValueError('不支持 %s 更新 code_mapping 数据' % table_name)
def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = 'wind_trade_date' has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: try: table = session.execute('SELECT exch_code,max(trade_date) FROM {table_name} GROUP BY exch_code'.format( table_name=table_name)) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception: logger.exception("交易日获取异常") else: exch_code_trade_date_dic = {} exchange_code_dict = { "HKEX": "香港", "NYSE": "纽约", "SZSE": "深圳", "TWSE": "台湾", "NASDAQ": "纳斯达克", "AMEX": "美国证券交易所", "TSE": "东京", "LSE": "伦敦", "SGX": "新加坡" } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] trade_date_start = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) else: trade_date_start = '1980-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) if exchange_code is None or exchange_code == "": trade_date_list = invoker.tdays(trade_date_start, end_date_str) else: trade_date_list = invoker.tdays(trade_date_start, end_date_str, "TradingCalendar=%s" % exchange_code) if trade_date_list is None: logger.warning("没有查询到交易日期") date_count = len(trade_date_list) if date_count > 0: logger.info("%d 条交易日数据将被导入", date_count) trade_date_df = pd.DataFrame({'trade_date': trade_date_list}) trade_date_df['exch_code'] = exchange_code bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={'trade_date': Date, 'exch_code': String(20)}, myisam_if_create_table=True, primary_keys=['trade_date', 'exch_code'], schema=config.DB_SCHEMA_MD) logger.info('%s %d 条交易日数据导入 %s 完成', exchange_code, date_count, table_name)
def import_coin_info(chain_param=None, ): """插入基础信息数据到 cmc_coin_v1_info""" table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # url = 'https://api.coinmarketcap.com/v2/listings/' # dtype = { # 'id': String(60), # 'name': String(60), # 'symbol': String(20), # 'website_slug': String(60), # } url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' dtype = { 'id': String(60), 'name': String(60), 'symbol': String(20), 'rank': Integer, 'price_usd': DOUBLE, 'price_btc': DOUBLE, '24h_volume_usd': DOUBLE, 'market_cap_usd': DOUBLE, 'available_supply': DOUBLE, 'total_supply': DOUBLE, 'max_supply': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'last_updated': DATETIME, } rsp = requests.get(url) if rsp.status_code != 200: raise ValueError('请求 listings 相应失败') json = rsp.json() data_df = pd.DataFrame(json) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x))) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , ADD PRIMARY KEY (`id`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def get_all_instrument_type(): sql_str = "select wind_code from wind_future_daily group by wind_code" with with_db_session(engine_md) as session: instrument_list = [ row[0] for row in session.execute(sql_str).fetchall() ] re_pattern_instrument_type = re.compile(r'\D+(?=\d{3})', re.IGNORECASE) instrument_type_set = { re_pattern_instrument_type.search(name).group() for name in instrument_list } return list(instrument_type_set)
def get_suspend_to_dic(): """将 tushare_stock_daily_suspend 转换成日期范围字典,key:股票代码;value:日期范围""" with with_db_session(engine_md) as session: sql_str = """SELECT ts_code, suspend_date, resume_date FROM tushare_stock_daily_suspend""" table = session.execute(sql_str) code_date_range_dic = defaultdict(list) for ts_code, suspend_date, resume_date in table.fetchall(): if suspend_date is None: continue code_date_range_dic[ts_code].append( (suspend_date, suspend_date if resume_date is None else resume_date)) return code_date_range_dic
def import_trade_date(chain_param=None): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ exch_code_trade_date_dic = {} with with_db_session(engine_md) as session: try: table = session.execute('SELECT exch_code,max(trade_date) FROM ifind_trade_date GROUP BY exch_code') exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception as exp: logger.exception("交易日获取异常") exchange_code_dict = { "HKEX": "香港联合交易所", "SZSE": "深圳证券交易所", "SSE": "上海证券交易所", } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE_TS) else: start_date_str = '19900101' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE_TS) trade_date_df = pro.trade_cal(exchange_id='', start_date=start_date_str, end_date=end_date_str) if trade_date_df is None or trade_date_df.shape[0] == 0: logger.warning('%s[%s] [%s - %s] 没有查询到交易日期', exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str) continue date_count = trade_date_df.shape[0] logger.info("%s[%s] %d 条交易日数据将被导入 %s", exchange_code_dict[exchange_code], exchange_code, date_count, 'tushare_trade_date') # with with_db_session(engine_md) as session: # session.execute("INSERT INTO ifind_trade_date (trade_date,exch_code) VALUE (:trade_date,:exch_code)", # params=[{'trade_date': trade_date, 'exch_code': exchange_code} for trade_date in # trade_date_df['time']]) # trade_date_df['exch_code'] = exchange_code # trade_date_df.rename(columns={'cal_date': 'trade_date'}, inplace=True) trade_date_df.to_sql('tushare_trade_date', engine_md, if_exists='append', index=False, dtype={ 'exchange_id': String(10), 'cal_date': Date, 'is_open': DOUBLE, }) logger.info('%s[%s] %d 条交易日数据导入 %s 完成', exchange_code_dict[exchange_code], exchange_code, date_count, 'tushare_trade_date')
def get_trade_date_list_sorted(exch_code='SZSE') -> list: """ 獲取交易日列表 :return: """ sql_str = "select trade_date from wind_trade_date where exch_code = :exch_code order by trade_date" with with_db_session(engine_md) as session: trade_date_list_sorted = [ content[0] for content in session.execute(sql_str, params={ 'exch_code': exch_code }).fetchall() ] return trade_date_list_sorted
def update_trade_date_latest(wind_code_trade_date_latest): """ 设置 fund_info 表 trade_date_latest 字段为最近的交易日 :param wind_code_trade_date_latest: :return: """ logger.info("开始设置 fund_info 表 trade_date_latest 字段为最近的交易日") if len(wind_code_trade_date_latest) > 0: params = [{'wind_code': wind_code, 'trade_date_latest': trade_date_latest} for wind_code, trade_date_latest in wind_code_trade_date_latest.items()] with with_db_session(engine_md) as session: session.execute( 'UPDATE fund_info SET trade_date_latest = :trade_date_latest WHERE wind_code = :wind_code', params) logger.info('%d 条基金信息记录被更新', len(wind_code_trade_date_latest))
def import_tushare_fund_portfolio(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_portfolio' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # if has_table is True: # sql_str="""""" # else: sql_str = """SELECT ts_code FROM md_integration.tushare_fund_basic""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) code_list = list(row[0] for row in table.fetchall()) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_list) try: for num, (ts_code) in enumerate(code_list, start=1): data_df = invoke_fund_portfolio(ts_code=ts_code) logging.info(" 提取%s 基金行情信息%d 条,整体完成%d/%d", ts_code, len(data_df), num, data_len) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 20000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) logging.info(" %s 表 %d 条净值信息被更新", table_name, data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: #插入剩余数据 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_trade_date(chain_param=None): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = "tushare_trade_date" exch_code_trade_date_dic = {} with with_db_session(engine_md) as session: try: table = session.execute('SELECT exchange,max(cal_date) FROM {table_name} GROUP BY exchange'.format( table_name=table_name )) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception as exp: logger.exception("交易日获取异常") exchange_code_dict = { "HKEX": "香港联合交易所", "SZSE": "深圳证券交易所", "SSE": "上海证券交易所", } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE_TS) else: start_date_str = '19900101' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE_TS) trade_date_df = pro.trade_cal(exchange_id='', start_date=start_date_str, end_date=end_date_str) if trade_date_df is None or trade_date_df.shape[0] == 0: logger.warning('%s[%s] [%s - %s] 没有查询到交易日期', exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str) continue date_count = trade_date_df.shape[0] logger.info("%s[%s] %d 条交易日数据将被导入 %s", exchange_code_dict[exchange_code], exchange_code, date_count, table_name) date_count = bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={ 'exchange': String(10), 'cal_date': Date, 'is_open': DOUBLE, }, myisam_if_create_table=True, primary_keys=['exchange', 'cal_date'], schema=config.DB_SCHEMA_MD) logger.info('%s[%s] %d 条交易日数据导入 %s 完成', exchange_code_dict[exchange_code], exchange_code, date_count, table_name)
def merge_tick_2_bar(): """ 将tick数据合并成为1分钟数据 :return: """ "" table_name = 'pytdx_stock_min1' tick_table_name = 'pytdx_tick_temp' has_table = engine_md.has_table(table_name) sql_create_table = f"""CREATE TABLE `{table_name}` ( `ts_code` varchar(12) NOT NULL, `trade_date` datetime DEFAULT NULL, `date` date NOT NULL, `time` time NOT NULL, `open` DOUBLE DEFAULT NULL, `high` DOUBLE DEFAULT NULL, `low` DOUBLE DEFAULT NULL, `close` DOUBLE DEFAULT NULL, `vol` int(11) DEFAULT NULL, `amount` DOUBLE DEFAULT NULL, PRIMARY KEY (`ts_code`,`date`,`time`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8""" sql_str = f"""insert ignore {table_name} select bar.ts_code, bar.trade_date, bar.date, bar.time, tick_open.price open, bar.high, bar.low, tick_close.price close, bar.vol, bar.amount from ( select ts_code, trade_date, time, date, min(`index`) index_min, max(`index`) index_max, max(price) high, min(price) low, sum(vol) vol, sum(price*vol) amount from {tick_table_name} group by ts_code, date, time ) bar left join {tick_table_name} tick_open on bar.ts_code = tick_open.ts_code and bar.date = tick_open.date and bar.index_min = tick_open.index left join {tick_table_name} tick_close on bar.ts_code = tick_close.ts_code and bar.date = tick_close.date and bar.index_max = tick_close.index""" with with_db_session(engine_md) as session: # 创建表 if not has_table: session.execute(sql_create_table) logger.info('创建表 %s ', table_name) # tick 数据合并成 min1 数据 rslt = session.execute(sql_str) insert_count = rslt.rowcount logger.info('合并分钟数据 %d 条', insert_count)
def save_fund_info(fund_info_df, col_type): """ 将更新的fund_info 信息保存的 wind_fund_info 表中,进行数据清洗,到fund_info表 :param fund_info_df: :param get_df: :param mode: :return: """ # 设置col_name列表参数 col_name_param_list = [ ('SEC_NAME', String(200)), ('FUND_SETUPDATE', Date), ('FUND_MATURITYDATE', Date), ('FUND_MGRCOMP', String(200)), ('FUND_EXISTINGYEAR', String(200)), ('FUND_PTMYEAR', String(200)), ('FUND_TYPE', String(200)), ('FUND_FUNDMANAGER', String(200)), ] col_name_param_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list} dtype = {col_name.lower(): val for col_name, val in col_name_param_list} dtype['win_code'] = String(200) dtype['strategy_type'] = String(200) if fund_info_df.shape[0] == 0: return table_name = 'wind_fund_info' fund_info_df.rename(columns=col_name_param_dic, inplace=True) fund_info_df.index.names = ['wind_code'] fund_info_df.reset_index(inplace=True) fund_info_df.drop_duplicates(inplace=True) data_count = bunch_insert_on_duplicate_update(fund_info_df, table_name, engine_md, dtype=dtype) logging.info('%d funds inserted', data_count) # TODO: 对于存量数据,需要进行定期更新操作 # wind_fund_info 表中增量数据插入到 fund_info sql_str = """ INSERT INTO fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) SELECT wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager FROM wind_fund_info wfi LEFT OUTER JOIN fund_info fi ON wfi.wind_code=fi.wind_code WHERE fi.wind_code IS NULL""" with with_db_session(engine_md) as session: table = session.execute(sql_str) logging.info('new data was inserted into fund_info from wind_fund_info table') return fund_info_df
def stat_fund(date_from, date_to): sql_str = """ SELECT (@rowNum:=@rowNum+1) AS rowNo, t.* FROM ( SELECT date_from.ts_code, basic.name, basic.management, date_from.date_from, nav_to.end_date, nav_from.accum_nav nav_from, nav_to.accum_nav nav_to, nav_to.accum_nav/nav_from.accum_nav pct_chg FROM ( SELECT ts_code, max(end_date) date_from FROM tushare_fund_nav WHERE end_date<= :date_from GROUP BY ts_code ) date_from JOIN ( SELECT ts_code, max(end_date) date_to FROM tushare_fund_nav WHERE end_date<= :date_to GROUP BY ts_code ) date_to ON date_from.ts_code = date_to.ts_code JOIN tushare_fund_nav nav_from ON date_from.ts_code = nav_from.ts_code AND date_from.date_from = nav_from.end_date JOIN tushare_fund_nav nav_to ON date_to.ts_code = nav_to.ts_code AND date_to.date_to = nav_to.end_date JOIN tushare_fund_basic basic ON date_from.ts_code = basic.ts_code WHERE basic.name NOT LIKE '%B%' and basic.name NOT LIKE '%A%' and basic.name NOT LIKE '%C%' HAVING nav_to.accum_nav IS NOT NULL AND nav_from.accum_nav IS NOT NULL and pct_chg != 1 and pct_chg < 2 ORDER BY nav_to.accum_nav/nav_from.accum_nav ) t""" # data_df = pd.read_sql(sql_str, engine_md) with with_db_session(engine_md) as session: session.execute("Select (@rowNum :=0) ;") table = session.execute(sql_str, params={ 'date_from': date_2_str(date_from), 'date_to': date_2_str(date_to) }) data = [[d for d in row] for row in table.fetchall()] data_df = pd.DataFrame(data, columns=[ 'rowNo', 'ts_code', 'name', 'management', 'date_from', 'date_to', 'nav_from', 'nav_to', 'pct_chg' ]) return data_df.describe()['pct_chg']
def add_new_col_data(col_name, param, chain_param=None, db_col_name=None, col_type_str='DOUBLE', wind_code_set: set = None): """ 1)修改 daily 表,增加字段 2)wind_ckdvp_stock表增加数据 3)第二部不见得1天能够完成,当第二部完成后,将wind_ckdvp_stock数据更新daily表中 :param col_name:增加字段名称 :param param: 参数 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param db_col_name: 默认为 None,此时与col_name相同 :param col_type_str: DOUBLE, VARCHAR(20), INTEGER, etc. 不区分大小写 :param wind_code_set: 默认 None, 否则仅更新指定 wind_code :return: """ if db_col_name is None: # 默认为 None,此时与col_name相同 db_col_name = col_name # 检查当前数据库是否存在 db_col_name 列,如果不存在则添加该列 add_col_2_table(engine_md, 'wind_stock_daily', db_col_name, col_type_str) # 将数据增量保存到 wind_ckdvp_stock 表 all_finished = add_data_2_ckdvp(col_name, param, wind_code_set) # 将数据更新到 ds 表中 # 对表的列进行整合,daily表的列属性值插入wind_ckdvp_stock的value 根据所有条件进行判定 if all_finished: sql_str = """ update wind_stock_daily daily, wind_ckdvp_stock ckdvp set daily.{db_col_name} = ckdvp.value where daily.wind_code = ckdvp.wind_code and ckdvp.key = '{db_col_name}' and ckdvp.param = '{param}' and ckdvp.time = daily.trade_date""".format( db_col_name=db_col_name, param=param) # 进行事务提交 with with_db_session(engine_md) as session: rst = session.execute(sql_str) data_count = rst.rowcount session.commit() logger.info('更新 %s 字段 wind_stock_daily 表 %d 条记录', db_col_name, data_count)
def add_new_col_data_to_fin(col_name, param, chain_param=None, db_col_name=None, col_type_str='DOUBLE', ths_code_set: set = None): """ 1)修改 fin 表,增加字段 2)ckpv表增加数据 3)第二部不见得1天能够完成,当第二部完成后,将ckvp数据更新fin表中 :param col_name:增加字段名称 :param param: 参数 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param dtype: 数据库字段类型 :param db_col_name: 默认为 None,此时与col_name相同 :param col_type_str: DOUBLE, VARCHAR(20), INTEGER, etc. 不区分大小写 :param ths_code_set: 默认 None, 否则仅更新指定 ths_code :return: """ table_name = 'ifind_stock_fin' if db_col_name is None: # 默认为 None,此时与col_name相同 db_col_name = col_name # 检查当前数据库是否存在 db_col_name 列,如果不存在则添加该列 add_col_2_table(engine_md, table_name, db_col_name, col_type_str) # 将数据增量保存到 ckdvp 表 all_finished = add_data_fin_2_ckdvp(col_name, param, ths_code_set) # 将数据更新到 ds 表中 if all_finished: sql_str = """update {table_name} daily, ifind_stock_ckdvp ckdvp set daily.{db_col_name} = ckdvp.value where daily.ths_code = ckdvp.ths_code and daily.time = ckdvp.time and ckdvp.key = '{db_col_name}' and ckdvp.param = '{param}'""".format(table_name=table_name, db_col_name=db_col_name, param=param) with with_db_session(engine_md) as session: session.execute(sql_str) session.commit() logger.info('更新 %s 字段 %s 表', db_col_name, table_name)
def get_latest_constituent_df(sector_code): """ 获取最新的交易日日期,及相应的成分股数据 :return: """ table_name = "wind_sectorconstituent" has_table = engine_md.has_table(table_name) if not has_table: return None, None sql_str = 'SELECT max(trade_date) FROM wind_sectorconstituent WHERE sector_code = :sector_code' with with_db_session(engine_md) as session: date_latest = session.execute(sql_str, params={"sector_code": sector_code}).scalar() if date_latest is None: return None, None sql_str = "SELECT * FROM wind_sectorconstituent WHERE sector_code = %s AND trade_date = %s" sec_df = pd.read_sql(sql_str, engine_md, params=[sector_code, date_latest]) return date_latest, sec_df
def get_latest_constituent_df(index_code): """ 获取最新的交易日日期,及相应的成分股数据 :return: """ table_name = "wind_index_constituent" has_table = engine_md.has_table(table_name) if not has_table: return None, None with with_db_session(engine_md) as session: date_latest = session.execute( "select max(trade_date) from wind_index_constituent where index_code = :index_code", params={ "index_code": index_code }).scalar() if date_latest is None: return None, None sql_str = "select * from wind_index_constituent where index_code = %s and trade_date = %s" sec_df = pd.read_sql(sql_str, engine_md, params=[index_code, date_latest]) return date_latest, sec_df
def import_wind_fund_nav_to_nav(): """ 将 wind_fund_nav 数据导入到 fund_nav 表中 :return: """ table_name = 'fund_nav' has_table = engine_md.has_table(table_name) logger.info("开始将 wind_fund_nav_daily 数据导入到 fund_nav_tmp_df") create_sql_str = """CREATE TABLE {table_name} ( `wind_code` varchar(20) NOT NULL COMMENT '基金代码', `nav_date` date NOT NULL COMMENT '净值日期', `nav` double DEFAULT NULL COMMENT '净值', `nav_acc` double DEFAULT NULL COMMENT '累计净值', PRIMARY KEY (`wind_code`,`nav_date`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8""".format(table_name=table_name) # TODO: 需要对 Group by 子句进行跳转 sql_str = """INSERT INTO fund_nav(wind_code, nav_date, nav, nav_acc) SELECT wfn.wind_code, wfn.nav_date, wfn.nav, wfn.nav_acc FROM ( SELECT wind_code, nav_date, max(nav) nav, max(nav_acc) nav_acc FROM wind_fund_nav_daily GROUP BY wind_code, nav_date ) AS wfn LEFT OUTER JOIN fund_nav fn ON wfn.wind_code = fn.wind_code AND wfn.nav_date = fn.nav_date WHERE fn.nav_date IS NULL""" with with_db_session(engine_md) as session: if not has_table: session.execute(create_sql_str) logger.info("创建 %s 表", table_name) session.execute(sql_str) logger.info('导入结束')
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :return: """ table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) # 获取已存在合约列表 try: sql_str = 'SELECT ths_code, ths_start_trade_date_future FROM {table_name}'.format( table_name=table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) code_ipo_date_dic = dict(table.fetchall()) except: code_ipo_date_dic = {} # 通过wind获取合约列表 future_sectorid_dic_list = [ { 'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", 'sectorid': '091004001', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", 'sectorid': '091004003', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", 'sectorid': '091004004', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 5年国债期货', 'regex': r"TF\d{4}\.CFE", 'sectorid': '091004002', 'date_establish': '2013-09-06' }, { 'subject_name': 'CFE 10年期国债期货', 'regex': r"T\d{4}\.CFE", 'sectorid': '091004005', 'date_establish': '2015-03-20' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", 'sectorid': '091002002', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", 'sectorid': '091002010', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", 'sectorid': '091002006', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", 'sectorid': '091002012', 'date_establish': '2014-03-21' }, { 'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.SHF", 'sectorid': '091001004', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.SHF", 'sectorid': '091001010', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\d{4}\.SHF", 'sectorid': '091001011', 'date_establish': '2013-10-18' }, { 'subject_name': '天然橡胶', 'regex': r"RU\d{4}\.SHF", 'sectorid': '091002007', 'date_establish': '1997-02-01' }, { 'subject_name': '铜', 'regex': r"CU\d{4}\.SHF", 'sectorid': '091002003', 'date_establish': '1997-02-01' }, { 'subject_name': '铝', 'regex': r"AL\d{4}\.SHF", 'sectorid': '091002001', 'date_establish': '1997-02-01' }, { 'subject_name': '锌', 'regex': r"ZN\d{4}\.SHF", 'sectorid': '091002009', 'date_establish': '2007-03-26' }, { 'subject_name': '铅', 'regex': r"PB\d{4}\.SHF", 'sectorid': '091002005', 'date_establish': '2011-03-24' }, { 'subject_name': '镍', 'regex': r"NI\d{4}\.SHF", 'sectorid': '091002014', 'date_establish': '2015-03-27' }, { 'subject_name': '锡', 'regex': r"SN\d{4}\.SHF", 'sectorid': '091002013', 'date_establish': '2015-03-27' }, { 'subject_name': '白糖', 'regex': r"SR\d{4}\.CZC", 'sectorid': '091003004', 'date_establish': '2006-01-06' }, { 'subject_name': '棉花', 'regex': r"CF\d{4}\.CZC", 'sectorid': '091003001', 'date_establish': '2004-06-01' }, { 'subject_name': '鲜苹果', 'regex': r"AP\d{4}\.CZC", 'sectorid': '091003019', 'date_establish': '2017-12-22' }, ] # 字段列表及参数 indicator_param_list = [ ('ths_future_short_name_future', '', String(20)), ('ths_future_code_future', '', String(20)), ('ths_sec_type_future', '', String(20)), ('ths_td_variety_future', '', String(20)), ('ths_td_unit_future', '', DOUBLE), ('ths_pricing_unit_future', '', String(20)), ('ths_mini_chg_price_future', '', DOUBLE), ('ths_chg_ratio_lmit_future', '', DOUBLE), ('ths_td_deposit_future', '', DOUBLE), ('ths_start_trade_date_future', '', Date), ('ths_last_td_date_future', '', Date), ('ths_last_delivery_date_future', '', Date), ('ths_delivery_month_future', '', String(10)), ('ths_listing_benchmark_price_future', '', DOUBLE), ('ths_initial_td_deposit_future', '', DOUBLE), ('ths_contract_month_explain_future', '', String(60)), ('ths_td_time_explain_future', '', String(80)), ('ths_last_td_date_explian_future', '', String(60)), ('ths_delivery_date_explain_future', '', String(60)), ('ths_exchange_short_name_future', '', String(20)), ('ths_contract_en_short_name_future', '', String(20)), ('ths_contract_en_name_future', '', String(20)), ] json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in indicator_param_list], sep=';') # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # 获取合约列表 code_set = set() ndays_per_update = 60 # 获取历史期货合约列表信息 sector_count = len(future_sectorid_dic_list) for num, future_sectorid_dic in enumerate(future_sectorid_dic_list, start=1): subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() # 计算获取合约列表的起始日期 date_since = get_date_since(code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) logger.debug('%d/%d) 获取 %s %s [%s - %s] 合约列表', num, sector_count, subject_name, sector_id, date_since, date_yestoday) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) # 获取合约列表 # w.wset("sectorconstituent","date=2017-05-02;sectorid=a599010205000000") # future_info_df = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) try: future_info_df = invoker.THS_DataPool( 'block', '%s;%s' % (date_since_str, sector_id), 'date:Y,thscode:Y,security_name:Y') except APIError: logger.exception('THS_DataPool %s 获取失败', '%s;%s' % (date_since_str, sector_id)) break if future_info_df is None or future_info_df.shape[0] == 0: break code_set |= set(future_info_df['THSCODE']) if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday if DEBUG: break # 获取合约列表 code_list = [wc for wc in code_set if wc not in code_ipo_date_dic] # 获取合约基本信息 if len(code_list) > 0: future_info_df = invoker.THS_BasicData(code_list, json_indicator, json_param) if future_info_df is None or future_info_df.shape[0] == 0: data_count = 0 logger.warning("更新 %s 结束 %d 条记录被更新", table_name, data_count) else: data_count = future_info_df.shape[0] # future_info_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( future_info_df, table_name, engine_md, dtype) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)