def import_coin_info(chain_param=None): """获取全球交易币基本信息""" table_name = 'tushare_coin_info' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'coin': String(60), 'en_name': String(60), 'cn_name': String(60), 'issue_date': Date, 'amount': DOUBLE, } coinlist_df = pro.coinlist(start_date='20170101', end_date=date_2_str(date.today(), DATE_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(coinlist_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `coin` `coin` VARCHAR(60) NOT NULL FIRST, CHANGE COLUMN `en_name` `en_name` VARCHAR(60) NOT NULL AFTER `coin`, ADD PRIMARY KEY (`coin`, `en_name`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def insert_into_db(data_df_list, engine_md): data_count = len(data_df_list) table_name = 'wind_stock_tick' has_table = engine_md.has_table(table_name) param_list = [ ('datetime', DateTime), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('ask1', DOUBLE), ('bid1', DOUBLE), ('asize1', DOUBLE), ('bsize1', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('preclose', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if data_count > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('datetime', inplace=True) data_df_all.reset_index(inplace=True) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logger.info('%d data imported', data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_count
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表自 %s 日起的 %d 条信息被更新", table_name, trade_date, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_tushare_stock_fund_holdings(): table_name = 'tushare_stock_fund_holdings' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_fund_holdings_indicator_param_list = [ ('ts_code', String(20)), ('sec_name', String(20)), ('end_date', Date), ('nums', DOUBLE), ('nlast', DOUBLE), ('count', DOUBLE), ('clast', DOUBLE), ('amount', DOUBLE), ('ratio', DOUBLE), ] tushare_fund_holdings_dtype = {key: val for key, val in tushare_fund_holdings_indicator_param_list} data_df_list, data_count, all_data_count, = [], 0, 0 years = list(range(2013, 2019)) try: for year in years: for quarter in list([1, 2, 3, 4]): print((year, quarter)) data_df = invoke_fund_holdings(year, quarter) ts_code_list = [] for i in data_df.code: if i[0] == '6': sh = i + '.SH' ts_code_list.append(sh) else: sz = i + '.SZ' ts_code_list.append(sz) data_df.code = ts_code_list data_df = data_df.rename(columns={'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date'}) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 50: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def insert_db(df: pd.DataFrame): nonlocal has_table insert_data_count = bunch_insert_on_duplicate_update(df, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) has_table = True return insert_data_count
def import_index_info(wind_codes, chain_param=None): """ 导入指数信息 :param wind_codes: :return: """ table_name = 'wind_index_info' has_table = engine_md.has_table(table_name) col_name_param_list = [ ('LAUNCHDATE', Date), ('BASEDATE', Date), ('BASEVALUE', DOUBLE), ('COUNTRY', String(20)), ('CRM_ISSUER', String(20)), ('SEC_NAME', String(20)), ] col_name_param = ",".join([key.lower() for key, _ in col_name_param_list]) col_name_param_dic = { col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list } # 设置dtype类型 dtype = {key.lower(): val for key, val in col_name_param_list} dtype['wind_code'] = String(20) info_df = invoker.wss(wind_codes, col_name_param) if info_df is None or info_df.shape[0] == 0: logger.warning("没有数据可导入") return info_df.rename(columns=col_name_param_dic, inplace=True) info_df.index.rename("wind_code", inplace=True) info_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) # info_df.to_sql(table_name, engine_md, if_exists='append', index=True, # dtype={ # 'wind_code': String(20), # 'null': Date, # 'basedate': Date, # 'basevalue': DOUBLE, # 'country': String(20), # 'crm_issuer': String(20), # 'sec_name': String(20), # }) logger.info('%d 条指数信息导入成功\n%s', info_df.shape[0], info_df) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_index_info(chain_param=None, ths_code=None): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_index_info' has_table = engine_md.has_table(table_name) logging.info("更新 ifind_index_info 开始") if ths_code is None: # 获取全市场股票代码及名称 date_end = date.today() stock_code_set = set() stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub ths_code = ','.join(stock_code_set) indicator_param_list = [ ('ths_index_short_name_index', '', String(20)), ('ths_index_code_index', '', String(10)), ('ths_index_category_index', '', String(20)), ('ths_index_base_period_index', '', Date), ('ths_index_base_point_index', '', DOUBLE), ('ths_publish_org_index', '', String(20)), ] # indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index; # ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index', # param = ';;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 index info 可以更新") return dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_coin_info(chain_param=None, ): """插入基础信息数据到 cmc_coin_v1_info""" table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # url = 'https://api.coinmarketcap.com/v2/listings/' # dtype = { # 'id': String(60), # 'name': String(60), # 'symbol': String(20), # 'website_slug': String(60), # } url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' dtype = { 'id': String(60), 'name': String(60), 'symbol': String(20), 'rank': Integer, 'price_usd': DOUBLE, 'price_btc': DOUBLE, '24h_volume_usd': DOUBLE, 'market_cap_usd': DOUBLE, 'available_supply': DOUBLE, 'total_supply': DOUBLE, 'max_supply': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'last_updated': DATETIME, } rsp = requests.get(url) if rsp.status_code != 200: raise ValueError('请求 listings 相应失败') json = rsp.json() data_df = pd.DataFrame(json) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x))) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , ADD PRIMARY KEY (`id`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_tushare_namechange(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_namechange' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """select max(start_date) start_date FROM md_integration.tushare_stock_namechange""" else: sql_str = """select min(list_date) start_date FROM md_integration.tushare_stock_info""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) start_date = list(row[0] for row in table.fetchall()) start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS) end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS) try: data_df = pro.namechange( start_date=start_date, end_date=end_date, fields='ts_code,name,start_date,end_date,change_reason') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE) logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `start_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
def init(alter_table=False): # 创建表 Base.metadata.create_all(engine_md) logger.info("所有表结构建立完成") if alter_table: alter_table_2_myisam(engine_md) table_name_list = engine_md.table_names() build_primary_key(table_name_list) logger.info("所有表结构调整完成") for table_name in table_name_list: TABLE_MODEL_DIC[table_name] = Table(table_name, Base.metadata, autoload=True) logger.info("所有表Model动态加载完成") init_data()
def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_indicator_param_list = [ ('ts_code', String(20)), ('symbol', String(20)), ('name', String(40)), ('area', String(100)), ('industry', String(200)), ('fullname', String(100)), ('enname', String(200)), ('market', String(100)), ('exchange', String(20)), ('curr_type', String(20)), ('list_status', String(20)), ('list_date', Date), ('delist_date', Date), ('is_hs', String(20)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in tushare_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in tushare_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic( exchange='', fields= 'ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs,is_hs,is_hs' ) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_stock_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'sec_name': (String(20), prefer_left, { 'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name' }), 'cn_name': (String(100), get_value, { 'key': 'ths_corp_cn_name_stock' }), 'en_name': (String(100), get_value, { 'key': 'ths_corp_name_en_stock' }), 'delist_date': (Date, prefer_left, { 'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date' }), 'ipo_date': (Date, prefer_left, { 'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date' }), 'pre_name': (Text, prefer_left, { 'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename' }), 'established_date': (Date, get_value, { 'key': 'ths_established_date_stock' }), 'exch_city': (String(20), get_value, { 'key': 'exch_city' }), 'exch_cn': (String(20), get_value, { 'key': 'ths_listing_exchange_stock' }), 'exch_eng': (String(20), get_value, { 'key': 'exch_eng' }), 'stock_code': (String(20), prefer_left, { 'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code' }), 'mkt': (String(20), get_value, { 'key': 'mkt' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_future_info" has_table = engine_md.has_table(table_name) logger.info("更新 %s 开始", table_name) # 获取已存在合约列表 if has_table: sql_str = 'select wind_code, ipo_date from {table_name}'.format( table_name=table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) wind_code_ipo_date_dic = dict(table.fetchall()) else: wind_code_ipo_date_dic = {} # 通过wind获取合约列表 # w.start() # 初始化服务器接口,用于下载万得数据 future_sectorid_dic_list = [ #中金所期货合约 # {'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", # 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16'}, # {'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", # 'sectorid': '1000014871000000', 'date_establish': '2015-4-16'}, # {'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", # 'sectorid': '1000014872000000', 'date_establish': '2015-4-16'}, # {'subject_name': '2年期国债', 'regex': r"TS\d{4}\.CFE", # 'sectorid': '1000014880000000', 'date_establish': '2018-08-17'}, # {'subject_name': '5年期国债', 'regex': r"TF\d{4}\.CFE", # 'sectorid': '1000010299000000', 'date_establish': '2013-09-06'}, # {'subject_name': '10年期国债', 'regex': r"T\d{4}\.CFE", # 'sectorid': '1000014874000000', 'date_establish': '2015-03-20'}, #中金所指数 { 'subject_name': 'CFE 沪深300', 'regex': r"IF\.CFE", 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\.CFE", 'sectorid': '1000014871000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\.CFE", 'sectorid': '1000014872000000', 'date_establish': '2015-4-16' }, { 'subject_name': '2年期国债', 'regex': r"TS\.CFE", 'sectorid': '1000014880000000', 'date_establish': '2018-08-17' }, { 'subject_name': '5年期国债', 'regex': r"TF\.CFE", 'sectorid': '1000010299000000', 'date_establish': '2013-09-06' }, { 'subject_name': '10年期国债', 'regex': r"T\.CFE", 'sectorid': '1000014874000000', 'date_establish': '2015-03-20' }, # 大连商品交易所合约 # {'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.DCE", # 'sectorid': '1000002976000000', 'date_establish': '2011-04-15'}, # {'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.DCE", # 'sectorid': '1000009338000000', 'date_establish': '2013-03-22'}, # {'subject_name': '铁矿石', 'regex': r"I\d{4}\.DCE", # 'sectorid': '1000006502000000', 'date_establish': '2013-10-18'}, # {'subject_name': '豆粕', 'regex': r"M\d{4}\.DCE", # 'sectorid': 'a599010304000000', 'date_establish': '2000-07-17'}, # {'subject_name': '豆油', 'regex': r"Y\d{4}\.DCE", # 'sectorid': 'a599010306000000', 'date_establish': '2006-01-09'}, # {'subject_name': '棕榈油', 'regex': r"P\d{4}\.DCE", # 'sectorid': 'a599010307000000', 'date_establish': '2007-10-29'}, # {'subject_name': '豆一', 'regex': r"A\d{4}\.DCE", # 'sectorid': 'a599010302000000', 'date_establish': '2004-07-15'}, # {'subject_name': '豆二', 'regex': r"B\d{4}\.DCE", # 'sectorid': 'a599010303000000', 'date_establish': '2004-12-22'}, # {'subject_name': '玉米', 'regex': r"C\d{4}\.DCE", # 'sectorid': 'a599010305000000', 'date_establish': '2004-09-22'}, # {'subject_name': '玉米淀粉', 'regex': r"CS\d{4}\.DCE", # 'sectorid': '1000011469000000', 'date_establish': '2014-12-19'}, # {'subject_name': '鸡蛋', 'regex': r"JD\d{4}\.DCE", # 'sectorid': '1000011464000000', 'date_establish': '2013-11-08'}, # {'subject_name': '线型低密度聚乙烯', 'regex': r"L\d{4}\.DCE", # 'sectorid': 'a599010308000000', 'date_establish': '2007-07-31'}, # {'subject_name': '聚氯乙烯', 'regex': r"V\d{4}\.DCE", # 'sectorid': 'a599010309000000', 'date_establish': '2009-05-25'}, # {'subject_name': '聚丙烯', 'regex': r"PP\d{4}\.DCE", # 'sectorid': '1000011468000000', 'date_establish': '2014-02-28'}, #大连指数 { 'subject_name': 'DCE 焦炭', 'regex': r"J\.DCE", 'sectorid': '1000002976000000', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\.DCE", 'sectorid': '1000009338000000', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\.DCE", 'sectorid': '1000006502000000', 'date_establish': '2013-10-18' }, { 'subject_name': '豆粕', 'regex': r"M\.DCE", 'sectorid': 'a599010304000000', 'date_establish': '2000-07-17' }, { 'subject_name': '豆油', 'regex': r"Y\.DCE", 'sectorid': 'a599010306000000', 'date_establish': '2006-01-09' }, { 'subject_name': '棕榈油', 'regex': r"P\.DCE", 'sectorid': 'a599010307000000', 'date_establish': '2007-10-29' }, { 'subject_name': '豆一', 'regex': r"A\.DCE", 'sectorid': 'a599010302000000', 'date_establish': '2004-07-15' }, { 'subject_name': '豆二', 'regex': r"B\.DCE", 'sectorid': 'a599010303000000', 'date_establish': '2004-12-22' }, { 'subject_name': '玉米', 'regex': r"C\.DCE", 'sectorid': 'a599010305000000', 'date_establish': '2004-09-22' }, { 'subject_name': '玉米淀粉', 'regex': r"CS\.DCE", 'sectorid': '1000011469000000', 'date_establish': '2014-12-19' }, { 'subject_name': '鸡蛋', 'regex': r"JD\.DCE", 'sectorid': '1000011464000000', 'date_establish': '2013-11-08' }, { 'subject_name': '线型低密度聚乙烯', 'regex': r"L\.DCE", 'sectorid': 'a599010308000000', 'date_establish': '2007-07-31' }, { 'subject_name': '聚氯乙烯', 'regex': r"V\.DCE", 'sectorid': 'a599010309000000', 'date_establish': '2009-05-25' }, { 'subject_name': '聚丙烯', 'regex': r"PP\.DCE", 'sectorid': '1000011468000000', 'date_establish': '2014-02-28' }, #上海期货交易所合约 # {'subject_name': '天然橡胶', 'regex': r"RU\d{4}\.SHF", # 'sectorid': 'a599010208000000', 'date_establish': '1995-06-01'}, # {'subject_name': '铜', 'regex': r"CU\d{4}\.SHF", # 'sectorid': 'a599010202000000', 'date_establish': '1995-05-01'}, # {'subject_name': '铝', 'regex': r"AL\d{4}\.SHF", # 'sectorid': 'a599010203000000', 'date_establish': '1995-05-01'}, # {'subject_name': '锌', 'regex': r"ZN\d{4}\.SHF", # 'sectorid': 'a599010204000000', 'date_establish': '2007-03-26'}, # {'subject_name': '铅', 'regex': r"PB\d{4}\.SHF", # 'sectorid': '1000002892000000', 'date_establish': '2011-03-24'}, # {'subject_name': '镍', 'regex': r"NI\d{4}\.SHF", # 'sectorid': '1000011457000000', 'date_establish': '2015-03-27'}, # {'subject_name': '锡', 'regex': r"SN\d{4}\.SHF", # 'sectorid': '1000011458000000', 'date_establish': '2015-03-27'}, # {'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", # 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09'}, # {'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", # 'sectorid': '1000006502000000', 'date_establish': '2012-05-10'}, # {'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", # 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27'}, # {'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", # 'sectorid': '1000011455000000', 'date_establish': '2014-03-21'}, # {'subject_name': 'SHFE 沥青', 'regex': r"BU\d{4}\.SHF", # 'sectorid': '1000011013000000', 'date_establish': '2013-10-09'}, # {'subject_name': '原油', 'regex': r"SC\d{4}\.SHF", # 'sectorid': '1000011463000000', 'date_establish': '2018-03-26'}, #上海期货交易所指数 { 'subject_name': '天然橡胶', 'regex': r"RU\.SHF", 'sectorid': 'a599010208000000', 'date_establish': '1995-06-01' }, { 'subject_name': '铜', 'regex': r"CU\.SHF", 'sectorid': 'a599010202000000', 'date_establish': '1995-05-01' }, { 'subject_name': '铝', 'regex': r"AL\.SHF", 'sectorid': 'a599010203000000', 'date_establish': '1995-05-01' }, { 'subject_name': '锌', 'regex': r"ZN\.SHF", 'sectorid': 'a599010204000000', 'date_establish': '2007-03-26' }, { 'subject_name': '铅', 'regex': r"PB\.SHF", 'sectorid': '1000002892000000', 'date_establish': '2011-03-24' }, { 'subject_name': '镍', 'regex': r"NI\.SHF", 'sectorid': '1000011457000000', 'date_establish': '2015-03-27' }, { 'subject_name': '锡', 'regex': r"SN\.SHF", 'sectorid': '1000011458000000', 'date_establish': '2015-03-27' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\.SHF", 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\.SHF", 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\.SHF", 'sectorid': '1000011455000000', 'date_establish': '2014-03-21' }, { 'subject_name': 'SHFE 沥青', 'regex': r"BU\\.SHF", 'sectorid': '1000011013000000', 'date_establish': '2013-10-09' }, { 'subject_name': '原油', 'regex': r"SC\.SHF", 'sectorid': '1000011463000000', 'date_establish': '2018-03-26' }, #郑商所合约 # {'subject_name': '白糖', 'regex': r"SR\d{3,4}\.CZC", # 'sectorid': 'a599010405000000', 'date_establish': '2006-01-06'}, # {'subject_name': '棉花', 'regex': r"CF\d{3,4}\.CZC", # 'sectorid': 'a599010404000000', 'date_establish': '2004-06-01'}, # {'subject_name': '动力煤', 'regex': r"(ZC|TC)\d{3,4}\.CZC", # 'sectorid': '1000011012000000', 'date_establish': '2013-09-26'}, # {'subject_name': '玻璃', 'regex': r"FG\d{3,4}\.CZC", # 'sectorid': '1000008549000000', 'date_establish': '2013-12-03'}, # {'subject_name': '精对苯二甲酸', 'regex': r"TA\d{3,4}\.CZC", # 'sectorid': 'a599010407000000', 'date_establish': '2006-12-18'}, # {'subject_name': '甲醇', 'regex': r"(ME|MA)\d{3,4}\.CZC", # 'sectorid': '1000005981000000', 'date_establish': '2011-10-28'}, # {'subject_name': '菜籽油', 'regex': r"OI\d{3,4}\.CZC", # 'sectorid': 'a599010408000000', 'date_establish': '2007-06-08'}, # {'subject_name': '菜籽粕', 'regex': r"RM\d{3,4}\.CZC", # 'sectorid': '1000008622000000', 'date_establish': '2012-12-28'}, #郑商所指数 { 'subject_name': '白糖', 'regex': r"SR\.CZC", 'sectorid': 'a599010405000000', 'date_establish': '2006-01-06' }, { 'subject_name': '棉花', 'regex': r"CF\.CZC", 'sectorid': 'a599010404000000', 'date_establish': '2004-06-01' }, { 'subject_name': '动力煤', 'regex': r"(ZC|TC)\.CZC", 'sectorid': '1000011012000000', 'date_establish': '2013-09-26' }, { 'subject_name': '玻璃', 'regex': r"FG\.CZC", 'sectorid': '1000008549000000', 'date_establish': '2013-12-03' }, { 'subject_name': '精对苯二甲酸', 'regex': r"TA\.CZC", 'sectorid': 'a599010407000000', 'date_establish': '2006-12-18' }, { 'subject_name': '甲醇', 'regex': r"(ME|MA)\.CZC", 'sectorid': '1000005981000000', 'date_establish': '2011-10-28' }, { 'subject_name': '菜籽油', 'regex': r"OI\.CZC", 'sectorid': 'a599010408000000', 'date_establish': '2007-06-08' }, { 'subject_name': '菜籽粕', 'regex': r"RM\.CZC", 'sectorid': '1000008622000000', 'date_establish': '2012-12-28' }, ] wind_code_set = set() ndays_per_update = 60 # 获取接口参数以及参数列表 col_name_param_list = [ ("ipo_date", Date), ("sec_name", String(50)), ("sec_englishname", String(200)), ("exch_eng", String(200)), ("lasttrade_date", Date), ("lastdelivery_date", Date), ("dlmonth", String(20)), ("lprice", DOUBLE), ("sccode", String(20)), ("margin", DOUBLE), ("punit", String(200)), ("changelt", DOUBLE), ("mfprice", DOUBLE), ("contractmultiplier", DOUBLE), ("ftmargins", String(100)), ("trade_code", String(200)), ] wind_indictor_str = ",".join(col_name for col_name, _ in col_name_param_list) dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) # 获取历史期货合约列表信息 logger.info("获取历史期货合约列表信息") for future_sectorid_dic in future_sectorid_dic_list: subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() date_since = get_date_since(wind_code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) logger.info("%s[%s] %s ~ %s", subject_name, sector_id, date_since, date_yestoday) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) future_info_df = invoker.wset( "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) data_count = 0 if future_info_df is None else future_info_df.shape[ 0] logger.info("subject_name=%s[%s] %s 返回 %d 条数据", subject_name, sector_id, date_since_str, data_count) if data_count > 0: wind_code_set |= set(future_info_df['wind_code']) if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday # 获取合约列表 wind_code_list = [ wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic ] # 获取合约基本信息 # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") if len(wind_code_list) > 0: logger.info("%d wind_code will be invoked by wss, wind_code_list=%s", len(wind_code_list), wind_code_list) future_info_df = invoker.wss(wind_code_list, wind_indictor_str) future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply( mfprice_2_num) future_info_count = future_info_df.shape[0] future_info_df.rename( columns={c: str.lower(c) for c in future_info_df.columns}, inplace=True) future_info_df.index.rename('wind_code', inplace=True) future_info_df.reset_index(inplace=True) data_count = bunch_insert_on_duplicate_update(future_info_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logger.info("更新 wind_future_info 结束 %d 条记录被更新", future_info_count) update_from_info_table(table_name)
def import_future_daily(chain_param=None, wind_code_set=None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_future_daily" logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [("open", DOUBLE), ("high", DOUBLE), ("low", DOUBLE), ("close", DOUBLE), ("volume", DOUBLE), ("amt", DOUBLE), ("dealnum", DOUBLE), ("settle", DOUBLE), ("oi", DOUBLE), ("st_stock", DOUBLE), ('position', DOUBLE), ('instrument_id', String(20)), ( 'trade_date', Date, )] wind_indictor_str = ",".join([key for key, _ in param_list[:10]]) if has_table: sql_str = """ select wind_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( select fi.wind_code, ifnull(trade_date_max_1, ipo_date) date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_future_info fi left outer join (select wind_code, adddate(max(trade_date),1) trade_date_max_1 from {table_name} group by wind_code) wfd on fi.wind_code = wfd.wind_code ) tt where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) order by wind_code""".format(table_name=table_name) else: sql_str = """ SELECT wind_code, date_frm, if(lasttrade_date<end_date,lasttrade_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_future_info info ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY wind_code; """ logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) # 获取date_from,date_to,将date_from,date_to做为value值 future_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(future_date_dic) try: logger.info("%d future instrument will be handled", data_len) for num, (wind_code, (date_frm, date_to)) in enumerate(future_date_dic.items()): # 暂时只处理 RU 期货合约信息 # if wind_code.find('RU') == -1: # continue if date_frm > date_to: continue date_frm_str = date_frm.strftime(STR_FORMAT_DATE) date_to_str = date_to.strftime(STR_FORMAT_DATE) logger.info('%d/%d) get %s between %s and %s', num, data_len, wind_code, date_frm_str, date_to_str) # data_df = wsd_cache(w, wind_code, "open,high,low,close,volume,amt,dealnum,settle,oi,st_stock", # date_frm, date_to, "") try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_frm_str, date_to_str, "") except APIError as exp: logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_frm_str, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_frm_str, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={c: str.lower(c) for c in data_df.columns}, inplace=True) data_df.rename(columns={'oi': 'position'}, inplace=True) data_df['instrument_id'] = wind_code.split('.')[0] data_df_list.append(data_df) # 仅仅调试时使用 if DEBUG and len(data_df_list) >= 1: break finally: data_df_count = len(data_df_list) if data_df_count > 0: logger.info('merge data with %d df', data_df_count) data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) else: logger.info("更新 %s 结束 0 条记录被更新", table_name)
def import_wind_bonus(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_stock_bonus' has_table = engine_md.has_table(table_name) param_list = [ ('div_cashbeforetax2', DOUBLE), ('div_cashaftertax2', DOUBLE), ('div_stock2', DOUBLE), ('div_capitalization2', DOUBLE), ('div_capitalization', DOUBLE), ('div_stock', DOUBLE), ('div_cashaftertax', DOUBLE), ('div_cashbeforetax', DOUBLE), ('div_cashandstock', DOUBLE), ('div_recorddate', Date), ('div_exdate', Date), ('div_paydate', Date), ('div_trddateshareb', Date), ('div_preDisclosureDate', Date), ('div_prelandate', Date), ('div_smtgdate', Date), ('div_impdate', Date), ] param = ",".join([key for key, _ in param_list]) with with_db_session(engine_md) as session: table = session.execute('SELECT wind_code, ipo_date, delist_date FROM wind_stock_info') stock_date_dic = {wind_code: (ipo_date, delist_date if delist_date > UN_AVAILABLE_DATE else None) for wind_code, ipo_date, delist_date in table.fetchall()} print(len(stock_date_dic)) DATE_LIST = [datetime.strptime('2010-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2011-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2012-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2013-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2014-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2015-12-31', STR_FORMAT_DATE).date(), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dic_exdate_df_list = [] for rep_date in DATE_LIST: rep_date_str = rep_date.strftime('%Y%m%d') stock_list = [s for s, date_pair in stock_date_dic.items() if date_pair[0] < rep_date and (rep_date < date_pair[1] if date_pair[1] is not None else True)] dic_exdate_df = invoker.wss(stock_list, param) dic_exdate_df_list.append(dic_exdate_df) dic_exdate_df_all = pd.concat(dic_exdate_df_list) dic_exdate_df_all.index.rename('wind_code', inplace=True) dic_exdate_df_all.drop_duplicates(inplace=True) dic_exdate_df_all['DIV_EXDATE'] = dic_exdate_df_all['DIV_EXDATE'].apply(str_date) dic_exdate_df_all['DIV_PAYDATE'] = dic_exdate_df_all['DIV_PAYDATE'].apply(str_date) dic_exdate_df_all['DIV_IMPDATE'] = dic_exdate_df_all['DIV_IMPDATE'].apply(str_date) dic_exdate_df_all['DIV_RECORDDATE'] = dic_exdate_df_all['DIV_RECORDDATE'].apply(str_date) dic_exdate_df_all['DIV_PREDISCLOSUREDATE'] = dic_exdate_df_all['DIV_PREDISCLOSUREDATE'].apply(str_date) dic_exdate_df_all['DIV_PRELANDATE'] = dic_exdate_df_all['DIV_PRELANDATE'].apply(str_date) dic_exdate_df_all['DIV_SMTGDATE'] = dic_exdate_df_all['DIV_SMTGDATE'].apply(str_date) dic_exdate_df_all['DIV_TRDDATESHAREB'] = dic_exdate_df_all['DIV_TRDDATESHAREB'].apply(str_date) condition = ~(dic_exdate_df_all['DIV_EXDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_PAYDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_IMPDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_RECORDDATE'].apply(lambda x: x is None) ) dic_exdate_df_available = dic_exdate_df_all[condition] bunch_insert_on_duplicate_update(dic_exdate_df_available, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_future_daily' info_table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) indicator_param_list = [ ('preClose', String(20)), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('avgPrice', DOUBLE), ('change', DOUBLE), ('changeRatio', DOUBLE), ('preSettlement', DOUBLE), ('settlement', DOUBLE), ('change_settlement', DOUBLE), ('chg_settlement', DOUBLE), ('openInterest', DOUBLE), ('positionChange', DOUBLE), ('amplitude', DOUBLE), ] json_indicator = ','.join([key for key, _ in indicator_param_list]) if has_table: # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据 # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全, # 如:AL0202.SHF AL9902.SHF CU0202.SHF # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理 sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd ON fi.ths_code = wfd.ths_code ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) -- AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ths_start_trade_date_future date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi ) tt""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: logger.info("%d future instrument will be handled", code_count) for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): if begin_time is None: continue if begin_time > end_time: logger.warning('%d/%d) %s [%s - %s] 日期范围无效,跳过', num, code_count, ths_code, begin_time, end_time) continue else: logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) try: data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, '', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) except APIError as exp: if exp.ret_dic['error_code'] in ( -4001, -4210, ): data_df = None logger.exception( "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s 不影响后续任务", ths_code, json_indicator, begin_time, end_time) else: logger.exception( "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s", ths_code, json_indicator, begin_time, end_time) break if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 logging.info("%s 新增数据 %d 条", table_name, data_count) # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count logging.info("%s 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_info_hk(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 导入 港股股票信息 到 wind_stock_info_hk :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param refresh: 默认为False,True 则进行全部更新 :return: """ table_name = 'wind_stock_info_hk' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) param_list = [ ('sec_name', String(20)), ('trade_code', String(20)), ('ipo_date', Date), ('delist_date', Date), ('mkt', String(20)), ('exch_city', String(20)), ('exch_eng', String(20)), ('prename', String(2000)), ] # 获取列属性名,以逗号进行分割 param = ",".join([key for key, _ in param_list]) rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if refresh: date_fetch = DATE_BASE else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) seg_count = 1000 stock_info_df_list = [] for stock_code_list_sub in split_chunk(stock_code_list, seg_count): # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 # stock_info_df = invoker.wss(stock_code_list_sub, # "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename") # 获取接口文档数据信息 stock_info_df = invoker.wss(stock_code_list_sub, param) stock_info_df_list.append(stock_info_df) if DEBUG: break stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('wind_code', inplace=True) stock_info_all_df.rename(columns=rename_col_dic, inplace=True) logging.info('%d data will be import', stock_info_all_df.shape[0]) stock_info_all_df.reset_index(inplace=True) # data_list = list(stock_info_all_df.T.to_dict().values()) # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)".format( # table_name=table_name # ) # # sql_str = "insert INTO wind_stock_info_hk (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # with with_db_session(engine_md) as session: # session.execute(sql_str, data_list) # stock_count = session.execute('select count(*) from {table_name}'.format(table_name=table_name)).first()[0] # 创建表格数据 data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def add_data_2_ckdvp(col_name, param, wind_code_set: set = None, begin_time=None): """判断表格是否存在,存在则进行表格的关联查询 :param col_name: 增加的列属性名 :param param: 参数 :param wind_code_set: 默认为None :param begin_time: 默认为None :return: """ table_name = 'wind_ckdvp_stock_hk' all_finished = False has_table = engine_md.has_table('wind_ckdvp_stock_hk') if has_table: # 执行语句,表格数据联立 sql_str = """ select wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( select info.wind_code, (ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from wind_stock_info_hk info left outer join (select wind_code, adddate(max(time),1) from wind_ckdvp_stock_hk where wind_ckdvp_stock_hk.key='{0}' and param='{1}' group by wind_code ) daily on info.wind_code = daily.wind_code ) tt where date_frm <= if(delist_date<end_date,delist_date, end_date) order by wind_code""".format(col_name, param) else: logger.warning( 'wind_ckdvp_stock_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date,delist_date, end_date) date_to FROM ( SELECT info.wind_code,ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = { 'wind_code': String(20), 'key': String(80), 'time': Date, 'value': String(80), 'param': String(80), } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, date_from, date_to) data_df = invoker.wsd(wind_code, col_name, date_from, date_to, param) if data_df is not None and data_df.shape[0] > 0: # 对我们的表格进行规范整理,整理我们的列名,索引更改 data_df['key'] = col_name data_df['param'] = param data_df['wind_code'] = wind_code data_df.rename(columns={col_name.upper(): 'value'}, inplace=True) data_df.index.rename('time', inplace=True) data_df.reset_index(inplace=True) data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 2: break all_finished = True finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype=dtype) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `wind_code` `wind_code` VARCHAR(20) NOT NULL , CHANGE COLUMN `time` `time` DATE NOT NULL , CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL , CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL , ADD PRIMARY KEY (`wind_code`, `time`, `key`, `param`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return all_finished
def import_smfund_info(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_smfund_info" has_table = engine_md.has_table(table_name) # w.start() types = { u'主动股票型分级母基金': 1000007766000000, u'被动股票型分级母基金': 1000007767000000, u'纯债券型分级母基金': 1000007768000000, u'混合债券型分级母基金': 1000007769000000, u'混合型分级母基金': 1000026143000000, u'QDII分级母基金': 1000019779000000 } col_name_param_list = [ ('wind_code', String(20)), ('fund_type', String(20)), ('sec_name', String(50)), ('class_a_code', String(20)), ('class_a_name', String(50)), ('class_b_code', String(20)), ('class_b_name', String(50)), ('track_indexcode', String(20)), ('track_indexname', String(50)), ('a_pct', DOUBLE), ('b_pct', DOUBLE), ('upcv_nav', DOUBLE), ('downcv_nav', DOUBLE), ('max_purchasefee', DOUBLE), ('max_redemptionfee', DOUBLE), ] col_name = ",".join([col_name for col_name, _ in col_name_param_list]) # 获取各个历史时段的分级基金列表,并汇总全部基金代码 dates = [ '2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01', '2018-01-01' ] # 分三个时间点获取市场上所有分级基金产品 df = pd.DataFrame() # 获取接数据 for date_p in dates: temp_df = invoker.wset("sectorconstituent", "date=%s;sectorid=1000006545000000" % date_p) df = df.append(temp_df) wind_code_all = df['wind_code'].unique() # 设置dtype dtype = {key: val for key, val in col_name_param_list} dtype['wind_code'] = String(20) dtype["tradable"] = String(20) dtype["fund_setupdate"] = Date dtype["fund_maturitydate"] = Date if has_table: with with_db_session(engine_md) as session: table = session.execute("SELECT wind_code FROM wind_smfund_info") wind_code_existed = set( [content[0] for content in table.fetchall()]) wind_code_new = list(set(wind_code_all) - wind_code_existed) else: wind_code_new = list(set(wind_code_all)) # if len(wind_code_new) == 0: # print('no sm fund imported') # 查询数据库,剔除已存在的基金代码 wind_code_new = [code for code in wind_code_new if code.find('!') < 0] info_df = invoker.wss(wind_code_new, 'fund_setupdate, fund_maturitydate') if info_df is None: raise Exception('no data') info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply( lambda x: str_2_date(x)) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply( lambda x: str_2_date(x)) info_df.rename(columns={ 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate' }, inplace=True) field = col_name # field = "fund_type,wind_code,sec_name,class_a_code,class_a_name,class_b_code,class_b_name,a_pct,b_pct,upcv_nav,downcv_nav,track_indexcode,track_indexname,max_purchasefee,max_redemptionfee" for code in info_df.index: beginDate = info_df.loc[code, 'fund_setupdate'].strftime('%Y-%m-%d') temp_df = invoker.wset("leveragedfundinfo", "date=%s;windcode=%s;field=%s" % (beginDate, code, field)) # ;field=%s , field df = df.append(temp_df) if DEBUG and len(df) > 10: break df.set_index('wind_code', inplace=True) df['tradable'] = df.index.map(lambda x: x if 'S' in x else None) # df.index = df.index.map(lambda x: x[:-2] + 'OF') info_df = info_df.join(df, how='outer') # TODO: 需要检查一下代码 info_df.rename( columns={ 'a_nav': 'nav_a', 'b_nav': 'nav_b', 'a_fs_inc': 'fs_inc_a', 'b_fs_inc': 'fs_inc_b' }) info_df.index.rename('wind_code', inplace=True) info_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, len(info_df)) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def add_data_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None): """ 将数据增量保存到 ifind_ckdvp_index 表,code key date value param 五个字段组合成的表 value 为 Varchar(80) 该表用于存放各种新增加字段的值 查询语句举例: THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31') :param json_indicator: :param json_param: :param ths_code_set: :param begin_time: :return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断 """ all_finished = False table_name = 'ifind_ckdvp_index' has_table = engine_md.has_table(table_name) if has_table: sql_str = """ select ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( select info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from ifind_index_info info left outer join (select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name} where {table_name}.key='{0}' and param='{1}' group by ths_code ) daily on info.ths_code = daily.ths_code ) tt where date_frm <= if(NULL<end_date, NULL, end_date) order by ths_code""".format(json_indicator, json_param, table_name=table_name) else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """ SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } # 设置 dtype dtype = { 'ths_code': String(20), 'key': String(80), 'time': Date, 'value': String(80), 'param': String(80), } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_df['key'] = json_indicator data_df['param'] = json_param data_df.rename(columns={json_indicator: 'value'}, inplace=True) data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 4: break all_finished = True finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count if not has_table: alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL , CHANGE COLUMN `time` `time` DATE NOT NULL , CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL , CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL , ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) return all_finished
def import_index_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_index_daily_ds' has_table = engine_md.has_table(table_name) json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""".format(table_name=table_name) else: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) end_time = date_2_str(end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def merge_stock_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'stock_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name) ifind_table_his_name = 'ifind_{table_name}_his'.format( table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_his_sql_str = "select * from {table_name}".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name}".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_his_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from ]) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), # TODO: 原因不详,wind接口取到的部分 close 数据不准确 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准 # 因此出现冲突时应该以 wind 为准 'total_shares': (DOUBLE, prefer_right, { 'left_key': 'totalShares', 'right_key': 'total_shares' }), # 'susp_days': (Integer, '***', { # 'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status', # 'primary_keys': ('ths_code', 'time')}), 'max_up_or_down': (Integer, max_up_or_down, { 'ths_key': 'ths_up_and_down_status_stock', 'wind_key': 'maxupordown', 'primary_keys': ('ths_code', 'time') }), 'total_capital': (DOUBLE, get_value, { 'key': 'totalCapital' }), 'float_capital': (DOUBLE, get_value, { 'key': 'floatCapitalOfAShares' }), 'pct_chg': (DOUBLE, mean_value, { 'left_key': 'changeRatio', 'right_key': 'pct_chg', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'float_a_shares': (DOUBLE, get_value, { 'key': 'floatSharesOfAShares' }), # 对应wind float_a_shares 'free_float_shares': (DOUBLE, get_value, { 'key': 'free_float_shares' }), # 对应 ths ths_free_float_shares_stock # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准 # 因此应该如有不同应该以 wind 为准 'pe_ttm': (DOUBLE, prefer_right, { 'left_key': 'ths_pe_ttm_stock', 'right_key': 'pe_ttm', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'pe': (DOUBLE, get_value, { 'key': 'pe' }), 'pb': (DOUBLE, get_value, { 'key': 'pb' }), 'ps': (DOUBLE, get_value, { 'key': 'ps' }), 'pcf': (DOUBLE, get_value, { 'key': 'pcf' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def import_wind_code_kv(keys: list, chain_param=None, wind_code_list=None): """ 插入股票相关市场数据。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 w.wsd("600000.SH", "west_eps", "2018-05-05", "2018-06-03", "year=2018;westPeriod=180") :param keys: :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_list: :return: """ table_name = "wind_code_kv" wind_code_set = set(wind_code_list) if wind_code_list is not None else None logging.info("更新 wind_stock_daily 开始") has_table = engine_md.has_table(table_name) # 本次获取数据的截止日期 date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today() param_list = [ ('trade_date', Date), ('key', String(45)), ('value', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) for item_key in keys: item_key = item_key.lower() # 获取每只股票对应key 的最近一个交易日数据 start_date # 如果没有,则使用 DATE_BASE ipo_date 中最大的一个 # sql_str = """SELECT info.wind_code, ifnull(trade_date_max_1,start_date) start_date, delist_date FROM # ( # SELECT wind_code, if (ipo_date<%s,date(%s),ipo_date) start_date, delist_date FROM wind_stock_info # ) info # LEFT JOIN # ( # SELECT wind_code, adddate(max(Trade_date),1) trade_date_max_1 FROM wind_code_kv # WHERE wind_code_kv.key=%s GROUP BY wind_code # ) kv # ON info.wind_code = kv.wind_code""" # date_df = pd.read_sql(sql_str, engine_md, params=[DATE_BASE, DATE_BASE, item_key], index_col='wind_code') # # with with_db_session(engine_md) as session: # # 获取市场有效交易日数据 # sql_str = "SELECT trade_date FROM wind_trade_date WHERE trade_date >= :trade_date" # table = session.execute(sql_str, params={'trade_date': DATE_BASE}) # trade_date_sorted_list = [t[0] for t in table.fetchall()] # trade_date_sorted_list.sort() if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) kv ON info.wind_code = kv.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_stock_kv 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set} data_df_list = [] data_len = len(stock_date_dic) logger.info('%d stocks will been import into wind_code_kv', data_len) try: for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1): # if wind_code_set is not None and wind_code not in wind_code_set: # continue # # 获取 date_from # date_from, date_delist = date_s['start_date'], date_s['delist_date'] # # 获取 date_to # if date_delist is None: # date_to = date_ending # else: # date_to = min([date_delist, date_ending]) # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to) # if date_from is None or date_to is None or date_from > date_to: # continue # 获取股票量价等行情数据 wind_indictor_str = item_key data_df = get_wind_kv_per_year(wind_code, wind_indictor_str, date_from, date_to, "year=%(year)d;westPeriod=180") # try: # data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to, "year=2018;westPeriod=180") # except APIError as exp: # logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) # if exp.ret_dic.setdefault('error_code', 0) in ( # -40520007, # 没有可用数据 # -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 # ): # continue # else: # break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue data_df.dropna(inplace=True) if data_df.shape[0] == 0: logger.warning('%d/%d) %s has 0 data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={item_key.upper(): 'value'}, inplace=True) data_df['key'] = item_key data_df_list.append(data_df) # 调试使用 if data_num >= 5: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_pub_fund_info(chain_param=None, first_time=False): """ 获取全市场可转债基本信息 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param first_time: 第一次执行时将从2004年开始查找全部公募基金数据 :return: """ table_name = "wind_pub_fund_info" has_table = engine_md.has_table(table_name) if first_time: date_since = datetime.strptime('2004-01-01', STR_FORMAT_DATE).date() date_list = [] one_year = timedelta(days=365) while date_since < date.today() - ONE_DAY: date_list.append(date_since) date_since += one_year else: date_list.append(date.today() - ONE_DAY) else: date_list = [date.today() - ONE_DAY] # 获取 wind_code 集合 wind_code_set = set() for fetch_date in date_list: data_set = get_wind_code_set(fetch_date) if data_set is not None: wind_code_set |= data_set if DEBUG and len(wind_code_set) > 6: break if has_table: with with_db_session(engine_md) as session: sql_str = "select wind_code from %s" % table_name table = session.execute(sql_str) wind_code_set_existed = { content[0] for content in table.fetchall() } wind_code_set -= wind_code_set_existed else: wind_code_set = wind_code_set # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") wind_code_list = list(wind_code_set) wind_code_count = len(wind_code_list) seg_count = 1000 # loop_count = math.ceil(float(wind_code_count) / seg_count) data_info_df_list = [] fund_info_field_col_name_list = [ ('FUND_FULLNAME', String(200)), ('FUND_EXCHANGESHORTNAME', String(50)), ('FUND_BENCHMARK', String(200)), ('FUND_SETUPDATE', Date), ('FUND_MATURITYDATE', Date), ('FUND_FUNDMANAGER', String(500)), ('FUND_FUNDMANAGER', String(50)), ('FUND_MGRCOMP', String(200)), ('FUND_CUSTODIANBANK', String(50)), ('FUND_TYPE', String(50)), ('FUND_FIRSTINVESTTYPE', String(50)), ('FUND_INVESTTYPE', String(50)), ('FUND_STRUCTUREDFUNDORNOT', String(50)), ('FUND_BENCHINDEXCODE', String(50)), ] # col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in fund_info_field_col_name_list} # col_name_list = ",".join([col_name.lower() for col_name in col_name_dic.keys()]) dtype = {key.lower(): val for key, val in fund_info_field_col_name_list} dtype['wind_code'] = String(20) # dtype['invest_type_level1'] = String(50) for n in range(0, wind_code_count, seg_count): sub_list = wind_code_list[n:(n + seg_count)] # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 # w.wss("000309.OF", "fund_fullname, fund_exchangeshortname,fund_benchmark,fund_benchindexcode,fund_setupdate, # fund_maturitydate,fund_fundmanager,fund_mgrcomp,fund_custodianbank,fund_type,fund_firstinvesttype, # fund_investtype,fund_structuredfundornot, # fund_investstyle") structuredfundornot field_str = ",".join([ col_name.lower() for col_name, _ in fund_info_field_col_name_list ]) stock_info_df = invoker.wss(sub_list, field_str) data_info_df_list.append(stock_info_df) if DEBUG and len(data_info_df_list) > 1000: break if len(data_info_df_list) == 0: logger.info("%s 没有数据可以导入", table_name) return data_info_all_df = pd.concat(data_info_df_list) data_info_all_df.index.rename('wind_code', inplace=True) data_info_all_df.rename( columns={col: col.lower() for col in data_info_all_df.columns}, inplace=True) logging.info('%d data will be import', data_info_all_df.shape[0]) data_info_all_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(data_info_all_df, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) update_from_info_table(table_name)
def import_index_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_index_daily_his :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_index_daily_his' if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 5: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None): """将ds his 以及财务数据合并为 daily 数据""" table_name = 'ifind_stock_daily' logging.info("合成 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(`time`),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) # 獲取各個表格數據 ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from) ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from) ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date', None) ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None) ifind_fin_df_g = ifind_fin_df.groupby('ths_code') ths_code_set_4_daily = set(ifind_fin_df_g.size().index) # 合并 ds his 数据 ifind_his_ds_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) # 拼接後續有nan,無數據 ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code') logger.debug("提取数据完成") # 计算 财报披露时间 report_date_dic_dic = {} for report_date_g in [ ifind_report_date_df.groupby( ['ths_code', 'ths_regular_report_actual_dd_stock']) ]: for num, ((ths_code, report_date), data_df) in enumerate(report_date_g, start=1): if ths_code_set is not None and ths_code not in ths_code_set: continue if is_nan_or_none(report_date): continue report_date_dic = report_date_dic_dic.setdefault(ths_code, {}) if ths_code not in ths_code_set_4_daily: logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code) if report_date not in report_date_dic_dic: ifind_fin_df_temp = ifind_fin_df_temp[ ifind_fin_df_temp['time'] <= report_date] if ifind_fin_df_temp.shape[0] > 0: report_date_dic[ report_date] = ifind_fin_df_temp.sort_values( 'time').iloc[0] # # 设置 dtype dtype = {'report_date': Date} for dic in [ DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE, DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS ]: for key, val in dic.items(): dtype[key] = val logger.debug("计算财报日期完成") # 整理 data_df 数据 tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len( report_date_dic_dic) try: for num, (ths_code, report_date_dic) in enumerate(report_date_dic_dic.items(), start=1): # key:ths_code # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用 以後在驚醒改進 if ths_code not in ifind_his_ds_df_g.size(): logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue # open low 等 is NAN 2438 ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group( ths_code) # shape[1] 30 logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code, ifind_his_ds_df_cur_ths_code.shape[0]) report_date_list = list(report_date_dic.keys()) report_date_list.sort() for report_date_from, report_date_to in iter_2_range( report_date_list): logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ths_code, date_2_str(report_date_from), date_2_str(report_date_to)) # 计算有效的日期范围 if report_date_from is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] < report_date_to elif report_date_to is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] >= report_date_from else: is_fit = (ifind_his_ds_df_cur_ths_code['time'] < report_date_to) & ( ifind_his_ds_df_cur_ths_code['time'] >= report_date_from) # 获取日期范围内的数据 ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[ is_fit].copy() segment_count = ifind_his_ds_df_segment.shape[0] if segment_count == 0: continue fin_s = report_date_dic[ report_date_from] if report_date_from is not None else None for key in DTYPE_STOCK_DAILY_FIN.keys(): if key in ('ths_code', 'time'): continue ifind_his_ds_df_segment[key] = fin_s[ key] if fin_s is not None and key in fin_s else None ifind_his_ds_df_segment['report_date'] = report_date_from # 添加数据到列表 data_df_list.append(ifind_his_ds_df_segment) data_count += segment_count if DEBUG and len(data_df_list) > 1: break # 保存数据库 if data_count > 10000: # 保存到数据库 data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count data_count, data_df_list = 0, [] finally: # 保存到数据库 if len(data_df_list) > 0: data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_smfund_daily(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = "wind_smfund_daily" has_table = engine_md.has_table(table_name) col_name_param_list = [ ('next_pcvdate', Date), ('a_nav', DOUBLE), ('b_nav', DOUBLE), ('a_fs_inc', DOUBLE), ('b_fs_inc', DOUBLE), ('cur_interest', DOUBLE), ('next_interest', DOUBLE), ('ptm_year', DOUBLE), ('anal_pricelever', DOUBLE), ('anal_navlevel', DOUBLE), ('t1_premium', DOUBLE), ('t2_premium', DOUBLE), ('dq_status', String(50)), ('tm_type', TEXT), ('code_p', String(20)), ('trade_date', Date), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amt', DOUBLE), ('pct_chg', DOUBLE), ('open_a', DOUBLE), ('high_a', DOUBLE), ('low_a', DOUBLE), ('close_a', DOUBLE), ('volume_a', DOUBLE), ('amt_a', DOUBLE), ('pct_chg_a', DOUBLE), ('open_b', DOUBLE), ('high_b', DOUBLE), ('low_b', DOUBLE), ('close_b', DOUBLE), ('volume_b', DOUBLE), ('amt_b', DOUBLE), ('pct_chg_b', DOUBLE), ] # wset的调用参数 wind_indictor_str = ",".join([key for key, _ in col_name_param_list[:14]]) # 设置dtype类型 dtype = {key: val for key, val in col_name_param_list} date_ending = date.today() - ONE_DAY if datetime.now( ).hour < BASE_LINE_HOUR else date.today() date_ending_str = date_ending.strftime('%Y-%m-%d') # 对于 表格是否存在进行判断,取值 if has_table: sql_str = """ SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code FROM wind_smfund_info fi LEFT OUTER JOIN (SELECT code_p, adddate(max(trade_date), 1) trade_date_max FROM wind_smfund_daily GROUP BY code_p) smd ON fi.wind_code = smd.code_p WHERE fund_setupdate IS NOT NULL AND class_a_code IS NOT NULL AND class_b_code IS NOT NULL""" else: sql_str = """ SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code FROM wind_smfund_info WHERE fund_setupdate IS NOT NULL AND class_a_code IS NOT NULL AND class_b_code IS NOT NULL""" df = pd.read_sql(sql_str, engine_md) df.set_index('wind_code', inplace=True) data_len = df.shape[0] logger.info('分级基金数量: %d', data_len) index_start = 1 # 获取data_from for data_num, wind_code in enumerate( df.index, start=1): # 可调整 # [100:min([df_count, 200])] if data_num < index_start: continue logger.info('%d/%d) %s start to import', data_num, data_len, wind_code) date_from = df.loc[wind_code, 'date_start'] date_from = str_2_date(date_from) if type(date_from) not in (date, datetime, Timestamp): logger.info('%d/%d) %s has no fund_setupdate will be ignored', data_num, data_len, wind_code) # print(df.iloc[i, :]) continue date_from_str = date_from.strftime('%Y-%m-%d') if date_from > date_ending: logger.info('%d/%d) %s %s %s 跳过', data_num, data_len, wind_code, date_from_str, date_ending_str) continue # 设置wsd接口参数 field = "open,high,low,close,volume,amt,pct_chg" # wsd_cache(w, code, field, beginTime, today, "") try: df_p = invoker.wsd(wind_code, field, date_from_str, date_ending_str, "") except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if df_p is None: continue df_p.rename(columns=lambda x: x.swapcase(), inplace=True) df_p['code_p'] = wind_code code_a = df.loc[wind_code, 'class_a_code'] if code_a is None: print('%d %s has no code_a will be ignored' % (data_num, wind_code)) # print(df.iloc[i, :]) continue # 获得数据存储到df_a里面 # df_a = wsd_cache(w, code_a, field, beginTime, today, "") df_a = invoker.wsd(code_a, field, date_from_str, date_ending_str, "") df_a.rename(columns=lambda x: x.swapcase() + '_a', inplace=True) code_b = df.loc[wind_code, 'class_b_code'] # df_b = wsd_cache(w, code_b, field, beginTime, today, "") # 获取接口数据 获得df_b df_b = invoker.wsd(code_b, field, date_from_str, date_ending_str, "") df_b.columns = df_b.columns.map(lambda x: x.swapcase() + '_b') new_df = pd.DataFrame() for date_str in df_p.index: # time = date_str.date().strftime('%Y-%m-%d') field = "date=%s;windcode=%s;field=%s" % (date_str, wind_code, wind_indictor_str) # wset_cache(w, "leveragedfundinfo", field) temp = invoker.wset("leveragedfundinfo", field) temp['date'] = date_str new_df = new_df.append(temp) if DEBUG and len(new_df) > 8: break # 将获取信息进行表格联立 合并 new_df['next_pcvdate'] = new_df['next_pcvdate'].map( lambda x: str_2_date(x) if x is not None else x) new_df.set_index('date', inplace=True) one_df = pd.concat([df_p, df_a, df_b, new_df], axis=1) one_df.index.rename('trade_date', inplace=True) one_df.reset_index(inplace=True) # one_df['date'] = one_df['date'].map(lambda x: x.date()) one_df.rename(columns={'date': 'trade_date'}, inplace=True) # one_df.rename(columns={"index":'trade_date'},inplace=True) # one_df.set_index(['code_p', 'trade_date'], inplace=True) bunch_insert_on_duplicate_update(one_df, table_name, engine_md, dtype=dtype) logger.info('%d/%d) %s import success', data_num, data_len, wind_code) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) # 手动创建主键, 主键不是wind_code create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `code_p` `code_p` VARCHAR(20) NOT NULL , CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL , ADD PRIMARY KEY (`code_p`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_stock_daily_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = 'wind_stock_daily_hk' logging.info("更新 %s 开始", table_name) param_list = [ ('OPEN', DOUBLE), ('HIGH', DOUBLE), ('LOW', DOUBLE), ('CLOSE', DOUBLE), ('ADJFACTOR', DOUBLE), ('VOLUME', DOUBLE), ('AMT', DOUBLE), ('PCT_CHG', DOUBLE), ('MAXUPORDOWN', Integer), ('SWING', DOUBLE), ('TURN', DOUBLE), ('FREE_TURN', DOUBLE), ('TRADE_STATUS', String(20)), ('SUSP_DAYS', Integer), ('TOTAL_SHARES', DOUBLE), ('FREE_FLOAT_SHARES', DOUBLE), ('EV2_TO_EBITDA', DOUBLE), ('PS_TTM', DOUBLE), ('PE_TTM', DOUBLE), ('PB_MRQ', DOUBLE), ] # 将列表列名转化为小写 col_name_dic = { col_name.upper(): col_name.lower() for col_name, _ in param_list } # 获取列表列名 col_name_list = [col_name.lower() for col_name in col_name_dic.keys()] # wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \ # "swing,turn,free_turn,trade_status,susp_days," + \ # "total_shares,free_float_shares,ev2_to_ebitda" wind_indictor_str = ",".join(col_name_list) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily ON info.wind_code = daily.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(stock_date_dic) # 获取接口数据 logger.info('%d data will been import into %s', data_len, table_name) try: for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): logger.debug('%d/%d) %s [%s - %s]', data_num, data_len, wind_code, date_from, date_to) # if wind_code == '0192!1.HK': # logger.warning('%s wind 没有这只股票数据', wind_code) # continue try: data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to) except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns=col_name_dic, inplace=True) data_df_list.append(data_df) # 仅供调试使用 if DEBUG and len(data_df_list) > 1: break finally: # 导入数据库 创建 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def tushare_tmt_twincome_info(): table_name = 'tushare_tmt_twincome_info' has_table = engine_md.has_table(table_name) indicators_dic = [ ['1', 'PC', '20110128'], ['2', 'NB', '20110128'], ['3', '主机板', '20110128'], ['4', '印刷电路板', '20110128'], ['5', 'IC载板', '20110128'], ['6', 'PCB组装', '20110128'], ['7', '软板', '20110128'], ['8', 'PCB', '20110128'], ['9', 'PCB原料', '20110128'], ['10', '铜箔基板', '20110128'], ['11', '玻纤纱布', '20110128'], ['12', 'FCCL', '20110128'], ['13', '显示卡', '20110128'], ['14', '绘图卡', '20110128'], ['15', '电视卡', '20110128'], ['16', '泛工业电脑', '20110128'], ['17', 'POS', '20110128'], ['18', '工业电脑', '20110128'], ['19', '光电IO', '20110128'], ['20', '监视器', '20110128'], ['21', '扫描器', '20110128'], ['22', 'PC周边', '20110128'], ['23', '储存媒体', '20110128'], ['24', '光碟', '20110128'], ['25', '硬盘磁盘', '20110128'], ['26', '发光二极体', '20110128'], ['27', '太阳能', '20110128'], ['28', 'LCD面板', '20110128'], ['29', '背光模组', '20110128'], ['30', 'LCD原料', '20110128'], ['31', 'LCD其它', '20110128'], ['32', '触控面板', '20110128'], ['33', '监控系统', '20110128'], ['34', '其它光电', '20110128'], ['35', '电子零组件', '20110128'], ['36', '二极体整流', '20110128'], ['37', '连接器', '20110128'], ['38', '电源供应器', '20110128'], ['39', '机壳', '20110128'], ['40', '被动元件', '20110128'], ['41', '石英元件', '20110128'], ['42', '3C二次电源', '20110128'], ['43', '网路设备', '20110128'], ['44', '数据机', '20110128'], ['45', '网路卡', '20110128'], ['46', '半导体', '20110128'], ['47', '晶圆制造', '20110128'], ['48', 'IC封测', '20110128'], ['49', '特用IC', '20110128'], ['50', '记忆体模组', '20110128'], ['51', '晶圆材料', '20110128'], ['52', 'IC设计', '20110128'], ['53', 'IC光罩', '20110128'], ['54', '电子设备', '20110128'], ['55', '手机', '20110128'], ['56', '通讯设备', '20110128'], ['57', '电信业', '20110128'], ['58', '网路服务', '20110128'], ['59', '卫星通讯', '20110128'], ['60', '光纤通讯', '20110128'], ['61', '3C通路', '20110128'], ['62', '消费性电子', '20110128'], ['63', '照相机', '20110128'], ['64', '软件服务', '20110128'], ['65', '系统整合', '20110128'], ] dtype = { 'ts_code': String(20), 'cn_name': String(120), 'start_date':Date, } name_list = ['ts_code', 'cn_name','start_date'] info_df = pd.DataFrame(data=indicators_dic, columns=name_list) data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype) logger.info('%d 条记录被更新', data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`ts_code`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `wind_code` 主键设置完成', table_name)
def import_stock_quertarly_hk(chain_param=None, wind_code_set=None, begin_time=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :param wind_code_set: :param begin_time: :return: """ table_name = "wind_stock_quertarly_hk" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ths_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""".format(table_name=table_name) else: logger.warning( 'wind_stock_quertarly_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info_hk info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } data_df_list = [] logger.info('%d data will been import into %s', len(stock_date_dic), table_name) # 获取股票量价等行情数据 field_col_name_list = [ ('roic_ttm', String(20)), ('yoyprofit', String(20)), ('ebit', String(20)), ('ebit2', String(20)), ('ebit2_ttm', String(20)), ('surpluscapitalps', String(20)), ('undistributedps', String(20)), ('stm_issuingdate', Date), ] # 获取列表属性名 dtype = {key: val for key, val in field_col_name_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date wind_indictor_str = ",".join(key for key, _ in field_col_name_list) upper_col_2_name_dic = { name.upper(): val for name, val in field_col_name_list } # 获取接口数据 try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename(columns=upper_col_2_name_dic, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index): is_all_none = data_df.loc[trade_date].apply( lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) # 仅供调试使用 if DEBUG and len(data_df_list) > 5: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql('wind_stock_quertarly_hk', engine_md, if_exists='append') bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly_hk 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])