def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) wind_indicator_param_list = [ ('ts_code', String(20)), ('symbol', DOUBLE), ('list_date', Date), ('delist_date', Date), ('name', String(30)), ('fullname', String(100)), ('enname', String(200)), ('exchange_id', String(30)), ('list_status', String(10)), ('is_hs', String(10)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in wind_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in wind_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic(exchange_id='', fields='ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs') logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_coin_info(): """获取全球交易币基本信息""" table_name = 'tushare_coin_info' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'coin': String(60), 'en_name': String(60), 'cn_name': String(60), 'issue_date': Date, 'amount': DOUBLE, } coinlist_df = pro.coinlist(start_date='20170101', end_date=date_2_str(date.today(), DATE_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(coinlist_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `coin` `coin` VARCHAR(60) NOT NULL FIRST, CHANGE COLUMN `en_name` `en_name` VARCHAR(60) NOT NULL AFTER `coin`, ADD PRIMARY KEY (`coin`, `en_name`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_tushare_index_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) fields = 'ts_code', 'name', 'fullname', 'market', 'publisher', 'index_type', 'category', 'base_date', 'base_point', 'list_date', 'weight_rule', 'desc', 'exp_date' market_list = list( ['MSCI', 'CSI', 'SSE', 'SZSE', 'CICC', 'SW', 'CNI', 'OTH']) try: for mkt in market_list: # trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_index_basic(market=mkt, fields=fields) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_BASIC) logging.info("%s更新 %s 结束 %d 条信息被更新", mkt, table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name])
def import_macroeconomy_info(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_macroeconomy_info' has_table = engine_md.has_table(table_name) indicators_dic = [ # 人民币汇率 ["M0067855", "us2rmb", "美元兑人民币即期汇率", "1994-01-04", None, '中国货币网'], ] dtype = { 'key': String(20), 'en_name': String(120), 'cn_name': String(120), 'begin_date': Date, 'end_date': Date, 'remark': Text, } name_list = [ 'key', 'en_name', 'cn_name', 'begin_date', 'end_date', 'remark' ] info_df = pd.DataFrame(data=indicators_dic, columns=name_list) data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype) logger.info('%d 条记录被更新', data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `key` `key` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`key`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `key` 主键设置完成', table_name)
def insert_into_db(data_df_list, engine_md): data_count = len(data_df_list) table_name = 'wind_stock_tick' has_table = engine_md.has_table(table_name) param_list = [ ('datetime', DateTime), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('ask1', DOUBLE), ('bid1', DOUBLE), ('asize1', DOUBLE), ('bsize1', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('preclose', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if data_count > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('datetime', inplace=True) data_df_all.reset_index(inplace=True) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logger.info('%d data imported', data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_count
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, trade_date, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_tushare_stock_fund_holdings(): table_name = 'tushare_stock_fund_holdings' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_fund_holdings_indicator_param_list = [ ('ts_code', String(20)), ('sec_name', String(20)), ('end_date', Date), ('nums', DOUBLE), ('nlast', DOUBLE), ('count', DOUBLE), ('clast', DOUBLE), ('amount', DOUBLE), ('ratio', DOUBLE), ] tushare_fund_holdings_dtype = {key: val for key, val in tushare_fund_holdings_indicator_param_list} data_df_list, data_count, all_data_count, = [], 0, 0 years = list(range(2013, 2019)) try: for year in years: for quarter in list([1, 2, 3, 4]): print((year, quarter)) data_df = invoke_fund_holdings(year, quarter) ts_code_list = [] for i in data_df.code: if i[0] == '6': sh = i + '.SH' ts_code_list.append(sh) else: sz = i + '.SZ' ts_code_list.append(sz) data_df.code = ts_code_list data_df = data_df.rename(columns={'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date'}) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 50: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_coin_info(chain_param=None, ): """插入基础信息数据到 cmc_coin_v1_info""" table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # url = 'https://api.coinmarketcap.com/v2/listings/' # dtype = { # 'id': String(60), # 'name': String(60), # 'symbol': String(20), # 'website_slug': String(60), # } url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' dtype = { 'id': String(60), 'name': String(60), 'symbol': String(20), 'rank': Integer, 'price_usd': DOUBLE, 'price_btc': DOUBLE, '24h_volume_usd': DOUBLE, 'market_cap_usd': DOUBLE, 'available_supply': DOUBLE, 'total_supply': DOUBLE, 'max_supply': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'last_updated': DATETIME, } rsp = requests.get(url) if rsp.status_code != 200: raise ValueError('请求 listings 相应失败') json = rsp.json() data_df = pd.DataFrame(json) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x))) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , ADD PRIMARY KEY (`id`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_index_info(chain_param=None, ths_code=None): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_index_info' has_table = engine_md.has_table(table_name) logging.info("更新 ifind_index_info 开始") if ths_code is None: # 获取全市场股票代码及名称 date_end = date.today() stock_code_set = set() stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub ths_code = ','.join(stock_code_set) indicator_param_list = [ ('ths_index_short_name_index', '', String(20)), ('ths_index_code_index', '', String(10)), ('ths_index_category_index', '', String(20)), ('ths_index_base_period_index', '', Date), ('ths_index_base_point_index', '', DOUBLE), ('ths_publish_org_index', '', String(20)), ] # indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index; # ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index', # param = ';;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 index info 可以更新") return dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def init(alter_table=False): # 创建表 Base.metadata.create_all(engine_md) logger.info("所有表结构建立完成") if alter_table: alter_table_2_myisam(engine_md) table_name_list = engine_md.table_names() build_primary_key(table_name_list) logger.info("所有表结构调整完成") for table_name in table_name_list: TABLE_MODEL_DIC[table_name] = Table(table_name, Base.metadata, autoload=True) logger.info("所有表Model动态加载完成")
def import_index_info(wind_codes, chain_param=None): """ 导入指数信息 :param wind_codes: :return: """ table_name = 'wind_index_info' has_table = engine_md.has_table(table_name) col_name_param_list = [ ('LAUNCHDATE', Date), ('BASEDATE', Date), ('BASEVALUE', DOUBLE), ('COUNTRY', String(20)), ('CRM_ISSUER', String(20)), ('SEC_NAME', String(20)), ] col_name_param = ",".join([key.lower() for key, _ in col_name_param_list]) col_name_param_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list} # 设置dtype类型 dtype = {key.lower(): val for key, val in col_name_param_list} dtype['wind_code'] = String(20) info_df = invoker.wss(wind_codes, col_name_param) if info_df is None or info_df.shape[0] == 0: logger.warning("没有数据可导入") return info_df.rename(columns=col_name_param_dic, inplace=True) info_df.index.rename("wind_code", inplace=True) info_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) # info_df.to_sql(table_name, engine_md, if_exists='append', index=True, # dtype={ # 'wind_code': String(20), # 'null': Date, # 'basedate': Date, # 'basevalue': DOUBLE, # 'country': String(20), # 'crm_issuer': String(20), # 'sec_name': String(20), # }) logger.info('%d 条指数信息导入成功\n%s', info_df.shape[0], info_df) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_tushare_namechange(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_namechange' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """select max(start_date) start_date FROM md_integration.tushare_stock_namechange""" else: sql_str = """select min(list_date) start_date FROM md_integration.tushare_stock_info""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) start_date = list(row[0] for row in table.fetchall()) start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS) end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS) try: data_df = pro.namechange( start_date=start_date, end_date=end_date, fields='ts_code,name,start_date,end_date,change_reason') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE) logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `start_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
def import_tushare_stock_index_weight(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_weight' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code index_code,trade_date trade_date_list FROM md_integration.tushare_stock_index_daily_md """ with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = {} # for ts_code, trade_date in table.fetchall(): # code_date_range_dic.setdefault(ts_code, []).append(trade_date) for index_code, trade_date_list in table.fetchall(): code_date_range_dic.setdefault(index_code, []).append(trade_date_list) data_len = len(code_date_range_dic) logger.info( '%d index weight will been import into tushare_stock_index_weight table', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (index_code, trade_date_list) in enumerate(code_date_range_dic.items(), start=1): trade_date_list_len = len(trade_date_list) for i, trade_date in enumerate(trade_date_list): # trade_date=trade_date_list[i] logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i, trade_date_list_len, index_code, trade_date) data_df = invoke_index_weight(index_code=index_code, trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_WEIGHT) logging.info("%s 更新 %s %d 条信息被更新", trade_date, table_name, data_count) else: break Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_WEIGHT) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_hsgt_top10(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_hsgt_top10' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('name', String(20)), ('close', DOUBLE), ('change', DOUBLE), ('rank', Integer), ('market_type', String(20)), ('amount', DOUBLE), ('net_amount', DOUBLE), ('buy', DOUBLE), ('sell', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) for market_type in list(['1', '3']): data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("%s更新 %s 结束 %d 条信息被更新", trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") break finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `trade_date` `trade_date` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`trade_date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `trade_date` 主键设置完成', table_name)
def import_coin_latest(chain_param=None, ): """插入最新价格数据到 cmc_coin_pro_latest """ table_name = 'cmc_coin_pro_latest' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'id': Integer, 'name': String(60), 'slug': String(60), 'symbol': String(20), 'date_added': DATETIME, 'last_updated': DATETIME, 'market_cap': DOUBLE, 'circulating_supply': DOUBLE, 'max_supply': DOUBLE, 'num_market_pairs': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'price': DOUBLE, 'total_supply': DOUBLE, 'volume_24h': DOUBLE, 'cmc_rank': DOUBLE, } header = { 'Content-Type': 'application/json', 'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY } params = { # 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY, 'limit': 5000, 'start': 1 } # https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest" rsp = requests.get(url=url, params=params, headers=header) if rsp.status_code != 200: logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content) return ret_dic = rsp.json() data_list = ret_dic['data'] data_dic_list = [] for dic in data_list: data_dic = {} for key, val in dic.items(): if key == 'quote': for sub_key, sub_val in val['USD'].items(): data_dic[sub_key] = sub_val else: data_dic[key] = val data_dic_list.append(data_dic) data_df = pd.DataFrame(data_dic_list) # 数据整理 data_df['date_added'] = data_df['date_added'].apply( lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name) execute_sql(engine_md, create_pk_str)
def merge_stock_info(): """ 合并 wind,ifind 数据到对应名称的表中 :return: """ table_name = 'stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) # ifind_model = TABLE_MODEL_DIC[ifind_table_name] # wind_model = TABLE_MODEL_DIC[wind_table_name] # with with_db_session(engine_md) as session: # session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code) ifind_sql_str = "select * from {table_name}".format( table_name=ifind_table_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on='ths_code', right_on='wind_code', indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'sec_name': (String(20), prefer_left, { 'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name' }), 'cn_name': (String(100), get_value, { 'key': 'ths_corp_cn_name_stock' }), 'en_name': (String(100), get_value, { 'key': 'ths_corp_name_en_stock' }), 'delist_date': (Date, prefer_left, { 'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date' }), 'ipo_date': (Date, prefer_left, { 'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date' }), 'pre_name': (Text, prefer_left, { 'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename' }), 'established_date': (Date, get_value, { 'key': 'ths_established_date_stock' }), 'exch_city': (String(20), get_value, { 'key': 'exch_city' }), 'exch_cn': (String(20), get_value, { 'key': 'ths_listing_exchange_stock' }), 'exch_eng': (String(20), get_value, { 'key': 'exch_eng' }), 'stock_code': (String(20), prefer_left, { 'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code' }), 'mkt': (String(20), get_value, { 'key': 'mkt' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def add_data_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None): """ 将数据增量保存到 ifind_ckdvp_index 表,code key date value param 五个字段组合成的表 value 为 Varchar(80) 该表用于存放各种新增加字段的值 查询语句举例: THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31') :param json_indicator: :param json_param: :param ths_code_set: :param begin_time: :return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断 """ all_finished = False table_name = 'ifind_ckdvp_index' has_table = engine_md.has_table(table_name) if has_table: sql_str = """ select ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( select info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date from ifind_index_info info left outer join (select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name} where {table_name}.key='{0}' and param='{1}' group by ths_code ) daily on info.ths_code = daily.ths_code ) tt where date_frm <= if(NULL<end_date, NULL, end_date) order by ths_code""".format(json_indicator, json_param, table_name=table_name) else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """ SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } # 设置 dtype dtype = { 'ths_code': String(20), 'key': String(80), 'time': Date, 'value': String(80), 'param': String(80), } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_df['key'] = json_indicator data_df['param'] = json_param data_df.rename(columns={json_indicator: 'value'}, inplace=True) data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 4: break all_finished = True finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count if not has_table: alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL , CHANGE COLUMN `time` `time` DATE NOT NULL , CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL , CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL , ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) return all_finished
def import_index_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_index_daily_ds' has_table = engine_md.has_table(table_name) json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""".format(table_name=table_name) else: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) end_time = date_2_str(end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_index_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_index_daily_his :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_index_daily_his' if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 5: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_stock_hk_info' logging.info("更新 %s 开始", table_name) if ths_code is None: # 获取全市场港股代码及名称 if refresh: date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_hk_code_set = set() while date_fetch < date_end: stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub date_fetch += timedelta(days=365) stock_hk_code_set_sub = get_stock_hk_code_set(date_end) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub if DEBUG: stock_hk_code_set = list(stock_hk_code_set)[:10] ths_code = ','.join(stock_hk_code_set) indicator_param_list = [ ('ths_stock_short_name_hks', '', String(40)), ('ths_stock_code_hks', '', String(20)), ('ths_isin_code_hks', '', String(40)), ('ths_corp_ashare_short_name_hks', '', String(10)), ('ths_corp_ashare_code_hks', '', String(60)), ('ths_stock_varieties_hks', '', String(40)), ('ths_ipo_date_hks', '', Date), ('ths_listed_exchange_hks', '', String(60)), ('ths_stop_listing_date_hks', '', Date), ('ths_corp_cn_name_hks', '', String(120)), ('ths_corp_name_en_hks', '', String(120)), ('ths_established_date_hks', '', Date), ('ths_accounting_date_hks', '', String(20)), ('ths_general_manager_hks', '', String(40)), ('ths_secretary_hks', '', String(40)), ('ths_operating_scope_hks', '', Text), ('ths_mo_product_name_hks', '', String(200)), ('ths_district_hks', '', String(60)), ('ths_reg_address_hks', '', String(200)), ('ths_office_address_hks', '', String(200)), ('ths_corp_tel_hks', '', String(200)), ('ths_corp_fax_hks', '', String(200)), ('ths_corp_website_hks', '', String(200)), ('ths_auditor_hks', '', String(60)), ('ths_legal_counsel_hks', '', String(300)), ('ths_hs_industry_hks', '', String(40)), ] # jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks' # jsonparam=';;;;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') param += '100' data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 stock_hk info 可以更新") return # 删除历史数据,更新数据 has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: session.execute( "DELETE FROM {table_name} WHERE ths_code IN (".format( table_name=table_name) + ','.join( [':code%d' % n for n in range(len(stock_hk_code_set))]) + ")", params={ 'code%d' % n: val for n, val in enumerate(stock_hk_code_set) }) session.commit() dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # data_count = data_df.shape[0] # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def merge_stock_daily(date_from=None): """ 合并 wind,ifind 数据到对应名称的表中 :param date_from: :return: """ table_name = 'stock_daily' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name) ifind_table_his_name = 'ifind_{table_name}_his'.format( table_name=table_name) wind_table_name = 'wind_{table_name}'.format(table_name=table_name) if date_from is None: ifind_his_sql_str = "select * from {table_name}".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name}".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name}".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code' else: ifind_his_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_ds_name) ifind_ds_sql_str = "select * from {table_name} where time >= %s".format( table_name=ifind_table_his_name) wind_sql_str = "select * from {table_name} where trade_date >= %s".format( table_name=wind_table_name) ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from ]) # , index_col='ths_code' ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code' wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code' ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) joined_df = pd.merge(ifind_df, wind_df, how='outer', left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'], indicator='indicator_column') col_merge_dic = { 'unique_code': (String(20), prefer_left, { 'left_key': 'ths_code', 'right_key': 'wind_code' }), 'trade_date': (Date, prefer_left, { 'left_key': 'time', 'right_key': 'trade_date' }), 'open': (DOUBLE, mean_value, { 'left_key': 'open_x', 'right_key': 'open_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'high': (DOUBLE, mean_value, { 'left_key': 'high_x', 'right_key': 'high_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'low': (DOUBLE, mean_value, { 'left_key': 'low_x', 'right_key': 'low_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), # TODO: 原因不详,wind接口取到的部分 close 数据不准确 'close': (DOUBLE, prefer_left, { 'left_key': 'close_x', 'right_key': 'close_y', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'volume': (DOUBLE, mean_value, { 'left_key': 'volume_x', 'right_key': 'volume_y', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), 'amount': (DOUBLE, mean_value, { 'left_key': 'amount', 'right_key': 'amt', 'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time') }), # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准 # 因此出现冲突时应该以 wind 为准 'total_shares': (DOUBLE, prefer_right, { 'left_key': 'totalShares', 'right_key': 'total_shares' }), # 'susp_days': (Integer, '***', { # 'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status', # 'primary_keys': ('ths_code', 'time')}), 'max_up_or_down': (Integer, max_up_or_down, { 'ths_key': 'ths_up_and_down_status_stock', 'wind_key': 'maxupordown', 'primary_keys': ('ths_code', 'time') }), 'total_capital': (DOUBLE, get_value, { 'key': 'totalCapital' }), 'float_capital': (DOUBLE, get_value, { 'key': 'floatCapitalOfAShares' }), 'pct_chg': (DOUBLE, mean_value, { 'left_key': 'changeRatio', 'right_key': 'pct_chg', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'float_a_shares': (DOUBLE, get_value, { 'key': 'floatSharesOfAShares' }), # 对应wind float_a_shares 'free_float_shares': (DOUBLE, get_value, { 'key': 'free_float_shares' }), # 对应 ths ths_free_float_shares_stock # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准 # 因此应该如有不同应该以 wind 为准 'pe_ttm': (DOUBLE, prefer_right, { 'left_key': 'ths_pe_ttm_stock', 'right_key': 'pe_ttm', 'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time') }), 'pe': (DOUBLE, get_value, { 'key': 'pe' }), 'pb': (DOUBLE, get_value, { 'key': 'pb' }), 'ps': (DOUBLE, get_value, { 'key': 'ps' }), 'pcf': (DOUBLE, get_value, { 'key': 'pcf' }), } col_merge_rule_dic = { key: (val[1], val[2]) for key, val in col_merge_dic.items() } dtype = {key: val[0] for key, val in col_merge_dic.items()} data_df = merge_data(joined_df, col_merge_rule_dic) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logger.info('%s 新增或更新记录 %d 条', table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_df
def import_stock_hk_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_daily_ds' info_table_name = 'ifind_stock_hk_info' # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock' # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None): """将ds his 以及财务数据合并为 daily 数据""" table_name = 'ifind_stock_daily' logging.info("合成 %s 开始", table_name) has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(`time`),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) # 獲取各個表格數據 ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from) ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from) ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date', None) ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None) ifind_fin_df_g = ifind_fin_df.groupby('ths_code') ths_code_set_4_daily = set(ifind_fin_df_g.size().index) # 合并 ds his 数据 ifind_his_ds_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer', on=['ths_code', 'time']) # 拼接後續有nan,無數據 ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code') logger.debug("提取数据完成") # 计算 财报披露时间 report_date_dic_dic = {} for report_date_g in [ ifind_report_date_df.groupby( ['ths_code', 'ths_regular_report_actual_dd_stock']) ]: for num, ((ths_code, report_date), data_df) in enumerate(report_date_g, start=1): if ths_code_set is not None and ths_code not in ths_code_set: continue if is_nan_or_none(report_date): continue report_date_dic = report_date_dic_dic.setdefault(ths_code, {}) if ths_code not in ths_code_set_4_daily: logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code) if report_date not in report_date_dic_dic: ifind_fin_df_temp = ifind_fin_df_temp[ ifind_fin_df_temp['time'] <= report_date] if ifind_fin_df_temp.shape[0] > 0: report_date_dic[ report_date] = ifind_fin_df_temp.sort_values( 'time').iloc[0] # # 设置 dtype dtype = {'report_date': Date} for dic in [ DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE, DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS ]: for key, val in dic.items(): dtype[key] = val logger.debug("计算财报日期完成") # 整理 data_df 数据 tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len( report_date_dic_dic) try: for num, (ths_code, report_date_dic) in enumerate(report_date_dic_dic.items(), start=1): # key:ths_code # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用 以後在驚醒改進 if ths_code not in ifind_his_ds_df_g.size(): logger.error('fin 表中不存在 %s 的財務數據', ths_code) continue # open low 等 is NAN 2438 ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group( ths_code) # shape[1] 30 logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code, ifind_his_ds_df_cur_ths_code.shape[0]) report_date_list = list(report_date_dic.keys()) report_date_list.sort() for report_date_from, report_date_to in iter_2_range( report_date_list): logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ths_code, date_2_str(report_date_from), date_2_str(report_date_to)) # 计算有效的日期范围 if report_date_from is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] < report_date_to elif report_date_to is None: is_fit = ifind_his_ds_df_cur_ths_code[ 'time'] >= report_date_from else: is_fit = (ifind_his_ds_df_cur_ths_code['time'] < report_date_to) & ( ifind_his_ds_df_cur_ths_code['time'] >= report_date_from) # 获取日期范围内的数据 ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[ is_fit].copy() segment_count = ifind_his_ds_df_segment.shape[0] if segment_count == 0: continue fin_s = report_date_dic[ report_date_from] if report_date_from is not None else None for key in DTYPE_STOCK_DAILY_FIN.keys(): if key in ('ths_code', 'time'): continue ifind_his_ds_df_segment[key] = fin_s[ key] if fin_s is not None and key in fin_s else None ifind_his_ds_df_segment['report_date'] = report_date_from # 添加数据到列表 data_df_list.append(ifind_his_ds_df_segment) data_count += segment_count if DEBUG and len(data_df_list) > 1: break # 保存数据库 if data_count > 10000: # 保存到数据库 data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count data_count, data_df_list = 0, [] finally: # 保存到数据库 if len(data_df_list) > 0: data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) tot_data_count += data_count logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_report_date(chain_param=None, ths_code_set: set = None, begin_time=None, interval='Q'): """ 通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认) :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param interval: Q 季度 M 月 W 周 D 日 :return: """ table_name = 'ifind_stock_hk_report_date' info_table_name = 'ifind_stock_hk_info' has_table = engine_md.has_table(table_name) # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks' # jsonparam=';' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, "Days:Tradedays,Fill:Previous,Interval:{interval}".format( interval=interval), begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_coin_daily(chain_param=None, id_set=None, begin_time=None): """插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期""" table_name = "cmc_coin_v1_daily" info_table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily ON info.id = daily.id ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY id""".format(table_name=table_name, info_table_name=info_table_name) else: logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT id, symbol, null date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt ORDER BY id""".format(info_table_name=info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { (coin_id, symbol): (date_from if begin_time is None else min([date_from, begin_time]), date_to) for coin_id, symbol, date_from, date_to in table.fetchall() if id_set is None or coin_id in id_set } # 设置 dtype dtype = { 'id': String(60), 'date': Date, 'open': DOUBLE, 'high': DOUBLE, 'low': DOUBLE, 'close': DOUBLE, 'volume': DOUBLE, 'market_cap': DOUBLE, } col_names = dtype.keys() data_df_list = [] dic_count = len(stock_date_dic) data_count = 0 # 获取接口数据 logger.info('%d coins will been import into %s', dic_count, table_name) try: for data_num, ((coin_id, symbol), (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1): logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count, coin_id, symbol, date_from, date_to) date_from_str = None try: if date_from is None: scraper = CmcScraperV1(symbol, coin_id) else: date_from_str = date_2_str( str_2_date(date_from, DATE_FORMAT_STR), DATE_FORMAT_STR_CMC) scraper = CmcScraperV1(symbol, coin_id, start_date=date_from_str) data_df = scraper.get_dataframe() except Exception as exp: logger.exception("scraper('%s', '%s', start_date='%s')", symbol, coin_id, date_from_str) continue if data_df is None or data_df.shape[0] == 0: logger.warning('%d/%d) %s has no data during %s %s', data_num, dic_count, coin_id, date_from, date_to) continue data_df.rename(columns={ col_name: rename_by_dic(col_name, col_names) for col_name in data_df.columns }, inplace=True) data_df.rename(columns={'market cap': 'market_cap'}, inplace=True) data_df['market_cap'] = data_df['market_cap'].apply( lambda x: 0 if isinstance(x, str) else x) data_df['volume'] = data_df['volume'].apply( lambda x: 0 if isinstance(x, str) else x) logger.info('%d/%d) %d data of %s between %s and %s', data_num, dic_count, data_df.shape[0], coin_id, data_df['date'].min(), data_df['date'].max()) data_df['id'] = coin_id data_df_list.append(data_df) data_count += data_df.shape[0] # 仅供调试使用 if DEBUG and len(data_df_list) > 10: break if data_count > 10000: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("%s %d 条信息被更新", table_name, data_count) data_df_list, data_count = [], 0 finally: # 导入数据库 创建 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_stock_hk_fin_quarterly(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为季度获取 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:Q', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_daily_md' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, base_date date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into tushare_stock_index_daily_md', data_len) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if len(data_df) > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['trade_date'].iloc[-1]), None df2 = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2 > 0): last_date_in_df_cur = try_2_date( df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_fin_by_report_date_weekly(chain_param=None, ths_code_set: set = None, begin_time=None, refresh=False): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为周度获取 以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param refresh: 全部刷新 :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from ( select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date union select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date ) tt where report_date is not null order by ths_code, report_date""" if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取报告日-10天到报告日日期范围列表 table = session.execute(ths_code_report_date_str) ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {} for ths_code, date_from, date_to in table.fetchall(): if ths_code_set is None or ths_code in ths_code_set: ths_code_report_date_range_list_dic_tmp.setdefault( ths_code, []).append((date_from, date_to)) # 获取每只股票需要获取日线数据的日期区间 if not refresh: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min( [date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } else: code_date_range_dic = {} # 合并重叠的日期 for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items( ): if not refresh and ths_code in code_date_range_dic: code_date_range = code_date_range_dic[ths_code] else: code_date_range = None # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并 date_range_list_new, date_from_last, date_to_last = [], None, None for date_from, date_to in date_range_list: if code_date_range is not None: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 if not refresh and (date_to < code_date_range[0] or code_date_range[1] < date_from): continue if date_from_last is None: # 首次循环 设置 date_from_last date_from_last = date_from elif date_from < date_to_last: # 日期重叠,需要合并 pass else: # 日期未重叠,保存 range date_range_list_new.append((date_from_last, date_to_last)) date_from_last = date_from # 循环底部,设置 date_to_last date_to_last = date_to # 循环结束,保存 range if date_from_last is not None and date_to_last is not None: date_range_list_new.append((date_from_last, date_to_last)) if len(date_range_list_new) > 0: ths_code_report_date_range_list_dic[ths_code] = date_range_list_new data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( ths_code_report_date_range_list_dic) try: for num, (ths_code, date_range_list) in enumerate( ths_code_report_date_range_list_dic.items(), start=1): for begin_time, end_time in date_range_list: logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:W', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_future_daily' info_table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) indicator_param_list = [ ('preClose', String(20)), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('avgPrice', DOUBLE), ('change', DOUBLE), ('changeRatio', DOUBLE), ('preSettlement', DOUBLE), ('settlement', DOUBLE), ('change_settlement', DOUBLE), ('chg_settlement', DOUBLE), ('openInterest', DOUBLE), ('positionChange', DOUBLE), ('amplitude', DOUBLE), ] json_indicator = ','.join([key for key, _ in indicator_param_list]) if has_table: # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据 # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全, # 如:AL0202.SHF AL9902.SHF CU0202.SHF # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理 sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd ON fi.ths_code = wfd.ths_code ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ths_start_trade_date_future date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi ) tt""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: logger.info("%d future instrument will be handled", code_count) for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 logging.info("%s 新增数据 %d 条", table_name, data_count) # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count logging.info("%s 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_quertarly(chain_param=None, wind_code_set=None): """ 插入股票日线数据到最近一个工作日-1 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ logging.info("更新 wind_fina_indicator 开始") table_name = 'wind_fina_indicator' has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info LEFT OUTER JOIN (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly ON info.wind_code = quertarly.wind_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code;""".format(table_name=table_name) else: logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围') sql_str = """ SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.wind_code, ipo_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM wind_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY wind_code""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 stock_date_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 获取股票量价等行情数据 param_list = [ ('roic_ttm', DOUBLE), ('yoyprofit', DOUBLE), ('ebit', DOUBLE), ('ebit2', DOUBLE), ('ebit2_ttm', DOUBLE), ('surpluscapitalps', DOUBLE), ('undistributedps', DOUBLE), ('stm_issuingdate', DOUBLE), ] # 获取参数列表 wind_indictor_str = ",".join(key for key, _ in param_list) dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] logger.info('%d stocks will been import into wind_stock_quertarly', len(stock_date_dic)) try: for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()): # 获取股票量价等行情数据 # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q") data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q") if data_df is None: logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to) continue data_df.rename( columns={c: str(c).lower() for c in data_df.columns}, inplace=True) # 清理掉期间全空的行 for trade_date in list(data_df.index[:10]): is_all_none = data_df.loc[trade_date].apply( lambda x: x is None).all() if is_all_none: logger.warning("%s %s 数据全部为空", wind_code, trade_date) data_df.drop(trade_date, inplace=True) logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df_list.append(data_df) if DEBUG and len(data_df_list) > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新", data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])