def import_tushare_index_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) fields = [ 'ts_code', 'name', 'fullname', 'market', 'publisher', 'index_type', 'category', 'base_date', 'base_point', 'list_date', 'weight_rule', 'desc', 'exp_date' ] market_list = list( ['MSCI', 'CSI', 'SSE', 'SZSE', 'CICC', 'SW', 'CNI', 'OTH']) for mkt in market_list: # trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_index_basic(market=mkt, fields=fields) if len(data_df) > 0: data_count = bunch_insert_p(data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_INDEX_BASIC, primary_keys=['ts_code']) logging.info("%s 更新 %s 结束 %d 条信息被更新", mkt, table_name, data_count) else: logging.info("%s 无数据信息可被更新", mkt)
def import_info_table(type_name, insert_db=True) -> pd.DataFrame: """ 调用 get_all_securities 获取指定 type 的信息 type: 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货 :param type_name: :return: """ table_name = f'jq_{type_name}_info' logger.info("更新 %s 开始", table_name) # has_table = engine_md.has_table(table_name) param_list = [ ('jq_code', String(20)), ('display_name', String(20)), ('name', String(20)), ('start_date', Date), ('end_date', Date), ] # 设置 dtype dtype = {key: val for key, val in param_list} # 数据提取 # types: list: 用来过滤securities的类型, list元素可选: # 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货 # date: 日期, 一个字符串或者 [datetime.datetime]/[datetime.date] 对象, # 用于获取某日期还在上市的股票信息. 默认值为 None, 表示获取所有日期的股票信息 stock_info_all_df = get_all_securities(types=type_name) stock_info_all_df.index.rename('jq_code', inplace=True) stock_info_all_df.reset_index(inplace=True) if insert_db: logger.info('%s 数据将被导入', stock_info_all_df.shape[0]) data_count = bunch_insert_p(stock_info_all_df, table_name=table_name, dtype=dtype, primary_keys=['jq_code']) logger.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) return stock_info_all_df
def save_data_2_daily_table(data_new_s_list: list, table_name, dtype: dict): df = pd.DataFrame(data_new_s_list) data_count = bunch_insert_p(df, table_name, dtype=dtype, primary_keys=['id', 'trade_date']) return data_count
def import_tushare_suspend(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_suspend' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_suspend # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(suspend_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = pro.suspend(ts_code='', suspend_date=trade_date, resume_date='', fields='') if len(data_df) > 0: data_count = bunch_insert_p( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_SUSPEND, primary_keys=['ts_code', 'suspend_date']) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("%s 当日无停牌股票", trade_date_list_len) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_hsgt_top10(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_hsgt_top10' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('name', String(20)), ('close', DOUBLE), ('change', DOUBLE), ('rank', Integer), ('market_type', String(20)), ('amount', DOUBLE), ('net_amount', DOUBLE), ('buy', DOUBLE), ('sell', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) for market_type in list(['1', '3']): data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type) if len(data_df) > 0: data_count = bunch_insert_p( data_df, table_name=table_name, dtype=dtype, primary_keys=['ts_code', 'trade_date']) logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") break except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_moneyflow_hsgt(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_moneyflow_hsgt' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ggt_ss', DOUBLE), ('ggt_sz', DOUBLE), ('hgt', DOUBLE), ('sgt', DOUBLE), ('north_money', DOUBLE), ('south_money', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_moneyflow_hsgt(trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_p(data_df, table_name=table_name, dtype=dtype, primary_keys=['trade_date']) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_margin(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_margin' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('exchange_id', String(20)), ('rzye', DOUBLE), ('rzmre', DOUBLE), ('rzche', DOUBLE), ('rqye', DOUBLE), ('rqmcl', DOUBLE), ('rzrqye', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format( table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2010-03-31') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) for exchange_id in list(['SSE', 'SZSE']): data_df = invoke_margin(trade_date=trade_date, exchange_id=exchange_id) if len(data_df) > 0: # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) # logging.info("%s更新 %s %s 结束 %d 条信息被更新", trade_date, table_name, exchange_id, data_count) data_count = bunch_insert_p( data_df, table_name=table_name, dtype=dtype, primary_keys=['exchange_id', 'trade_date']) logging.info("%d/%d) %s %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, exchange_id, trade_date, table_name, data_count) else: logging.info("%d/%d) %s %s 无数据信息可被更新 %s", num, trade_date_list_len, exchange_id, trade_date, table_name) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_top_inst(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_top_inst' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' and cal_date>'2012-01-03') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) # 定义相应的中间变量 data_df_list, data_count, all_data_count, data_len, trade_date = [], 0, 0, len( trddate), None try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_top_inst(trade_date=trade_date) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP_INST) data_count = bunch_insert_p(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_INST, primary_keys=[ 'ts_code', 'trade_date', 'exalter', 'buy', 'sell' ]) logging.info("更新 %s 结束 ,截至%s日 %d 条信息被更新", table_name, trade_date, all_data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_p(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_INST, primary_keys=[ 'ts_code', 'trade_date', 'exalter', 'buy', 'sell' ]) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 ,截至%s日 %d 条信息被更新", table_name, trade_date, all_data_count)
def import_jq_stock_daily(chain_param=None, code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name_info = TABLE_NAME_INFO table_name = TABLE_NAME table_name_bak = get_bak_table_name(table_name) logging.info("更新 %s 开始", table_name) # 根据 info table 查询每只股票日期区间 sql_info_str = f""" SELECT jq_code, date_frm, if(date_to<end_date, date_to, end_date) date_to FROM ( SELECT info.jq_code, start_date date_frm, end_date date_to, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {table_name_info} info ) tt WHERE date_frm <= if(date_to<end_date, date_to, end_date) ORDER BY jq_code""" has_table = engine_md.has_table(table_name) has_bak_table = engine_md.has_table(table_name_bak) # 进行表格判断,确定是否含有 jq_stock_daily_md if has_table: # 这里对原始的 sql语句进行了调整 # 以前的逻辑:每只股票最大的一个交易日+1天作为起始日期 # 现在的逻辑:每只股票最大一天的交易日作为起始日期 # 主要原因在希望通过接口获取到数据库中现有最大交易日对应的 factor因子以进行比对 sql_trade_date_range_str = f""" SELECT jq_code, date_frm, if(date_to<end_date, date_to, end_date) date_to FROM ( SELECT info.jq_code, ifnull(trade_date, info.start_date) date_frm, info.end_date date_to, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {table_name_info} info LEFT OUTER JOIN (SELECT jq_code, max(trade_date) trade_date FROM {table_name} GROUP BY jq_code) daily ON info.jq_code = daily.jq_code ) tt WHERE date_frm < if(date_to<end_date, date_to, end_date) ORDER BY jq_code""" else: sql_trade_date_range_str = sql_info_str logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, table_name_info) sql_trade_date_str = """SELECT trade_date FROM jq_trade_date trddate WHERE trade_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) ORDER BY trade_date""" with with_db_session(engine_md) as session: # 获取截至当期全部交易日前 table = session.execute(sql_trade_date_str) trade_date_list = [row[0] for row in table.fetchall()] trade_date_list.sort() # 获取每只股票日线数据的日期区间 table = session.execute(sql_trade_date_range_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { key_code: (date_from, date_to) for key_code, date_from, date_to in table.fetchall() if code_set is None or key_code in code_set } # 从 info 表中查询全部日期区间 if sql_info_str == sql_trade_date_range_str: code_date_range_from_info_dic = code_date_range_dic else: # 获取每只股票日线数据的日期区间 table = session.execute(sql_info_str) # 计算每只股票需要获取日线数据的日期区间 # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_from_info_dic = { key_code: (date_from, date_to) for key_code, date_from, date_to in table.fetchall() if code_set is None or key_code in code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list try: for num, (key_code, (date_from_tmp, date_to_tmp)) in enumerate(code_date_range_dic.items(), start=1): data_df = None try: for loop_count in range(2): # 根据交易日数据取交集,避免不用的请求耽误时间 date_from = get_first(trade_date_list, lambda x: x >= date_from_tmp) date_to = get_last(trade_date_list, lambda x: x <= date_to_tmp) if date_from is None or date_to is None or date_from >= date_to: logger.debug('%d/%d) %s [%s - %s] 跳过', num, data_len, key_code, date_from, date_to) break logger.debug('%d/%d) %s [%s - %s] %s', num, data_len, key_code, date_from, date_to, '第二次查询' if loop_count > 0 else '') data_df = invoke_daily(key_code=key_code, start_date=date_2_str(date_from), end_date=date_2_str(date_to)) # 该判断只在第一次循环时执行 if loop_count == 0 and has_table: # 进行 factor 因子判断,如果发现最小的一个交易日的因子不为1,则删除数据库中该股票的全部历史数据,然后重新下载。 # 因为当期股票下载的数据为前复权价格,如果股票出现复权调整,则历史数据全部需要重新下载 factor_value = data_df.sort_values('trade_date').iloc[ 0, :]['factor'] if factor_value != 1 and ( code_date_range_from_info_dic[key_code][0] != code_date_range_dic[key_code][0]): # 删除该股屏历史数据 sql_str = f"delete from {table_name} where jq_code=:jq_code" row_count = execute_sql_commit( sql_str, params={'jq_code': key_code}) date_from_tmp, date_to_tmp = code_date_range_from_info_dic[ key_code] if has_bak_table: sql_str = f"delete from {table_name_bak} where jq_code=:jq_code" row_count = execute_sql_commit( sql_str, params={'jq_code': key_code}) date_from_tmp, date_to_tmp = code_date_range_from_info_dic[ key_code] logger.info( '%d/%d) %s %d 条历史记录被清除,重新加载前复权历史数据 [%s - %s] 同时清除bak表中相应记录', num, data_len, key_code, row_count, date_from_tmp, date_to_tmp) else: logger.info( '%d/%d) %s %d 条历史记录被清除,重新加载前复权历史数据 [%s - %s]', num, data_len, key_code, row_count, date_from_tmp, date_to_tmp) # 重新设置起止日期,进行第二次循环 continue # 退出 for _ in range(2): 循环 break except Exception as exp: data_df = None logger.exception('%s [%s - %s]', key_code, date_2_str(date_from_tmp), date_2_str(date_to_tmp)) if exp.args[0].find('超过了每日最大查询限制'): break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_p(data_df_all, table_name, dtype=DTYPE, primary_keys=['jq_code', 'trade_date']) all_data_count += data_count data_df_list, data_count = [], 0 if DEBUG and num >= 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_p(data_df_all, table_name, dtype=DTYPE, primary_keys=['jq_code', 'trade_date']) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)