def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' primary_keys = ["ts_code", "trade_date"] logging.info("更新 %s 开始", table_name) # 进行表格判断,确定是否含有 table_name has_table = engine_md.has_table(table_name) # sqlite_file_name = 'eDB_adjfactor.db' check_sqlite_db_primary_keys(table_name, primary_keys) if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = [row[0] for row in table.fetchall()] trade_date_count, data_count_tot = len(trade_date_list), 0 try: for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if data_df is not None and data_df.shape[0] > 0: data_count = bunch_insert( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR, primary_keys=primary_keys) data_count_tot += data_count logging.info("%d/%d) %s 表 %s %d 条信息被更新", num, trade_date_count, table_name, trade_date, data_count) else: logging.info("%d/%d) %s 表 %s 数据信息可被更新", num, trade_date_count, table_name, trade_date) except: logger.exception("更新 %s 异常", table_name) finally: logging.info("%s 表 %d 条记录更新完成", table_name, data_count_tot)
def import_tushare_daily_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_basic' primary_keys = ["ts_code", "trade_date"] logging.info("更新 %s 开始", table_name) check_sqlite_db_primary_keys(table_name, primary_keys) has_table = engine_md.has_table(table_name) # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!! # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了 if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: for_count = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_daily_basic(ts_code='', trade_date=trade_date) if data_df is not None and data_df.shape[0] > 0: data_count = bunch_insert( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC, primary_keys=primary_keys) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count) else: logging.info("%d/%d) %s 无数据信息可被更新", num, for_count, trade_date) except: logger.exception("更新 %s 表异常", table_name)
def import_tushare_stock_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_md' primary_keys = ["ts_code", "trade_date"] check_sqlite_db_primary_keys(table_name, primary_keys) logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) sql_trade_date_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" with with_db_session(engine_md) as session: table = session.execute(sql_trade_date_str) trade_date_list = [row[0] for row in table.fetchall()] trade_date_list.sort() # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from_tmp, date_to_tmp)) in enumerate(code_date_range_dic.items(), start=1): date_from = get_first(trade_date_list, lambda x: x >= date_from_tmp) date_to = get_last(trade_date_list, lambda x: x <= date_to_tmp) if date_from is None or date_to is None or date_from > date_to: logger.debug('%d/%d) %s [%s - %s] 跳过', num, data_len, ts_code, date_from, date_to) continue logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_daily(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if len(data_df) > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['trade_date'].iloc[-1]), None df2 = invoke_daily(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if df2 is not None and len(df2) > 0: last_date_in_df_cur = try_2_date(df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) data_count = bunch_insert( data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_MD, primary_keys=primary_keys) all_data_count += data_count data_df_list, data_count = [], 0 if DEBUG and len(data_df_list) > 1: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert( data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_MD, primary_keys=primary_keys) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_cashflow(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_cashflow' primary_keys = ['ts_code', 'ann_date', 'end_date'] logging.info("更新 %s 开始", table_name) check_sqlite_db_primary_keys(table_name, primary_keys) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) cashflow ON info.ts_code = cashflow.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('%d/%d) %d 条 %s 的现金流被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_CASHFLOW, primary_keys=primary_keys) logger.info('%d 条现金流数据已插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_CASHFLOW) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # 仅调试使用 cycles = cycles + 1 if DEBUG and cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_CASHFLOW, primary_keys=primary_keys) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)