def create_ts_task(task: TaskEnum): """ 创建TS任务列表 """ # 删除原有的相同任务的历史任务列表 TaskTable.del_with_task(task) with session_scope() as sm: codes = (sm.query(TS_Stock_Basic.ts_code, TS_Stock_Basic.bs_code, TS_Stock_Basic.list_date, TS_Stock_Basic.delist_date).filter( TS_Stock_Basic.list_status == "L").all()) tasklist = [] for c in codes: tasktable = TaskTable( task=task.value, task_name=task.name, ts_code=c.ts_code, bs_code=c.bs_code, begin_date=c.list_date, end_date=c.delist_date if c.delist_date is not None else datetime.now().date(), ) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info("生成{}条任务记录".format(len(codes)))
def create_bs_task(task: TaskEnum, tmpcodes=None): """ 创建BS任务列表 """ # 删除原有的相同任务的历史任务列表 TaskTable.del_with_task(task) with session_scope() as sm: query = sm.query(BS_Stock_Basic.code, BS_Stock_Basic.ipoDate, BS_Stock_Basic.outDate, BS_Stock_Basic.ts_code) if CQ_Config.IDB_DEBUG == "1": # 如果是测试环境 if tmpcodes: query = query.filter(BS_Stock_Basic.code.in_(tmpcodes)) else: query = query.join(BS_SZ50_Stocks, BS_Stock_Basic.code == BS_SZ50_Stocks.code) # query = query.filter(BS_Stock_Basic.status == True) #取上市的 codes = query.all() tasklist = [] for c in codes: tasktable = TaskTable( task=task.value, task_name=task.name, ts_code=c.ts_code, bs_code=c.code, begin_date=c.ipoDate, end_date=c.outDate if c.outDate is not None else datetime.now().date(), ) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info("生成{}条任务记录".format(len(codes)))
def create_ts_cal_task(task: TaskEnum): """ 创建基于交易日历的任务列表 """ # 删除历史任务 TaskTable.del_with_task(task) with session_scope() as sm: rp = sm.query(distinct(TS_TradeCal.date).label("t_date")).filter( TS_TradeCal.is_open == True, TS_TradeCal.date <= datetime.now().date() # noqa ) codes = rp.all() tasklist = [] for c in codes: tasktable = TaskTable( task=task.value, task_name=task.name, ts_code="按日期更新", bs_code="按日期更新", begin_date=c.t_date, end_date=c.t_date, ) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info("生成{}条任务记录".format(len(codes)))
def import_data(): """ 导入日线数据 """ #offset:当偏移量大于800万时,offset limit模式性能下降严重,查询一次要12秒…… #改成直接定位主键id查询。 from_table = BS_Profit_Data with concurrent.futures.ThreadPoolExecutor() as executor: with session_scope() as session: onerow = session.query(func.min(from_table.id), func.max(from_table.id)).one() minid = onerow[0] maxid = onerow[1] if not minid: #没有数据 return pagesize = 5000 ahead_id = minid next_id = ahead_id + pagesize i = 0 #计数 while True: if ahead_id > maxid: break rp = session.query(from_table).filter( from_table.id >= ahead_id, from_table.id < next_id) to_data = _build_result_data(rp) executor.submit(_insert_data, to_data, i + 1) i += 1 ahead_id = next_id next_id = ahead_id + pagesize _logger.info("数据导入完成")
def del_all(): """ 清空表数据 """ with session_scope() as sm: sm.query(TS_TradeCal).delete() sm.commit() _logger.info("交易日历表数据已清空")
def del_with_task(taskEnum: TaskEnum): """ 按任务ID过滤,删除历史任务列表 """ with session_scope() as sm: query = sm.query(TaskTable).filter( TaskTable.task == taskEnum.value) query.delete() sm.commit() _logger.info("任务:{}-历史任务已删除".format(taskEnum.name))
def get_profit_data(): """ 获取季频盈利能力数据 """ # 登陆系统 lg = bs.login() # 显示登陆返回信息 print('login respond error_code:' + lg.error_code) print('login respond error_msg:' + lg.error_msg) with concurrent.futures.ThreadPoolExecutor() as executor: with session_scope() as sm: rp = sm.query(TaskTable).filter( TaskTable.task == TaskEnum.季频盈利能力.value, TaskTable.finished == False) for task in rp: if task.finished: continue start_year = task.begin_date.year end_year = task.end_date.year dflist = [] for y in range(start_year, end_year): for q in range(1, 5): max_try = 8 # 失败重连的最大次数 for i in range(max_try): rs_profit = bs.query_profit_data(code=task.ts_code, year=y, quarter=q) if rs_profit.error_code == '0': profit_list = [] while (rs_profit.error_code == '0') & rs_profit.next(): profit_list.append( rs_profit.get_row_data()) result_df = pd.DataFrame( profit_list, columns=rs_profit.fields) dflist.append(result_df) break elif i < (max_try - 1): time.sleep(2) _logger.error('{}:{}-{} 获取数据失败,重连……'.format( task.ts_code, y, q)) continue task.finished = True executor.submit(_save_date, dflist, task.ts_code) sm.commit() # 登出系统 bs.logout()
def update_task(reset: bool = False): """ 更新任务表 """ if reset: create_bs_task(TaskEnum.BS日线历史A股K线后复权数据) return #删除历史任务记录 TaskTable.del_with_task(TaskEnum.BS日线历史A股K线后复权数据) with session_scope() as sm: #通过BS证券基本资料和A股K线后复权数据的每个股票的最新交易时间,查出所有需要更新的股票及更新时间 cte = sm.query(BS_Daily_hfq.code, func.max(BS_Daily_hfq.date).label('mx_date')).group_by( BS_Daily_hfq.code).cte('cte') query = sm.query(BS_Stock_Basic.code, BS_Stock_Basic.ts_code, BS_Stock_Basic.ipoDate, BS_Stock_Basic.outDate, cte.c.mx_date) query = query.join(cte, BS_Stock_Basic.code == cte.c.code, isouter=True) query = query.filter( or_( and_(BS_Stock_Basic.outDate == None, cte.c.mx_date < dtime.now().date()), cte.c.mx_date == None, BS_Stock_Basic.outDate > cte.c.mx_date)) codes = query.all() tasklist = [] for c in codes: tasktable = TaskTable( task=TaskEnum.BS日线历史A股K线后复权数据.value, task_name=TaskEnum.BS日线历史A股K线后复权数据.name, ts_code=c.ts_code, bs_code=c.code, begin_date=c.ipoDate if c.mx_date is None else c.mx_date + datetime.timedelta(days=1), end_date=c.outDate if c.outDate is not None else dtime.now().date()) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info('生成{}条任务记录'.format(len(codes)))
def _update_to_tmp_table(tmp_table_name: str): """创建更新数据到临时表""" _logger.info("创建更新数据到临时表") with concurrent.futures.ThreadPoolExecutor() as executor: with session_scope() as sm: # 获取所有需要更新的数据 query = sm.query(BS_LaterNPctChg.code, func.min(BS_LaterNPctChg.date).label("mi_date")) query = query.filter(BS_LaterNPctChg.l250_pctChg == None) # noqa query = query.group_by(BS_LaterNPctChg.code) rp = query.all() for row in tqdm(rp): s = (select([ BS_Daily_hfq.id, BS_Daily_hfq.code, BS_Daily_hfq.date, BS_Daily_hfq.close, BS_Daily_hfq.pctChg, ]).where( and_(BS_Daily_hfq.code == row.code, BS_Daily_hfq.date >= row.mi_date)).order_by( desc(BS_Daily_hfq.date))) df = pd.read_sql(s, engine) # 1日,2日,3日,5日,8日,10日,13日,20日,30日,60日,120日,250日 _calc_n_pctChg(df, 1) _calc_n_pctChg(df, 2) _calc_n_pctChg(df, 3) _calc_n_pctChg(df, 5) _calc_n_pctChg(df, 8) _calc_n_pctChg(df, 10) _calc_n_pctChg(df, 13) _calc_n_pctChg(df, 20) _calc_n_pctChg(df, 30) _calc_n_pctChg(df, 60) _calc_n_pctChg(df, 120) _calc_n_pctChg(df, 250) df = df.replace([np.inf, -np.inf], np.nan) executor.submit(_save_tmp_data, df, tmp_table_name)
def _insert_data(data: list, pagenum): """ 导入数据 """ ins_data = [] num = 0 #计数 try: with session_scope() as session: for dic in data: to_table = _build_to_table(dic) ins_data.append(to_table) num += 1 dm = divmod(num, 1000) if dm[1] == 0: session.bulk_save_objects(ins_data) session.commit() ins_data.clear() if len(ins_data) > 0: session.bulk_save_objects(ins_data) session.commit() except Exception as e: _logger.error("第{}页数据导入失败:{}".format(pagenum, repr(e))) else: _logger.info("第{}页数据导入完成".format(pagenum))
def update_task(): """ 更新任务表 """ with session_scope() as sm: cte1 = ( sm.query(distinct(TS_TradeCal.date).label("date")).filter( and_(TS_TradeCal.is_open == True, TS_TradeCal.date <= dtime.now().date())) # noqa .cte("cte1")) cte2 = sm.query( distinct( TS_Daily_Basic.trade_date).label("trade_date")).cte("cte2") query = ( sm.query(cte1.c.date).join(cte2, cte1.c.date == cte2.c.trade_date, isouter=True).filter( cte2.c.trade_date == None) # noqa ) trade_dates = query.all() tasklist = [] for c in trade_dates: tasktable = TaskTable( task=TaskEnum.TS更新每日指标.value, task_name=TaskEnum.TS更新每日指标.name, ts_code="按日期更新", bs_code="按日期更新", begin_date=c.date, end_date=c.date, ) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info("生成{}条任务记录".format(len(trade_dates)))
def create_task( task: TaskEnum, begin_date: date, end_date: date, codes: list = [], type: str = None, status: str = None, market: str = None, isdel=False, ): """创建任务 :param task: 任务类型 :type task: TaskEnum :param begin_date: 如果开始时间(begin_date)为None,开始时间取股票上市(IPO)时间 :type begin_date: date :param end_date: 结束时间 :type end_date: date :param codes: 股票代码列表, defaults to [] :type codes: list, optional :param type: 证券类型,其中1:股票,2:指数,3:其它, defaults to None :type type: str, optional :param status: 上市状态,其中1:上市,0:退市, defaults to None :type status: str, optional :param market: 市场类型 (主板/中小板/创业板/科创板/CDR), defaults to None :type market: str, optional :param isdel: 是否删除删除原有的相同任务的历史任务列表, defaults to False :type isdel: bool, optional """ with session_scope() as sm: if not codes: query = sm.query(BS_Stock_Basic.code, BS_Stock_Basic.ipoDate) if market: query = query.join( TS_Stock_Basic, BS_Stock_Basic.code == TS_Stock_Basic.bs_code).filter( TS_Stock_Basic.market == market) if CQ_Config.IDB_DEBUG == "1": # 如果是测试环境 query = query.join(BS_SZ50_Stocks, BS_Stock_Basic.code == BS_SZ50_Stocks.code) if status: query = query.filter(BS_Stock_Basic.status == status) if type: query = query.filter(BS_Stock_Basic.type == type) codes = query.all() if isdel: # 删除原有的相同任务的历史任务列表 query = sm.query(TaskTable).filter(TaskTable.task == task.value) query.delete() sm.commit() _logger.info("任务:{}-历史任务已删除".format(task.name)) tasklist = [] for c in codes: tasktable = TaskTable( task=task.value, task_name=task.name, ts_code=c.code, begin_date=begin_date if begin_date is not None else c.ipoDate, end_date=end_date, ) tasklist.append(tasktable) sm.bulk_save_objects(tasklist) _logger.info("生成{}条任务记录".format(len(codes)))
def del_all_date(): """ 删除全部数据 """ with session_scope() as session: session.query(BS_Stock_Basic).delete()
def del_all_data(): """ 删除数据 """ with session_scope() as sn: sn.query(BS_SZ50_Stocks).delete()
def query_history_k_data_plus( taskEnum, frequency, adjustflag, load_data_func, load_data_func_params: dict = None, ): """ 按照任务表获取历史A股K线数据 """ fields = _get_fields(frequency) #### 登陆系统 #### lg = bs.login() with concurrent.futures.ThreadPoolExecutor() as executor: with session_scope() as sm: rp = sm.query(TaskTable).filter(TaskTable.task == taskEnum.value, TaskTable.finished == False).all() for task in tqdm(rp): if task.finished: continue start_date = task.begin_date.strftime("%Y-%m-%d") end_date = task.end_date.strftime("%Y-%m-%d") max_try = 8 # 失败重连的最大次数 for i in range(max_try): rs = bs.query_history_k_data_plus(task.bs_code, fields, start_date=start_date, end_date=end_date, frequency=frequency, adjustflag=adjustflag) if rs.error_code == '0': data_list = [] while (rs.error_code == '0') & rs.next(): # 获取一条记录,将记录合并在一起 data_list.append(rs.get_row_data()) #_logger.info('{}下载成功,数据{}条'.format(task.ts_code, len(data_list))) result = pd.DataFrame(data_list, columns=rs.fields) if load_data_func_params: params = copy.deepcopy(load_data_func_params) else: params = {} params['result'] = result params['bs_code'] = task.bs_code params['frequency'] = frequency params['adjustflag'] = adjustflag executor.submit(load_data_func, params) task.finished = True break elif i < (max_try - 1): time.sleep(2) continue else: _logger.error( '获取历史A股K线数据失败/query_history_k_data_plus respond error_code:' + rs.error_code) _logger.error( '获取历史A股K线数据失败/query_history_k_data_plus respond error_msg:' + rs.error_msg) sm.commit() #### 登出系统 #### bs.logout()