def get_end_dates(): """ 股票结束日期。限定退市或者当前处于暂停上市状态的股票 Examples -------- >>> df = get_end_dates() >>> df.head() symbol end_date 0 000003 2002-06-14 1 000013 2004-09-20 2 000015 2001-10-22 3 000024 2015-12-30 4 000033 2017-07-07 """ col_names = ['symbol', 'end_date'] with session_scope() as sess: query = sess.query(SpecialTreatment.code, func.max(SpecialTreatment.date)).group_by( SpecialTreatment.code).having( SpecialTreatment.treatment.in_([ SpecialTreatmentType.delisting, SpecialTreatmentType.PT ])) df = pd.DataFrame.from_records(query.all()) df.columns = col_names return df
def latest_trading_day(): """最近一个交易日期(不包含当日)""" today = pd.Timestamp('today').date() with session_scope() as sess: return sess.query(TradingCalendar.date, ).filter( TradingCalendar.date < today).filter( TradingCalendar.is_trading == True).order_by( TradingCalendar.date.desc()).limit(1).scalar()
def get_previous_trading_date(date_): """上一个交易日期""" date_ = pd.Timestamp(date_).date() with session_scope() as sess: return sess.query(TradingCalendar.date).filter( TradingCalendar.is_trading == True, TradingCalendar.date < date_).order_by( TradingCalendar.date.desc()).limit(1).scalar()
def get_last_trading_date(): """最后交易日期""" date_ = pd.Timestamp('today').date() with session_scope() as sess: return sess.query(TradingCalendar.date).filter( TradingCalendar.is_trading == True, TradingCalendar.date <= date_).order_by( TradingCalendar.date.desc()).limit(1).scalar()
def not_yet_updated(codes, class_): """指定类在codes中今天尚未更新的代码""" assert class_ in (ShortName, SpecialTreatment) today = pd.Timestamp('today').date() with session_scope() as sess: query = sess.query(class_.code).filter( func.date(class_.last_updated) == today).distinct() updated_codes = set([x[0] for x in query.all()]) return set(codes).difference(updated_codes)
def fetch_single_equity(stock_code, start, end): """ 从本地数据库读取股票期间日线交易数据 注 -- 1. 除OHLCV外,还包括涨跌幅、成交额、换手率、流通市值、总市值、流通股本、总股本 2. 使用bcolz格式写入时,由于涨跌幅存在负数,必须剔除该列!!! Parameters ---------- stock_code : str 要获取数据的股票代码 start_date : datetime-like 自开始日期(包含该日) end_date : datetime-like 至结束日期 return ---------- DataFrame: OHLCV列的DataFrame对象。 Examples -------- >>> symbol = '000333' >>> start_date = '2017-4-1' >>> end_date = pd.Timestamp('2018-4-16') >>> df = fetch_single_equity(symbol, start_date, end_date) >>> df.iloc[:,:8] symbol date open high low close prev_close change_pct 0 000333 2018-04-02 53.30 55.00 52.68 52.84 54.53 -3.0992 1 000333 2018-04-03 52.69 53.63 52.18 52.52 52.84 -0.6056 2 000333 2018-04-04 52.82 54.10 52.06 53.01 52.52 0.9330 3 000333 2018-04-09 52.91 53.31 51.00 51.30 53.01 -3.2258 4 000333 2018-04-10 51.45 52.80 51.18 52.77 51.30 2.8655 5 000333 2018-04-11 52.78 53.63 52.41 52.98 52.77 0.3980 6 000333 2018-04-12 52.91 52.94 51.84 51.87 52.98 -2.0951 7 000333 2018-04-13 52.40 52.47 51.01 51.32 51.87 -1.0603 8 000333 2018-04-16 51.31 51.80 49.15 49.79 51.32 -2.9813 """ start = pd.Timestamp(start).date() end = pd.Timestamp(end).date() with session_scope() as sess: query = sess.query( StockDaily.code, StockDaily.date, StockDaily.A002_开盘价, StockDaily.A003_最高价, StockDaily.A004_最低价, StockDaily.A005_收盘价, StockDaily.A009_前收盘, StockDaily.A011_涨跌幅, StockDaily.A006_成交量, StockDaily.A007_成交金额, StockDaily.A008_换手率, StockDaily.A013_流通市值, StockDaily.A012_总市值).filter(StockDaily.code == stock_code, StockDaily.date.between(start, end)) df = pd.DataFrame.from_records(query.all()) df.columns = DAILY_COLS df = _fill_zero(df) df['circulating_share'] = df.cmv / df.close df['total_share'] = df.tmv / df.close return df
def codes_to_append(): """要添加数据的股票代码""" last_trading_date = get_last_trading_date() previous_trading_date = get_previous_trading_date(last_trading_date) with session_scope() as sess: y_codes = sess.query( StockDaily.code).filter(StockDaily.date == previous_trading_date) t_codes = sess.query( StockDaily.code).filter(StockDaily.date == last_trading_date) query = y_codes.except_(t_codes) return [x[0] for x in query.all()]
def get_start_date(code, class_): """类所含股票代码最后日期""" assert class_ in (ShortName, SpecialTreatment) first_date = pd.Timestamp('1990-1-1').date() with session_scope() as sess: l_d = sess.query(func.max( class_.date)).filter(class_.code == code).scalar() if l_d is None: return first_date else: return l_d + pd.Timedelta(days=1)
def concept_categories(): """概念类别映射{代码:名称}""" with session_scope() as sess: query = sess.query( Category.code, Category.title, ).filter(Category.label == '概念') df = pd.DataFrame.from_records(query.all()) df.columns = ['code', 'name'] df.sort_values('code', inplace=True) return df.set_index('code').to_dict()['name']
def ths_industry_info(): """同花顺行业""" with session_scope() as sess: query = sess.query( StockCategory.stock_code, Category.title, ).filter(Category.code == StockCategory.category_code).filter( Category.label == '同花顺行业') df = pd.DataFrame.from_records(query.all()) df.columns = ['sid', 'ths_industry'] return df
def region_info(): """股票所处地域""" with session_scope() as sess: query = sess.query( StockCategory.stock_code, Category.title, ).filter(Category.code == StockCategory.category_code).filter( Category.label == '地域') df = pd.DataFrame.from_records(query.all()) df.columns = ['sid', 'region'] df.sort_values('sid', inplace=True) return df
def gen_asset_metadata(only_in=True): """ 生成股票元数据 Paras ----- only_in : bool 是否仅仅包含当前在市的股票,默认为真。 Examples -------- >>> df = gen_asset_metadata() >>> df.head() symbol asset_name first_traded last_traded exchange auto_close_date \ 0 000001 平安银行 1991-01-02 2018-04-19 SZSE 2018-04-20 1 000002 万 科A 1991-01-02 2018-04-19 SZSE 2018-04-20 2 000004 国农科技 1991-01-02 2018-04-19 SZSE 2018-04-20 3 000005 世纪星源 1991-01-02 2018-04-19 SZSE 2018-04-20 4 000006 深振业A 1992-04-27 2018-04-19 SZSE 2018-04-20 start_date end_date 0 1991-04-03 2018-04-19 1 1991-01-29 2018-04-19 2 1991-01-14 2018-04-19 3 1990-12-10 2018-04-19 4 1992-04-27 2018-04-19 """ columns = ['symbol', 'first_traded', 'last_traded'] with session_scope() as sess: query = sess.query( StockDaily.code, func.min(StockDaily.date), func.max(StockDaily.date)).filter(~StockDaily.code.startswith( '2')).filter(~StockDaily.code.startswith('9')).group_by( StockDaily.code) df = pd.DataFrame.from_records(query.all()) df.columns = columns df['exchange'] = df['symbol'].map(get_exchange) df['auto_close_date'] = df['last_traded'].map( lambda x: x + pd.Timedelta(days=1)) latest_name = get_latest_short_name() start_dates = get_start_dates() end_dates = get_end_dates() df = df.merge(latest_name, 'left', on='symbol').merge(start_dates, 'left', on='symbol').merge(end_dates, 'left', on='symbol') # 对于未退市的结束日期,以最后交易日期代替 df.loc[df.end_date.isna(), 'end_date'] = df.loc[df.end_date.isna(), 'last_traded'] if only_in: df = df[~df.symbol.isin(end_dates.symbol)] df.reset_index(inplace=True, drop=True) return df
def ipo_info(): """ 股票上市日期(A股,不含B股) """ with session_scope() as sess: query = sess.query( Stock.code, Issue.A004_上市日期, ).filter(Stock.code == Issue.code).filter( Issue.A004_上市日期.isnot(None)).filter(~Stock.code.startswith( '2')).filter(~Stock.code.startswith('9')) df = pd.DataFrame.from_records(query.all()) df.columns = ['sid', 'asof_date'] return df.sort_values('sid')
def fetch_single_minutely_equity(stock_code, start, end, exclude_lunch=True): """ 从本地数据库读取单个股票期间分钟级别交易明细数据 注 -- 1. 仅包含OHLCV列 2. 原始数据按分钟进行汇总,first(open),last(close),max(high),min(low),sum(volume) Parameters ---------- stock_code : str 要获取数据的股票代码 start_date : datetime-like 自开始日期(包含该日) end_date : datetime-like 至结束日期 exclude_lunch : bool 是否排除午休时间,默认”是“ return ---------- DataFrame: OHLCV列的DataFrame对象。 Examples -------- >>> symbol = '000333' >>> start_date = '2018-4-1' >>> end_date = pd.Timestamp('2018-4-19') >>> df = fetch_single_minutely_equity(symbol, start_date, end_date) >>> df.tail() close high low open volume 2018-04-19 14:56:00 51.55 51.56 51.50 51.55 376400 2018-04-19 14:57:00 51.55 51.55 51.55 51.55 20000 2018-04-19 14:58:00 51.55 51.55 51.55 51.55 0 2018-04-19 14:59:00 51.55 51.55 51.55 51.55 0 2018-04-19 15:00:00 51.57 51.57 51.57 51.57 353900 """ start = pd.Timestamp(start).date() end = pd.Timestamp(end).date() with session_scope() as sess: query = sess.query(DealDetail.code, DealDetail.date, DealDetail.A001_时间, DealDetail.A002_价格, DealDetail.A004_成交量).filter( DealDetail.code == stock_code, DealDetail.date.between(start, end)) df = pd.DataFrame.from_records(query.all()) if df.empty: return pd.DataFrame(columns=OHLCV_COLS) return _handle_minutely_data(df, exclude_lunch)
def failed_one(code): """如刷新失败,返回True,否则False""" try: _, p2, p3 = fetch_gpgk(code) except ValueError: # 无法获取数据,失败 return True sn_start = get_start_date(code, ShortName) st_start = get_start_date(code, SpecialTreatment) sns = _gen_sn(code, p2, sn_start) sts = _gen_st(code, p3, st_start) with session_scope() as sess: _insert_sn(sess, code, sns) _insert_st(sess, code, sts) return False
def get_adhoc_holidays(): """从本地数据库中获取特别假期(务必以UTC时区表达)""" today = pd.Timestamp('today') with session_scope() as sess: query = sess.query(TradingCalendar.date).filter( TradingCalendar.date <= today.date()).filter( TradingCalendar.is_trading == False) start = today + pd.Timedelta(days=1) end = today + pd.Timedelta(days=365) # 所有未来日期周六周日均视同为假期 future = pd.date_range(start, end, tz='UTC') wds = pd.date_range(start, end, tz='UTC', freq='B') future_holidays = future.difference(wds) holidays = [x[0] for x in query.all()] holidays = pd.DatetimeIndex(holidays, tz='UTC') return holidays.union(future_holidays)
def flush_stock_codes(): df = stock_code_reader.read() df['status'] = df.status.map(lambda x: Status(x)) with session_scope(False) as sess: query = sess.query(Stock.code).order_by(Stock.code) present = set([x[0] for x in query.all()]) new_codes = set(df.index.values).difference(present) for code in sorted(new_codes): s = Stock() s.code = code s.name = df.loc[code, 'name'] s.latest_status = df.loc[code, 'status'] sess.add(s) logger.info('{},新增代码:{}'.format(s.__tablename__, code)) log_to_db(Stock.__tablename__, True, 1, Action.INSERT, code)
def get_cn_benchmark_returns(symbol): """ 获取指数基准收益率,默认代码`000001` Parameters ---------- symbol : str Benchmark symbol for which we're getting the returns. """ with session_scope() as sess: query = sess.query( IndexDaily.date, IndexDaily.change_pct).filter(IndexDaily.code == symbol) df = pd.DataFrame.from_records(query.all()) s = pd.Series(data=df[1].values / 100, index=pd.DatetimeIndex(df[0].values)) return s.sort_index().tz_localize('UTC').dropna()
def log_to_db(table_name, status, rows, action, code=None, start=None, end=None): """记录刷新数据""" with session_scope() as sess: rr = RefreshRecord(table_name=table_name, status=status, row=rows, action=action, code=code, start=start, end=end) sess.add(rr)
def fetch_single_quity_adjustments(stock_code, start, end): """ 从本地数据库读取股票期间分红派息数据 Parameters ---------- stock_code : str 要获取数据的股票代码 start : datetime-like 自开始日期 end : datetime-like 至结束日期 return ---------- DataFrame对象 Examples -------- >>> fetch_single_quity_adjustments('600000', '2010-4-1', '2018-4-16') symbol date amount ratio record_date pay_date listing_date 0 600000 2010-06-10 0.150 0.3 2010-06-09 2010-06-11 2010-06-10 1 600000 2011-06-03 0.160 0.3 2011-06-02 2011-06-07 2011-06-03 2 600000 2012-06-26 0.300 0.0 2012-06-25 2012-06-26 2012-06-26 3 600000 2013-06-03 0.550 0.0 2013-05-31 2013-06-03 2013-06-03 4 600000 2014-06-24 0.660 0.0 2014-06-23 2014-06-24 2014-06-24 5 600000 2015-06-23 0.757 0.0 2015-06-19 2015-06-23 2015-06-23 6 600000 2016-06-23 0.515 0.1 2016-06-22 2016-06-24 2016-06-23 7 600000 2017-05-25 0.200 0.3 2017-05-24 2017-05-26 2017-05-25 """ start = pd.Timestamp(start).date() end = pd.Timestamp(end).date() with session_scope() as sess: query = sess.query(Adjustment.code, Adjustment.date, Adjustment.A002_派息, Adjustment.A003_送股, Adjustment.A004_股权登记日, Adjustment.A005_除权基准日, Adjustment.A006_红股上市日) query = query.filter(Adjustment.code == stock_code) query = query.filter(Adjustment.date.between(start, end)) df = pd.DataFrame.from_records(query.all()) if df.empty: # 返回一个空表 return pd.DataFrame(columns=ADJUSTMENT_COLS) df.columns = ADJUSTMENT_COLS return df
def _insert(row_data, date_): with session_scope() as sess: sd = StockDaily( code=row_data['SYMBOL'], date=date_, A001_名称=row_data['NAME'], A002_开盘价=row_data['OPEN'], A003_最高价=row_data['HIGH'], A004_最低价=row_data['LOW'], A005_收盘价=row_data['PRICE'], A006_成交量=row_data['VOLUME'], A008_换手率=row_data['TURNOVER'], A009_前收盘=row_data['YESTCLOSE'], A011_涨跌幅=row_data['PERCENT'], A012_总市值=row_data['TCAP'], A013_流通市值=row_data['MCAP'], ) sess.add(sd)
def _save_short_name(): """存储股票简称变动历史""" with session_scope() as sess: query = sess.query(StockDaily.code, StockDaily.date, StockDaily.A001_名称).group_by( StockDaily.code, StockDaily.A001_名称) df = pd.DataFrame.from_records(query.all()) df.columns = ['sid', 'asof_date', 'short_name'] df['asof_date'] = pd.to_datetime(df['asof_date'], utc=True) df['timestamp'] = df['asof_date'] # - pd.Timedelta(days=1) df['sid'] = df['sid'].map(lambda x: int(x)) table_name = 'short_names' # 转换为bcolz格式并存储 rootdir = bcolz_table_path(table_name) if os.path.exists(rootdir): rmtree(rootdir) odo(df, rootdir) logger.info('表:{},数据存储路径:{}'.format(table_name, rootdir))
def get_treasury_data(start_date, end_date): start_date, end_date = sanitize_dates(start_date, end_date) # 确保为date类型 start_date = pd.Timestamp(start_date).date() end_date = pd.Timestamp(end_date).date() with session_scope() as sess: query = sess.query(Treasury.date, Treasury.m1, Treasury.m3, Treasury.m6, Treasury.y1, Treasury.y2, Treasury.y3, Treasury.y5, Treasury.y7, Treasury.y10, Treasury.y20, Treasury.y30).filter( Treasury.date.between(start_date, end_date)) df = pd.DataFrame.from_records(query.all()) df.columns = TREASURY_COL_NAMES df.set_index(keys='date', inplace=True) df.index = pd.DatetimeIndex(df.index) # 缺少2/7年数据,使用简单平均插值 df['2year'] = (df['1year'] + df['3year']) / 2 df['7year'] = (df['5year'] + df['10year']) / 2 return df.tz_localize('UTC')
def concept_info(): """股票概念""" id_maps, _ = field_code_concept_maps() with session_scope() as sess: query = sess.query( StockCategory.stock_code, StockCategory.category_code, ).filter(Category.code == StockCategory.category_code).filter( Category.label == '概念').filter( ~StockCategory.stock_code.startswith('9')).filter( ~StockCategory.stock_code.startswith('2')) df = pd.DataFrame.from_records(query.all()) df.columns = ['sid', 'concept'] out = pd.pivot_table(df, values='concept', index='sid', columns='concept', aggfunc=np.count_nonzero, fill_value=0) out.rename(columns=id_maps, inplace=True) return out.astype('bool').reset_index()
def get_start_dates(): """ 股票上市日期 Examples -------- >>> df = get_start_dates() >>> df.head() symbol start_date 0 000001 1991-04-03 1 000002 1991-01-29 2 000003 1991-01-14 3 000004 1991-01-14 4 000005 1990-12-10 """ col_names = ['symbol', 'start_date'] with session_scope() as sess: query = sess.query(Issue.code, Issue.A004_上市日期).filter(Issue.A004_上市日期.isnot(None)) df = pd.DataFrame.from_records(query.all()) df.columns = col_names return df
def flush_shareholder(codes=None): """ 刷新股票股东信息 说明: 如初始化则包含所有曾经上市的股票代码,含已经退市 否则仅包含当前在市的股票代码 """ # if init: # insert_preprocessed_data() if codes is None: codes = get_all_codes(False) else: codes = ensure_list(codes) # 按代码循环 for code in codes: with session_scope() as sess: # 按类型循环 for type_ in (ShareholderType.main, ShareholderType.circulating, ShareholderType.fund): _flush_by(sess, code, type_)
def get_latest_short_name(): """ 获取股票最新股票简称 Examples -------- >>> df = get_end_dates() >>> df.head() symbol asset_name 0 000001 平安银行 1 000002 万 科A 2 000003 PT金田A 3 000004 国农科技 4 000005 世纪星源 """ col_names = ['symbol', 'asset_name'] with session_scope() as sess: query = sess.query(StockDaily.code, StockDaily.A001_名称).group_by( StockDaily.code).having(func.max(StockDaily.date)) df = pd.DataFrame.from_records(query.all()) df.columns = col_names return df
def flush_trading_calendar(): with session_scope(False) as sess: num = sess.query(func.count(TradingCalendar.date)).scalar() if not num: to_adds = [] for d in trading_days: to_add = TradingCalendar(date=d.date(), is_trading=True) to_adds.append(to_add) for d in non_trading_days: to_add = TradingCalendar(date=d.date(), is_trading=False) to_adds.append(to_add) sess.add_all(to_adds) fmt = '{},初始化数据,其中,交易日:{}天,非交易日:{}天' logger.info(fmt.format(to_add.__tablename__, len(trading_days), len(non_trading_days))) else: last_day = sess.query(func.max(TradingCalendar.date)).scalar() date_rng = pd.date_range(last_day, end_date) for d in date_rng: if d in trading_days: is_trading = True else: is_trading = False old = sess.query(TradingCalendar).filter( TradingCalendar.date == d.date()).one_or_none() if old: old.is_trading = is_trading else: to_add = TradingCalendar( date=d.date(), is_trading=is_trading) sess.add(to_add) sess.commit() info = '交易日' if is_trading else '非交易日' fmt = '{} 刷新' + info + '{}状态' logger.info(fmt.format( TradingCalendar.__tablename__, d.date()))
def has_data(class_): """判断是否存在数据""" with session_scope() as sess: return sess.query(class_.last_updated).limit(1).scalar()
def _delete(): """删除临时数据""" with session_scope() as sess: count = sess.query(StockDaily).filter( StockDaily.A007_成交金额.is_(None)).delete() logger.info('删除临时数据{}行'.format(count))