def _single_minutely_equity(one_day, code, db=None, is_index=False): if db is None: db = get_db('wy_index_quotes') if is_index else get_db('wy_quotes') name = one_day.strftime(r"%Y-%m-%d") if name not in db.list_collection_names(): return pd.DataFrame() collection = db[name] # 存在延时 start = one_day.replace(hour=9, minute=30) end = one_day.replace(hour=15, minute=1) predicate = { 'code': code, 'time': { '$gte': start, '$lte': end }, } projection = { 'datetime': '$time', 'close': '$price', 'open': 1, 'high': 1, 'low': 1, 'volume': 1, '_id': 0 } sort = [('datetime', 1)] cursor = collection.find(predicate, projection=projection, sort=sort) df = pd.DataFrame.from_records(cursor) if df.empty: return df df['datetime'] = df['datetime'].dt.floor('T') df.drop_duplicates(['datetime'], keep='last', inplace=True) df.set_index(['datetime'], inplace=True) return df
def _refresh(batch, d): db = get_db('wy') collection = db['业绩预告'] create_index_for(collection) for code in batch: # 首先检查状态,减少数据库查询 if d.get(code, False): continue if not need_refresh(collection, code): d[code] = True logger.info(f"股票 {code} 业绩预告 已经刷新") continue try: docs = fetch_yjyg(code) except (ValueError, KeyError): # 网页不存在时发生,忽略 # 标注为完成状态 d[code] = True continue except Exception as e: logger.error(f"股票 {code} 业绩预告 失败 {e}") continue # 正常情形下运行以下代码 last_dt = get_max_dt(collection, code) for doc in docs: doc['股票代码'] = code doc[DATE_KEY_1] = pd.to_datetime(doc[DATE_KEY_1], errors='ignore') doc['报告日期'] = pd.to_datetime(doc['报告日期'], errors='ignore') if doc[DATE_KEY_1] > last_dt: doc['更新时间'] = pd.Timestamp('now') collection.insert_one(_droped_null(doc)) logger.info(f"完成股票 {code} 业绩预告 刷新") d[code] = True
def get_dividend_data(): """现金股利""" db = get_db('wy') collection = db['分红配股'] # 使用股权登记日作为 asof_date # 此指标仅用于计算年度股息之用,不涉及到所谓知晓日期 pipeline = [ { '$project': { '_id': 0, 'sid': '$股票代码', '分红年度': 1, AD_FIELD_NAME: '$股权登记日', '每股派息': '$派息(每10股)', } } ] docs = collection.aggregate(pipeline) df = pd.DataFrame.from_records(docs) # 2019 -> Timestamp('2019-01-01 00:00:00') df['分红年度'] = df['分红年度'].map(_to_timestamp) # 首先将日期缺失值默认为分红年度后一个季度 cond = df['asof_date'].isnull() df.loc[cond, 'asof_date'] = df.loc[cond, '分红年度'] + pd.Timedelta(days=45) # 重要:对未分派的记录,不得舍弃 # 派息NaN -> 0.0 不影响实际意义,加快读写速度 values = {'每股派息': 0.0} df.fillna(value=values, inplace=True) # 数值更改为每股派息 df['每股派息'] = df['每股派息'] / 10.0 df.sort_values(['sid', 'asof_date'], inplace=True, ignore_index=True) df['sid'] = df['sid'].astype('int64') return df
def _periodly_report(only_A, item_name): # 一般而言,定期财务报告截止日期与报告年度相同 # 但不排除数据更正等情形下,报告年度与截止日期不一致 to_drop = [ '_id', '股票简称', '机构名称', '合并类型编码', '合并类型', '报表来源编码', '报表来源', '备注', '截止日期', '开始日期' ] db = get_db('cninfo') collection = db[item_name] pipeline = [{'$project': {k: 0 for k in to_drop}}] if only_A: pipeline.insert(0, MATCH_ONLY_A) ds = collection.aggregate(pipeline) df = pd.DataFrame.from_records(ds) # 规范列名称 df.columns = df.columns.map(_normalized_col_name) df.rename(columns={ "股票代码": "sid", "报告年度": "asof_date", "公告日期": "timestamp" }, inplace=True) df['sid'] = df['sid'].map(lambda x: int(x)) df.sort_values(['sid', 'asof_date'], inplace=True) return df
def get_investment_rating_data(): """投资评级 备注 大量字符写入时间极长,转换为类别,加快写入速度。 """ db = get_db('wy') collection = db['投资评级'] pipeline = [ { '$project': { '_id': 0, 'sid': '$股票代码', AD_FIELD_NAME: '$评级日期', '评级': '$最新评级', '分析师': 1, '评级机构': 1, } } ] docs = collection.aggregate(pipeline) df = pd.DataFrame.from_records(docs) # 可能数据没有清洗干净 cond = df['sid'].str.match(r"\d{6}") df = df[cond] df['sid'] = df['sid'].astype('int64') # 至少相差一小时 # df['asof_date'] -= pd.Timedelta(hours=1) cate_cols_pat = ['评级机构', '分析师'] maps = {} for col_pat in cate_cols_pat: df, maps = _handle_cate(df, col_pat, maps) return df, maps
def get_concept_maps(by='all', to_symbol=True, latest=False): """概念对应股票列表 Args: by (str, optional): 分类单位. Defaults to 'all'. all 代表合并 to_symbol (bool, optional): 转换为Equity. Defaults to True. latest (bool, optional): 限于最近2年. Defaults to False. Returns: dict: 以概念名称为键,股票列表为值 """ assert by in ('ths', 'tct', 'all') db = get_db() if by == 'ths': collection = db['同花顺概念'] return _get_concept_maps(collection, latest) elif by == 'tct': collection = db['腾讯概念'] return _get_concept_maps(collection, latest) else: ths = _get_concept_maps(db['同花顺概念']) tct = _get_concept_maps(db['腾讯概念']) keys = set(list(ths.keys()) + list(tct.keys())) res = {} for key in keys: p1 = ths.get(key, []) p2 = tct.get(key, []) v = set(p1 + p2) if to_symbol: v = [symbol(s) for s in v] res[key] = v return res
def _fetch_single_index(code, start, end): index_code = decode_index_code(code) start, end = sanitize_dates(start, end) db = get_db('wy_index_daily') collection = db[index_code] predicate = {'日期': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('日期', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) if df.empty: return df df['股票代码'] = code # fill 0 df['换手率'] = 0.0 df['流通市值'] = 0.0 df['总市值'] = 0.0 # 截取所需列 df = df[WY_DAILY_COL_MAPS.keys()] df.rename(columns=WY_DAILY_COL_MAPS, inplace=True) df.sort_values('date', inplace=True) # fill 0 cols = [ 'b_close', 'b_high', 'b_low', 'b_open', 'shares_outstanding', 'total_shares' ] df.loc[:, cols] = 0.0 return df
def _single_minutely_equity(one_day, code, db=None): if db is None: db = get_db('cjmx') name = one_day.strftime(r"%Y-%m-%d") if name not in db.list_collection_names(): return pd.DataFrame() collection = db[name] # 存在延时 start = one_day.replace(hour=9, minute=30) end = one_day.replace(hour=15, minute=1) predicate = { '股票代码': code, '成交时间': { '$gte': start, '$lte': end }, } projection = { 'datetime': '$成交时间', 'price': '$成交价', 'volume': '$成交量', '_id': 0 } cursor = collection.find(predicate, projection=projection) df = pd.DataFrame.from_records(cursor) if df.empty: return df df.set_index(['datetime'], inplace=True) return df
def get_investment_rating_data(only_A=True): """投资评级""" item_name = '投资评级' to_drop = [ '_id', '前一次投资评级', '股票简称', '投资评级', '评级变化', '是否首次评级', "目标价格(下限)", "目标价格(上限)" ] db = get_db('cninfo') collection = db[item_name] pipeline = [{'$project': {k: 0 for k in to_drop}}] if only_A: pipeline.insert(0, MATCH_ONLY_A) ds = collection.aggregate(pipeline) df = pd.DataFrame.from_records(ds) df.rename(columns={ "股票代码": "sid", "发布日期": "asof_date", "投资评级(经调整)": "投资评级", }, inplace=True) df.dropna(subset=['投资评级'], inplace=True) df['timestamp'] = df['asof_date'] # 至少相差一小时 df['asof_date'] -= pd.Timedelta(hours=1) df['sid'] = df['sid'].map(lambda x: int(x)) return df
def _refresh(batch, d): db = get_db('wy') collection1 = db[NAMES[0]] collection2 = db[NAMES[1]] for code in batch: # 首先检查状态,减少数据库查询 if d.get(code, False): continue if not need_refresh(collection2, code): d[code] = True logger.info(f"股票 {code} 已经刷新") continue try: doc1, doc2 = fetch_company_info(code) doc1['股票代码'] = code doc1['更新时间'] = pd.Timestamp('now') doc2['股票代码'] = code doc2['更新时间'] = pd.Timestamp('now') collection1.insert_one(doc1) collection2.insert_one(doc2) d[code] = True except Exception as e: logger.error(f"{e}") continue logger.info(f"完成股票 {code} 刷新")
def get_ipo(): # 大量股票上市日期为空 db = get_db('wy') collection = db['IPO资料'] docs = collection.find({}, projection={ '_id': 0, '股票代码': 1, '上市日期': 1, }) df = pd.DataFrame.from_records(docs) df['上市日期'] = pd.to_datetime(df['上市日期'], errors='coerce') wy_dates = { code: pd.to_datetime(dt, errors='coerce') for code, dt in zip(df['股票代码'], df['上市日期']) } ipo_dates = _listing_date() dates = merge(wy_dates, ipo_dates) def f(code): try: return dates[code] except KeyError: return pd.NaT df['上市日期'] = df['股票代码'].map(f) df.dropna(inplace=True) df.rename(columns={'股票代码': 'sid'}, inplace=True) df['sid'] = df['sid'].astype('int64') df[AD_FIELD_NAME] = df['上市日期'] - pd.Timedelta(days=1) return df
def _get_report(only_A, item_name, to_drop, col='报告年度', keys=['股票代码', '报告年度']): """ 获取财务报告数据 使用报告期资产负债表的公告日期 """ if '_id' not in to_drop: to_drop.append('_id') db = get_db('cninfo') collection = db[item_name] pipeline = [{'$project': {k: 0 for k in to_drop}}] if only_A: pipeline.insert(0, MATCH_ONLY_A) ds = collection.aggregate(pipeline) df = pd.DataFrame.from_records(ds) dates = _financial_report_announcement_date(only_A) if col != '报告年度': # 处理行业排名 df['报告年度'] = df.pop(col) # 合并使用 公告日期 df = df.join(dates.set_index(keys), on=keys) # 规范列名称 df.columns = df.columns.map(_normalized_col_name) df.rename(columns={ "股票代码": "sid", "报告年度": "asof_date", "公告日期": "timestamp" }, inplace=True) df['sid'] = df['sid'].map(lambda x: int(x)) df.sort_values(['sid', 'asof_date'], inplace=True) return df
def get_ths_concept(): """同花顺股票概念""" db = get_db() collection = db['同花顺概念'] pipeline = [{ '$unwind': { 'path': '$股票列表' } }, { '$project': { '_id': 0, '概念名称': 1, 'asof_date': "$日期", 'sid': "$股票列表", } }] docs = collection.aggregate(pipeline) df = pd.DataFrame.from_records(docs) df = pd.pivot_table(df, values='概念名称', index=['asof_date', 'sid'], columns='概念名称', aggfunc=np.count_nonzero, fill_value=0) # 规范列名称,列名称不得以下划线、数字开头 # 且名称中不得含 '.'字符 d = get_bcolz_col_names(df.columns) df.columns = get_bcolz_col_names(d.values()) df = df.astype(bool).reset_index() # 选择股票 【原始数据中包含非法记录】 cond = df['sid'].str.match(r"\d{6}") df = df[cond] df['sid'] = df['sid'].astype('int64') return df, d
def get_treasury_data(start, end): """期间国库券利率 Arguments: start {date like} -- 开始日期 end {date like} -- 结束日期 Returns: DataFrame -- 期间利率 Example: >>> start, end = '2020-03-10', '2020-03-15' >>> get_treasury_data(start, end).iloc[:3,:5] cash 1month 2month 3month 6month date 2020-03-10 00:00:00+00:00 0.016000 0.016231 0.016610 0.016661 0.016991 2020-03-11 00:00:00+00:00 0.016000 0.016727 0.016996 0.017001 0.017211 2020-03-12 00:00:00+00:00 0.015742 0.016195 0.016993 0.016994 0.017625 2020-03-13 00:00:00+00:00 0.014287 0.016395 0.016699 0.016705 0.017953 """ start, end = sanitize_dates(start, end) db = get_db() collection = db['国债利率'] predicate = {'date': {"$gte": start, "$lte": end}} projection = {"_id": 0} sort = [("日期", 1)] df = pd.DataFrame.from_records( collection.find(predicate, projection, sort=sort)) # df.set_index('date', inplace=True) df.index = pd.DatetimeIndex(df.pop('date')) # 缺少2年数据,使用简单平均插值 value = (df['y1'] + df['y3']) / 2 df.insert(7, '2year', value) df.rename(columns=TREASURY_COL_MAPS, inplace=True) return df.tz_localize('UTC')
def get_performance_forecaste_data(only_A=True): """上市公司业绩预告""" item_name = '上市公司业绩预告' # 简化写入量,保留`业绩类型` to_drop = ['_id', '股票简称', '业绩类型编码', '业绩变化原因', '报告期最新记录标识', '备注'] db = get_db('cninfo') collection = db[item_name] pipeline = [{'$project': {k: 0 for k in to_drop}}] if only_A: pipeline.insert(0, MATCH_ONLY_A) ds = collection.aggregate(pipeline) df = pd.DataFrame.from_records(ds) # 业绩预告反映未来事件 cond = df['公告日期'].isnull() df.loc[cond, '公告日期'] = df.loc[cond, '报告年度'] - pd.Timedelta(days=45) # 保留`报告年度`列 df.rename( columns={ "股票代码": "sid", # "报告年度": "asof_date", "公告日期": "timestamp", }, inplace=True) # 将 asof_date 定义为前一小时 df['asof_date'] = df['timestamp'] - pd.Timedelta(hours=1) df['sid'] = df['sid'].map(lambda x: int(x)) # 深证信原始数据中 股票代码 "002746" # 公告日期 2013-10-13 报告年度 2016-09-30 # 即做出提前三年的业绩预告,有违常理,需删除 # 一般而言,业绩预告不会领先报告年度一个季度发布 cond = df['timestamp'] - df['asof_date'] < pd.Timedelta(days=90) df = df.loc[cond, :] return df
def get_concept_info(only_A=True): """股票概念编码信息 Keyword Arguments: only_A {bool} -- 只包含A股代码 (default: {True}) Returns: pd.DataFrame -- 股票概念编码信息表 Example: >>> get_concept_info().head(3) sid A001 A002 A003 A004 A005 ... A205 1 False False False False False ... False 2 False False False False False ... False 4 False False False True False ... False """ db = get_db() collection = db['同花顺概念'] pipeline = [ { '$unwind': { 'path': '$股票列表' } }, { '$project': { '_id': 0, '概念编码': 1, # '概念名称': 1, '股票列表': 1 } } ] ds = collection.aggregate(pipeline) def func(x): if only_A: return A_STOCK_PAT.match(x['股票列表']) else: return STOCK_PAT.match(x['股票列表']) ds = filter(func, ds) df = pd.DataFrame.from_records(ds) df.rename(columns={'股票列表': 'sid'}, inplace=True) out = pd.pivot_table(df, values='概念编码', index='sid', columns='概念编码', aggfunc=np.count_nonzero, fill_value=0) id_maps, _ = field_code_concept_maps() out.rename(columns=id_maps, inplace=True) out = out.astype('bool').reset_index() out['sid'] = out['sid'].map(lambda x: int(x)) return out
def get_short_name_changes(): """股票简称变动历史""" db = get_db('wy_stock_daily') codes = db.list_collection_names() # 3878只股票 用时 48s with ThreadPoolExecutor(MAX_WORKER) as pool: r = pool.map(_change_hist, codes) df = pd.concat(r, ignore_index=True) return df
def get_short_name_changes(only_A=True): """股票简称变动历史""" db = get_db('wy_stock_daily') codes = db.list_collection_names() if only_A: codes = filter_a(codes) func = partial(_change_hist, db=db) # 3878只股票 用时 48s with ThreadPoolExecutor(MAX_WORKER) as pool: r = pool.map(func, codes) df = pd.concat(r, ignore_index=True) return df
def fetch_single_minutely_equity(code, start, end): """ 从本地数据库读取单个股票期间分钟级别交易明细数据 **注意** 交易日历分钟自9:31~11:30 13:01~15:00 在数据库中,分钟级别成交数据分日期存储 Parameters ---------- code : str 要获取数据的股票代码 start_date : datetime-like 自开始日期(包含该日) end_date : datetime-like 至结束日期 return ---------- DataFrame: OHLCV列的DataFrame对象。 Examples -------- >>> stock_code = '000333' >>> start = '2020-06-29' >>> end = pd.Timestamp('2020-06-30') >>> df = fetch_single_minutely_equity(stock_code, start, end) >>> df.tail() close high low open volume 2018-04-19 14:56:00 51.55 51.56 51.50 51.55 376400 2018-04-19 14:57:00 51.55 51.55 51.55 51.55 20000 2018-04-19 14:58:00 51.55 51.55 51.55 51.55 0 2018-04-19 14:59:00 51.55 51.55 51.55 51.55 0 2018-04-19 15:00:00 51.57 51.57 51.57 51.57 353900 """ calendar = get_calendar('XSHG') fmt = r"%Y-%m-%d" dates = calendar.sessions_in_range(start.strftime(fmt), end.strftime(fmt)).tz_localize(None) cols = ['open', 'high', 'low', 'close', 'volume'] # 指数分钟级别数据 if len(code) == 7: return _index_minute_data(code, dates) db = get_db('wy_quotes') func = partial(_fetch_single_minutely_equity, stock_code=code, db=db, is_index=False) with ThreadPoolExecutor(MAX_WORKER) as executor: dfs = executor.map(func, dates) return pd.concat(dfs).sort_index()
def _gsjj(): # price data 含 动态注册资本 # 舍弃 db = get_db('wy') collection = db['公司简介'] docs = collection.find({}, projection={ '_id': 0, '股票代码': 1, '注册资本': 1, }) df = pd.DataFrame.from_records(docs) df['注册资本'] = df['注册资本'].map(_to_float) return df.set_index('股票代码')
def get_q_indicator(name): """单季度财务指标""" db = get_db('wy') collection = db[name] docs = collection.find(projection={ '_id': 0, '更新时间': 0, }) df = pd.DataFrame.from_records(docs) # 规范列名称 df.columns = df.columns.map(_normalized_col_name) df.rename(columns={'股票代码': 'sid', '报告日期': AD_FIELD_NAME}, inplace=True) df['sid'] = df['sid'].astype('int64') return df
def get_yjyg(): """业绩预告""" db = get_db('wy') collection = db['业绩预告'] docs = collection.find(projection={'_id': 0, '更新时间': 0, '预测内容': 0}) df = pd.DataFrame.from_records(docs) # TODO:业绩预告 存在 公告日期 < 报告日期 df.rename(columns={ '股票代码': 'sid', '报告日期': AD_FIELD_NAME, '公告日期': TS_FIELD_NAME }, inplace=True) df['sid'] = df['sid'].astype('int64') return df
def _get_codes(bundle, m_dir_path): # 代码在其子目录下 ** 代表当前目录的子目录 db_codes = [p.stem.split('.')[0] for p in m_dir_path.glob("**/*.bcolz")] if 'test' in bundle: web_codes = TEST_CODES else: web_codes = [ code for code, dt in get_stock_status().items() if dt is not None ] db = get_db('wy_index_daily') index_codes = db.list_collection_names() web_codes += [encode_index_code(x) for x in index_codes] to_insert = set(web_codes).difference(db_codes) to_append = set(web_codes).intersection(db_codes) return to_insert, to_append
def get_treasury_data(start_date, end_date): """读取期间资金成本数据 Parameters ---------- start_date : datetime-like 开始日期 end_date : datetime-like 结束日期 return ---------- DataFrame: DataFrame对象。 Examples -------- >>> start_date = '2020-05-15' >>> end_date = '2020-05-25' >>> df = get_treasury_data(start_date, end_date) >>> df.iloc[:5, :5] cash 1month 2month 3month 6month date 2020-05-15 00:00:00+00:00 0.006838 0.009496 0.009506 0.010076 0.011570 2020-05-18 00:00:00+00:00 0.006838 0.009369 0.009611 0.010414 0.011701 2020-05-19 00:00:00+00:00 0.009838 0.009425 0.010490 0.010307 0.012016 2020-05-20 00:00:00+00:00 0.008188 0.009084 0.010712 0.011012 0.012378 2020-05-21 00:00:00+00:00 0.007028 0.008569 0.010695 0.011032 0.012465 """ start, end = sanitize_dates(start_date, end_date) db = get_db() collection = db['国债利率'] predicate = {'date': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('date', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) # 缺少2年数据,使用简单平均插值 value = (df['y1'] + df['y3']) / 2 df.insert(7, '2year', value) df.rename(columns=TREASURY_COL_MAPS, inplace=True) df.set_index('date', inplace=True) df = df.tz_localize('UTC') calendar = get_calendar('XSHG') start = start.tz_localize('UTC') end = end.tz_localize('UTC') sessions = calendar.sessions_in_range(start, end) # 务必与交易日历一致 return df.reindex(sessions).fillna(method='ffill')
def gen_asset_metadata(only_in=True, only_A=True, include_index=True): """ 生成符号元数据 Paras ----- only_in : bool 是否仅仅包含当前在市的股票,默认为真。 only_A : bool 是否仅仅为A股股票(即:不包含B股股票),默认为不包含。 include_index : bool 是否包含指数,默认包含指数。 Examples -------- >>> df = gen_asset_metadata() >>> df.head() symbol start_date end_date exchange asset_name first_traded last_traded auto_close_date 0 000001 1991-04-03 2018-12-21 深交所主板 平安银行 1991-04-03 2018-12-21 2018-12-22 1 000002 1991-01-29 2018-12-21 深交所主板 万 科A 1991-01-29 2018-12-21 2018-12-22 2 000004 1991-01-14 2018-12-21 深交所主板 国农科技 1991-01-02 2018-12-21 2018-12-22 3 000005 1990-12-10 2018-12-21 深交所主板 世纪星源 1991-01-02 2018-12-21 2018-12-22 4 000006 1992-04-27 2018-12-21 深交所主板 深振业A 1992-04-27 2018-12-21 2018-12-22 """ db = get_db('wy_stock_daily') codes = db.list_collection_names() delisted = get_delist_stock_dates() if only_in: codes = [code for code in codes if code not in delisted.keys()] # 股票数量 >3900 # 设置max_workers=8,用时 67s 股票 4565 用时 110s # 设置max_workers=4,用时 54s func = partial(_stock_first_and_last, db=db) with ThreadPoolExecutor(MAX_WORKER) as pool: r = pool.map(func, codes) df = pd.concat(r) df.sort_values('symbol', inplace=True) df['exchange'] = df['symbol'].map(get_exchange) df['start_date'] = df['first_traded'] df['end_date'] = df['last_traded'] df['auto_close_date'] = df['last_traded'].map( lambda x: x + pd.Timedelta(days=1)) if not include_index: return df else: i = gen_index_metadata() return pd.concat([df, i])
def _get_single_stock_equity(symbol, start_date, end_date, is_index, index_name): start_date, end_date = sanitize_dates(start_date, end_date) db_name = 'wy_index_daily' if is_index else 'wy_stock_daily' db = get_db(db_name) collection = db[symbol] df = query(collection, start_date, end_date) df.columns = DAILY_COLS df['change_pct'] = df['change_pct'] / 100.0 df['date'] = pd.to_datetime(df['date']) df.set_index('date', inplace=True) df.sort_index(inplace=True) res = df.tz_localize('utc')['change_pct'] res.name = index_name # 原始数据中含nan res.fillna(0.0, inplace=True) return res
def get_ggrq(): """提取财务报告公告日期""" db = get_db('wy') collection = db['预约披露'] docs = collection.find(projection={ '_id': 0, 'sid': '$股票代码', TS_FIELD_NAME: '$实际披露', AD_FIELD_NAME: '报告年度', }) df = pd.DataFrame.from_records(docs) df['sid'] = df['sid'].astype('int64') df.drop_duplicates(subset=[AD_FIELD_NAME, 'sid'], keep='last', inplace=True) df.set_index([AD_FIELD_NAME, 'sid'], inplace=True) return df
def get_margin_data(only_A=True): """融资融券数据""" db = get_db('cninfo') collection = db['融资融券明细'] projection = { '_id': 0, '股票简称': 0, } # sort = [('股票代码', 1), ('交易日期', 1)] df = pd.DataFrame.from_records(collection.find(projection=projection)) df = _select_only_a(df, only_A, '股票代码') df.rename(columns={'交易日期': 'timestamp', '股票代码': 'sid'}, inplace=True) df['sid'] = df['sid'].map(lambda x: int(x)) # 设置晚8小时 df['asof_date'] = df['timestamp'] - pd.Timedelta(hours=8) df.sort_values(['sid', 'timestamp'], inplace=True, ignore_index=True) return df
def _fetch_single_equity(stock_code, start, end): """读取本地原始数据""" start, end = sanitize_dates(start, end) db = get_db('wy_stock_daily') collection = db[stock_code] predicate = {'日期': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('日期', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) if df.empty: return df df['股票代码'] = stock_code # 截取所需列 df = df[WY_DAILY_COL_MAPS.keys()] df.rename(columns=WY_DAILY_COL_MAPS, inplace=True) df.sort_values('date', inplace=True) return df
def get_cn_benchmark_returns(symbol='000300'): """获取基准收益率 Parameters ---------- symbol : str Benchmark symbol for which we're getting the returns. Returns: Series -- 基准收益率 """ db = get_db('wy_index_daily') collection = db[symbol] projection = {'_id': 0, '日期': 1, '涨跌幅': 1} df = pd.DataFrame.from_records(collection.find(projection=projection)) index = pd.DatetimeIndex(df['日期'].values) s = pd.Series(df['涨跌幅'].values / 100.0, index=index) return s.sort_index().tz_localize('UTC').sort_index().dropna()