コード例 #1
0
ファイル: wy_minute_data.py プロジェクト: liudengfeng/zipline
def _single_minutely_equity(one_day, code, db=None, is_index=False):
    if db is None:
        db = get_db('wy_index_quotes') if is_index else get_db('wy_quotes')
    name = one_day.strftime(r"%Y-%m-%d")
    if name not in db.list_collection_names():
        return pd.DataFrame()
    collection = db[name]
    # 存在延时
    start = one_day.replace(hour=9, minute=30)
    end = one_day.replace(hour=15, minute=1)
    predicate = {
        'code': code,
        'time': {
            '$gte': start,
            '$lte': end
        },
    }
    projection = {
        'datetime': '$time',
        'close': '$price',
        'open': 1,
        'high': 1,
        'low': 1,
        'volume': 1,
        '_id': 0
    }
    sort = [('datetime', 1)]
    cursor = collection.find(predicate, projection=projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df['datetime'] = df['datetime'].dt.floor('T')
    df.drop_duplicates(['datetime'], keep='last', inplace=True)
    df.set_index(['datetime'], inplace=True)
    return df
コード例 #2
0
def _refresh(batch, d):
    db = get_db('wy')
    collection = db['业绩预告']
    create_index_for(collection)
    for code in batch:
        # 首先检查状态,减少数据库查询
        if d.get(code, False):
            continue
        if not need_refresh(collection, code):
            d[code] = True
            logger.info(f"股票 {code} 业绩预告 已经刷新")
            continue
        try:
            docs = fetch_yjyg(code)
        except (ValueError, KeyError):
            # 网页不存在时发生,忽略
            # 标注为完成状态
            d[code] = True
            continue
        except Exception as e:
            logger.error(f"股票 {code} 业绩预告 失败 {e}")
            continue
        # 正常情形下运行以下代码
        last_dt = get_max_dt(collection, code)
        for doc in docs:
            doc['股票代码'] = code
            doc[DATE_KEY_1] = pd.to_datetime(doc[DATE_KEY_1], errors='ignore')
            doc['报告日期'] = pd.to_datetime(doc['报告日期'], errors='ignore')
            if doc[DATE_KEY_1] > last_dt:
                doc['更新时间'] = pd.Timestamp('now')
                collection.insert_one(_droped_null(doc))
        logger.info(f"完成股票 {code} 业绩预告 刷新")
        d[code] = True
コード例 #3
0
def get_dividend_data():
    """现金股利"""
    db = get_db('wy')
    collection = db['分红配股']
    # 使用股权登记日作为 asof_date
    # 此指标仅用于计算年度股息之用,不涉及到所谓知晓日期
    pipeline = [
        {
            '$project': {
                '_id': 0,
                'sid': '$股票代码',
                '分红年度': 1,
                AD_FIELD_NAME: '$股权登记日',
                '每股派息': '$派息(每10股)',
            }
        }
    ]
    docs = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(docs)
    # 2019 -> Timestamp('2019-01-01 00:00:00')
    df['分红年度'] = df['分红年度'].map(_to_timestamp)
    # 首先将日期缺失值默认为分红年度后一个季度
    cond = df['asof_date'].isnull()
    df.loc[cond, 'asof_date'] = df.loc[cond, '分红年度'] + pd.Timedelta(days=45)
    # 重要:对未分派的记录,不得舍弃
    # 派息NaN -> 0.0 不影响实际意义,加快读写速度
    values = {'每股派息': 0.0}
    df.fillna(value=values, inplace=True)
    # 数值更改为每股派息
    df['每股派息'] = df['每股派息'] / 10.0
    df.sort_values(['sid', 'asof_date'], inplace=True, ignore_index=True)
    df['sid'] = df['sid'].astype('int64')
    return df
コード例 #4
0
def _periodly_report(only_A, item_name):
    # 一般而言,定期财务报告截止日期与报告年度相同
    # 但不排除数据更正等情形下,报告年度与截止日期不一致
    to_drop = [
        '_id', '股票简称', '机构名称', '合并类型编码', '合并类型', '报表来源编码', '报表来源', '备注',
        '截止日期', '开始日期'
    ]
    db = get_db('cninfo')
    collection = db[item_name]
    pipeline = [{'$project': {k: 0 for k in to_drop}}]
    if only_A:
        pipeline.insert(0, MATCH_ONLY_A)
    ds = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(ds)
    # 规范列名称
    df.columns = df.columns.map(_normalized_col_name)
    df.rename(columns={
        "股票代码": "sid",
        "报告年度": "asof_date",
        "公告日期": "timestamp"
    },
              inplace=True)
    df['sid'] = df['sid'].map(lambda x: int(x))
    df.sort_values(['sid', 'asof_date'], inplace=True)
    return df
コード例 #5
0
def get_investment_rating_data():
    """投资评级

    备注

    大量字符写入时间极长,转换为类别,加快写入速度。
    """
    db = get_db('wy')
    collection = db['投资评级']
    pipeline = [
        {
            '$project': {
                '_id': 0,
                'sid': '$股票代码',
                AD_FIELD_NAME: '$评级日期',
                '评级': '$最新评级',
                '分析师': 1,
                '评级机构': 1,
            }
        }
    ]
    docs = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(docs)
    # 可能数据没有清洗干净
    cond = df['sid'].str.match(r"\d{6}")
    df = df[cond]
    df['sid'] = df['sid'].astype('int64')

    # 至少相差一小时
    # df['asof_date'] -= pd.Timedelta(hours=1)
    cate_cols_pat = ['评级机构', '分析师']
    maps = {}
    for col_pat in cate_cols_pat:
        df, maps = _handle_cate(df, col_pat, maps)
    return df, maps
コード例 #6
0
def get_concept_maps(by='all', to_symbol=True, latest=False):
    """概念对应股票列表

    Args:
        by (str, optional): 分类单位. Defaults to 'all'.
            all 代表合并
        to_symbol (bool, optional): 转换为Equity. Defaults to True.
        latest (bool, optional): 限于最近2年. Defaults to False.

    Returns:
        dict: 以概念名称为键,股票列表为值
    """
    assert by in ('ths', 'tct', 'all')
    db = get_db()
    if by == 'ths':
        collection = db['同花顺概念']
        return _get_concept_maps(collection, latest)
    elif by == 'tct':
        collection = db['腾讯概念']
        return _get_concept_maps(collection, latest)
    else:
        ths = _get_concept_maps(db['同花顺概念'])
        tct = _get_concept_maps(db['腾讯概念'])
        keys = set(list(ths.keys()) + list(tct.keys()))
        res = {}
        for key in keys:
            p1 = ths.get(key, [])
            p2 = tct.get(key, [])
            v = set(p1 + p2)
            if to_symbol:
                v = [symbol(s) for s in v]
            res[key] = v
        return res
コード例 #7
0
def _fetch_single_index(code, start, end):
    index_code = decode_index_code(code)
    start, end = sanitize_dates(start, end)
    db = get_db('wy_index_daily')
    collection = db[index_code]
    predicate = {'日期': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('日期', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df['股票代码'] = code
    # fill 0
    df['换手率'] = 0.0
    df['流通市值'] = 0.0
    df['总市值'] = 0.0
    # 截取所需列
    df = df[WY_DAILY_COL_MAPS.keys()]
    df.rename(columns=WY_DAILY_COL_MAPS, inplace=True)
    df.sort_values('date', inplace=True)
    # fill 0
    cols = [
        'b_close', 'b_high', 'b_low', 'b_open', 'shares_outstanding',
        'total_shares'
    ]
    df.loc[:, cols] = 0.0
    return df
コード例 #8
0
def _single_minutely_equity(one_day, code, db=None):
    if db is None:
        db = get_db('cjmx')
    name = one_day.strftime(r"%Y-%m-%d")
    if name not in db.list_collection_names():
        return pd.DataFrame()
    collection = db[name]
    # 存在延时
    start = one_day.replace(hour=9, minute=30)
    end = one_day.replace(hour=15, minute=1)
    predicate = {
        '股票代码': code,
        '成交时间': {
            '$gte': start,
            '$lte': end
        },
    }
    projection = {
        'datetime': '$成交时间',
        'price': '$成交价',
        'volume': '$成交量',
        '_id': 0
    }
    cursor = collection.find(predicate, projection=projection)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df.set_index(['datetime'], inplace=True)
    return df
コード例 #9
0
def get_investment_rating_data(only_A=True):
    """投资评级"""
    item_name = '投资评级'
    to_drop = [
        '_id', '前一次投资评级', '股票简称', '投资评级', '评级变化', '是否首次评级', "目标价格(下限)",
        "目标价格(上限)"
    ]
    db = get_db('cninfo')
    collection = db[item_name]
    pipeline = [{'$project': {k: 0 for k in to_drop}}]
    if only_A:
        pipeline.insert(0, MATCH_ONLY_A)
    ds = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(ds)

    df.rename(columns={
        "股票代码": "sid",
        "发布日期": "asof_date",
        "投资评级(经调整)": "投资评级",
    },
              inplace=True)
    df.dropna(subset=['投资评级'], inplace=True)
    df['timestamp'] = df['asof_date']
    # 至少相差一小时
    df['asof_date'] -= pd.Timedelta(hours=1)
    df['sid'] = df['sid'].map(lambda x: int(x))
    return df
コード例 #10
0
ファイル: wy_gszl.py プロジェクト: liudengfeng/cnswd
def _refresh(batch, d):
    db = get_db('wy')
    collection1 = db[NAMES[0]]
    collection2 = db[NAMES[1]]
    for code in batch:
        # 首先检查状态,减少数据库查询
        if d.get(code, False):
            continue
        if not need_refresh(collection2, code):
            d[code] = True
            logger.info(f"股票 {code} 已经刷新")
            continue
        try:
            doc1, doc2 = fetch_company_info(code)
            doc1['股票代码'] = code
            doc1['更新时间'] = pd.Timestamp('now')
            doc2['股票代码'] = code
            doc2['更新时间'] = pd.Timestamp('now')
            collection1.insert_one(doc1)
            collection2.insert_one(doc2)
            d[code] = True
        except Exception as e:
            logger.error(f"{e}")
            continue
        logger.info(f"完成股票 {code} 刷新")
コード例 #11
0
def get_ipo():
    # 大量股票上市日期为空
    db = get_db('wy')
    collection = db['IPO资料']
    docs = collection.find({}, projection={
        '_id': 0,
        '股票代码': 1,
        '上市日期': 1,
    })
    df = pd.DataFrame.from_records(docs)
    df['上市日期'] = pd.to_datetime(df['上市日期'], errors='coerce')
    wy_dates = {
        code: pd.to_datetime(dt, errors='coerce')
        for code, dt in zip(df['股票代码'], df['上市日期'])
    }
    ipo_dates = _listing_date()
    dates = merge(wy_dates, ipo_dates)

    def f(code):
        try:
            return dates[code]
        except KeyError:
            return pd.NaT

    df['上市日期'] = df['股票代码'].map(f)
    df.dropna(inplace=True)
    df.rename(columns={'股票代码': 'sid'}, inplace=True)
    df['sid'] = df['sid'].astype('int64')
    df[AD_FIELD_NAME] = df['上市日期'] - pd.Timedelta(days=1)
    return df
コード例 #12
0
def _get_report(only_A, item_name, to_drop, col='报告年度', keys=['股票代码', '报告年度']):
    """
    获取财务报告数据

    使用报告期资产负债表的公告日期
    """
    if '_id' not in to_drop:
        to_drop.append('_id')

    db = get_db('cninfo')
    collection = db[item_name]
    pipeline = [{'$project': {k: 0 for k in to_drop}}]
    if only_A:
        pipeline.insert(0, MATCH_ONLY_A)
    ds = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(ds)
    dates = _financial_report_announcement_date(only_A)
    if col != '报告年度':
        # 处理行业排名
        df['报告年度'] = df.pop(col)
    # 合并使用 公告日期
    df = df.join(dates.set_index(keys), on=keys)
    # 规范列名称
    df.columns = df.columns.map(_normalized_col_name)

    df.rename(columns={
        "股票代码": "sid",
        "报告年度": "asof_date",
        "公告日期": "timestamp"
    },
              inplace=True)
    df['sid'] = df['sid'].map(lambda x: int(x))
    df.sort_values(['sid', 'asof_date'], inplace=True)
    return df
コード例 #13
0
def get_ths_concept():
    """同花顺股票概念"""
    db = get_db()
    collection = db['同花顺概念']
    pipeline = [{
        '$unwind': {
            'path': '$股票列表'
        }
    }, {
        '$project': {
            '_id': 0,
            '概念名称': 1,
            'asof_date': "$日期",
            'sid': "$股票列表",
        }
    }]
    docs = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(docs)
    df = pd.pivot_table(df,
                        values='概念名称',
                        index=['asof_date', 'sid'],
                        columns='概念名称',
                        aggfunc=np.count_nonzero,
                        fill_value=0)
    # 规范列名称,列名称不得以下划线、数字开头
    # 且名称中不得含 '.'字符
    d = get_bcolz_col_names(df.columns)
    df.columns = get_bcolz_col_names(d.values())
    df = df.astype(bool).reset_index()
    # 选择股票 【原始数据中包含非法记录】
    cond = df['sid'].str.match(r"\d{6}")
    df = df[cond]
    df['sid'] = df['sid'].astype('int64')
    return df, d
コード例 #14
0
def get_treasury_data(start, end):
    """期间国库券利率

    Arguments:
        start {date like} -- 开始日期
        end {date like} -- 结束日期

    Returns:
        DataFrame -- 期间利率

    Example:
    >>> start, end = '2020-03-10', '2020-03-15'
    >>> get_treasury_data(start, end).iloc[:3,:5]
                                cash    1month    2month    3month    6month
    date
    2020-03-10 00:00:00+00:00  0.016000  0.016231  0.016610  0.016661  0.016991 
    2020-03-11 00:00:00+00:00  0.016000  0.016727  0.016996  0.017001  0.017211 
    2020-03-12 00:00:00+00:00  0.015742  0.016195  0.016993  0.016994  0.017625 
    2020-03-13 00:00:00+00:00  0.014287  0.016395  0.016699  0.016705  0.017953
    """
    start, end = sanitize_dates(start, end)
    db = get_db()
    collection = db['国债利率']
    predicate = {'date': {"$gte": start, "$lte": end}}
    projection = {"_id": 0}
    sort = [("日期", 1)]
    df = pd.DataFrame.from_records(
        collection.find(predicate, projection, sort=sort))
    # df.set_index('date', inplace=True)
    df.index = pd.DatetimeIndex(df.pop('date'))
    # 缺少2年数据,使用简单平均插值
    value = (df['y1'] + df['y3']) / 2
    df.insert(7, '2year', value)
    df.rename(columns=TREASURY_COL_MAPS, inplace=True)
    return df.tz_localize('UTC')
コード例 #15
0
def get_performance_forecaste_data(only_A=True):
    """上市公司业绩预告"""
    item_name = '上市公司业绩预告'
    # 简化写入量,保留`业绩类型`
    to_drop = ['_id', '股票简称', '业绩类型编码', '业绩变化原因', '报告期最新记录标识', '备注']
    db = get_db('cninfo')
    collection = db[item_name]
    pipeline = [{'$project': {k: 0 for k in to_drop}}]
    if only_A:
        pipeline.insert(0, MATCH_ONLY_A)
    ds = collection.aggregate(pipeline)
    df = pd.DataFrame.from_records(ds)

    # 业绩预告反映未来事件

    cond = df['公告日期'].isnull()
    df.loc[cond, '公告日期'] = df.loc[cond, '报告年度'] - pd.Timedelta(days=45)
    # 保留`报告年度`列
    df.rename(
        columns={
            "股票代码": "sid",
            # "报告年度": "asof_date",
            "公告日期": "timestamp",
        },
        inplace=True)
    # 将 asof_date 定义为前一小时
    df['asof_date'] = df['timestamp'] - pd.Timedelta(hours=1)
    df['sid'] = df['sid'].map(lambda x: int(x))
    # 深证信原始数据中 股票代码  "002746"
    # 公告日期  2013-10-13 报告年度 2016-09-30
    # 即做出提前三年的业绩预告,有违常理,需删除
    # 一般而言,业绩预告不会领先报告年度一个季度发布
    cond = df['timestamp'] - df['asof_date'] < pd.Timedelta(days=90)
    df = df.loc[cond, :]
    return df
コード例 #16
0
def get_concept_info(only_A=True):
    """股票概念编码信息

    Keyword Arguments:
        only_A {bool} -- 只包含A股代码 (default: {True})

    Returns:
        pd.DataFrame -- 股票概念编码信息表

    Example:
    >>> get_concept_info().head(3)
    sid   A001   A002   A003   A004   A005  ...   A205
      1  False  False  False  False  False  ...  False
      2  False  False  False  False  False  ...  False
      4  False  False  False   True  False  ...  False
    """
    db = get_db()
    collection = db['同花顺概念']
    pipeline = [
        {
            '$unwind': {
                'path': '$股票列表'
            }
        },
        {
            '$project': {
                '_id': 0,
                '概念编码': 1,
                # '概念名称': 1,
                '股票列表': 1
            }
        }
    ]
    ds = collection.aggregate(pipeline)

    def func(x):
        if only_A:
            return A_STOCK_PAT.match(x['股票列表'])
        else:
            return STOCK_PAT.match(x['股票列表'])

    ds = filter(func, ds)
    df = pd.DataFrame.from_records(ds)
    df.rename(columns={'股票列表': 'sid'}, inplace=True)

    out = pd.pivot_table(df,
                         values='概念编码',
                         index='sid',
                         columns='概念编码',
                         aggfunc=np.count_nonzero,
                         fill_value=0)

    id_maps, _ = field_code_concept_maps()
    out.rename(columns=id_maps, inplace=True)
    out = out.astype('bool').reset_index()
    out['sid'] = out['sid'].map(lambda x: int(x))
    return out
コード例 #17
0
def get_short_name_changes():
    """股票简称变动历史"""
    db = get_db('wy_stock_daily')
    codes = db.list_collection_names()
    # 3878只股票 用时 48s
    with ThreadPoolExecutor(MAX_WORKER) as pool:
        r = pool.map(_change_hist, codes)
    df = pd.concat(r, ignore_index=True)
    return df
コード例 #18
0
def get_short_name_changes(only_A=True):
    """股票简称变动历史"""
    db = get_db('wy_stock_daily')
    codes = db.list_collection_names()
    if only_A:
        codes = filter_a(codes)
    func = partial(_change_hist, db=db)
    # 3878只股票 用时 48s
    with ThreadPoolExecutor(MAX_WORKER) as pool:
        r = pool.map(func, codes)
    df = pd.concat(r, ignore_index=True)
    return df
コード例 #19
0
ファイル: wy_minute_data.py プロジェクト: liudengfeng/zipline
def fetch_single_minutely_equity(code, start, end):
    """
    从本地数据库读取单个股票期间分钟级别交易明细数据

    **注意** 
        交易日历分钟自9:31~11:30 13:01~15:00
        在数据库中,分钟级别成交数据分日期存储

    Parameters
    ----------
    code : str
        要获取数据的股票代码
    start_date : datetime-like
        自开始日期(包含该日)
    end_date : datetime-like
        至结束日期

    return
    ----------
    DataFrame: OHLCV列的DataFrame对象。

    Examples
    --------
    >>> stock_code = '000333'
    >>> start = '2020-06-29'
    >>> end = pd.Timestamp('2020-06-30')
    >>> df = fetch_single_minutely_equity(stock_code, start, end)
    >>> df.tail()
                        close   high    low   open  volume
    2018-04-19 14:56:00  51.55  51.56  51.50  51.55  376400
    2018-04-19 14:57:00  51.55  51.55  51.55  51.55   20000
    2018-04-19 14:58:00  51.55  51.55  51.55  51.55       0
    2018-04-19 14:59:00  51.55  51.55  51.55  51.55       0
    2018-04-19 15:00:00  51.57  51.57  51.57  51.57  353900
    """
    calendar = get_calendar('XSHG')
    fmt = r"%Y-%m-%d"
    dates = calendar.sessions_in_range(start.strftime(fmt),
                                       end.strftime(fmt)).tz_localize(None)
    cols = ['open', 'high', 'low', 'close', 'volume']

    # 指数分钟级别数据
    if len(code) == 7:
        return _index_minute_data(code, dates)

    db = get_db('wy_quotes')
    func = partial(_fetch_single_minutely_equity,
                   stock_code=code,
                   db=db,
                   is_index=False)
    with ThreadPoolExecutor(MAX_WORKER) as executor:
        dfs = executor.map(func, dates)
    return pd.concat(dfs).sort_index()
コード例 #20
0
def _gsjj():
    # price data 含 动态注册资本
    # 舍弃
    db = get_db('wy')
    collection = db['公司简介']
    docs = collection.find({}, projection={
        '_id': 0,
        '股票代码': 1,
        '注册资本': 1,
    })
    df = pd.DataFrame.from_records(docs)
    df['注册资本'] = df['注册资本'].map(_to_float)
    return df.set_index('股票代码')
コード例 #21
0
def get_q_indicator(name):
    """单季度财务指标"""
    db = get_db('wy')
    collection = db[name]
    docs = collection.find(projection={
        '_id': 0,
        '更新时间': 0,
    })
    df = pd.DataFrame.from_records(docs)
    # 规范列名称
    df.columns = df.columns.map(_normalized_col_name)
    df.rename(columns={'股票代码': 'sid', '报告日期': AD_FIELD_NAME}, inplace=True)
    df['sid'] = df['sid'].astype('int64')
    return df
コード例 #22
0
def get_yjyg():
    """业绩预告"""
    db = get_db('wy')
    collection = db['业绩预告']
    docs = collection.find(projection={'_id': 0, '更新时间': 0, '预测内容': 0})
    df = pd.DataFrame.from_records(docs)
    # TODO:业绩预告 存在 公告日期 < 报告日期
    df.rename(columns={
        '股票代码': 'sid',
        '报告日期': AD_FIELD_NAME,
        '公告日期': TS_FIELD_NAME
    },
              inplace=True)
    df['sid'] = df['sid'].astype('int64')
    return df
コード例 #23
0
def _get_codes(bundle, m_dir_path):
    # 代码在其子目录下 ** 代表当前目录的子目录
    db_codes = [p.stem.split('.')[0] for p in m_dir_path.glob("**/*.bcolz")]
    if 'test' in bundle:
        web_codes = TEST_CODES
    else:
        web_codes = [
            code for code, dt in get_stock_status().items() if dt is not None
        ]
        db = get_db('wy_index_daily')
        index_codes = db.list_collection_names()
        web_codes += [encode_index_code(x) for x in index_codes]
    to_insert = set(web_codes).difference(db_codes)
    to_append = set(web_codes).intersection(db_codes)
    return to_insert, to_append
コード例 #24
0
def get_treasury_data(start_date, end_date):
    """读取期间资金成本数据

    Parameters
    ----------
    start_date : datetime-like
        开始日期
    end_date : datetime-like
        结束日期

    return
    ----------
    DataFrame: DataFrame对象。

    Examples
    --------
    >>> start_date = '2020-05-15'
    >>> end_date = '2020-05-25'
    >>> df = get_treasury_data(start_date, end_date)
    >>> df.iloc[:5, :5]
        cash	1month	2month	3month	6month
    date					
    2020-05-15 00:00:00+00:00	0.006838	0.009496	0.009506	0.010076	0.011570
    2020-05-18 00:00:00+00:00	0.006838	0.009369	0.009611	0.010414	0.011701
    2020-05-19 00:00:00+00:00	0.009838	0.009425	0.010490	0.010307	0.012016
    2020-05-20 00:00:00+00:00	0.008188	0.009084	0.010712	0.011012	0.012378
    2020-05-21 00:00:00+00:00	0.007028	0.008569	0.010695	0.011032	0.012465
    """
    start, end = sanitize_dates(start_date, end_date)
    db = get_db()
    collection = db['国债利率']
    predicate = {'date': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('date', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    # 缺少2年数据,使用简单平均插值
    value = (df['y1'] + df['y3']) / 2
    df.insert(7, '2year', value)
    df.rename(columns=TREASURY_COL_MAPS, inplace=True)
    df.set_index('date', inplace=True)
    df = df.tz_localize('UTC')
    calendar = get_calendar('XSHG')
    start = start.tz_localize('UTC')
    end = end.tz_localize('UTC')
    sessions = calendar.sessions_in_range(start, end)
    # 务必与交易日历一致
    return df.reindex(sessions).fillna(method='ffill')
コード例 #25
0
def gen_asset_metadata(only_in=True, only_A=True, include_index=True):
    """
    生成符号元数据

    Paras
    -----
    only_in : bool
        是否仅仅包含当前在市的股票,默认为真。
    only_A : bool
        是否仅仅为A股股票(即:不包含B股股票),默认为不包含。
    include_index : bool
        是否包含指数,默认包含指数。

    Examples
    --------
    >>> df = gen_asset_metadata()
    >>> df.head()
        symbol start_date   end_date exchange asset_name first_traded last_traded auto_close_date
    0     000001 1991-04-03 2018-12-21    深交所主板       平安银行   1991-04-03  2018-12-21      2018-12-22
    1     000002 1991-01-29 2018-12-21    深交所主板       万 科A   1991-01-29  2018-12-21      2018-12-22
    2     000004 1991-01-14 2018-12-21    深交所主板       国农科技   1991-01-02  2018-12-21      2018-12-22
    3     000005 1990-12-10 2018-12-21    深交所主板       世纪星源   1991-01-02  2018-12-21      2018-12-22
    4     000006 1992-04-27 2018-12-21    深交所主板       深振业A   1992-04-27  2018-12-21      2018-12-22
    """
    db = get_db('wy_stock_daily')
    codes = db.list_collection_names()
    delisted = get_delist_stock_dates()
    if only_in:
        codes = [code for code in codes if code not in delisted.keys()]

    # 股票数量 >3900
    # 设置max_workers=8,用时 67s  股票 4565 用时 110s
    # 设置max_workers=4,用时 54s
    func = partial(_stock_first_and_last, db=db)
    with ThreadPoolExecutor(MAX_WORKER) as pool:
        r = pool.map(func, codes)
    df = pd.concat(r)
    df.sort_values('symbol', inplace=True)
    df['exchange'] = df['symbol'].map(get_exchange)
    df['start_date'] = df['first_traded']
    df['end_date'] = df['last_traded']
    df['auto_close_date'] = df['last_traded'].map(
        lambda x: x + pd.Timedelta(days=1))
    if not include_index:
        return df
    else:
        i = gen_index_metadata()
        return pd.concat([df, i])
コード例 #26
0
def _get_single_stock_equity(symbol, start_date, end_date, is_index,
                             index_name):
    start_date, end_date = sanitize_dates(start_date, end_date)
    db_name = 'wy_index_daily' if is_index else 'wy_stock_daily'
    db = get_db(db_name)
    collection = db[symbol]
    df = query(collection, start_date, end_date)
    df.columns = DAILY_COLS
    df['change_pct'] = df['change_pct'] / 100.0
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df.sort_index(inplace=True)
    res = df.tz_localize('utc')['change_pct']
    res.name = index_name
    # 原始数据中含nan
    res.fillna(0.0, inplace=True)
    return res
コード例 #27
0
def get_ggrq():
    """提取财务报告公告日期"""
    db = get_db('wy')
    collection = db['预约披露']
    docs = collection.find(projection={
        '_id': 0,
        'sid': '$股票代码',
        TS_FIELD_NAME: '$实际披露',
        AD_FIELD_NAME: '报告年度',
    })
    df = pd.DataFrame.from_records(docs)
    df['sid'] = df['sid'].astype('int64')
    df.drop_duplicates(subset=[AD_FIELD_NAME, 'sid'],
                       keep='last',
                       inplace=True)
    df.set_index([AD_FIELD_NAME, 'sid'], inplace=True)
    return df
コード例 #28
0
def get_margin_data(only_A=True):
    """融资融券数据"""
    db = get_db('cninfo')
    collection = db['融资融券明细']
    projection = {
        '_id': 0,
        '股票简称': 0,
    }
    # sort = [('股票代码', 1), ('交易日期', 1)]
    df = pd.DataFrame.from_records(collection.find(projection=projection))
    df = _select_only_a(df, only_A, '股票代码')
    df.rename(columns={'交易日期': 'timestamp', '股票代码': 'sid'}, inplace=True)
    df['sid'] = df['sid'].map(lambda x: int(x))
    # 设置晚8小时
    df['asof_date'] = df['timestamp'] - pd.Timedelta(hours=8)
    df.sort_values(['sid', 'timestamp'], inplace=True, ignore_index=True)
    return df
コード例 #29
0
def _fetch_single_equity(stock_code, start, end):
    """读取本地原始数据"""
    start, end = sanitize_dates(start, end)
    db = get_db('wy_stock_daily')
    collection = db[stock_code]
    predicate = {'日期': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('日期', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df['股票代码'] = stock_code
    # 截取所需列
    df = df[WY_DAILY_COL_MAPS.keys()]
    df.rename(columns=WY_DAILY_COL_MAPS, inplace=True)
    df.sort_values('date', inplace=True)
    return df
コード例 #30
0
ファイル: benchmarks_cn.py プロジェクト: liudengfeng/zipline
def get_cn_benchmark_returns(symbol='000300'):
    """获取基准收益率

    Parameters
    ----------
    symbol : str
        Benchmark symbol for which we're getting the returns.

    Returns:
        Series -- 基准收益率
    """
    db = get_db('wy_index_daily')
    collection = db[symbol]
    projection = {'_id': 0, '日期': 1, '涨跌幅': 1}
    df = pd.DataFrame.from_records(collection.find(projection=projection))
    index = pd.DatetimeIndex(df['日期'].values)
    s = pd.Series(df['涨跌幅'].values / 100.0, index=index)
    return s.sort_index().tz_localize('UTC').sort_index().dropna()