Beispiel #1
0
def get_treasury_data(start, end):
    """期间国库券利率

    Arguments:
        start {date like} -- 开始日期
        end {date like} -- 结束日期

    Returns:
        DataFrame -- 期间利率

    Example:
    >>> start, end = '2020-03-10', '2020-03-15'
    >>> get_treasury_data(start, end).iloc[:3,:5]
                                cash    1month    2month    3month    6month
    date
    2020-03-10 00:00:00+00:00  0.016000  0.016231  0.016610  0.016661  0.016991 
    2020-03-11 00:00:00+00:00  0.016000  0.016727  0.016996  0.017001  0.017211 
    2020-03-12 00:00:00+00:00  0.015742  0.016195  0.016993  0.016994  0.017625 
    2020-03-13 00:00:00+00:00  0.014287  0.016395  0.016699  0.016705  0.017953
    """
    start, end = sanitize_dates(start, end)
    db = get_db()
    collection = db['国债利率']
    predicate = {'date': {"$gte": start, "$lte": end}}
    projection = {"_id": 0}
    sort = [("日期", 1)]
    df = pd.DataFrame.from_records(
        collection.find(predicate, projection, sort=sort))
    # df.set_index('date', inplace=True)
    df.index = pd.DatetimeIndex(df.pop('date'))
    # 缺少2年数据,使用简单平均插值
    value = (df['y1'] + df['y3']) / 2
    df.insert(7, '2year', value)
    df.rename(columns=TREASURY_COL_MAPS, inplace=True)
    return df.tz_localize('UTC')
Beispiel #2
0
def get_treasury_data(start_date, end_date):
    start_date, end_date = sanitize_dates(start_date, end_date)
    # 确保为date类型
    start_date = pd.Timestamp(start_date).date()
    end_date = pd.Timestamp(end_date).date()
    with session_scope('szsh') as sess:
        query = sess.query(
            Treasury.date,
            Treasury.m1,
            Treasury.m3,
            Treasury.m6,
            Treasury.y1,
            Treasury.y3,
            Treasury.y5,
            Treasury.y7,
            Treasury.y10,
            Treasury.y20,
            Treasury.y30
        ).filter(Treasury.date.between(start_date, end_date))
        df = pd.DataFrame.from_records(query.all())
        # 缺少2年数据,使用简单平均插值
        value = (df.iloc[:,4] + df.iloc[:, 5]) / 2
        df.insert(5,'2year',value)
        df.columns = TREASURY_COL_NAMES
        df.set_index(keys='date', inplace=True)
        df.index = pd.DatetimeIndex(df.index)
    return df.tz_localize('UTC')
Beispiel #3
0
def _fetch_single_index(code, start, end):
    index_code = decode_index_code(code)
    start, end = sanitize_dates(start, end)
    db = get_db('wy_index_daily')
    collection = db[index_code]
    predicate = {'日期': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('日期', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df['股票代码'] = code
    # fill 0
    df['换手率'] = 0.0
    df['流通市值'] = 0.0
    df['总市值'] = 0.0
    # 截取所需列
    df = df[WY_DAILY_COL_MAPS.keys()]
    df.rename(columns=WY_DAILY_COL_MAPS, inplace=True)
    df.sort_values('date', inplace=True)
    # fill 0
    cols = [
        'b_close', 'b_high', 'b_low', 'b_open', 'shares_outstanding',
        'total_shares'
    ]
    df.loc[:, cols] = 0.0
    return df
Beispiel #4
0
def get_symbol_rets(symbol, start=None, end=None):
    """
    Calls the currently registered 'returns_func'

    Parameters
    ----------
    symbol : object
        An identifier for the asset whose return
        series is desired.
        e.g. ticker symbol or database ID
    start : date, optional
        Earliest date to fetch data for.
        Defaults to earliest date available.
    end : date, optional
        Latest date to fetch data for.
        Defaults to latest date available.

    Returns
    -------
    pandas.Series
        Returned by the current 'returns_func'
    """
    start, end = sanitize_dates(start, end)
    return SETTINGS['returns_func'](symbol,
                                    start_date=start,
                                    end_date=end)
Beispiel #5
0
def get_non_trading_days(start, end, tz='utc'):
    """自然日历中除交易日外的日期定义为非交易日期"""
    start, end = sanitize_dates(start, end)
    assert (end - start).days >= 1, '期间最少相隔1天'
    all_days = pd.date_range(start, end, tz=tz)
    trading_dates = get_trading_dates(start, end, tz)
    diff_ = all_days.difference(trading_dates)
    return diff_
Beispiel #6
0
def get_trading_dates(start=None, end=None, tz='utc'):
    """期间所有交易日
    
    Keyword Arguments:
        start {date like} -- 开始日期 (default: {None})
        end {[type]} -- 结束日期 (default: {None})
        tz {str} -- 输出目标时区 (default: {'utc'})
    
    Returns:
        DatetimeIndex -- 期间交易日期
            如含未来日期,则未来工作日视同为交易日

    存在的情形:
        1. start > today
                start-----end
              ^
            today
        2. today == start
            start-----end 
              ^
            today
        3. start < today < end
            start-----end 
                   ^
                 today
        4. today == end
            start-----end 
                       ^
                     today
        5. today > end    
            start-----end        
                           ^
                         today
    """
    start, end = sanitize_dates(start, end)
    assert (end - start).days >= 1, '期间最少相隔1天'
    today = dt.datetime.today().date()
    if start > today:
        dates = pd.bdate_range(start, end, freq='B').sort_values()
    if start == today:
        dates = pd.bdate_range(today + pd.Timedelta(days=1), end, freq='B')
        if is_trading_day(today):
            dates = dates.append(pd.DatetimeIndex([today]))
    if start < today < end:
        dates = _historical_trading_dates(start, today - pd.Timedelta(days=1))
        if is_trading_day(today):
            dates = dates.append(pd.DatetimeIndex([today]))
        future = pd.bdate_range(today + pd.Timedelta(days=1), end, freq='B')
        dates = dates.append(future)
    if end == today:
        dates = _historical_trading_dates(start, today - pd.Timedelta(days=1))
        if is_trading_day(today):
            dates = dates.append(pd.DatetimeIndex([today]))
    if end < today:
        dates = _historical_trading_dates(start, end)
    return dates.tz_localize(tz).sort_values()
Beispiel #7
0
def select_output_by(output,
                     start=None,
                     end=None,
                     assets=None,
                     reduce_format=True):
    """
    按时间及代码选择`pipeline`输出数据框

    专用于研究环境下的run_pipeline输出结果分析

    参数
    ----
    output : MultiIndex DataFrame
        pipeline输出结果
    start : str
        开始时间
    end : str
        结束时间    
    assets : 可迭代对象或str
        股票代码

    案例
    ----  
    >>> # result 为运行`pipeline`输出结果 
    >>> select_output_by(result,'2018-04-23','2018-04-24',assets=['000585','600871'])

                                                  mean_10
    2018-04-23 00:00:00+00:00 	*ST东电(000585) 	2.7900
                                *ST油服(600871) 	2.0316
    2018-04-24 00:00:00+00:00 	*ST东电(000585) 	2.7620
                                *ST油服(600871) 	2.0316    
    """
    nlevels = output.index.nlevels
    if nlevels != 2:
        raise ValueError('输入数据框只能是run_pipeline输出结果,MultiIndex DataFrame')
    start, end = sanitize_dates(start, end)
    sessions = trading_sessions(start, end)
    start, end = sessions[0], sessions[-1]
    if assets is not None:
        assets = symbols(assets)
    else:
        assets = []

    ret = _select_output_by(output, start, end, assets)

    if reduce_format:
        cond1 = start == end
        cond2 = len(assets) == 1
        if cond1 & cond2:
            ret = ret.xs((start, assets[0]))
        elif cond1:
            ret = ret.xs(start, level=0)
        elif cond2:
            ret = ret.xs(assets[0], level=1)

    return ret
Beispiel #8
0
def to_tdates(start, end):
    """修正交易日期"""
    calendar = _trading_calendar()
    dates = calendar.all_sessions
    # 修正日期
    start, end = sanitize_dates(start, end)
    # 定位交易日期
    start_date = dates[dates.get_loc(start, method='bfill')]
    end_date = dates[dates.get_loc(end, method='ffill')]
    if start_date > end_date:
        start_date = end_date
    return dates, start_date, end_date
def read_stock_daily(stock_code):
    start, end = sanitize_dates(START_DATE, END_DATE)
    pf = f'tests/resources/cndata/stock_daily/{stock_code}.csv'
    df = pd.read_csv(pf, encoding='gb2312', na_values=['-', None])
    df = df[WY_DAILY_COL_MAPS.keys()]
    df.rename(columns=WY_DAILY_COL_MAPS, inplace=True)
    df = df.sort_values('date')
    df['date'] = pd.to_datetime(df['date'])
    cond = df['date'].between(start, end)
    df = df.loc[cond, :]
    df['change_pct'].fillna(0.0, inplace=True)
    return df
Beispiel #10
0
def get_adhoc_holidays(start, end, tz='utc'):
    """
    非交易日的其中的工作日

    **注意**
        不同于非交易日
            adhoc_holidays = 非交易日 - 周末日期
    """
    start, end = sanitize_dates(start, end)
    assert (end - start).days >= 1, '期间最少相隔1天'
    b_dates = pd.bdate_range(start, end, tz=tz)
    trading_dates = get_trading_dates(start, end, tz)
    diff_ = b_dates.difference(trading_dates)
    return diff_
Beispiel #11
0
def get_treasury_data(start_date, end_date):
    """读取期间资金成本数据

    Parameters
    ----------
    start_date : datetime-like
        开始日期
    end_date : datetime-like
        结束日期

    return
    ----------
    DataFrame: DataFrame对象。

    Examples
    --------
    >>> start_date = '2020-05-15'
    >>> end_date = '2020-05-25'
    >>> df = get_treasury_data(start_date, end_date)
    >>> df.iloc[:5, :5]
        cash	1month	2month	3month	6month
    date					
    2020-05-15 00:00:00+00:00	0.006838	0.009496	0.009506	0.010076	0.011570
    2020-05-18 00:00:00+00:00	0.006838	0.009369	0.009611	0.010414	0.011701
    2020-05-19 00:00:00+00:00	0.009838	0.009425	0.010490	0.010307	0.012016
    2020-05-20 00:00:00+00:00	0.008188	0.009084	0.010712	0.011012	0.012378
    2020-05-21 00:00:00+00:00	0.007028	0.008569	0.010695	0.011032	0.012465
    """
    start, end = sanitize_dates(start_date, end_date)
    db = get_db()
    collection = db['国债利率']
    predicate = {'date': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('date', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    # 缺少2年数据,使用简单平均插值
    value = (df['y1'] + df['y3']) / 2
    df.insert(7, '2year', value)
    df.rename(columns=TREASURY_COL_MAPS, inplace=True)
    df.set_index('date', inplace=True)
    df = df.tz_localize('UTC')
    calendar = get_calendar('XSHG')
    start = start.tz_localize('UTC')
    end = end.tz_localize('UTC')
    sessions = calendar.sessions_in_range(start, end)
    # 务必与交易日历一致
    return df.reindex(sessions).fillna(method='ffill')
Beispiel #12
0
def _get_single_stock_equity(symbol, start_date, end_date, is_index,
                             index_name):
    start_date, end_date = sanitize_dates(start_date, end_date)
    db_name = 'wy_index_daily' if is_index else 'wy_stock_daily'
    db = get_db(db_name)
    collection = db[symbol]
    df = query(collection, start_date, end_date)
    df.columns = DAILY_COLS
    df['change_pct'] = df['change_pct'] / 100.0
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df.sort_index(inplace=True)
    res = df.tz_localize('utc')['change_pct']
    res.name = index_name
    # 原始数据中含nan
    res.fillna(0.0, inplace=True)
    return res
Beispiel #13
0
def fetch_treasury_data_from(start=EARLIEST_POSSIBLE_DATE.date(),
                             end=pd.Timestamp('today')):
    """
    获取期间资金成本数据

    Parameters
    ----------
    start : datelike
        开始日期
    end : datelike
        结束日期

    Returns
    -------
    res : DataFrame
        Index: 日期
        columns:月度年度周期

    Example
    -------
    >>> df = fetch_treasury_data_from('2017-11-1','2017-11-20')
    >>> df.columns
    Index(['m0', 'm1', 'm2', 'm3', 'm6', 'm9', 'y1', 'y3', 'y5', 'y7', 'y10','y15', 'y20', 'y30', 'y40', 'y50'],dtype='object')
    >>> df.iloc[:,:6]
                    m0        m1        m2        m3        m6        m9
    date
    2017-11-01  0.030340  0.030800  0.030909  0.035030  0.035121  0.035592
    2017-11-02  0.029894  0.029886  0.032182  0.035074  0.035109  0.035493
    2017-11-03  0.027311  0.030052  0.032532  0.034992  0.035017  0.035461
    2017-11-06  0.026155  0.030086  0.032532  0.034917  0.034992  0.035514
    2017-11-07  0.026155  0.030127  0.032813  0.034788  0.035039  0.035465
    2017-11-08  0.026759  0.029984  0.033226  0.035399  0.035034  0.035469
    2017-11-09  0.027285  0.029925  0.033655  0.035553  0.034849  0.035629
    2017-11-10  0.027618  0.029958  0.033720  0.035691  0.035939  0.035735
    2017-11-13  0.028462  0.030854  0.034653  0.035708  0.035939  0.035935
    2017-11-14  0.028462  0.031018  0.034988  0.035754  0.035939  0.035940
    2017-11-15  0.028384  0.030871  0.035439  0.036412  0.036566  0.036252
    2017-11-16  0.028338  0.030875  0.035427  0.036317  0.036502  0.036222
    2017-11-17  0.027718  0.029956  0.035390  0.036981  0.036752  0.036183
    2017-11-20  0.028198  0.030235  0.035431  0.036797  0.036686  0.036153
    """
    start, end = sanitize_dates(start, end)
    start, end = pd.Timestamp(start), pd.Timestamp(end)
    df = read_local_data()
    return _preprocess(df, start, end)
Beispiel #14
0
def _fetch_single_equity(stock_code, start, end):
    """读取本地原始数据"""
    start, end = sanitize_dates(start, end)
    db = get_db('wy_stock_daily')
    collection = db[stock_code]
    predicate = {'日期': {'$gte': start, '$lte': end}}
    projection = {'_id': 0}
    sort = [('日期', 1)]
    cursor = collection.find(predicate, projection, sort=sort)
    df = pd.DataFrame.from_records(cursor)
    if df.empty:
        return df
    df['股票代码'] = stock_code
    # 截取所需列
    df = df[WY_DAILY_COL_MAPS.keys()]
    df.rename(columns=WY_DAILY_COL_MAPS, inplace=True)
    df.sort_values('date', inplace=True)
    return df
Beispiel #15
0
def fetch_history(code, start, end=None, is_index=False):
    """获取股票或者指数的历史交易数据(不复权)
    备注:
        提供的数据延迟一日

    记录:
        `2018-12-12 16:00`时下载 002622 历史数据,数据截至日为2018-12-10 延迟2日
    """
    start, end = sanitize_dates(start, end)
    url_fmt = 'http://quotes.money.163.com/service/chddata.html?code={}&start={}&end={}'
    code = _query_code(code, is_index)
    start_str = start.strftime('%Y%m%d')
    end_str = end.strftime('%Y%m%d')
    url = url_fmt.format(code, start_str, end_str) + '#01b07'
    na_values = ['None', '--', 'none']
    kwds = {
        'index_col': 0,
        'encoding': 'cp936',
        'parse_dates': True,
        'na_values': na_values,
    }
    page_response = get_page_response(url, 'get')
    df = pd.read_csv(BytesIO(page_response.content), **kwds)
    return df
Beispiel #16
0
def fetch_single_equity(stock_code, start, end):
    """
    从本地数据库读取股票期间日线交易数据

    注
    --
    1. 除OHLCV外,还包括涨跌幅、成交额、换手率、流通市值、总市值、流通股本、总股本
    2. 添加后复权价格,使用复权价在图中去除间隙断层
    3. 使用bcolz格式写入时,由于涨跌幅存在负数,必须剔除该列

    Parameters
    ----------
    stock_code : str
        要获取数据的股票代码
    start_date : datetime-like
        自开始日期(包含该日)
    end_date : datetime-like
        至结束日期

    return
    ----------
    DataFrame: OHLCV列的DataFrame对象。datetimeindex.tz 为 None

    Examples
    --------
    >>> # 600710 股票代码重用
    >>> stock_code = '600710'
    >>> start = '2016-03-29'
    >>> end = pd.Timestamp('2017-07-31')
    >>> df = fetch_single_equity(stock_code, start, end)
    >>> df.iloc[-6:,:8]
              date	symbol	open	high	low	close	prev_close	change_pct
    322	2017-07-24	600710	9.36	9.36	9.36	9.36	9.36	NaN
    323	2017-07-25	600710	9.36	9.36	9.36	9.36	9.36	NaN
    324	2017-07-26	600710	9.36	9.36	9.36	9.36	9.36	NaN
    325	2017-07-27	600710	9.36	9.36	9.36	9.36	9.36	NaN
    326	2017-07-28	600710	9.36	9.36	9.36	9.36	9.36	NaN
    327	2017-07-31	600710	9.25	9.64	7.48	7.55	9.31	-18.9044
    """
    # 指数日线数据
    if len(stock_code) == 7:
        return _fetch_single_index(stock_code, start, end)
    start, end = sanitize_dates(start, end)
    # 首先提取全部数据,确保自IPO以来复权价一致
    df = _fetch_single_equity(stock_code, None, None)
    if df.empty:
        return df
    # 恢复0股价
    df = _fill_zero(df)
    # 添加复权价格
    df = _add_back_prices(df)
    cond = df['date'].between(start, end)
    df = df.loc[cond, :]
    if df.empty:
        return df
    t_start, t_end = df['date'].values[0], df['date'].values[-1]
    # 判断数据长度是否缺失
    dts = [t for t in _tdates() if t >= t_start and t <= t_end]
    dts = pd.to_datetime(dts)
    # 填充停牌数据
    df = _reindex(df, dts)
    assert len(df) == len(dts), f"股票:{stock_code},期间{t_start} ~ {t_end} 数据不足"
    df.loc[:, 'shares_outstanding'] = df.market_cap / df.close
    df.loc[:, 'total_shares'] = df.total_cap / df.close
    if not df.empty:
        cond = df['close'] > 0.0
        df = df[cond]
    return df