Example #1
0
def get_end_dates():
    """
    股票结束日期。限定退市或者当前处于暂停上市状态的股票

    Examples
    --------
    >>> df = get_end_dates()
    >>> df.head()
    symbol    end_date
    0  000003  2002-06-14
    1  000013  2004-09-20
    2  000015  2001-10-22
    3  000024  2015-12-30
    4  000033  2017-07-07  
    """
    col_names = ['symbol', 'end_date']
    with session_scope() as sess:
        query = sess.query(SpecialTreatment.code,
                           func.max(SpecialTreatment.date)).group_by(
                               SpecialTreatment.code).having(
                                   SpecialTreatment.treatment.in_([
                                       SpecialTreatmentType.delisting,
                                       SpecialTreatmentType.PT
                                   ]))
        df = pd.DataFrame.from_records(query.all())
        df.columns = col_names
        return df
Example #2
0
def latest_trading_day():
    """最近一个交易日期(不包含当日)"""
    today = pd.Timestamp('today').date()
    with session_scope() as sess:
        return sess.query(TradingCalendar.date, ).filter(
            TradingCalendar.date < today).filter(
                TradingCalendar.is_trading == True).order_by(
                    TradingCalendar.date.desc()).limit(1).scalar()
Example #3
0
def get_previous_trading_date(date_):
    """上一个交易日期"""
    date_ = pd.Timestamp(date_).date()
    with session_scope() as sess:
        return sess.query(TradingCalendar.date).filter(
            TradingCalendar.is_trading == True,
            TradingCalendar.date < date_).order_by(
                TradingCalendar.date.desc()).limit(1).scalar()
Example #4
0
def get_last_trading_date():
    """最后交易日期"""
    date_ = pd.Timestamp('today').date()
    with session_scope() as sess:
        return sess.query(TradingCalendar.date).filter(
            TradingCalendar.is_trading == True,
            TradingCalendar.date <= date_).order_by(
                TradingCalendar.date.desc()).limit(1).scalar()
Example #5
0
def not_yet_updated(codes, class_):
    """指定类在codes中今天尚未更新的代码"""
    assert class_ in (ShortName, SpecialTreatment)
    today = pd.Timestamp('today').date()
    with session_scope() as sess:
        query = sess.query(class_.code).filter(
            func.date(class_.last_updated) == today).distinct()
        updated_codes = set([x[0] for x in query.all()])
        return set(codes).difference(updated_codes)
Example #6
0
def fetch_single_equity(stock_code, start, end):
    """
    从本地数据库读取股票期间日线交易数据

    注
    --
    1. 除OHLCV外,还包括涨跌幅、成交额、换手率、流通市值、总市值、流通股本、总股本
    2. 使用bcolz格式写入时,由于涨跌幅存在负数,必须剔除该列!!!

    Parameters
    ----------
    stock_code : str
        要获取数据的股票代码
    start_date : datetime-like
        自开始日期(包含该日)
    end_date : datetime-like
        至结束日期

    return
    ----------
    DataFrame: OHLCV列的DataFrame对象。

    Examples
    --------
    >>> symbol = '000333'
    >>> start_date = '2017-4-1'
    >>> end_date = pd.Timestamp('2018-4-16')
    >>> df = fetch_single_equity(symbol, start_date, end_date)
    >>> df.iloc[:,:8]
        symbol        date   open   high    low  close  prev_close  change_pct
    0  000333  2018-04-02  53.30  55.00  52.68  52.84       54.53     -3.0992
    1  000333  2018-04-03  52.69  53.63  52.18  52.52       52.84     -0.6056
    2  000333  2018-04-04  52.82  54.10  52.06  53.01       52.52      0.9330
    3  000333  2018-04-09  52.91  53.31  51.00  51.30       53.01     -3.2258
    4  000333  2018-04-10  51.45  52.80  51.18  52.77       51.30      2.8655
    5  000333  2018-04-11  52.78  53.63  52.41  52.98       52.77      0.3980
    6  000333  2018-04-12  52.91  52.94  51.84  51.87       52.98     -2.0951
    7  000333  2018-04-13  52.40  52.47  51.01  51.32       51.87     -1.0603
    8  000333  2018-04-16  51.31  51.80  49.15  49.79       51.32     -2.9813    
    """
    start = pd.Timestamp(start).date()
    end = pd.Timestamp(end).date()
    with session_scope() as sess:
        query = sess.query(
            StockDaily.code, StockDaily.date, StockDaily.A002_开盘价,
            StockDaily.A003_最高价, StockDaily.A004_最低价, StockDaily.A005_收盘价,
            StockDaily.A009_前收盘, StockDaily.A011_涨跌幅, StockDaily.A006_成交量,
            StockDaily.A007_成交金额, StockDaily.A008_换手率, StockDaily.A013_流通市值,
            StockDaily.A012_总市值).filter(StockDaily.code == stock_code,
                                        StockDaily.date.between(start, end))
        df = pd.DataFrame.from_records(query.all())
        df.columns = DAILY_COLS
        df = _fill_zero(df)
        df['circulating_share'] = df.cmv / df.close
        df['total_share'] = df.tmv / df.close
        return df
Example #7
0
def codes_to_append():
    """要添加数据的股票代码"""
    last_trading_date = get_last_trading_date()
    previous_trading_date = get_previous_trading_date(last_trading_date)
    with session_scope() as sess:
        y_codes = sess.query(
            StockDaily.code).filter(StockDaily.date == previous_trading_date)
        t_codes = sess.query(
            StockDaily.code).filter(StockDaily.date == last_trading_date)
        query = y_codes.except_(t_codes)
        return [x[0] for x in query.all()]
Example #8
0
def get_start_date(code, class_):
    """类所含股票代码最后日期"""
    assert class_ in (ShortName, SpecialTreatment)
    first_date = pd.Timestamp('1990-1-1').date()
    with session_scope() as sess:
        l_d = sess.query(func.max(
            class_.date)).filter(class_.code == code).scalar()
        if l_d is None:
            return first_date
        else:
            return l_d + pd.Timedelta(days=1)
Example #9
0
def concept_categories():
    """概念类别映射{代码:名称}"""
    with session_scope() as sess:
        query = sess.query(
            Category.code,
            Category.title,
        ).filter(Category.label == '概念')
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['code', 'name']
        df.sort_values('code', inplace=True)
        return df.set_index('code').to_dict()['name']
Example #10
0
def ths_industry_info():
    """同花顺行业"""
    with session_scope() as sess:
        query = sess.query(
            StockCategory.stock_code,
            Category.title,
        ).filter(Category.code == StockCategory.category_code).filter(
            Category.label == '同花顺行业')
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['sid', 'ths_industry']
        return df
Example #11
0
def region_info():
    """股票所处地域"""
    with session_scope() as sess:
        query = sess.query(
            StockCategory.stock_code,
            Category.title,
        ).filter(Category.code == StockCategory.category_code).filter(
            Category.label == '地域')
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['sid', 'region']
        df.sort_values('sid', inplace=True)
        return df
Example #12
0
def gen_asset_metadata(only_in=True):
    """
    生成股票元数据

    Paras
    -----
    only_in : bool
        是否仅仅包含当前在市的股票,默认为真。

    Examples
    --------
    >>> df = gen_asset_metadata()
    >>> df.head()
    symbol asset_name first_traded last_traded exchange auto_close_date  \
    0  000001       平安银行   1991-01-02  2018-04-19     SZSE      2018-04-20
    1  000002       万 科A   1991-01-02  2018-04-19     SZSE      2018-04-20
    2  000004       国农科技   1991-01-02  2018-04-19     SZSE      2018-04-20
    3  000005       世纪星源   1991-01-02  2018-04-19     SZSE      2018-04-20
    4  000006       深振业A   1992-04-27  2018-04-19     SZSE      2018-04-20
    start_date    end_date
    0  1991-04-03  2018-04-19
    1  1991-01-29  2018-04-19
    2  1991-01-14  2018-04-19
    3  1990-12-10  2018-04-19
    4  1992-04-27  2018-04-19      
    """
    columns = ['symbol', 'first_traded', 'last_traded']
    with session_scope() as sess:
        query = sess.query(
            StockDaily.code, func.min(StockDaily.date),
            func.max(StockDaily.date)).filter(~StockDaily.code.startswith(
                '2')).filter(~StockDaily.code.startswith('9')).group_by(
                    StockDaily.code)
        df = pd.DataFrame.from_records(query.all())
        df.columns = columns
        df['exchange'] = df['symbol'].map(get_exchange)
        df['auto_close_date'] = df['last_traded'].map(
            lambda x: x + pd.Timedelta(days=1))
        latest_name = get_latest_short_name()
        start_dates = get_start_dates()
        end_dates = get_end_dates()
        df = df.merge(latest_name, 'left',
                      on='symbol').merge(start_dates, 'left',
                                         on='symbol').merge(end_dates,
                                                            'left',
                                                            on='symbol')
        # 对于未退市的结束日期,以最后交易日期代替
        df.loc[df.end_date.isna(), 'end_date'] = df.loc[df.end_date.isna(),
                                                        'last_traded']
        if only_in:
            df = df[~df.symbol.isin(end_dates.symbol)]
            df.reset_index(inplace=True, drop=True)
        return df
Example #13
0
def ipo_info():
    """
    股票上市日期(A股,不含B股)
    """
    with session_scope() as sess:
        query = sess.query(
            Stock.code,
            Issue.A004_上市日期,
        ).filter(Stock.code == Issue.code).filter(
            Issue.A004_上市日期.isnot(None)).filter(~Stock.code.startswith(
                '2')).filter(~Stock.code.startswith('9'))
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['sid', 'asof_date']
        return df.sort_values('sid')
Example #14
0
def fetch_single_minutely_equity(stock_code, start, end, exclude_lunch=True):
    """
    从本地数据库读取单个股票期间分钟级别交易明细数据

    注
    --
    1. 仅包含OHLCV列
    2. 原始数据按分钟进行汇总,first(open),last(close),max(high),min(low),sum(volume)

    Parameters
    ----------
    stock_code : str
        要获取数据的股票代码
    start_date : datetime-like
        自开始日期(包含该日)
    end_date : datetime-like
        至结束日期
    exclude_lunch : bool
        是否排除午休时间,默认”是“

    return
    ----------
    DataFrame: OHLCV列的DataFrame对象。

    Examples
    --------
    >>> symbol = '000333'
    >>> start_date = '2018-4-1'
    >>> end_date = pd.Timestamp('2018-4-19')
    >>> df = fetch_single_minutely_equity(symbol, start_date, end_date)
    >>> df.tail()
                        close   high    low   open  volume
    2018-04-19 14:56:00  51.55  51.56  51.50  51.55  376400
    2018-04-19 14:57:00  51.55  51.55  51.55  51.55   20000
    2018-04-19 14:58:00  51.55  51.55  51.55  51.55       0
    2018-04-19 14:59:00  51.55  51.55  51.55  51.55       0
    2018-04-19 15:00:00  51.57  51.57  51.57  51.57  353900
    """
    start = pd.Timestamp(start).date()
    end = pd.Timestamp(end).date()
    with session_scope() as sess:
        query = sess.query(DealDetail.code, DealDetail.date,
                           DealDetail.A001_时间, DealDetail.A002_价格,
                           DealDetail.A004_成交量).filter(
                               DealDetail.code == stock_code,
                               DealDetail.date.between(start, end))
        df = pd.DataFrame.from_records(query.all())
        if df.empty:
            return pd.DataFrame(columns=OHLCV_COLS)
        return _handle_minutely_data(df, exclude_lunch)
Example #15
0
def failed_one(code):
    """如刷新失败,返回True,否则False"""
    try:
        _, p2, p3 = fetch_gpgk(code)
    except ValueError:
        # 无法获取数据,失败
        return True
    sn_start = get_start_date(code, ShortName)
    st_start = get_start_date(code, SpecialTreatment)
    sns = _gen_sn(code, p2, sn_start)
    sts = _gen_st(code, p3, st_start)
    with session_scope() as sess:
        _insert_sn(sess, code, sns)
        _insert_st(sess, code, sts)
    return False
Example #16
0
def get_adhoc_holidays():
    """从本地数据库中获取特别假期(务必以UTC时区表达)"""
    today = pd.Timestamp('today')
    with session_scope() as sess:
        query = sess.query(TradingCalendar.date).filter(
            TradingCalendar.date <= today.date()).filter(
                TradingCalendar.is_trading == False)
        start = today + pd.Timedelta(days=1)
        end = today + pd.Timedelta(days=365)
        # 所有未来日期周六周日均视同为假期
        future = pd.date_range(start, end, tz='UTC')
        wds = pd.date_range(start, end, tz='UTC', freq='B')
        future_holidays = future.difference(wds)
        holidays = [x[0] for x in query.all()]
        holidays = pd.DatetimeIndex(holidays, tz='UTC')
        return holidays.union(future_holidays)
Example #17
0
def flush_stock_codes():
    df = stock_code_reader.read()
    df['status'] = df.status.map(lambda x: Status(x))

    with session_scope(False) as sess:
        query = sess.query(Stock.code).order_by(Stock.code)
        present = set([x[0] for x in query.all()])
        new_codes = set(df.index.values).difference(present)
        for code in sorted(new_codes):
            s = Stock()
            s.code = code
            s.name = df.loc[code, 'name']
            s.latest_status = df.loc[code, 'status']
            sess.add(s)
            logger.info('{},新增代码:{}'.format(s.__tablename__, code))
            log_to_db(Stock.__tablename__, True, 1, Action.INSERT, code)
Example #18
0
def get_cn_benchmark_returns(symbol):
    """
    获取指数基准收益率,默认代码`000001`

    Parameters
    ----------
    symbol : str
        Benchmark symbol for which we're getting the returns.
    """
    with session_scope() as sess:
        query = sess.query(
            IndexDaily.date,
            IndexDaily.change_pct).filter(IndexDaily.code == symbol)
        df = pd.DataFrame.from_records(query.all())
        s = pd.Series(data=df[1].values / 100,
                      index=pd.DatetimeIndex(df[0].values))
        return s.sort_index().tz_localize('UTC').dropna()
Example #19
0
def log_to_db(table_name,
              status,
              rows,
              action,
              code=None,
              start=None,
              end=None):
    """记录刷新数据"""
    with session_scope() as sess:
        rr = RefreshRecord(table_name=table_name,
                           status=status,
                           row=rows,
                           action=action,
                           code=code,
                           start=start,
                           end=end)
        sess.add(rr)
Example #20
0
def fetch_single_quity_adjustments(stock_code, start, end):
    """
    从本地数据库读取股票期间分红派息数据

    Parameters
    ----------
    stock_code : str
        要获取数据的股票代码
    start : datetime-like
        自开始日期
    end : datetime-like
        至结束日期

    return
    ----------
    DataFrame对象

    Examples
    --------
    >>> fetch_single_quity_adjustments('600000', '2010-4-1', '2018-4-16')
        symbol        date  amount  ratio record_date    pay_date listing_date
    0  600000  2010-06-10   0.150    0.3  2010-06-09  2010-06-11   2010-06-10
    1  600000  2011-06-03   0.160    0.3  2011-06-02  2011-06-07   2011-06-03
    2  600000  2012-06-26   0.300    0.0  2012-06-25  2012-06-26   2012-06-26
    3  600000  2013-06-03   0.550    0.0  2013-05-31  2013-06-03   2013-06-03
    4  600000  2014-06-24   0.660    0.0  2014-06-23  2014-06-24   2014-06-24
    5  600000  2015-06-23   0.757    0.0  2015-06-19  2015-06-23   2015-06-23
    6  600000  2016-06-23   0.515    0.1  2016-06-22  2016-06-24   2016-06-23
    7  600000  2017-05-25   0.200    0.3  2017-05-24  2017-05-26   2017-05-25
    """
    start = pd.Timestamp(start).date()
    end = pd.Timestamp(end).date()
    with session_scope() as sess:
        query = sess.query(Adjustment.code, Adjustment.date,
                           Adjustment.A002_派息, Adjustment.A003_送股,
                           Adjustment.A004_股权登记日, Adjustment.A005_除权基准日,
                           Adjustment.A006_红股上市日)
        query = query.filter(Adjustment.code == stock_code)
        query = query.filter(Adjustment.date.between(start, end))
        df = pd.DataFrame.from_records(query.all())
        if df.empty:
            # 返回一个空表
            return pd.DataFrame(columns=ADJUSTMENT_COLS)
        df.columns = ADJUSTMENT_COLS
        return df
Example #21
0
def _insert(row_data, date_):
    with session_scope() as sess:
        sd = StockDaily(
            code=row_data['SYMBOL'],
            date=date_,
            A001_名称=row_data['NAME'],
            A002_开盘价=row_data['OPEN'],
            A003_最高价=row_data['HIGH'],
            A004_最低价=row_data['LOW'],
            A005_收盘价=row_data['PRICE'],
            A006_成交量=row_data['VOLUME'],
            A008_换手率=row_data['TURNOVER'],
            A009_前收盘=row_data['YESTCLOSE'],
            A011_涨跌幅=row_data['PERCENT'],
            A012_总市值=row_data['TCAP'],
            A013_流通市值=row_data['MCAP'],
        )
        sess.add(sd)
Example #22
0
def _save_short_name():
    """存储股票简称变动历史"""
    with session_scope() as sess:
        query = sess.query(StockDaily.code, StockDaily.date,
                           StockDaily.A001_名称).group_by(
                               StockDaily.code, StockDaily.A001_名称)
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['sid', 'asof_date', 'short_name']
        df['asof_date'] = pd.to_datetime(df['asof_date'], utc=True)
        df['timestamp'] = df['asof_date']  # - pd.Timedelta(days=1)
        df['sid'] = df['sid'].map(lambda x: int(x))
    table_name = 'short_names'
    # 转换为bcolz格式并存储
    rootdir = bcolz_table_path(table_name)
    if os.path.exists(rootdir):
        rmtree(rootdir)
    odo(df, rootdir)
    logger.info('表:{},数据存储路径:{}'.format(table_name, rootdir))
Example #23
0
def get_treasury_data(start_date, end_date):
    start_date, end_date = sanitize_dates(start_date, end_date)
    # 确保为date类型
    start_date = pd.Timestamp(start_date).date()
    end_date = pd.Timestamp(end_date).date()
    with session_scope() as sess:
        query = sess.query(Treasury.date, Treasury.m1, Treasury.m3,
                           Treasury.m6, Treasury.y1, Treasury.y2, Treasury.y3,
                           Treasury.y5, Treasury.y7, Treasury.y10,
                           Treasury.y20, Treasury.y30).filter(
                               Treasury.date.between(start_date, end_date))
        df = pd.DataFrame.from_records(query.all())
        df.columns = TREASURY_COL_NAMES
        df.set_index(keys='date', inplace=True)
        df.index = pd.DatetimeIndex(df.index)
        # 缺少2/7年数据,使用简单平均插值
        df['2year'] = (df['1year'] + df['3year']) / 2
        df['7year'] = (df['5year'] + df['10year']) / 2
    return df.tz_localize('UTC')
Example #24
0
def concept_info():
    """股票概念"""
    id_maps, _ = field_code_concept_maps()
    with session_scope() as sess:
        query = sess.query(
            StockCategory.stock_code,
            StockCategory.category_code,
        ).filter(Category.code == StockCategory.category_code).filter(
            Category.label == '概念').filter(
                ~StockCategory.stock_code.startswith('9')).filter(
                    ~StockCategory.stock_code.startswith('2'))
        df = pd.DataFrame.from_records(query.all())
        df.columns = ['sid', 'concept']
        out = pd.pivot_table(df,
                             values='concept',
                             index='sid',
                             columns='concept',
                             aggfunc=np.count_nonzero,
                             fill_value=0)
        out.rename(columns=id_maps, inplace=True)
        return out.astype('bool').reset_index()
Example #25
0
def get_start_dates():
    """
    股票上市日期

    Examples
    --------
    >>> df = get_start_dates()
    >>> df.head()
    symbol  start_date
    0  000001  1991-04-03
    1  000002  1991-01-29
    2  000003  1991-01-14
    3  000004  1991-01-14
    4  000005  1990-12-10
    """
    col_names = ['symbol', 'start_date']
    with session_scope() as sess:
        query = sess.query(Issue.code,
                           Issue.A004_上市日期).filter(Issue.A004_上市日期.isnot(None))
        df = pd.DataFrame.from_records(query.all())
        df.columns = col_names
        return df
Example #26
0
def flush_shareholder(codes=None):
    """
    刷新股票股东信息
    
    说明:
        如初始化则包含所有曾经上市的股票代码,含已经退市
        否则仅包含当前在市的股票代码
    """
    # if init:
    #     insert_preprocessed_data()
    if codes is None:
        codes = get_all_codes(False)
    else:
        codes = ensure_list(codes)
    # 按代码循环
    for code in codes:
        with session_scope() as sess:
            # 按类型循环
            for type_ in (ShareholderType.main,
                          ShareholderType.circulating,
                          ShareholderType.fund):
                _flush_by(sess, code, type_)
Example #27
0
def get_latest_short_name():
    """
    获取股票最新股票简称

    Examples
    --------
    >>> df = get_end_dates()
    >>> df.head()
         symbol  asset_name
    0     000001    平安银行
    1     000002    万 科A
    2     000003   PT金田A
    3     000004    国农科技
    4     000005    世纪星源     
    """
    col_names = ['symbol', 'asset_name']
    with session_scope() as sess:
        query = sess.query(StockDaily.code, StockDaily.A001_名称).group_by(
            StockDaily.code).having(func.max(StockDaily.date))
        df = pd.DataFrame.from_records(query.all())
        df.columns = col_names
        return df
Example #28
0
def flush_trading_calendar():
    with session_scope(False) as sess:
        num = sess.query(func.count(TradingCalendar.date)).scalar()
        if not num:
            to_adds = []
            for d in trading_days:
                to_add = TradingCalendar(date=d.date(), is_trading=True)
                to_adds.append(to_add)
            for d in non_trading_days:
                to_add = TradingCalendar(date=d.date(), is_trading=False)
                to_adds.append(to_add)
            sess.add_all(to_adds)
            fmt = '{},初始化数据,其中,交易日:{}天,非交易日:{}天'
            logger.info(fmt.format(to_add.__tablename__,
                                   len(trading_days),
                                   len(non_trading_days)))
        else:
            last_day = sess.query(func.max(TradingCalendar.date)).scalar()
            date_rng = pd.date_range(last_day, end_date)
            for d in date_rng:
                if d in trading_days:
                    is_trading = True
                else:
                    is_trading = False
                old = sess.query(TradingCalendar).filter(
                    TradingCalendar.date == d.date()).one_or_none()
                if old:
                    old.is_trading = is_trading
                else:
                    to_add = TradingCalendar(
                        date=d.date(), is_trading=is_trading)
                    sess.add(to_add)
                sess.commit()
                info = '交易日' if is_trading else '非交易日'
                fmt = '{} 刷新' + info + '{}状态'
                logger.info(fmt.format(
                    TradingCalendar.__tablename__, d.date()))
Example #29
0
def has_data(class_):
    """判断是否存在数据"""
    with session_scope() as sess:
        return sess.query(class_.last_updated).limit(1).scalar()
Example #30
0
def _delete():
    """删除临时数据"""
    with session_scope() as sess:
        count = sess.query(StockDaily).filter(
            StockDaily.A007_成交金额.is_(None)).delete()
        logger.info('删除临时数据{}行'.format(count))