Beispiel #1
0
def get_extras(info,
               security_list,
               start_date=None,
               end_date=None,
               df=True,
               count=None):
    assert info in ('is_st', 'acc_net_value', 'unit_net_value',
                    'futures_sett_price', 'futures_positions')
    securities = list_or_str(security_list)
    securities = convert_security(securities)
    if start_date and count:
        raise ParamsError("start_date 参数与 count 参数只能二选一")
    if not (count is None or count > 0):
        raise ParamsError("count 参数需要大于 0 或者为 None")
    if count is not None:
        count = int(count)
    end_date = convert_date(end_date) if end_date else convert_date(
        '2015-12-31')

    from jqdata.stores import FundStore, StStore, FuturesStore, CalendarStore

    if start_date:
        start_date = convert_date(start_date)
    elif count:
        ix = CalendarStore.instance().get_trade_days_between(
            datetime.date(2005, 1, 4), end_date)
        start_date = ix[-count]
    else:
        start_date = convert_date('2015-01-01')
    df = bool(df)
    dates = CalendarStore.instance().get_trade_days_between(
        start_date, end_date)
    values = {}
    if info == 'is_st':
        for s in securities:
            values[s.code] = StStore.instance().query(s, dates)
    elif info in ('acc_net_value', 'unit_net_value'):
        for s in securities:
            values[s.code] = FundStore.instance().query(s, dates, info)
    elif info in ('futures_sett_price', 'futures_positions'):
        for s in securities:
            values[s.code] = FuturesStore.instance().query(s, dates, info)
    if df:
        columns = [s.code for s in securities]
        ret = dict(index=vec2combine(dates), columns=columns, data=values)
        ret = pd.DataFrame(**ret)
        return ret
    else:
        return values
Beispiel #2
0
def get_field(security, field='close', unit='1d'):
    '''
    获取bcolz某一列, 归因分析取数据专用。

    :param security:
    :param unit:
    :param field:
    :return:
    '''

    assert unit == '1d', '暂时只支持1d'
    from jqdata.stores.bcolz_store import get_bcolz_day_store
    store = get_bcolz_day_store()
    security = convert_security(security)
    cr = store.open_bcolz_carray(security, field)
    return cr[:]
Beispiel #3
0
def get_ticks(security,
              end_dt,
              start_dt=None,
              count=None,
              fields=['time', 'current', 'high', 'low', 'volume', 'money']):
    from jqdata.stores.tick_store import get_tick_store
    from jqdata.utils.security import convert_security
    from jqdata.utils.datetime_utils import parse_datetime

    security = convert_security(security)
    end_dt = parse_datetime(end_dt)
    if start_dt is None and count is None:
        raise ParamsError("start_dt和count不能同时为None")
    elif start_dt is not None and count is not None:
        raise ParamsError("start_dt和count只能有一个不为None")
    if start_dt is not None:
        start_dt = parse_datetime(start_dt)
    if count is not None:
        count = int(count)
        assert count > 0, "get_ticks, count必须是一个正整数"

    store = get_tick_store()
    table = store.get_table(security)
    idx = table.find_great_or_equal(end_dt)
    if start_dt is not None:
        if start_dt > end_dt:
            raise ParamsError("start_dt 必须小于等于 end_dt")
        start = table.find_great_or_equal(start_dt)
    elif count is not None:
        start = max(0, idx - count)

    arr = table.array[start:idx]

    ret = {}
    for f in fields:
        if f in ('current', 'high', 'low', 'a1_p', 'b1_p'):
            ret[f] = arr[f] / 10000.
        elif f == 'time':
            ret[f] = arr[f] / 1000.
        else:
            ret[f] = arr[f] / 1.0
    dtype = np.dtype([(str(f), ret[f].dtype) for f in fields])
    cols = [ret[f] for f in fields]
    result = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
    return result
Beispiel #4
0
def get_current_tick(security, current_dt):
    from jqdata.stores.tick_store import get_tick_store
    from jqdata.utils.security import convert_security
    from jqdata.utils.datetime_utils import parse_datetime
    from jqdata.models.tick import Tick

    security = convert_security(security)
    assert isinstance(current_dt, datetime.datetime)
    # current_dt = parse_datetime(current_dt)
    store = get_tick_store()
    table = store.get_table(security)
    if table is None:
        raise Exception("找不到%s 的tick数据" % security.code)
    idx = table.find_less_or_equal(current_dt)
    price_fields = ('current', 'high', 'low', 'a1_p', 'a2_p', 'a3_p', 'a4_p',
                    'a5_p', 'b1_p', 'b2_p', 'b3_p', 'b4_p', 'b5_p')
    if idx < table.len and idx >= 0:
        data = table.array[idx]
        ret = {}
        if hasattr(data, 'dtype'):
            for f in data.dtype.names:
                if f in price_fields:
                    ret[f] = data[f] / 10000.
                elif f == 'time':
                    ret[f] = data[f] / 1000.
                else:
                    ret[f] = data[f]
        else:
            # data is tuple
            names = table.array.dtype.names
            for i in range(0, len(names)):
                f = names[i]
                if f in price_fields:
                    ret[f] = data[i] / 10000.
                elif f == 'time':
                    ret[f] = data[i] / 1000.
                else:
                    ret[f] = data[i]
        return Tick(security, ret)
    return None
Beispiel #5
0
def get_bars(end_dt,
             security,
             count,
             unit='1d',
             fields=('open', 'high', 'low', 'close'),
             include_now=False,
             fq=None,
             pre_factor_ref_date=None):
    '''
    :param end_dt: 截止日期
    :param security: 标的
    :param count:   bar个数
    :param unit: 频率,'1d'表示1天,'xm'表示x分钟。
    :param fields:
    :param include_now:
    :param fq: 'pre'表示前复权, 'post'表示后复权, None表示真实价格。
    :param pre_factor_ref_date: 前复权基准日期,这一天的价格为真实价格。None则表示全部取真实价格。
    :return:
    '''
    valid_bar_fields = ('date', 'open', 'close', 'high', 'low', 'volume',
                        'money')
    if isinstance(fields, (list, tuple)):
        for f in fields:
            assert f in valid_bar_fields, "get_bars 只支持 %s 字段" % (
                valid_bar_fields)
        str_field = False
    elif isinstance(fields, six.string_types):
        assert fields in valid_bar_fields, "get_bars 只支持 %s 字段" % (
            valid_bar_fields)
        str_field = True
    else:
        raise ParamsError("fields 应该是字符串或者list")

    if str_field:
        new_fields = [fields]
    else:
        new_fields = [i for i in fields]
    if 'factor' not in fields:
        new_fields.append('factor')
    end_dt = convert_dt(end_dt)
    valid_unit = ('1m', '5m', '15m', '30m', '60m', '120m', '1d', '1w', '1M')
    assert unit in valid_unit, 'get_bars, unit必须是 %s 中一种' % valid_unit
    count = int(count)
    assert count > 0, "get_bars, count必须是一个正整数"
    fq = ensure_fq(fq)
    security = convert_security(security)
    include_now = bool(include_now)

    end_trade_date = CalendarStore.instance().get_current_trade_date(
        security, end_dt)

    def ensure_not_empty(cols_dict):
        if cols_dict == {}:
            ret = {}
            for f in valid_bar_fields:
                ret[f] = np.zeros(0)
            return ret
        return cols_dict

    if unit == '1d':
        if include_now:
            # 获取当天的snapshot
            snapshot = get_snapshot(security, end_trade_date, end_dt)
            if snapshot:
                cols_dict = get_daily_bar_by_count(security,
                                                   end_trade_date,
                                                   count - 1,
                                                   new_fields,
                                                   include_now=False)
                cols_dict = ensure_not_empty(cols_dict)
                for f in cols_dict:
                    cols_dict[f] = np.append(cols_dict[f], snapshot[f])
            else:
                cols_dict = get_daily_bar_by_count(security,
                                                   end_trade_date,
                                                   count,
                                                   new_fields,
                                                   include_now=False)
                cols_dict = ensure_not_empty(cols_dict)
        else:
            cols_dict = get_daily_bar_by_count(security,
                                               end_trade_date,
                                               count,
                                               new_fields,
                                               include_now=False)
            cols_dict = ensure_not_empty(cols_dict)
    elif unit == '1m':
        end_dt = convert_dt(end_dt)
        cols_dict = get_minute_bar_by_count(security,
                                            end_dt,
                                            count,
                                            new_fields,
                                            include_now=include_now)
        cols_dict = ensure_not_empty(cols_dict)
    elif unit in ('5m', '15m', '30m', '60m', '120m'):
        x = int(unit[:-1])
        if security.is_futures():
            trade_days = CalendarStore.instance().get_all_trade_days(security)
            trade_days = trade_days[(trade_days >= security.start_date )&\
                                    (trade_days <= end_trade_date)&\
                                    (trade_days <= security.end_date)]
            cols_dict = {f: np.zeros(0) for f in new_fields}

            for idx in range(len(trade_days) - 1, -1, -1):
                open_dt = CalendarStore.instance().get_open_dt(
                    security, trade_days[idx])
                if trade_days[idx] == end_trade_date:
                    if not include_now:
                        close_dt = _not_include_now(security, end_trade_date,
                                                    end_dt, unit)
                    else:
                        close_dt = end_dt
                else:
                    close_dt = CalendarStore.instance().get_close_dt(
                        security, trade_days[idx])
                tmp_dict = get_minute_bar_by_period(security,
                                                    open_dt,
                                                    close_dt,
                                                    new_fields,
                                                    include_now=True)
                if not tmp_dict or len(tmp_dict[new_fields[0]] == 0):
                    continue
                tmp_dict = _resample_future_xm_bars(tmp_dict, x)
                for col in cols_dict:
                    cols_dict[col] = np.append(tmp_dict[col], cols_dict[f])
                if len(cols_dict[new_fields[0]]) >= count:
                    break
            for f in cols_dict:
                cols_dict[f] = cols_dict[f][-count:]
        else:
            cols_dict = {f: np.zeros(0) for f in new_fields}
            open_dt = CalendarStore.instance().get_open_dt(
                security, end_dt.date())
            if not include_now:
                close_dt = _not_include_now(security, end_trade_date, end_dt,
                                            unit)
            else:
                close_dt = end_dt
            tmp_dict = get_minute_bar_by_period(security,
                                                open_dt,
                                                close_dt,
                                                new_fields,
                                                include_now=True)
            if tmp_dict and len(tmp_dict[new_fields[0]]) > 0:
                tmp_dict = _resample_simple_xm_bars(tmp_dict, x)
                for col in cols_dict:
                    cols_dict[col] = np.append(tmp_dict[col], cols_dict[col])
            need_count = count - len(cols_dict[new_fields[0]])
            if need_count > 0:
                tmp_dict = get_minute_bar_by_count(security,
                                                   open_dt,
                                                   need_count * x,
                                                   new_fields,
                                                   include_now=False)
                tmp_dict = _resample_simple_xm_bars(tmp_dict, x)
                for col in cols_dict:
                    cols_dict[col] = np.append(tmp_dict[col], cols_dict[col])
            for f in cols_dict:
                cols_dict[f] = cols_dict[f][-count:]

    # 周线和月线必须先复权,然后 resample
    elif unit == '1w':
        if include_now:
            snapshot = get_snapshot(security, end_trade_date, end_dt)
            cols_dict = get_daily_bar_by_count(security,
                                               end_trade_date,
                                               count * 5,
                                               new_fields,
                                               include_now=False)
            cols_dict = ensure_not_empty(cols_dict)
            if snapshot:
                for f in cols_dict:
                    cols_dict[f] = np.append(cols_dict[f], snapshot[f])
        else:
            # monday == 0 ... Sunday == 6
            weekday = end_trade_date.weekday()
            last_sunday = end_trade_date - datetime.timedelta(weekday + 1)
            cols_dict = get_daily_bar_by_count(security,
                                               last_sunday,
                                               count * 5,
                                               new_fields,
                                               include_now=False)
            cols_dict = ensure_not_empty(cols_dict)
        cols_dict = _pre_fq(security, cols_dict, pre_factor_ref_date)
        cols_dict = _resample_days_bars(cols_dict, unit)
        for f in cols_dict:
            cols_dict[f] = cols_dict[f][-count:]
    elif unit == '1M':
        if include_now:
            snapshot = get_snapshot(security, end_trade_date, end_dt)
            cols_dict = get_daily_bar_by_count(security,
                                               end_trade_date,
                                               count * 31,
                                               new_fields,
                                               include_now=False)
            cols_dict = ensure_not_empty(cols_dict)
            if snapshot:
                for f in cols_dict:
                    cols_dict[f] = np.append(cols_dict[f], snapshot[f])
        else:
            end_date = end_trade_date.replace(day=1) - datetime.timedelta(
                days=1)
            cols_dict = get_daily_bar_by_count(security,
                                               end_date,
                                               count * 31,
                                               new_fields,
                                               include_now=False)
            cols_dict = ensure_not_empty(cols_dict)
        cols_dict = _pre_fq(security, cols_dict, pre_factor_ref_date)
        cols_dict = _resample_days_bars(cols_dict, unit)
        for f in cols_dict:
            cols_dict[f] = cols_dict[f][-count:]
    else:
        raise ParamsError("get_bars 支持 '1m', '1d'")

    # 将时间戳转换成datetime 或者 date。
    if 'date' in cols_dict:
        if unit in ('1d', '1w', '1M'):
            cols_dict['date'] = vec2date(cols_dict['date'])
        else:
            cols_dict['date'] = vec2datetime(cols_dict['date'])

    # 期货没有复权。
    # 周线和月线必须先复权(同一个周期内复权因子可能不同)
    if not security.is_futures() and unit not in ('1w', '1M'):
        if pre_factor_ref_date is not None:
            cols_dict = _pre_fq(security, cols_dict, pre_factor_ref_date)

    if str_field:
        dtype = np.dtype([(fields, cols_dict[fields].dtype)])
        cols = [cols_dict[fields]]
        result = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
    else:
        # numpy bug: name 不能为unicode。
        dtype = np.dtype([(str(name), cols_dict[name].dtype)
                          for name in fields])
        cols = [cols_dict[name] for name in fields]
        result = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
    return result
Beispiel #6
0
def get_price(security,
              start_date=None,
              end_date=None,
              frequency='daily',
              fields=None,
              skip_paused=False,
              fq='pre',
              count=None,
              pre_factor_ref_date=None):

    security = convert_security(security)

    if count is not None and start_date is not None:
        raise ParamsError("get_price 不能同时指定 start_date 和 count 两个参数")

    if count is not None:
        count = int(count)

    end_dt = convert_dt(end_date) if end_date else datetime.datetime(
        2015, 12, 31)
    end_dt = min(end_dt, date2dt(CalendarStore.instance().last_day))

    start_dt = convert_dt(start_date) if start_date else datetime.datetime(
        2015, 1, 1)
    start_dt = max(start_dt, date2dt(CalendarStore.instance().first_day))

    if pre_factor_ref_date:
        pre_factor_ref_date = convert_date(pre_factor_ref_date)

    if frequency in frequency_compat:
        unit = frequency_compat.get(frequency)
    else:
        unit = frequency

    if fields is not None:
        fields = ensure_str_tuple(fields)
        if 'price' in fields:
            warn_price_as_avg('使用 price 作为 get_price 的 fields 参数', 'getprice')
    else:
        fields = tuple(DEFAULT_FIELDS)

    check_unit_fields(unit, fields)
    fq = ensure_fq(fq)
    skip_paused = bool(skip_paused)
    if is_list(security) and skip_paused:
        raise ParamsError("get_price 取多只股票数据时, 为了对齐日期, 不能跳过停牌")

    if is_list(security) and _has_stock_with_future(security):
        if unit.endswith('m'):
            raise ParamsError("get_price 取分钟数据时,为了对齐数据,不能同时取股票和期货。")

    group = int(unit[:-1])
    res = {}
    for s in (security if is_list(security) else [security]):
        if unit.endswith('d'):
            a, index = get_price_daily_single(
                s,
                end_date=end_dt.date(),
                start_date=start_dt.date() if start_dt else None,
                count=count * group if count is not None else None,
                fields=fields,
                skip_paused=skip_paused,
                fq=fq,
                include_now=True,
                pre_factor_ref_date=pre_factor_ref_date)
        else:
            a, index = get_price_minute_single(
                s,
                end_dt=end_dt,
                start_dt=start_dt,
                count=count * group if count is not None else None,
                fields=fields,
                skip_paused=skip_paused,
                fq=fq,
                include_now=True,
                pre_factor_ref_date=pre_factor_ref_date)

        # group it
        dict_by_column = {
            f: group_array(a[f if f != 'price' else 'avg'], group, f)
            for f in fields
        }
        if index is not None and len(index) > 0:
            index = group_array(index, group, 'index')
            index = vec2datetime(index)
        res[s.code] = dict(index=index, columns=fields, data=dict_by_column)

    if is_list(security):
        fields = fields or DEFAULT_FIELDS
        if len(security) == 0:
            return pd.Panel(items=fields)
        pn_dict = {}
        index = res[security[0].code]['index']
        for f in fields:
            df_dict = {s.code: res[s.code]['data'][f] for s in security}
            pn_dict[f] = pd.DataFrame(index=index,
                                      columns=[s.code for s in security],
                                      data=df_dict)
        return pd.Panel(pn_dict)
    else:
        return pd.DataFrame(**res[security.code])
Beispiel #7
0
def attribute_history(end_dt,
                      security,
                      count,
                      unit='1d',
                      fields=tuple(DEFAULT_FIELDS),
                      skip_paused=True,
                      df=True,
                      fq='pre',
                      pre_factor_ref_date=None):
    '''
    只能在回测/模拟中调用,不能再研究中调用。
    参数说明:

    `unit`是'Xd'时, end_dt的类型是datetime.date;
    `unit`是'Xm'时, end_dt的类型是datetime.datetime;

    `count` 必须大于0;
    '''
    count = int(count)
    assert count > 0, "attribute_history, count必须是一个正整数"
    fields = ensure_str_tuple(fields)
    check_unit_fields(unit, fields)
    security = convert_security(security)
    if 'price' in fields:
        warn_price_as_avg('使用 price 作为 attribute_history 的 fields 参数',
                          'attributehistory')

    group = int(unit[:-1])
    total = int(count * group)
    skip_paused = bool(skip_paused)
    df = bool(df)
    fq = ensure_fq(fq)
    if pre_factor_ref_date is not None:
        pre_factor_ref_date = convert_date(pre_factor_ref_date)
    if unit.endswith('d'):
        end_dt = convert_date(end_dt)
        a, index = get_price_daily_single(
            security,
            end_date=end_dt,
            count=total,
            fields=fields,
            skip_paused=skip_paused,
            fq=fq,
            include_now=False,
            pre_factor_ref_date=pre_factor_ref_date)

    else:
        end_dt = convert_dt(end_dt)
        a, index = get_price_minute_single(
            security,
            end_dt=end_dt,
            count=total,
            fields=fields,
            skip_paused=skip_paused,
            fq=fq,
            include_now=False,
            pre_factor_ref_date=pre_factor_ref_date)

    dict_by_column = {
        f: group_array(a[f if f != 'price' else 'avg'], group, f)
        for f in fields
    }

    if not df:
        return dict_by_column
    else:
        if index is not None and len(index) > 0:
            index = group_array(index, group, 'index')
            index = vec2datetime(index)
        return pd.DataFrame(index=index, columns=fields, data=dict_by_column)
Beispiel #8
0
def history(end_dt,
            count,
            unit='1d',
            field='avg',
            security_list=None,
            df=True,
            skip_paused=False,
            fq='pre',
            pre_factor_ref_date=None):
    '''
    只能在回测/模拟中调用,不能再研究中调用。

    参数说明:

    `unit`是'Xd'时, end_dt的类型是datetime.date;
    `unit`是'Xm'时, end_dt的类型是datetime.datetime;

    `count` 必须大于0;

    `security_list`: 必须是 str 或者 tuple,不能是list 否则 lru_cache 会出错。
    '''
    count = int(count)
    assert count > 0, "history, count必须是一个正整数"
    check_unit_fields(unit, (field, ))
    if security_list is not None:
        security_list = list_or_str(security_list)
    if isinstance(security_list, tuple):
        security_list = list(security_list)
    security_list = convert_security(security_list)

    if field == 'price':
        warn_price_as_avg('使用 price 作为 history 的 field 参数', 'history')
        field = 'avg'

    group = int(unit[:-1])
    total = count * group
    dict_by_column = {}

    _index = None
    df = bool(df)
    skip_paused = bool(skip_paused)
    fq = ensure_fq(fq)
    if pre_factor_ref_date is not None:
        pre_factor_ref_date = convert_date(pre_factor_ref_date)
    need_index = df and not skip_paused

    if is_list(security_list) and _has_stock_with_future(security_list):
        if unit.endswith('m') and need_index:
            raise ParamsError("history 取分钟数据时,为了对齐数据,不能同时取股票和期货。")

    if unit.endswith('d'):
        end_dt = convert_date(end_dt)
        for security in security_list:
            a, _index = get_price_daily_single(
                security,
                end_date=end_dt,
                count=total,
                fields=(field, ),
                skip_paused=skip_paused,
                fq=fq,
                include_now=False,
                pre_factor_ref_date=pre_factor_ref_date)

            a = a[field]
            a = group_array(a, group, field)
            dict_by_column[security.code] = a
    else:
        end_dt = convert_dt(end_dt)
        for security in security_list:
            a, _index = get_price_minute_single(
                security,
                end_dt=end_dt,
                count=total,
                fields=(field, ),
                skip_paused=skip_paused,
                fq=fq,
                include_now=False,
                pre_factor_ref_date=pre_factor_ref_date)

            # 取第一列
            a = a[field]
            a = group_array(a, group, field)
            dict_by_column[security.code] = a

    if not df:
        return dict_by_column
    else:
        if need_index and _index is not None and len(_index) > 0:
            index = group_array(_index, group, 'index')
            index = vec2datetime(index)
        else:
            index = None
        return pd.DataFrame(index=index,
                            columns=[s.code for s in security_list],
                            data=dict_by_column)