Ejemplo n.º 1
0
def get_roll_yield(date=None, var="CU", symbol1=None, symbol2=None, df=None):
    """
    指定交易日指定品种(主力和次主力)或任意两个合约的展期收益率
    Parameters
    ------
    date: string 某一天日期 format: YYYYMMDD
    var: string 合约品种如RB、AL等
    symbol1: string 合约 1如 rb1810
    symbol2: string 合约 2 如 rb1812
    df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快
    Return
    -------
        tuple
        roll_yield
        near_by
        deferred
    """
    # date = "20200304"
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    if symbol1:
        var = symbol_varieties(symbol1)
    if not isinstance(df, pd.DataFrame):
        market = symbol_market(var)
        df = get_futures_daily(start_day=date, end_day=date, market=market)
    if var:
        df = df[~df["symbol"].str.contains(
            "efp")]  # 20200304 由于交易所获取的数据中会有比如 "CUefp",所以在这里过滤
        df = df[df["variety"] == var].sort_values("open_interest",
                                                  ascending=False)
        df["close"] = df["close"].astype("float")
        symbol1 = df["symbol"].tolist()[0]
        symbol2 = df["symbol"].tolist()[1]

    close1 = df["close"][df["symbol"] == symbol1.upper()].tolist()[0]
    close2 = df["close"][df["symbol"] == symbol2.upper()].tolist()[0]

    a = re.sub(r"\D", "", symbol1)
    a_1 = int(a[:-2])
    a_2 = int(a[-2:])
    b = re.sub(r"\D", "", symbol2)
    b_1 = int(b[:-2])
    b_2 = int(b[-2:])
    c = (a_1 - b_1) * 12 + (a_2 - b_2)
    if close1 == 0 or close2 == 0:
        return False
    if c > 0:
        return np.log(close2 / close1) / c * 12, symbol2, symbol1
    else:
        return np.log(close2 / close1) / c * 12, symbol1, symbol2
Ejemplo n.º 2
0
Archivo: cot.py Proyecto: wjw16/dtshare
def _table_cut_cal(table_cut, symbol):
    """
    表格切分
    :param table_cut: 需要切分的表格
    :type table_cut: pandas.DataFrame
    :param symbol: 品种
    :type symbol: str
    :return:
    :rtype: pandas.DataFrame
    """
    var = symbol_varieties(symbol)
    table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int)
    table_cut_sum = table_cut.sum()
    table_cut_sum['rank'] = 999
    for col in ['vol_party_name', 'long_party_name', 'short_party_name']:
        table_cut_sum[col] = None
    table_cut = table_cut.append(pd.DataFrame(table_cut_sum).T, sort=True)
    table_cut['symbol'] = symbol
    table_cut['variety'] = var
    table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int)
    return table_cut
Ejemplo n.º 3
0
Archivo: cot.py Proyecto: wjw16/dtshare
def get_rank_sum(date=None, vars_list=cons.contract_symbols):
    """
    抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
    注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名;
        所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表
    注2:大商所只公布了品种排名, 未公布标的排名
    :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品
    :return: pd.DataFrame:
    展期收益率数据
    symbol                           标的合约                     string
    var                              商品品种                     string
    vol_top5                         成交量前5会员成交量总和         int
    vol_chg_top5                     成交量前5会员成交量变化总和      int
    long_open_interest_top5          持多单前5会员持多单总和         int
    long_open_interest_chg_top5      持多单前5会员持多单变化总和      int
    short_open_interest_top5         持空单前5会员持空单总和         int
    short_open_interest_chg_top5     持空单前5会员持空单变化总和      int
    vol_top10                        成交量前10会员成交量总和        int
    ...
    date                             日期                         string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']]
    czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']]
    cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']]
    big_dict = {}
    if len(dce_var) > 0:
        data = get_dce_rank_table(date, dce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(shfe_var) > 0:
        data = get_shfe_rank_table(date, shfe_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(czce_var) > 0:
        data = get_czce_rank_table(date, czce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(cffex_var) > 0:
        data = get_cffex_rank_table(date, cffex_var)
        if data is False:
            return False
        big_dict.update(data)
    records = pd.DataFrame()

    for symbol, table in big_dict.items():
        table = table.applymap(lambda x: 0 if x == '' else x)
        for symbol_inner in set(table['symbol']):

            var = symbol_varieties(symbol_inner)
            if var in vars_list:
                table_cut = table[table['symbol'] == symbol_inner]
                table_cut['rank'] = table_cut['rank'].astype('float')
                table_cut_top5 = table_cut[table_cut['rank'] <= 5]
                table_cut_top10 = table_cut[table_cut['rank'] <= 10]
                table_cut_top15 = table_cut[table_cut['rank'] <= 15]
                table_cut_top20 = table_cut[table_cut['rank'] <= 20]

                big_dict = {'symbol': symbol_inner, 'variety': var,

                            'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(),
                            'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(),
                            'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(),
                            'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(),
                            'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(),

                            'vol_top10': table_cut_top10['vol'].sum(),
                            'vol_chg_top10': table_cut_top10['vol_chg'].sum(),
                            'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(),
                            'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(),
                            'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(),
                            'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(),

                            'vol_top15': table_cut_top15['vol'].sum(),
                            'vol_chg_top15': table_cut_top15['vol_chg'].sum(),
                            'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(),
                            'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(),
                            'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(),
                            'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(),

                            'vol_top20': table_cut_top20['vol'].sum(),
                            'vol_chg_top20': table_cut_top20['vol_chg'].sum(),
                            'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(),
                            'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(),
                            'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(),
                            'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(),

                            'date': date.strftime('%Y%m%d')
                            }
                records = records.append(pd.DataFrame(big_dict, index=[0]))

    if len(big_dict.items()) > 0:
        add_vars = [i for i in cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if
                    i in records['variety'].tolist()]
        for var in add_vars:
            records_cut = records[records['variety'] == var]
            var_record = pd.DataFrame(records_cut.sum()).T
            var_record['date'] = date.strftime('%Y%m%d')
            var_record.loc[:, ['variety', 'symbol']] = var
            records = records.append(var_record)

    return records.reset_index(drop=True)
Ejemplo n.º 4
0
Archivo: cot.py Proyecto: wjw16/dtshare
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
Ejemplo n.º 5
0
Archivo: cot.py Proyecto: wjw16/dtshare
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
        rank                        排名                        int
        vol_party_name              成交量排序的当前名次会员        string(中文)
        vol                         该会员成交量                  int
        vol_chg                     该会员成交量变化量             int
        long_party_name             持多单排序的当前名次会员        string(中文)
        long_open_interest          该会员持多单                  int
        long_open_interest_chg      该会员持多单变化量             int
        short_party_name            持空单排序的当前名次会员        string(中文)
        short_open_interest         该会员持空单                  int
        short_open_interest_chg     该会员持空单变化量             int
        symbol                      标的合约                     string
        var                         品种                        string
        date                        日期                        string YYYYMMDD

    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg',
                 'CJ3': 'short_open_interest',
                 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name',
                 'PARTICIPANTABBR2': 'long_party_name',
                 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank',
                 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'})

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict