예제 #1
0
def get_roll_yield(date=None, var="LR", symbol1=None, symbol2=None, df=None):
    """
    指定交易日指定品种(主力和次主力)或任意两个合约的展期收益率
    Parameters
    ------
    date: string 某一天日期 format: YYYYMMDD
    var: string 合约品种如RB、AL等
    symbol1: string 合约 1如 rb1810
    symbol2: string 合约 2 如 rb1812
    df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快
    Return
    -------
    tuple
    roll_yield
    near_by
    deferred
    """
    # date = "20200304"
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    if symbol1:
        var = symbol_varieties(symbol1)
    if not isinstance(df, pd.DataFrame):
        market = symbol_market(var)
        df = get_futures_daily(start_date=date, end_date=date, market=market)
    if var:
        df = df[~df["symbol"].str.contains(
            "efp")]  # 20200304 由于交易所获取的数据中会有比如 "CUefp",所以在这里过滤
        df = df[df["variety"] == var].sort_values("open_interest",
                                                  ascending=False)
        df["close"] = df["close"].astype("float")
        if len(df["close"]) < 2:
            return None
        symbol1 = df["symbol"].tolist()[0]
        symbol2 = df["symbol"].tolist()[1]

    close1 = df["close"][df["symbol"] == symbol1.upper()].tolist()[0]
    close2 = df["close"][df["symbol"] == symbol2.upper()].tolist()[0]

    a = re.sub(r"\D", "", symbol1)
    a_1 = int(a[:-2])
    a_2 = int(a[-2:])
    b = re.sub(r"\D", "", symbol2)
    b_1 = int(b[:-2])
    b_2 = int(b[-2:])
    c = (a_1 - b_1) * 12 + (a_2 - b_2)
    if close1 == 0 or close2 == 0:
        return False
    if c > 0:
        return np.log(close2 / close1) / c * 12, symbol2, symbol1
    else:
        return np.log(close2 / close1) / c * 12, symbol1, symbol2
예제 #2
0
def get_roll_yield(date=None, var='IF', symbol1=None, symbol2=None, df=None):
    """
            获取某一天某一品种(主力和次主力)、或固定两个合约的展期收益率
        Parameters
        ------
            date: string 某一天日期 format: YYYYMMDD
            var: string 合约品种如RB、AL等
            symbol1: string 合约1如rb1810
            symbol2: string 合约2如rb1812
            df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快
        Return
        -------
            tuple
            roll_yield
            near_by
            deferred
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if symbol1:
        var = symbol_varieties(symbol1)
    if not isinstance(df, pd.DataFrame):
        market = symbol_market(var)
        df = get_futures_daily(start_day=date, end_day=date, market=market)
    if var:
        df = df[df['variety'] == var].sort_values('open_interest',
                                                  ascending=False)
        df['close'] = df['close'].astype('float')
        symbol1 = df['symbol'].tolist()[0]
        symbol2 = df['symbol'].tolist()[1]

    close1 = df['close'][df['symbol'] == symbol1.upper()].tolist()[0]
    close2 = df['close'][df['symbol'] == symbol2.upper()].tolist()[0]

    a = re.sub(r'\D', '', symbol1)
    a_1 = int(a[:-2])
    a_2 = int(a[-2:])
    b = re.sub(r'\D', '', symbol2)
    b_1 = int(b[:-2])
    b_2 = int(b[-2:])
    c = (a_1 - b_1) * 12 + (a_2 - b_2)
    if close1 == 0 or close2 == 0:
        return False

    if c > 0:
        return np.log(close2 / close1) / c * 12, symbol2, symbol1
    else:
        return np.log(close2 / close1) / c * 12, symbol1, symbol2
예제 #3
0
def _table_cut_cal(table_cut, symbol):
    """
    表格切分
    :param table_cut: 需要切分的表格
    :param symbol: 品种
    :return: pd.DataFrame
    """
    var = symbol_varieties(symbol)
    table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int)
    table_cut_sum = table_cut.sum()
    table_cut_sum['rank'] = 999
    for col in ['vol_party_name', 'long_party_name', 'short_party_name']:
        table_cut_sum[col] = None
    table_cut = table_cut.append(pd.DataFrame(table_cut_sum).T, sort=True)
    table_cut['symbol'] = symbol
    table_cut['variety'] = var
    table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int)
    return table_cut
예제 #4
0
파일: cot.py 프로젝트: wanghan0501/akshare
def _table_cut_cal(table_cut, symbol):
    """
    表格切分
    :param table_cut: 需要切分的表格
    :type table_cut: pandas.DataFrame
    :param symbol: 具体合约的代码
    :type symbol: str
    :return:
    :rtype: pandas.DataFrame
    """
    var = symbol_varieties(symbol)
    table_cut[intColumns + ["rank"]] = table_cut[intColumns +
                                                 ["rank"]].astype(int)
    table_cut_sum = table_cut.sum()
    table_cut_sum["rank"] = 999
    for col in ["vol_party_name", "long_party_name", "short_party_name"]:
        table_cut_sum[col] = None
    table_cut = table_cut.append(pd.DataFrame(table_cut_sum).T, sort=True)
    table_cut["symbol"] = symbol
    table_cut["variety"] = var
    table_cut[intColumns + ["rank"]] = table_cut[intColumns +
                                                 ["rank"]].astype(int)
    return table_cut
예제 #5
0
파일: cot.py 프로젝트: dgczy/akshare
def get_rank_sum(date=None, vars_list=cons.contract_symbols):
    """
    抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
    注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名;
        所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表
    注2:大商所只公布了品种排名, 未公布标的排名
    :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品
    :return: pd.DataFrame:
    展期收益率数据
    symbol                           标的合约                     string
    var                              商品品种                     string
    vol_top5                         成交量前5会员成交量总和         int
    vol_chg_top5                     成交量前5会员成交量变化总和      int
    long_open_interest_top5          持多单前5会员持多单总和         int
    long_open_interest_chg_top5      持多单前5会员持多单变化总和      int
    short_open_interest_top5         持空单前5会员持空单总和         int
    short_open_interest_chg_top5     持空单前5会员持空单变化总和      int
    vol_top10                        成交量前10会员成交量总和        int
    ...
    date                             日期                         string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    dce_var = [
        i for i in vars_list if i in cons.market_exchange_symbols['dce']
    ]
    shfe_var = [
        i for i in vars_list if i in cons.market_exchange_symbols['shfe']
    ]
    czce_var = [
        i for i in vars_list if i in cons.market_exchange_symbols['czce']
    ]
    cffex_var = [
        i for i in vars_list if i in cons.market_exchange_symbols['cffex']
    ]
    big_dict = {}
    if len(dce_var) > 0:
        data = get_dce_rank_table(date, dce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(shfe_var) > 0:
        data = get_shfe_rank_table(date, shfe_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(czce_var) > 0:
        data = get_czce_rank_table(date, czce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(cffex_var) > 0:
        data = get_cffex_rank_table(date, cffex_var)
        if data is False:
            return False
        big_dict.update(data)
    records = pd.DataFrame()

    for symbol, table in big_dict.items():
        table = table.applymap(lambda x: 0 if x == '' else x)
        for symbol_inner in set(table['symbol']):

            var = symbol_varieties(symbol_inner)
            if var in vars_list:
                table_cut = table[table['symbol'] == symbol_inner]
                table_cut['rank'] = table_cut['rank'].astype('float')
                table_cut_top5 = table_cut[table_cut['rank'] <= 5]
                table_cut_top10 = table_cut[table_cut['rank'] <= 10]
                table_cut_top15 = table_cut[table_cut['rank'] <= 15]
                table_cut_top20 = table_cut[table_cut['rank'] <= 20]

                big_dict = {
                    'symbol':
                    symbol_inner,
                    'variety':
                    var,
                    'vol_top5':
                    table_cut_top5['vol'].sum(),
                    'vol_chg_top5':
                    table_cut_top5['vol_chg'].sum(),
                    'long_open_interest_top5':
                    table_cut_top5['long_open_interest'].sum(),
                    'long_open_interest_chg_top5':
                    table_cut_top5['long_open_interest_chg'].sum(),
                    'short_open_interest_top5':
                    table_cut_top5['short_open_interest'].sum(),
                    'short_open_interest_chg_top5':
                    table_cut_top5['short_open_interest_chg'].sum(),
                    'vol_top10':
                    table_cut_top10['vol'].sum(),
                    'vol_chg_top10':
                    table_cut_top10['vol_chg'].sum(),
                    'long_open_interest_top10':
                    table_cut_top10['long_open_interest'].sum(),
                    'long_open_interest_chg_top10':
                    table_cut_top10['long_open_interest_chg'].sum(),
                    'short_open_interest_top10':
                    table_cut_top10['short_open_interest'].sum(),
                    'short_open_interest_chg_top10':
                    table_cut_top10['short_open_interest_chg'].sum(),
                    'vol_top15':
                    table_cut_top15['vol'].sum(),
                    'vol_chg_top15':
                    table_cut_top15['vol_chg'].sum(),
                    'long_open_interest_top15':
                    table_cut_top15['long_open_interest'].sum(),
                    'long_open_interest_chg_top15':
                    table_cut_top15['long_open_interest_chg'].sum(),
                    'short_open_interest_top15':
                    table_cut_top15['short_open_interest'].sum(),
                    'short_open_interest_chg_top15':
                    table_cut_top15['short_open_interest_chg'].sum(),
                    'vol_top20':
                    table_cut_top20['vol'].sum(),
                    'vol_chg_top20':
                    table_cut_top20['vol_chg'].sum(),
                    'long_open_interest_top20':
                    table_cut_top20['long_open_interest'].sum(),
                    'long_open_interest_chg_top20':
                    table_cut_top20['long_open_interest_chg'].sum(),
                    'short_open_interest_top20':
                    table_cut_top20['short_open_interest'].sum(),
                    'short_open_interest_chg_top20':
                    table_cut_top20['short_open_interest_chg'].sum(),
                    'date':
                    date.strftime('%Y%m%d')
                }
                records = records.append(pd.DataFrame(big_dict, index=[0]))

    if len(big_dict.items()) > 0:
        add_vars = [
            i for i in cons.market_exchange_symbols['shfe'] +
            cons.market_exchange_symbols['cffex']
            if i in records['variety'].tolist()
        ]
        for var in add_vars:
            records_cut = records[records['variety'] == var]
            var_record = pd.DataFrame(records_cut.sum()).T
            var_record['date'] = date.strftime('%Y%m%d')
            var_record.loc[:, ['variety', 'symbol']] = var
            records = records.append(var_record)

    return records.reset_index(drop=True)
예제 #6
0
파일: cot.py 프로젝트: dgczy/akshare
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc[
                '合计',
                ['vol_party_name', 'long_party_name', 'short_party_name'
                 ]] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
예제 #7
0
파일: cot.py 프로젝트: dgczy/akshare
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
        rank                        排名                        int
        vol_party_name              成交量排序的当前名次会员        string(中文)
        vol                         该会员成交量                  int
        vol_chg                     该会员成交量变化量             int
        long_party_name             持多单排序的当前名次会员        string(中文)
        long_open_interest          该会员持多单                  int
        long_open_interest_chg      该会员持多单变化量             int
        short_party_name            持空单排序的当前名次会员        string(中文)
        short_open_interest         该会员持空单                  int
        short_open_interest_chg     该会员持空单变化量             int
        symbol                      标的合约                     string
        var                         品种                        string
        date                        日期                        string YYYYMMDD

    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={
            'CJ1': 'vol',
            'CJ1_CHG': 'vol_chg',
            'CJ2': 'long_open_interest',
            'CJ2_CHG': 'long_open_interest_chg',
            'CJ3': 'short_open_interest',
            'CJ3_CHG': 'short_open_interest_chg',
            'PARTICIPANTABBR1': 'vol_party_name',
            'PARTICIPANTABBR2': 'long_party_name',
            'PARTICIPANTABBR3': 'short_party_name',
            'PRODUCTNAME': 'product1',
            'RANK': 'rank',
            'INSTRUMENTID': 'symbol',
            'PRODUCTSORTNO': 'product2'
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if type(x) == type('') else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in [
            'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1',
            'product2'
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict
예제 #8
0
파일: cot.py 프로젝트: wanghan0501/akshare
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime("%Y%m%d"))
        r = requests.get(url)
        r.encoding = "utf-8"
        soup = BeautifulSoup(r.text, "lxml")
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, "utf-8")
        r.encoding = "utf-8"
        symbols = []
        for link in soup.find_all("b"):
            strings = str(link).split(" ")

            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                if symbol == "SR905日期:":
                    symbol = "SR905"
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 1]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, "rank"] = table_cut.index
            table_cut.loc["合计", "rank"] = 999
            table_cut.loc[
                "合计",
                ["vol_party_name", "long_party_name", "short_party_name"
                 ]] = None
            table_cut.loc[:, "symbol"] = symbol
            table_cut.loc[:, "variety"] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(",", ""))
            table_cut = table_cut.applymap(lambda x: 0 if x == "-" else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=None)[3:]
        big_df = pd.DataFrame()
        for item in data:
            big_df = pd.concat([big_df, item], axis=0, ignore_index=False)
        big_df.columns = big_df.iloc[0, :].tolist()
        data = big_df.iloc[1:, :]
    elif date < datetime.date(2017, 12, 28):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=0)[1]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.index.name = table.columns[0]
    table.columns = rank_columns
    table.loc[:, "rank"] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(",", ""))
    table = table.applymap(lambda x: 0 if x == "-" else x)
    indexes = [i for i in table.index if "合约" in i or "品种" in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if "PTA" in string:
            symbol = "TA"
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r":(.*) ").findall(string)[0]))
            except:
                symbol = re.compile(r":(.*) ").findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how="any").iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol.strip()] = table_cut.reset_index(drop=True)

    return big_dict
예제 #9
0
파일: cot.py 프로젝트: wanghan0501/akshare
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime("%Y%m%d"))
    r = requests_link(url, "utf-8")
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context["o_cursor"])

    df = df.rename(
        columns={
            "CJ1": "vol",
            "CJ1_CHG": "vol_chg",
            "CJ2": "long_open_interest",
            "CJ2_CHG": "long_open_interest_chg",
            "CJ3": "short_open_interest",
            "CJ3_CHG": "short_open_interest_chg",
            "PARTICIPANTABBR1": "vol_party_name",
            "PARTICIPANTABBR2": "long_party_name",
            "PARTICIPANTABBR3": "short_party_name",
            "PRODUCTNAME": "product1",
            "RANK": "rank",
            "INSTRUMENTID": "symbol",
            "PRODUCTSORTNO": "product2",
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == "" else x)
    df["variety"] = df["symbol"].apply(lambda x: symbol_varieties(x))

    df = df[df["rank"] > 0]
    for col in [
            "PARTICIPANTID1",
            "PARTICIPANTID2",
            "PARTICIPANTID3",
            "product1",
            "product2",
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df["variety"].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df["variety"] == var]
        for symbol in set(df_var["symbol"]):
            df_symbol = df_var[df_var["symbol"] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict