예제 #1
0
파일: receipt.py 프로젝트: wjw16/dtshare
def get_czce_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20080222至20100824(包括)
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: list
    :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECEIPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])]
    for i in list(range(len(indexes))):
        if i != len(indexes) - 1:
            data_cut = data.loc[indexes[i]:ends[i], :]
            data_cut = data_cut.fillna(method='pad')
        else:
            data_cut = data.loc[indexes[i]:, :]
            data_cut = data_cut.fillna(method='pad')
        if 'PTA' in data_cut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:]))
        if var == 'CF':
            receipt = data_cut[6].tolist()[-1]
            receipt_chg = data_cut[7].tolist()[-1]
        else:
            receipt = data_cut[5].tolist()[-1]
            receipt_chg = data_cut[6].tolist()[-1]
        data_dict = {
            'var': var,
            'receipt': int(receipt),
            'receipt_chg': int(receipt_chg),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
예제 #2
0
파일: cot.py 프로젝트: wjw16/dtshare
def _czce_df_read(url, skip_rows, encoding='utf-8'):
    """
    郑州商品交易所的网页数据
    :param url: 网站 string
    :param skip_rows: 去掉前几行 int
    :param encoding: utf-8 or gbk or gb2312
    :return: pd.DataFrame
    """
    r = requests_link(url, encoding)
    data = pd.read_html(r.text, match='.+', flavor=None, header=0, index_col=0, skiprows=skip_rows, attrs=None,
                        parse_dates=False, thousands=', ', encoding="gbk", decimal='.',
                        converters=None, na_values=None, keep_default_na=True)
    return data
예제 #3
0
파일: receipt.py 프로젝트: wjw16/dtshare
def get_shfe_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        data_dict = {
            'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
            'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
            'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
예제 #4
0
파일: cot.py 프로젝트: wjw16/dtshare
def get_cffex_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前20会员持仓排名数据明细
    注:该交易所既公布品种排名,也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']]
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2010, 4, 16):
        print(Exception("cffex数据源开始日期为20100416,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    big_dict = {}
    for var in vars_list:
        url = cons.CFFEX_VOL_RANK_URL % (date.strftime('%Y%m'), date.strftime('%d'), var)
        r = requests_link(url, encoding='gbk')
        if not r:
            return False
        if '网页错误' not in r.text:
            table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1]))
            table = table.dropna(how='any')
            table = table.applymap(lambda x: x.strip() if isinstance(x, str) else x)
            for symbol in set(table['合约']):
                table_cut = table[table['合约'] == symbol]
                table_cut.columns = ['symbol', 'rank'] + rank_columns
                table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol)
                big_dict[symbol] = table_cut.reset_index(drop=True)
    return big_dict
예제 #5
0
def get_shfe_v_wap(date=None):
    """
        获取上期所日成交均价数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            郑商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                time_range    v_wap时段,分09:00-10:15和09:00-15:00两类
                v_wap          加权平均成交均价
        或 None(给定日期没有数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_V_WAP_URL % (day.strftime("%Y%m%d")),
                headers=cons.headers,
                encoding="utf-8",
            ).text)
    except requests.HTTPError as reason:
        if reason.response not in [404, 403]:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return None

    if len(json_data["o_currefprice"]) == 0:
        return None
    try:
        df = pd.DataFrame(json_data["o_currefprice"])
        df["INSTRUMENTID"] = df["INSTRUMENTID"].str.strip()
        df[":B1"].astype("int16")
        return df.rename(columns=cons.SHFE_V_WAP_COLUMNS)[list(
            cons.SHFE_V_WAP_COLUMNS.values())]
    except:
        return None
예제 #6
0
파일: receipt.py 프로젝트: wjw16/dtshare
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method='pad')
            try:
                receipt = data_cut.loc[:, '仓单数量'].tolist()[-1]
            except:
                receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1]
            receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
예제 #7
0
def get_dce_daily(date=None, symbol_type="futures", retries=0):
    """
        获取大连商品交易所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一
        retries: int, 当前重试次数,达到3次则获取数据失败
    Return
    -------
        DataFrame
            大商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover       成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    if retries > 3:
        print("maximum retires for DCE market data: ", day.strftime("%Y%m%d"))
        return

    if symbol_type == "futures":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
               }))
        listed_columns = cons.DCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
    elif symbol_type == "option":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
                   "dayQuotes.trade_type": "1",
               }))
        listed_columns = cons.DCE_OPTION_COLUMNS
        output_columns = cons.OPTION_OUTPUT_COLUMNS
    else:
        print("invalid symbol_type :" + symbol_type +
              ', should be one of "futures" or "option"')
        return

    try:
        response = requests_link(url, method="post", headers=cons.headers).text
    except requests.exceptions.ContentDecodingError as reason:
        return get_dce_daily(day, retries=retries + 1)
    except requests.exceptions.HTTPError as reason:
        if reason.response == 504:
            return get_dce_daily(day, retries=retries + 1)
        elif reason.response != 404:
            print(cons.DCE_DAILY_URL, reason)
        return

    if "错误:您所请求的网址(URL)无法获取" in response:
        return get_dce_daily(day, retries=retries + 1)
    elif "暂无数据" in response:
        return

    data = BeautifulSoup(response, "html.parser").find_all("tr")
    if len(data) == 0:
        return

    dict_data = list()
    implied_data = list()
    for i_data in data[1:]:
        if "小计" in i_data.text or "总计" in i_data.text:
            continue
        x = i_data.find_all("td")
        if symbol_type == "futures":
            row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]}
            row_dict["symbol"] = row_dict["variety"] + x[1].text.strip()
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 16:
            m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip())
            if not m:
                continue
            row_dict = {
                "symbol": x[1].text.strip(),
                "variety": m.group(1).upper(),
                "contract_id": m.group(0),
            }
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 2:
            implied_data.append({
                "contract_id": x[0].text.strip(),
                "implied_volatility": float(x[1].text.strip()),
            })
    df = pd.DataFrame(dict_data)
    df["date"] = day.strftime("%Y%m%d")
    if symbol_type == "futures":
        return df[output_columns]
    else:
        return pd.merge(
            df,
            pd.DataFrame(implied_data),
            on="contract_id",
            how="left",
            indicator=False,
        )[output_columns]
예제 #8
0
def get_shfe_daily(date=None):
    """
    上海期货交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日
    :type date: str or datetime.date
    :return: 上海期货交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    上期所日交易数据(DataFrame):
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    volume        成交量
    open_interest 持仓量
    turnover      成交额
    settle        结算价
    pre_settle     前结算价
    variety       合约类别
    或 None(给定交易日没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")),
                headers=cons.shfe_headers,
            ).text)
    except requests.HTTPError as reason:
        if reason.response != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return

    if len(json_data["o_curinstrument"]) == 0:
        return

    df = pd.DataFrame([
        row for row in json_data["o_curinstrument"] if
        row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != ""
    ])
    df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df["symbol"] = df["variety"] + df["DELIVERYMONTH"]
    df["date"] = day.strftime("%Y%m%d")
    v_wap_df = get_shfe_v_wap(day)
    if v_wap_df is not None:
        df = pd.merge(
            df,
            v_wap_df[v_wap_df.time_range == "9:00-15:00"],
            on=["date", "symbol"],
            how="left",
        )
        df["turnover"] = df.v_wap * df.VOLUME
    else:
        df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x)
        df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"]
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
예제 #9
0
def get_cffex_daily(date=None):
    """
    中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        html = requests_link(
            cons.CFFEX_DAILY_URL.format(day.strftime("%Y%m"),
                                        day.strftime("%d"),
                                        day.strftime("%Y%m%d")),
            encoding="gbk",
            headers=cons.headers,
        ).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL %
                (day.strftime("%Y%m"), day.strftime("%d"),
                 day.strftime("%Y%m%d")),
                reason,
            )
        return

    if html.find("网页错误") >= 0:
        return
    html = [
        i.replace(" ", "").split(",") for i in html.split("\n")[:-2]
        if i[0][0] != "小"
    ]

    if html[0][0] != "合约代码":
        return

    dict_data = list()
    day_const = day.strftime("%Y%m%d")
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ["volume", "open_interest", "oi_chg"]:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"]
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
예제 #10
0
파일: cot.py 프로젝트: wjw16/dtshare
def get_dce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    大连商品交易所前 20 会员持仓排名数据明细
    注: 该交易所既公布品种排名, 也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表为空时为所有商品, 数据从20060104开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2006, 1, 4):
        print(Exception("dce数据源开始日期为20060104,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    big_dict = {}
    for var in vars_list:
        url = cons.DCE_VOL_RANK_URL % (var.lower(), var.lower(), date.year, date.month - 1, date.day)
        list_60_name = []
        list_60 = []
        list_60_chg = []
        rank = []
        texts = requests_link(url).content.splitlines()
        if not texts:
            return False
        if len(texts) > 30:
            for text in texts:
                line = text.decode("utf-8")
                string_list = line.split()
                try:
                    if int(string_list[0]) <= 20:
                        list_60_name.append(string_list[1])
                        list_60.append(string_list[2])
                        list_60_chg.append(string_list[3])
                        rank.append(string_list[0])
                except:
                    pass
            table_cut = pd.DataFrame({'rank': rank[0:20],
                                      'vol_party_name': list_60_name[0:20],
                                      'vol': list_60[0:20],
                                      'vol_chg': list_60_chg[0:20],
                                      'long_party_name': list_60_name[20:40],
                                      'long_open_interest': list_60[20:40],
                                      'long_open_interest_chg': list_60_chg[20:40],
                                      'short_party_name': list_60_name[40:60],
                                      'short_open_interest': list_60[40:60],
                                      'short_open_interest_chg': list_60_chg[40:60]
                                      })
            table_cut = table_cut.applymap(lambda x: x.replace(',', ''))
            table_cut = _table_cut_cal(table_cut, var)
            big_dict[var] = table_cut.reset_index(drop=True)
    return big_dict
예제 #11
0
파일: cot.py 프로젝트: wjw16/dtshare
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
예제 #12
0
파일: cot.py 프로젝트: wjw16/dtshare
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
        rank                        排名                        int
        vol_party_name              成交量排序的当前名次会员        string(中文)
        vol                         该会员成交量                  int
        vol_chg                     该会员成交量变化量             int
        long_party_name             持多单排序的当前名次会员        string(中文)
        long_open_interest          该会员持多单                  int
        long_open_interest_chg      该会员持多单变化量             int
        short_party_name            持空单排序的当前名次会员        string(中文)
        short_open_interest         该会员持空单                  int
        short_open_interest_chg     该会员持空单变化量             int
        symbol                      标的合约                     string
        var                         品种                        string
        date                        日期                        string YYYYMMDD

    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg',
                 'CJ3': 'short_open_interest',
                 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name',
                 'PARTICIPANTABBR2': 'long_party_name',
                 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank',
                 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'})

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict