コード例 #1
0
ファイル: cot.py プロジェクト: yangxiaozhe13/akshare
def futures_dce_position_rank_other(date="20160104"):
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
    payload = {
        "memberDealPosiQuotes.variety": "c",
        "memberDealPosiQuotes.trade_type": "0",
        "year": date.year,
        "month": date.month - 1,
        "day": date.day,
        "contract.contract_id": "all",
        "contract.variety_id": "c",
        "contract": "",
    }
    r = requests.post(url, data=payload)
    soup = BeautifulSoup(r.text, "lxml")
    symbol_list = [item["onclick"].strip("javascript:setVariety(").strip("');") for item in
                   soup.find_all(attrs={"class": "selBox"})[-3].find_all("input")]
    big_df = dict()
    for symbol in symbol_list:
        payload = {
            "memberDealPosiQuotes.variety": symbol,
            "memberDealPosiQuotes.trade_type": "0",
            "year": date.year,
            "month": date.month - 1,
            "day": date.day,
            "contract.contract_id": "all",
            "contract.variety_id": symbol,
            "contract": "",
        }
        r = requests.post(url, data=payload)
        soup = BeautifulSoup(r.text, "lxml")
        contract_list = [item["onclick"].strip("javascript:setContract_id('").strip("');") for item in
                         soup.find_all(attrs={"name": "contract"})]
        if contract_list:
            if len(contract_list[0]) == 4:
                contract_list = [symbol + item for item in contract_list]
                for contract in contract_list:
                    payload = {
                        "memberDealPosiQuotes.variety": symbol,
                        "memberDealPosiQuotes.trade_type": "0",
                        "year": date.year,
                        "month": date.month - 1,
                        "day": date.day,
                        "contract.contract_id": contract,
                        "contract.variety_id": symbol,
                        "contract": "",
                    }
                    r = requests.post(url, data=payload)
                    temp_df = pd.read_html(r.text)[1].iloc[:-1, :]
                    temp_df.columns = ["rank", "vol_party_name", "vol", "vol_chg", "_", "long_party_name",
                                       "long_open_interest",
                                       "long_open_interest_chg", "_", "short_party_name", "short_open_interest",
                                       "short_open_interest_chg"]
                    temp_df["variety"] = symbol.upper()
                    temp_df["symbol"] = contract
                    temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                       "short_open_interest", "short_open_interest_chg", "short_party_name",
                                       "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                    big_df[contract] = temp_df
    return big_df
コード例 #2
0
ファイル: daily_bar.py プロジェクト: waytrylee/akshare
def get_cffex_daily(date="20200416"):
    """
    中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        html = requests_link(
            cons.CFFEX_DAILY_URL.format(day.strftime("%Y%m"),
                                        day.strftime("%d"),
                                        day.strftime("%Y%m%d")),
            encoding="gbk",
            headers=cons.headers,
        ).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL %
                (day.strftime("%Y%m"), day.strftime("%d"),
                 day.strftime("%Y%m%d")),
                reason,
            )
        return

    if html.find("网页错误") >= 0:
        return
    html = [
        i.replace(" ", "").split(",") for i in html.split("\n")[:-2]
        if i[0][0] != "小"
    ]

    if html[0][0] != "合约代码":
        return

    dict_data = list()
    day_const = day.strftime("%Y%m%d")
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ["volume", "open_interest", "oi_chg"]:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"]
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
コード例 #3
0
ファイル: cot.py プロジェクト: fuyr/akshare
def get_rank_sum(date="20200727", vars_list=cons.contract_symbols):
    """
    抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
    注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名;
        所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表
    注2:大商所只公布了品种排名, 未公布标的排名
    :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品
    :return: pd.DataFrame:
    展期收益率数据
    symbol                           标的合约                     string
    var                              商品品种                     string
    vol_top5                         成交量前5会员成交量总和         int
    vol_chg_top5                     成交量前5会员成交量变化总和      int
    long_open_interest_top5          持多单前5会员持多单总和         int
    long_open_interest_chg_top5      持多单前5会员持多单变化总和      int
    short_open_interest_top5         持空单前5会员持空单总和         int
    short_open_interest_chg_top5     持空单前5会员持空单变化总和      int
    vol_top10                        成交量前10会员成交量总和        int
    ...
    date                             日期                         string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']]
    czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']]
    cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']]
    big_dict = {}
    if len(dce_var) > 0:
        data = get_dce_rank_table(date, dce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(shfe_var) > 0:
        data = get_shfe_rank_table(date, shfe_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(czce_var) > 0:
        data = get_czce_rank_table(date, czce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(cffex_var) > 0:
        data = get_cffex_rank_table(date, cffex_var)
        if data is False:
            return False
        big_dict.update(data)
    records = pd.DataFrame()

    for symbol, table in big_dict.items():
        table = table.applymap(lambda x: 0 if x == '' else x)
        for symbol_inner in set(table['symbol']):

            var = symbol_varieties(symbol_inner)
            if var in vars_list:
                if var in czce_var:
                    for col in [item for item in table.columns if item.find('open_interest') > -1] + ['vol', 'vol_chg']:
                        table[col] = [float(value.replace(',', '')) if value != '-' else 0.0 for value in table[col]]

                table_cut = table[table['symbol'] == symbol_inner]
                table_cut['rank'] = table_cut['rank'].astype('float')
                table_cut_top5 = table_cut[table_cut['rank'] <= 5]
                table_cut_top10 = table_cut[table_cut['rank'] <= 10]
                table_cut_top15 = table_cut[table_cut['rank'] <= 15]
                table_cut_top20 = table_cut[table_cut['rank'] <= 20]

                big_dict = {'symbol': symbol_inner, 'variety': var,

                            'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(),
                            'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(),
                            'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(),
                            'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(),
                            'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(),

                            'vol_top10': table_cut_top10['vol'].sum(),
                            'vol_chg_top10': table_cut_top10['vol_chg'].sum(),
                            'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(),
                            'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(),
                            'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(),
                            'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(),

                            'vol_top15': table_cut_top15['vol'].sum(),
                            'vol_chg_top15': table_cut_top15['vol_chg'].sum(),
                            'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(),
                            'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(),
                            'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(),
                            'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(),

                            'vol_top20': table_cut_top20['vol'].sum(),
                            'vol_chg_top20': table_cut_top20['vol_chg'].sum(),
                            'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(),
                            'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(),
                            'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(),
                            'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(),

                            'date': date.strftime('%Y%m%d')
                            }
                records = records.append(pd.DataFrame(big_dict, index=[0]))

    if len(big_dict.items()) > 0:
        add_vars = [i for i in cons.market_exchange_symbols['dce'] + cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if
                    i in records['variety'].tolist()]
        for var in add_vars:
            records_cut = records[records['variety'] == var]
            var_record = pd.DataFrame(records_cut.sum()).T
            var_record['date'] = date.strftime('%Y%m%d')
            var_record.loc[:, ['variety', 'symbol']] = var
            records = records.append(var_record)

    return records.reset_index(drop=True)
コード例 #4
0
ファイル: receipt.py プロジェクト: wanghan0501/akshare
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = (cons.convert_date(date).strftime("%Y%m%d")
            if date is not None else datetime.date.today())
    if date not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding="utf-8")
    r.encoding = "utf-8"
    data = pd.read_html(r.text, encoding="gb2312")
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if "注:" in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if "PTA" in data_cut[0].tolist()[0]:
                var = "TA"
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(" ")[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub("[A-Z]+", "", string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method="pad")
            try:
                receipt = data_cut.loc[:, "仓单数量"].tolist()[-1]
            except:
                receipt = data_cut.loc[:, "仓单数量(保税)"].tolist()[-1]
            receipt_chg = data_cut.loc[:, "当日增减"].tolist()[-1]
            data_dict = {
                "var": var,
                "receipt": int(receipt),
                "receipt_chg": int(receipt_chg),
                "date": date,
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records["var"]
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #5
0
ファイル: futures_daily_bar.py プロジェクト: zhupr/akshare
def get_dce_daily(date: str = "20030115") -> pd.DataFrame:
    """
    大连商品交易所日交易数据
    http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rxq/index.html
    :param date: 交易日, e.g., 20200416
    :type date: str
    :return: 具体交易日的个品种行情数据
    :rtype: pandas.DataFrame
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    url = "http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Length": "86",
        "Content-Type": "application/x-www-form-urlencoded",
        "Host": "www.dce.com.cn",
        "Origin": "http://www.dce.com.cn",
        "Pragma": "no-cache",
        "Referer": "http://www.dce.com.cn/publicweb/quotesdata/dayQuotesCh.html",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
    }
    params = {
        "dayQuotes.variety": "all",
        "dayQuotes.trade_type": "0",
        "year": date[:4],
        "month": str(int(date[4:6]) - 1),
        "day": date[6:],
        "exportFlag": "excel",
    }
    r = requests.post(url, data=params, headers=headers)
    data_df = pd.read_excel(BytesIO(r.content))
    data_df = data_df[~data_df["商品名称"].str.contains("小计")]
    data_df = data_df[~data_df["商品名称"].str.contains("总计")]
    data_df["variety"] = data_df["商品名称"].map(lambda x: cons.DCE_MAP[x])
    data_df["symbol"] = data_df["variety"] + data_df["交割月份"].astype(int).astype(str)
    del data_df["商品名称"]
    del data_df["交割月份"]
    data_df.columns = ["open", "high", "low", "close",
                       "pre_settle", "settle", "_", "_",
                       "volume", "open_interest", "_", "turnover", "variety", "symbol"]
    data_df["date"] = date
    data_df = data_df[
        ["symbol", "date", "open", "high", "low", "close", "volume", "open_interest", "turnover", "settle",
         "pre_settle", "variety"]]
    data_df = data_df.applymap(lambda x: x.replace(",", ""))
    data_df = data_df.astype({"open": "float",
                              "high": "float",
                              "low": "float",
                              "close": "float",
                              "volume": "float",
                              "open_interest": "float",
                              "turnover": "float",
                              "settle": "float",
                              "pre_settle": "float",
                              })
    return data_df
コード例 #6
0
ファイル: cot.py プロジェクト: fuyr/akshare
def get_dce_rank_table(date="20201026", vars_list=cons.contract_symbols):
    """
    大连商品交易所前 20 会员持仓排名数据明细
    注: 该交易所只公布标的合约排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天
    :param vars_list: 合约品种如 RB、AL等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据
    :return: pandas.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员      string(中文)
    vol                         该会员成交量                 int
    vol_chg                     该会员成交量变化量            int
    long_party_name             持多单排序的当前名次会员      string(中文)
    long_open_interest          该会员持多单                 int
    long_open_interest_chg      该会员持多单变化量            int
    short_party_name            持空单排序的当前名次会员       string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2006, 1, 4):
        print(Exception("大连商品交易所数据源开始日期为20060104,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    big_dict = {}
    for var in vars_list:
        symbol_list = _get_dce_contract_list(date, var)
        for symbol in symbol_list:
            url = cons.DCE_VOL_RANK_URL_1 % (var.lower(), symbol, var.lower(), date.year, date.month - 1, date.day)
            list_60_name = []
            list_60 = []
            list_60_chg = []
            rank = []
            texts = requests_link(url).content.splitlines()
            # print(texts)
            if not texts:
                return False
            if len(texts) > 30:
                for text in texts:
                    line = text.decode("utf-8")
                    string_list = line.split()
                    try:
                        if int(string_list[0]) <= 20:
                            list_60_name.append(string_list[1])
                            list_60.append(string_list[2])
                            list_60_chg.append(string_list[3])
                            rank.append(string_list[0])
                    except:
                        pass
                table_cut = pd.DataFrame({'rank': rank[0:20],
                                          'vol_party_name': list_60_name[0:20],
                                          'vol': list_60[0:20],
                                          'vol_chg': list_60_chg[0:20],
                                          'long_party_name': list_60_name[20:40],
                                          'long_open_interest': list_60[20:40],
                                          'long_open_interest_chg': list_60_chg[20:40],
                                          'short_party_name': list_60_name[40:60],
                                          'short_open_interest': list_60[40:60],
                                          'short_open_interest_chg': list_60_chg[40:60]
                                          })
                table_cut = table_cut.applymap(lambda x: x.replace(',', ''))
                table_cut = _table_cut_cal(table_cut, symbol)
                big_dict[symbol] = table_cut.reset_index(drop=True)
    return big_dict
コード例 #7
0
ファイル: receipt.py プロジェクト: zhouzyc/akshare
def get_receipt(start_day: str = None,
                end_day: str = None,
                vars_list: List = cons.contract_symbols):
    """
    获取大宗商品注册仓单数量
    :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type start_day: str
    :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type end_day: str
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :type vars_list: str
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    """
    start_day = cons.convert_date(
        start_day) if start_day is not None else datetime.date.today()
    end_day = cons.convert_date(
        end_day) if end_day is not None else cons.convert_date(
            cons.get_latest_data_date(datetime.datetime.now()))
    records = pd.DataFrame()
    while start_day <= end_day:
        if start_day.strftime('%Y%m%d') not in calendar:
            warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日")
        else:
            print(start_day)
            for market, market_vars in cons.market_exchange_symbols.items():

                if market == 'dce':
                    if start_day >= datetime.date(2009, 4, 7):
                        f = get_dce_receipt
                    else:
                        print('20090407起,dce每交易日更新仓单数据')
                        f = None
                elif market == 'shfe':
                    if datetime.date(2008, 10,
                                     6) <= start_day <= datetime.date(
                                         2014, 5, 16):
                        f = get_shfe_receipt_1
                    elif start_day > datetime.date(2014, 5, 16):
                        f = get_shfe_receipt_2
                    else:
                        f = None
                        print('20081006起,shfe每交易日更新仓单数据')
                elif market == 'czce':
                    if datetime.date(2008, 3, 3) <= start_day <= datetime.date(
                            2010, 8, 24):
                        f = get_czce_receipt_1
                    elif datetime.date(2010, 8,
                                       24) < start_day <= datetime.date(
                                           2015, 11, 11):
                        f = get_czce_receipt_2
                    elif start_day > datetime.date(2015, 11, 11):
                        f = get_czce_receipt_3
                    else:
                        f = None
                        print('20080303起,czce每交易日更新仓单数据')
                get_vars = [var for var in vars_list if var in market_vars]
                if market != 'cffex' and get_vars != []:
                    if f is not None:
                        records = records.append(f(start_day, get_vars))

        start_day += datetime.timedelta(days=1)
    records.reset_index(drop=True, inplace=True)
    if "MA" in records["var"].to_list():
        replace_index = records[records["var"] == "MA"]["receipt"].astype(
            str).str.split("0", expand=True)[0].index
        records.loc[replace_index, "receipt"] = records[
            records["var"] == "MA"]["receipt"].astype(str).str.split(
                "0", expand=True)[0]
    return records
コード例 #8
0
ファイル: futures_daily_bar.py プロジェクト: zhupr/akshare
def get_czce_daily(date="20200901"):
    """
    郑州商品交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日; 日期需要大于200100824
    :type date: str or datetime.date
    :return: 郑州商品交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn(f"{day.strftime('%Y%m%d')}非交易日")
        return None
    if day > datetime.date(2010, 8, 24):
        if day > datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_3
            url = u % (day.strftime("%Y"), day.strftime("%Y%m%d"))
        elif day < datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_2
            url = u % (day.strftime("%Y"), day.strftime("%Y%m%d"))
        listed_columns = cons.CZCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
        try:
            r = requests.get(url)
            html = r.text
        except requests.exceptions.HTTPError as reason:
            if reason.response.status_code != 404:
                print(
                    cons.CZCE_DAILY_URL_3
                    % (day.strftime("%Y"), day.strftime("%Y%m%d")),
                    reason,
                )
            return
        if html.find("您的访问出错了") >= 0 or html.find("无期权每日行情交易记录") >= 0:
            return
        html = [
            i.replace(" ", "").split("|")
            for i in html.split("\n")[:-4]
            if i[0][0] != "小"
        ]

        if day > datetime.date(2015, 9, 19):
            if html[1][0] not in ["品种月份", "品种代码"]:
                return
            dict_data = list()
            day_const = int(day.strftime("%Y%m%d"))
            for row in html[2:]:
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                        "volume",
                        "open_interest",
                        "oi_chg",
                        "exercise_volume",
                    ]:
                        row[i + 1] = row[i + 1].replace(",", "")
                        row_dict[field] = int(row[i + 1])
                    else:
                        row[i + 1] = row[i + 1].replace(",", "")
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)

            return pd.DataFrame(dict_data)[output_columns]
        elif day < datetime.date(2015, 9, 19):
            dict_data = list()
            day_const = int(day.strftime("%Y%m%d"))
            for row in html[1:]:
                row = row[0].split(",")
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                        "volume",
                        "open_interest",
                        "oi_chg",
                        "exercise_volume",
                    ]:
                        row_dict[field] = int(float(row[i + 1]))
                    else:
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)
            return pd.DataFrame(dict_data)[output_columns]

    if day <= datetime.date(2010, 8, 24):
        u = cons.CZCE_DAILY_URL_1
        url = u % day.strftime("%Y%m%d")
        listed_columns = cons.CZCE_COLUMNS_2
        output_columns = cons.OUTPUT_COLUMNS
        df = pd.read_html(url)[1].dropna(how="any")

        dict_data = list()
        day_const = int(day.strftime("%Y%m%d"))

        for row in df.to_dict(orient="records")[1:]:
            m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
            if not m:
                continue
            row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
            for i, field in enumerate(listed_columns):
                if row[i + 1] == "\r":
                    row_dict[field] = 0.0
                elif field in ["volume", "open_interest", "oi_chg", "exercise_volume"]:

                    row_dict[field] = int(row[i + 1])
                else:

                    row_dict[field] = float(row[i + 1])
            dict_data.append(row_dict)

        return pd.DataFrame(dict_data)[output_columns]
コード例 #9
0
ファイル: roll_yield.py プロジェクト: OryxLib/Oryx.FastAdmin
def get_roll_yield_bar(type_method="symbol",
                       var="RB",
                       date=None,
                       start_day=None,
                       end_day=None,
                       plot=False):
    """
    获取展期收益率
    :param type_method: 'symbol':获取某天某品种所有交割月合约的收盘价, 'var':获取某天所有品种两个主力合约的展期收益率(展期收益率横截面), ‘date’:获取某品种每天的两个主力合约的展期收益率(展期收益率时间序列)
    :param var: 合约品种如RB、AL等
    :param date: 某一天日期 format: YYYYMMDD
    :param start_day: 开始日期 format:YYYYMMDD
    :param end_day: 结束数据 format:YYYYMMDD
    :param plot: True or False作图
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
        ry      展期收益率
        index   日期或品种
    """

    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    start_day = (cons.convert_date(start_day)
                 if start_day is not None else datetime.date.today())
    end_day = (cons.convert_date(end_day)
               if end_day is not None else cons.convert_date(
                   cons.get_latest_data_date(datetime.datetime.now())))

    if type_method == "symbol":
        df = get_futures_daily(start_day=date,
                               end_day=date,
                               market=symbol_market(var))
        df = df[df["variety"] == var]
        if plot:
            _plot_bar_2(df[["symbol", "close"]])
        return df

    if type_method == "var":
        df = pd.DataFrame()
        for market in ["dce", "cffex", "shfe", "czce"]:
            df = df.append(
                get_futures_daily(start_day=date, end_day=date, market=market))
        var_list = list(set(df["variety"]))
        df_l = pd.DataFrame()
        for var in var_list:
            ry = get_roll_yield(date, var, df=df)
            if ry:
                df_l = df_l.append(
                    pd.DataFrame([ry],
                                 index=[var],
                                 columns=["roll_yield", "near_by",
                                          "deferred"]))
        df_l["date"] = date
        df_l = df_l.sort_values("roll_yield")
        if plot:
            _plot_bar(df_l["roll_yield"])
        return df_l

    if type_method == "date":
        df_l = pd.DataFrame()
        while start_day <= end_day:
            try:
                ry = get_roll_yield(start_day, var)
                if ry:
                    df_l = df_l.append(
                        pd.DataFrame(
                            [ry],
                            index=[start_day],
                            columns=["roll_yield", "near_by", "deferred"],
                        ))
            except:
                pass
            start_day += datetime.timedelta(days=1)
        if plot:
            _plot(df_l["roll_yield"])
        return df_l
コード例 #10
0
ファイル: receipt.py プロジェクト: zhouzyc/akshare
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method='pad')
            try:
                receipt = data_cut.loc[:, '仓单数量'].tolist()[-1]
            except:
                receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1]
            receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #11
0
def get_receipt(start_day: str = None,
                end_day: str = None,
                vars_list: List = cons.contract_symbols):
    """
        获取大宗商品注册仓单数量
        Parameters
        ------
            start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数量                     int
                    date            日期                        string YYYYMMDD
    """
    start_day = cons.convert_date(
        start_day) if start_day is not None else datetime.date.today()
    end_day = cons.convert_date(
        end_day) if end_day is not None else cons.convert_date(
            cons.get_latest_data_date(datetime.datetime.now()))
    records = pd.DataFrame()
    while start_day <= end_day:
        if start_day.strftime('%Y%m%d') not in calendar:
            warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日")
        else:
            print(start_day)
            for market, market_vars in cons.market_exchange_symbols.items():

                if market == 'dce':
                    if start_day >= datetime.date(2009, 4, 7):
                        f = get_dce_receipt
                    else:
                        print('20090407起,dce每交易日更新仓单数据')
                        f = None
                elif market == 'shfe':
                    if datetime.date(2008, 10,
                                     6) <= start_day <= datetime.date(
                                         2014, 5, 16):
                        f = get_shfe_receipt_1
                    elif start_day > datetime.date(2014, 5, 16):
                        f = get_shfe_receipt_2
                    else:
                        f = None
                        print('20081006起,shfe每交易日更新仓单数据')
                elif market == 'czce':
                    if datetime.date(2008, 3, 3) <= start_day <= datetime.date(
                            2010, 8, 24):
                        f = get_czce_receipt_1
                    elif datetime.date(2010, 8,
                                       24) < start_day <= datetime.date(
                                           2015, 11, 11):
                        f = get_czce_receipt_2
                    elif start_day > datetime.date(2015, 11, 11):
                        f = get_czce_receipt_3
                    else:
                        f = None
                        print('20080303起,czce每交易日更新仓单数据')
                get_vars = [var for var in vars_list if var in market_vars]
                if market != 'cffex' and get_vars != []:
                    if f is not None:
                        records = records.append(f(start_day, get_vars))

        start_day += datetime.timedelta(days=1)
    return records.reset_index(drop=True)
コード例 #12
0
def get_czce_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取郑州商品交易所注册仓单数据
        适用20080222至20100824(包括)
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECEIPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])]
    for i in list(range(len(indexes))):
        if i != len(indexes) - 1:
            data_cut = data.loc[indexes[i]:ends[i], :]
            data_cut = data_cut.fillna(method='pad')
        else:
            data_cut = data.loc[indexes[i]:, :]
            data_cut = data_cut.fillna(method='pad')
        if 'PTA' in data_cut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:]))
        if var == 'CF':
            receipt = data_cut[6].tolist()[-1]
            receipt_chg = data_cut[7].tolist()[-1]
        else:
            receipt = data_cut[5].tolist()[-1]
            receipt_chg = data_cut[6].tolist()[-1]
        data_dict = {
            'var': var,
            'receipt': int(receipt),
            'receipt_chg': int(receipt_chg),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #13
0
ファイル: receipt.py プロジェクト: wanghan0501/akshare
def get_shfe_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD           
    """
    date = (cons.convert_date(date).strftime("%Y%m%d")
            if date is not None else datetime.date.today())
    if date not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    if date == "20100126":
        shfe_20100126["date"] = date
        return shfe_20100126
    elif date == "20101029":
        shfe_20101029["date"] = date
        return shfe_20101029
    elif date in ["20100416", "20130821"]:
        return warnings.warn("20100416、20130821日期交易所数据丢失")
    else:
        var_list = [
            "天然橡胶",
            "沥青仓库",
            "沥青厂库",
            "热轧卷板",
            "燃料油",
            "白银",
            "线材",
            "螺纹钢",
            "铅",
            "铜",
            "铝",
            "锌",
            "黄金",
            "锡",
            "镍",
        ]
        url = cons.SHFE_RECEIPT_URL_1 % date
        data = pandas_read_html_link(url)[0]
        indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)]
        last_index = [
            x for x in data.index if "注" in str(data[0].tolist()[x])
        ][0] - 1
        records = pd.DataFrame()
        for i in list(range(len(indexes))):
            if i != len(indexes) - 1:
                data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :]
            else:
                data_cut = data.loc[indexes[i]:last_index, :]
                data_cut = data_cut.fillna(method="pad")
            data_dict = dict()
            data_dict["var"] = chinese_to_english(data_cut[0].tolist()[0])
            data_dict["receipt"] = int(data_cut[2].tolist()[-1])
            data_dict["receipt_chg"] = int(data_cut[3].tolist()[-1])
            data_dict["date"] = date
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records["var"]
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #14
0
ファイル: daily_bar.py プロジェクト: waytrylee/akshare
def get_dce_daily(date="20200416", symbol_type="futures", retries=0):
    """
    大连商品交易所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一
        retries: int, 当前重试次数,达到3次则获取数据失败
    Return
    -------
        DataFrame
            大商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover       成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    if retries > 3:
        print("maximum retires for DCE market data: ", day.strftime("%Y%m%d"))
        return

    if symbol_type == "futures":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
               }))
        listed_columns = cons.DCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
    elif symbol_type == "option":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
                   "dayQuotes.trade_type": "1",
               }))
        listed_columns = cons.DCE_OPTION_COLUMNS
        output_columns = cons.OPTION_OUTPUT_COLUMNS
    else:
        print("invalid symbol_type :" + symbol_type +
              ', should be one of "futures" or "option"')
        return

    try:
        response = requests_link(url, method="post", headers=cons.headers).text
    except requests.exceptions.ContentDecodingError as reason:
        return get_dce_daily(day, retries=retries + 1)
    except requests.exceptions.HTTPError as reason:
        if reason.response == 504:
            return get_dce_daily(day, retries=retries + 1)
        elif reason.response != 404:
            print(cons.DCE_DAILY_URL, reason)
        return

    if "错误:您所请求的网址(URL)无法获取" in response:
        return get_dce_daily(day, retries=retries + 1)
    elif "暂无数据" in response:
        return

    data = BeautifulSoup(response, "html.parser").find_all("tr")
    if len(data) == 0:
        return

    dict_data = list()
    implied_data = list()
    for i_data in data[1:]:
        if "小计" in i_data.text or "总计" in i_data.text:
            continue
        x = i_data.find_all("td")
        if symbol_type == "futures":
            row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]}
            row_dict["symbol"] = row_dict["variety"] + x[1].text.strip()
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 16:
            m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip())
            if not m:
                continue
            row_dict = {
                "symbol": x[1].text.strip(),
                "variety": m.group(1).upper(),
                "contract_id": m.group(0),
            }
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 2:
            implied_data.append({
                "contract_id": x[0].text.strip(),
                "implied_volatility": float(x[1].text.strip()),
            })
    df = pd.DataFrame(dict_data)
    df["date"] = day.strftime("%Y%m%d")
    if symbol_type == "futures":
        return df[output_columns]
    else:
        return pd.merge(
            df,
            pd.DataFrame(implied_data),
            on="contract_id",
            how="left",
            indicator=False,
        )[output_columns]
コード例 #15
0
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
コード例 #16
0
ファイル: daily_bar.py プロジェクト: waytrylee/akshare
def get_futures_daily(start_day="20200413",
                      end_day="20200416",
                      market="CFFEX",
                      index_bar=False):
    """
    交易所日交易数据
    Parameters
    ------
        start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        market: 'CFFEX' 中金所, 'CZCE' 郑商所,  'SHFE' 上期所, 'DCE' 大商所 之一, 'INE' 上海国际能源交易中心。默认为中金所
        index_bar: bool  是否合成指数K线
    Return
    -------
        DataFrame
            中金所日交易数据(DataFrame):
                symbol      合约代码
                date       日期
                open       开盘价
                high       最高价
                low       最低价
                close      收盘价
                volume      成交量
                open_interest 持仓量
                turnover    成交额
                settle     结算价
                pre_settle   前结算价
                variety     合约类别
        或 None(给定日期没有交易数据)
    """
    if market.upper() == "CFFEX":
        f = get_cffex_daily
    elif market.upper() == "CZCE":
        f = get_czce_daily
    elif market.upper() == "SHFE":
        f = get_shfe_daily
    elif market.upper() == "DCE":
        f = get_dce_daily
    elif market.upper() == "INE":
        f = get_ine_daily
    else:
        print("Invalid Market Symbol")
        return

    start_day = (cons.convert_date(start_day)
                 if start_day is not None else datetime.date.today())
    end_day = (cons.convert_date(end_day)
               if end_day is not None else cons.convert_date(
                   cons.get_latest_data_date(datetime.datetime.now())))

    df_list = list()
    while start_day <= end_day:
        df = f(start_day)
        if df is not None:
            df_list.append(df)
            if index_bar:
                df_list.append(get_futures_index(df))
        start_day += datetime.timedelta(days=1)

    if len(df_list) > 0:
        return pd.concat(df_list).reset_index(drop=True)
コード例 #17
0
ファイル: daily_bar.py プロジェクト: MattMei27/akshare
def get_cffex_daily(date=None):
    """
    获取中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pd.DataFrame
    中国金融期货交易所日(pd.DataFrame):
        symbol        合约代码
        date          日期
        open          开盘价
        high          最高价
        low          最低价
        close         收盘价
        volume        成交量
        open_interest   持仓量
        turnover      成交额
        settle        结算价
        pre_settle    前结算价
        variety       合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % day.strftime('%Y%m%d'))
        return None
    try:
        html = requests_link(cons.CFFEX_DAILY_URL.format(
            day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')),
                             encoding="gbk",
                             headers=cons.headers).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL %
                (day.strftime('%Y%m'), day.strftime('%d'),
                 day.strftime('%Y%m%d')), reason)
        return

    if html.find('网页错误') >= 0:
        return
    html = [
        i.replace(' ', '').split(',') for i in html.split('\n')[:-2]
        if i[0][0] != '小'
    ]

    if html[0][0] != '合约代码':
        return

    dict_data = list()
    day_const = day.strftime('%Y%m%d')
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ['volume', 'open_interest', 'oi_chg']:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict['pre_settle'] = row_dict['close'] - row_dict['change1']
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
コード例 #18
0
ファイル: futures_daily_bar.py プロジェクト: zhupr/akshare
def get_shfe_daily(date="20131016"):
    """
    上海期货交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日
    :type date: str or datetime.date
    :return: 上海期货交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    上期所日交易数据(DataFrame):
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    volume        成交量
    open_interest 持仓量
    turnover      成交额
    settle        结算价
    pre_settle     前结算价
    variety       合约类别
    或 None(给定交易日没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")),
                headers=cons.shfe_headers,
            ).text
        )
    except requests.HTTPError as reason:
        if reason.response != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return

    if len(json_data["o_curinstrument"]) == 0:
        return

    df = pd.DataFrame(
        [
            row
            for row in json_data["o_curinstrument"]
            if row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != ""
        ]
    )
    df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df["symbol"] = df["variety"] + df["DELIVERYMONTH"]
    df["date"] = day.strftime("%Y%m%d")
    v_wap_df = get_shfe_v_wap(day)
    if v_wap_df is not None:
        df = pd.merge(
            df,
            v_wap_df[v_wap_df.time_range == "9:00-15:00"],
            on=["date", "symbol"],
            how="left",
        )
        df["turnover"] = df.v_wap * df.VOLUME
    else:
        df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x)
        df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"]
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
コード例 #19
0
ファイル: daily_bar.py プロジェクト: MattMei27/akshare
def get_czce_daily(date=None):
    """
        获取郑商所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            郑商所每日期货交易数据:
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest 持仓量
                turnover      成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        None(类型错误或给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % day.strftime('%Y%m%d'))
        return None
    if day > datetime.date(2010, 8, 24):
        if day > datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_3
            url = u % (day.strftime('%Y'), day.strftime('%Y%m%d'))
        elif day < datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_2
            url = u % (day.strftime('%Y'), day.strftime('%Y%m%d'))
        listed_columns = cons.CZCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
        try:
            r = requests.get(url)
            html = r.text
        except requests.exceptions.HTTPError as reason:
            if reason.response.status_code != 404:
                print(
                    cons.CZCE_DAILY_URL_3 %
                    (day.strftime('%Y'), day.strftime('%Y%m%d')), reason)
            return
        if html.find('您的访问出错了') >= 0 or html.find('无期权每日行情交易记录') >= 0:
            return
        html = [
            i.replace(' ', '').split('|') for i in html.split('\n')[:-4]
            if i[0][0] != u'小'
        ]

        if day > datetime.date(2015, 9, 19):
            if html[1][0] not in ['品种月份', u'品种代码']:
                return
            dict_data = list()
            day_const = int(day.strftime('%Y%m%d'))
            for row in html[2:]:
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {
                    'date': day_const,
                    'symbol': row[0],
                    'variety': m.group(1)
                }
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                            'volume', 'open_interest', 'oi_chg',
                            'exercise_volume'
                    ]:
                        row[i + 1] = row[i + 1].replace(',', '')
                        row_dict[field] = int(row[i + 1])
                    else:
                        row[i + 1] = row[i + 1].replace(',', '')
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)

            return pd.DataFrame(dict_data)[output_columns]
        elif day < datetime.date(2015, 9, 19):
            dict_data = list()
            day_const = int(day.strftime('%Y%m%d'))
            for row in html[1:]:
                row = row[0].split(',')
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {
                    'date': day_const,
                    'symbol': row[0],
                    'variety': m.group(1)
                }
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                            'volume', 'open_interest', 'oi_chg',
                            'exercise_volume'
                    ]:
                        row_dict[field] = int(float(row[i + 1]))
                    else:
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)
            return pd.DataFrame(dict_data)[output_columns]

    if day <= datetime.date(2010, 8, 24):
        u = cons.CZCE_DAILY_URL_1
        url = u % day.strftime('%Y%m%d')
        listed_columns = cons.CZCE_COLUMNS_2
        output_columns = cons.OUTPUT_COLUMNS
        df = pd.read_html(url)[1].dropna(how='any')

        dict_data = list()
        day_const = int(day.strftime('%Y%m%d'))

        for row in df.to_dict(orient='records')[1:]:
            m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
            if not m:
                continue
            row_dict = {
                'date': day_const,
                'symbol': row[0],
                'variety': m.group(1)
            }
            for i, field in enumerate(listed_columns):
                if row[i + 1] == "\r":
                    row_dict[field] = 0.0
                elif field in [
                        'volume', 'open_interest', 'oi_chg', 'exercise_volume'
                ]:

                    row_dict[field] = int(row[i + 1])
                else:

                    row_dict[field] = float(row[i + 1])
            dict_data.append(row_dict)

        return pd.DataFrame(dict_data)[output_columns]
コード例 #20
0
ファイル: cot.py プロジェクト: fuyr/akshare
def get_czce_rank_table(date="20201026", vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2015, 10, 8):
        print("CZCE可获取的数据源开始日期为 20151008, 请输入合适的日期参数")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date >= datetime.date(2015, 10, 8):
        url = f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date.year}/{date.isoformat().replace('-', '')}/FutureDataHolding.xls"
        r = requests.get(url)
        temp_df = pd.read_excel(BytesIO(r.content))

    temp_pinzhong_index = [item + 1 for item in temp_df[temp_df.iloc[:, 0].str.contains("合计")].index.to_list()]
    temp_pinzhong_index.insert(0, 0)
    temp_pinzhong_index.pop()
    temp_symbol_index = temp_df.iloc[temp_pinzhong_index, 0].str.split(" ", expand=True).iloc[:, 0]
    symbol_list = [re.compile(r"[0-9a-zA-Z_]+").findall(item)[0] for item in temp_symbol_index.values]
    temp_symbol_index_list = temp_symbol_index.index.to_list()
    big_dict = {}
    for i in range(len(temp_symbol_index_list)-1):
        inner_temp_df = temp_df[temp_symbol_index_list[i]+2: temp_symbol_index_list[i+1]-1]
        inner_temp_df.columns = ["rank",
                                 "vol_party_name",
                                 "vol",
                                 "vol_chg",
                                 "long_party_name",
                                 "long_open_interest",
                                 "long_open_interest_chg",
                                 "short_party_name",
                                 "short_open_interest",
                                 "short_open_interest_chg",
                                 ]
        inner_temp_df.reset_index(inplace=True, drop=True)
        big_dict[symbol_list[i]] = inner_temp_df
        inner_temp_df = temp_df[temp_symbol_index_list[i+1]+2:-1]
        inner_temp_df.columns = ["rank",
                                 "vol_party_name",
                                 "vol",
                                 "vol_chg",
                                 "long_party_name",
                                 "long_open_interest",
                                 "long_open_interest_chg",
                                 "short_party_name",
                                 "short_open_interest",
                                 "short_open_interest_chg",
                                 ]
        inner_temp_df.reset_index(inplace=True, drop=True)
        big_dict[symbol_list[-1]] = inner_temp_df
    new_big_dict = {}
    for key, value in big_dict.items():
        value["symbol"] = key
        value["variety"] = re.compile(r"[a-zA-Z_]+").findall(key)[0]
        new_big_dict[key] = value

    return new_big_dict
コード例 #21
0
ファイル: futures_daily_bar.py プロジェクト: zexpp5/akshare
def get_cffex_daily(date="20100401"):
    """
    中国金融期货交易所日交易数据
    http://www.cffex.com.cn/rtj/
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    url = f"http://www.cffex.com.cn/sj/historysj/{date[:-2]}/zip/{date[:-2]}.zip"
    r = requests.get(url)
    try:
        with zipfile.ZipFile(BytesIO(r.content)) as file:
            with file.open(f"{date}_1.csv") as my_file:
                data = my_file.read().decode("gb2312")
                data_df = pd.read_csv(StringIO(data))
    except:
        return None
    data_df = data_df[data_df["合约代码"] != "小计"]
    data_df = data_df[data_df["合约代码"] != "合计"]
    data_df = data_df[~data_df["合约代码"].str.contains("IO")]
    data_df.reset_index(inplace=True, drop=True)
    data_df["合约代码"] = data_df["合约代码"].str.strip()
    symbol_list = data_df["合约代码"].to_list()
    variety_list = [
        re.compile(r"[a-zA-Z_]+").findall(item)[0] for item in symbol_list
    ]
    if data_df.shape[1] == 15:
        data_df.columns = [
            "symbol", "open", "high", "low", "volume", "turnover",
            "open_interest", "_", "close", "settle", "pre_settle", "_", "_",
            "_", "_"
        ]
    else:
        data_df.columns = [
            "symbol", "open", "high", "low", "volume", "turnover",
            "open_interest", "_", "close", "settle", "pre_settle", "_", "_",
            "_"
        ]
    data_df["date"] = date
    data_df["variety"] = variety_list
    data_df = data_df[[
        "symbol", "date", "open", "high", "low", "close", "volume",
        "open_interest", "turnover", "settle", "pre_settle", "variety"
    ]]
    return data_df
コード例 #22
0
ファイル: cot.py プロジェクト: fuyr/akshare
def futures_dce_position_rank(date: str = "20160104") -> pd.DataFrame:
    """
    大连商品交易日每日持仓排名-具体合约
    http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rcjccpm/index.html
    :param date: 指定交易日; e.g., "20200511"
    :type date: str
    :return: 指定日期的持仓排名数据
    :rtype: pandas.DataFrame
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = "http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Length": "160",
        "Content-Type": "application/x-www-form-urlencoded",
        "Host": "www.dce.com.cn",
        "Origin": "http://www.dce.com.cn",
        "Pragma": "no-cache",
        "Referer": "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
    }
    payload = {
        "memberDealPosiQuotes.variety": "a",
        "memberDealPosiQuotes.trade_type": "0",
        "contract.contract_id": "a2009",
        "contract.variety_id": "a",
        "year": date.year,
        "month": date.month - 1,
        "day": date.day,
        "batchExportFlag": "batch",
    }
    r = requests.post(url, payload, headers=headers)
    big_dict = dict()
    with zipfile.ZipFile(BytesIO(r.content), "r") as z:
        for i in z.namelist():
            file_name = i.encode('cp437').decode('GBK')
            try:
                data = pd.read_table(z.open(i), header=None, sep="\t").iloc[:-6]
                if len(data) < 12:  # 处理没有活跃合约的情况
                    big_dict[file_name.split("_")[1]] = pd.DataFrame()
                    continue
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                data = data.iloc[start_list[0]:, data.columns[data.iloc[start_list[0], :].notnull()]]
                data.reset_index(inplace=True, drop=True)
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist()
                part_one = data[start_list[0]: end_list[0]].iloc[1:, :]
                part_two = data[start_list[1]: end_list[1]].iloc[1:, :]
                part_three = data[start_list[2]: end_list[2]].iloc[1:, :]
                temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True),
                                     part_three.reset_index(drop=True)], axis=1, ignore_index=True)
                temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"]
                temp_df["rank"] = range(1, len(temp_df) + 1)
                del temp_df["名次"]
                temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest",
                                   "long_open_interest_chg", "short_party_name", "short_open_interest",
                                   "short_open_interest_chg", "rank"]
                temp_df["symbol"] = file_name.split("_")[1]
                temp_df["variety"] = file_name.split("_")[1][:-4].upper()
                temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                   "short_open_interest", "short_open_interest_chg", "short_party_name",
                                   "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                big_dict[file_name.split("_")[1]] = temp_df
            except UnicodeDecodeError as e:
                try:
                    data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=3)
                except:
                    data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=4)
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist()
                part_one = data[start_list[0]: end_list[0]].iloc[1:, :]
                part_two = data[start_list[1]: end_list[1]].iloc[1:, :]
                part_three = data[start_list[2]: end_list[2]].iloc[1:, :]
                temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True),
                                     part_three.reset_index(drop=True)], axis=1, ignore_index=True)
                temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"]
                temp_df["rank"] = range(1, len(temp_df) + 1)
                del temp_df["名次"]
                temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest",
                                   "long_open_interest_chg", "short_party_name", "short_open_interest",
                                   "short_open_interest_chg", "rank"]
                temp_df["symbol"] = file_name.split("_")[1]
                temp_df["variety"] = file_name.split("_")[1][:-4].upper()
                temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                   "short_open_interest", "short_open_interest_chg", "short_party_name",
                                   "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                big_dict[file_name.split("_")[1]] = temp_df
    return big_dict
コード例 #23
0
ファイル: cot.py プロジェクト: yangxiaozhe13/akshare
def get_dce_rank_table(date: str = "20210302", vars_list=cons.contract_symbols):
    """
    大连商品交易所前 20 会员持仓排名数据明细, 由于交易所网站问题, 需要 20200720 之后才有数据
    注: 该交易所只公布标的合约排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天
    :param vars_list: 合约品种如 RB、AL 等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据
    :return: pandas.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员      string(中文)
    vol                         该会员成交量                 int
    vol_chg                     该会员成交量变化量            int
    long_party_name             持多单排序的当前名次会员      string(中文)
    long_open_interest          该会员持多单                 int
    long_open_interest_chg      该会员持多单变化量            int
    short_party_name            持空单排序的当前名次会员       string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date_string = date
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2006, 1, 4):
        print(Exception("大连商品交易所数据源开始日期为 20060104,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    big_dict = {}
    for var in vars_list:
        # var = 'V'
        symbol_list = _get_dce_contract_list(date, var)
        for symbol in symbol_list:
            url = cons.DCE_VOL_RANK_URL_1 % (var.lower(), symbol, var.lower(), date.year, date.month - 1, date.day)
            temp_df = pd.read_excel(url[:-3] + 'excel', header=0, skiprows=3)
            temp_df.dropna(how='any', axis=0, inplace=True)
            temp_df = temp_df.applymap(lambda x: str(x).replace(',', ''))
            del temp_df["名次.1"]
            del temp_df["名次.2"]
            temp_df.rename(columns={"名次": "rank",
                                    "会员简称": "vol_party_name",
                                    "成交量": "vol",
                                    "增减": "vol_chg",
                                    "会员简称.1": "long_party_name",
                                    "持买单量": "long_open_interest",
                                    "增减.1": "long_open_interest_chg",
                                    "会员简称.2": "short_party_name",
                                    "持卖单量": "short_open_interest",
                                    "增减.2": "short_open_interest_chg",
                                    }, inplace=True)
            temp_df['symbol'] = symbol.upper()
            temp_df['var'] = var
            temp_df['date'] = date_string
            temp_df = temp_df.applymap(lambda x: str(x).replace("-", "0") if x == "-" else x)
            temp_df['rank'] = range(1, len(temp_df)+1)
            temp_df['vol'] = temp_df['vol'].astype(float)
            temp_df['vol_chg'] = temp_df['vol_chg'].astype(float)
            temp_df['long_open_interest'] = temp_df['long_open_interest'].astype(float)
            temp_df['long_open_interest_chg'] = temp_df['long_open_interest_chg'].astype(float)
            temp_df['short_open_interest'] = temp_df['short_open_interest'].astype(float)
            temp_df['short_open_interest_chg'] = temp_df['short_open_interest_chg'].astype(float)
            big_dict[symbol] = temp_df
    return big_dict
コード例 #24
0
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={
            'CJ1': 'vol',
            'CJ1_CHG': 'vol_chg',
            'CJ2': 'long_open_interest',
            'CJ2_CHG': 'long_open_interest_chg',
            'CJ3': 'short_open_interest',
            'CJ3_CHG': 'short_open_interest_chg',
            'PARTICIPANTABBR1': 'vol_party_name',
            'PARTICIPANTABBR2': 'long_party_name',
            'PARTICIPANTABBR3': 'short_party_name',
            'PRODUCTNAME': 'product1',
            'RANK': 'rank',
            'INSTRUMENTID': 'symbol',
            'PRODUCTSORTNO': 'product2'
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in [
            'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1',
            'product2'
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict
コード例 #25
0
def get_czce_receipt_3(
        date: str = None,
        vars_list: List = cons.contract_symbols) -> pd.DataFrame:
    """
    郑州商品交易所-注册仓单数据
    适用 20151008-至今
    http://www.czce.com.cn/cn/jysj/cdrb/H770310index_1.htm
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如 CF、TA 等列表为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn("vars_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date[:4]}/{date}/FutureDataWhsheet.xls"
    r = requests_link(url, encoding='utf-8')
    temp_df = pd.read_excel(BytesIO(r.content))
    temp_df = temp_df[[
        bool(1 - item) for item in [
            item if item is not np.NAN else False
            for item in temp_df.iloc[:, 0].str.contains("非农产品")
        ]
    ]]
    temp_df.reset_index(inplace=True, drop=True)
    range_list_one = list(temp_df[[
        item if item is not np.NAN else False
        for item in temp_df.iloc[:, 0].str.contains("品种")
    ]].index)
    range_list_two = list(temp_df[[
        item if item is not np.NAN else False
        for item in temp_df.iloc[:, 0].str.contains("品种")
    ]].index)[1:]
    range_list_two.append(None)
    symbol_list = []
    receipt_list = []
    receipt_chg_list = []
    for page in range(len(range_list_one)):
        inner_df = temp_df[range_list_one[page]:range_list_two[page]]
        reg = re.compile(r'[A-Z]+')
        try:
            symbol = reg.findall(inner_df.iloc[0, 0])[0]
        except:
            continue
        symbol_list.append(symbol)
        inner_df.columns = inner_df.iloc[1, :]
        inner_df = inner_df.iloc[2:, :]
        inner_df = inner_df.dropna(axis=1, how='all')
        if symbol == "PTA":
            try:
                receipt_list.append(
                    inner_df['仓单数量(完税)'].iloc[-1] +
                    inner_df['仓单数量(保税)'].iloc[-1])  # 20210316 TA 分为保税和完税
            except:
                receipt_list.append(0)
        elif symbol == "MA":
            try:
                try:
                    receipt_list.append(
                        inner_df['仓单数量(完税)'].iloc[-2] +
                        inner_df['仓单数量(保税)'].iloc[-2])  # 20210316 MA 分为保税和完税
                except:
                    receipt_list.append(
                        inner_df['仓单数量(完税)'].iloc[-2])  # 处理 MA 的特殊格式
            except:
                receipt_list.append(0)
        else:
            try:
                receipt_list.append(inner_df['仓单数量'].iloc[-1])
            except:
                receipt_list.append(0)
        if symbol == "MA":
            receipt_chg_list.append(inner_df['当日增减'].iloc[-2])
        else:
            receipt_chg_list.append(inner_df['当日增减'].iloc[-1])
    data_df = pd.DataFrame([
        symbol_list, receipt_list, receipt_chg_list,
        [date] * len(receipt_chg_list)
    ]).T
    data_df.columns = ['var', 'receipt', 'receipt_chg', 'date']
    temp_list = data_df['var'].tolist()
    data_df['var'] = [item if item != "PTA" else "TA" for item in temp_list]
    if len(data_df.index) != 0:
        data_df.index = data_df['var']
        vars_in_market = [i for i in vars_list if i in data_df.index]
        records = data_df.loc[vars_in_market, :]
    return records.reset_index(drop=True)