コード例 #1
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_ine_daily(date="20200416"):
    """
    上海国际能源交易中心-日频率-量价数据
    上海国际能源交易中心: 原油期货(上市时间: 20180326); 20号胶期货(上市时间: 20190812)
    trade_price: http://www.ine.cn/statements/daily/?paramid=kx
    trade_note: http://www.ine.cn/data/datanote.dat
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日
    :type date: str or datetime.date
    :return: 上海国际能源交易中心-日频率-量价数据
    :rtype: pandas.DataFrame or None
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn(f"{day.strftime('%Y%m%d')}非交易日")
        return None
    url = f"http://www.ine.cn/data/dailydata/kx/kx{day.strftime('%Y%m%d')}.dat"
    r = requests.get(url)
    result_df = pd.DataFrame()
    temp_df = pd.DataFrame(r.json()["o_curinstrument"]).iloc[:-1, :]
    temp_df = temp_df[temp_df["DELIVERYMONTH"] != "小计"]
    result_df["symbol"] = temp_df["PRODUCTID"].str.upper().str.split("_", expand=True)[0] + temp_df["DELIVERYMONTH"]
    result_df["date"] = day.strftime("%Y%m%d")
    result_df["open"] = temp_df["OPENPRICE"]
    result_df["high"] = temp_df["HIGHESTPRICE"]
    result_df["low"] = temp_df["LOWESTPRICE"]
    result_df["close"] = temp_df["CLOSEPRICE"]
    result_df["volume"] = temp_df["VOLUME"]
    result_df["open_interest"] = temp_df["OPENINTEREST"]
    result_df["turnover"] = 0
    result_df["settle"] = temp_df["SETTLEMENTPRICE"]
    result_df["pre_settle"] = temp_df["PRESETTLEMENTPRICE"]
    result_df["variety"] = temp_df["PRODUCTID"].str.upper().str.split("_", expand=True)[0]
    return result_df
コード例 #2
0
ファイル: futures_basis.py プロジェクト: cdmaxsmart/mssdk
def futures_spot_price_previous(date: str = "20110110") -> pd.DataFrame:
    """
    #TODO 修改该接口并观察历史数据能否获取
    :param date:
    :type date:
    :return:
    :rtype:
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2011, 1, 4):
        raise Exception("数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后")
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    url = date.strftime('http://www.100ppi.com/sf2/day-%Y-%m-%d.html')
    content = pandas_read_html_link(url)
    main = content[1]
    # Header
    header = _join_head(main)
    # Values
    values = main[main[4].str.endswith('%')]
    values.columns = header
    # Basis
    basis = pd.concat(content[2:-1])
    basis.columns = ['主力合约基差', '主力合约基差(%)']
    basis['商品'] = values['商品'].tolist()
    basis = pd.merge(values[["商品", "现货价格", "主力合约代码", "主力合约价格"]], basis)
    basis = pd.merge(
        basis, values[["商品", "180日内主力基差最高", "180日内主力基差最低", "180日内主力基差平均"]])
    return basis
コード例 #3
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def futures_dce_position_rank_other(date="20160104"):
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
    payload = {
        "memberDealPosiQuotes.variety": "c",
        "memberDealPosiQuotes.trade_type": "0",
        "year": date.year,
        "month": date.month-1,
        "day": date.day,
        "contract.contract_id": "all",
        "contract.variety_id": "c",
        "contract": "",
    }
    r = requests.post(url, data=payload)
    soup = BeautifulSoup(r.text, "lxml")
    symbol_list = [item["onclick"].strip("javascript:setVariety(").strip("');") for item in soup.find_all(attrs={"class": "selBox"})[-3].find_all("input")]
    big_df = dict()
    for symbol in symbol_list:
        payload = {
            "memberDealPosiQuotes.variety": "c",
            "memberDealPosiQuotes.trade_type": "0",
            "year": date.year,
            "month": date.month-1,
            "day": date.day,
            "contract.contract_id": "all",
            "contract.variety_id": symbol,
            "contract": "",
        }
        r = requests.post(url, data=payload)
        soup = BeautifulSoup(r.text, "lxml")
        contract_list = [item["onclick"].strip("javascript:setContract_id('").strip("');") for item in soup.find_all(attrs={"name": "contract"})]
        if contract_list:
            if len(contract_list[0]) == 4:
                contract_list = [symbol + item for item in contract_list]
                for contract in contract_list:
                    payload = {
                        "memberDealPosiQuotes.variety": "c",
                        "memberDealPosiQuotes.trade_type": "0",
                        "year": date.year,
                        "month": date.month - 1,
                        "day": date.day,
                        "contract.contract_id": contract,
                        "contract.variety_id": symbol,
                        "contract": "",
                    }
                    r = requests.post(url, data=payload)
                    temp_df = pd.read_html(r.text)[1].iloc[:-1, :]
                    temp_df.columns = ["rank", "vol_party_name", "vol", "vol_chg", "_", "long_party_name", "long_open_interest",
                                       "long_open_interest_chg", "_", "short_party_name", "short_open_interest",
                                       "short_open_interest_chg"]
                    temp_df["variety"] = symbol.upper()
                    temp_df["symbol"] = contract
                    temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                       "short_open_interest", "short_open_interest_chg", "short_party_name",
                                       "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                    big_df[contract] = temp_df
    return big_df
コード例 #4
0
def get_czce_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20080222至20100824(包括)
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: list
    :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECEIPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])]
    for i in list(range(len(indexes))):
        if i != len(indexes) - 1:
            data_cut = data.loc[indexes[i]:ends[i], :]
            data_cut = data_cut.fillna(method='pad')
        else:
            data_cut = data.loc[indexes[i]:, :]
            data_cut = data_cut.fillna(method='pad')
        if 'PTA' in data_cut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:]))
        if var == 'CF':
            receipt = data_cut[6].tolist()[-1]
            receipt_chg = data_cut[7].tolist()[-1]
        else:
            receipt = data_cut[5].tolist()[-1]
            receipt_chg = data_cut[6].tolist()[-1]
        data_dict = {
            'var': var,
            'receipt': int(receipt),
            'receipt_chg': int(receipt_chg),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #5
0
ファイル: futures_basis.py プロジェクト: cdmaxsmart/mssdk
def futures_spot_price(date: str = "20200110",
                       vars_list=cons.contract_symbols):
    """
    某个交易日大宗商品现货价格及相应基差

    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pandas.DataFrame
    展期收益率数据:
    var              商品品种                     string
    sp               现货价格                     float
    near_symbol      临近交割合约                  string
    near_price       临近交割合约结算价             float
    dom_symbol       主力合约                     string
    dom_price        主力合约结算价                float
    near_basis       临近交割合约相对现货的基差      float
    dom_basis        主力合约相对现货的基差         float
    near_basis_rate  临近交割合约相对现货的基差率    float
    dom_basis_rate   主力合约相对现货的基差率       float
    date             日期                         string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2011, 1, 4):
        raise Exception("数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后")
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    u1 = cons.SYS_SPOT_PRICE_LATEST_URL
    u2 = cons.SYS_SPOT_PRICE_URL.format(date.strftime("%Y-%m-%d"))
    i = 1
    while True:
        for url in [u2, u1]:
            try:
                # url = u2
                r = pandas_read_html_link(url)
                string = r[0].loc[1, 1]
                news = "".join(re.findall(r"[0-9]", string))
                if news[3:11] == date.strftime("%Y%m%d"):
                    records = _check_information(r[1], date)
                    records.index = records["symbol"]
                    var_list_in_market = [
                        i for i in vars_list if i in records.index
                    ]
                    temp_df = records.loc[var_list_in_market, :]
                    temp_df.reset_index(drop=True, inplace=True)
                    return temp_df
                else:
                    time.sleep(3)
            except:
                print(
                    f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败,第{str(i)}次尝试,最多5次")
                i += 1
                if i > 5:
                    print(
                        f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败, 如果当前交易日是 2018-09-12, 由于生意社源数据缺失, 无法访问, 否则为重复访问已超过5次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试"
                    )
                    return False
コード例 #6
0
def get_czce_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取郑州商品交易所注册仓单数据
        适用20100825(包括)至20151111(包括)
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_2 % (date[:4], date)
    r = requests.get(url)
    r.encoding = 'utf-8'
    data = pd.read_html(r.text)[3:]
    records = pd.DataFrame()
    for data_cut in data:
        if len(data_cut.columns) > 3:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                var = chinese_to_english(re.sub(r'[A-Z]+', '', string))
            data_cut.columns = data_cut.T[1].tolist()
            receipt = data_cut['仓单数量'].tolist()[-1]
            receipt_chg = data_cut['当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #7
0
ファイル: futures_daily_bar.py プロジェクト: cdmaxsmart/mssdk
def get_futures_daily(start_date="20201026",
                      end_date="20201027",
                      market="SHFE",
                      index_bar=False):
    """
    交易所日交易数据
    Parameters
    ------
    start_date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    end_date: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    market: 'CFFEX' 中金所, 'CZCE' 郑商所,  'SHFE' 上期所, 'DCE' 大商所 之一, 'INE' 上海国际能源交易中心。默认为中金所
    index_bar: bool  是否合成指数K线, 默认为 False 否则影响 roll_yield 的计算
    """
    if market.upper() == "CFFEX":
        f = get_cffex_daily
    elif market.upper() == "CZCE":
        f = get_czce_daily
    elif market.upper() == "SHFE":
        f = get_shfe_daily
    elif market.upper() == "DCE":
        f = get_dce_daily
    elif market.upper() == "INE":
        f = get_ine_daily
    else:
        print("Invalid Market Symbol")
        return None

    start_date = (cons.convert_date(start_date)
                  if start_date is not None else datetime.date.today())
    end_date = (cons.convert_date(end_date)
                if end_date is not None else cons.convert_date(
                    cons.get_latest_data_date(datetime.datetime.now())))

    df_list = list()
    while start_date <= end_date:
        df = f(date=str(start_date).replace("-", ""))
        if df is not None:
            df_list.append(df)
            if index_bar:
                df_list.append(get_futures_index(df))
        start_date += datetime.timedelta(days=1)

    if len(df_list) > 0:
        return pd.concat(df_list).reset_index(drop=True)
コード例 #8
0
ファイル: futures_basis.py プロジェクト: cdmaxsmart/mssdk
def futures_spot_price_daily(
    start_day: str = "20200110",
    end_day: str = "20200115",
    vars_list=cons.contract_symbols,
):
    """
    某段时间大宗商品现货价格及相应基差
    http://www.100ppi.com/sf/
    :param start_day: str 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天
    :param end_day: str 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天
    :param vars_list: list 合约品种如 [RB, AL]; 默认参数为所有商品
    :return: pandas.DataFrame
    展期收益率数据:
    var               商品品种                      string
    sp                现货价格                      float
    near_symbol       临近交割合约                  string
    near_price        临近交割合约结算价             float
    dom_symbol        主力合约                      string
    dom_price         主力合约结算价                 float
    near_basis        临近交割合约相对现货的基差      float
    dom_basis         主力合约相对现货的基差          float
    near_basis_rate   临近交割合约相对现货的基差率    float
    dom_basis_rate    主力合约相对现货的基差率        float
    date              日期                          string YYYYMMDD
    """
    start_day = (cons.convert_date(start_day)
                 if start_day is not None else datetime.date.today())
    end_day = (cons.convert_date(end_day)
               if end_day is not None else cons.convert_date(
                   cons.get_latest_data_date(datetime.datetime.now())))
    df_list = []
    while start_day <= end_day:
        print(start_day)
        temp_df = futures_spot_price(start_day, vars_list)
        if temp_df is False:
            return pd.concat(df_list).reset_index(drop=True)
        elif temp_df is not None:
            df_list.append(temp_df)
        start_day += datetime.timedelta(days=1)
    if len(df_list) > 0:
        temp_df = pd.concat(df_list)
        temp_df.reset_index(drop=True, inplace=True)
        return temp_df
コード例 #9
0
def get_roll_yield(date=None, var="BB", symbol1=None, symbol2=None, df=None):
    """
    指定交易日指定品种(主力和次主力)或任意两个合约的展期收益率
    Parameters
    ------
    date: string 某一天日期 format: YYYYMMDD
    var: string 合约品种如RB、AL等
    symbol1: string 合约 1如 rb1810
    symbol2: string 合约 2 如 rb1812
    df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快
    Return
    -------
    tuple
    roll_yield
    near_by
    deferred
    """
    # date = "20100104"
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    if symbol1:
        var = symbol_varieties(symbol1)
    if not isinstance(df, pd.DataFrame):
        market = symbol_market(var)
        df = get_futures_daily(start_date=date, end_date=date, market=market)
    if var:
        df = df[~df["symbol"].str.contains(
            "efp")]  # 20200304 由于交易所获取的数据中会有比如 "CUefp",所以在这里过滤
        df = df[df["variety"] == var].sort_values("open_interest",
                                                  ascending=False)
        df["close"] = df["close"].astype("float")
        if len(df["close"]) < 2:
            return None
        symbol1 = df["symbol"].tolist()[0]
        symbol2 = df["symbol"].tolist()[1]

    close1 = df["close"][df["symbol"] == symbol1.upper()].tolist()[0]
    close2 = df["close"][df["symbol"] == symbol2.upper()].tolist()[0]

    a = re.sub(r"\D", "", symbol1)
    a_1 = int(a[:-2])
    a_2 = int(a[-2:])
    b = re.sub(r"\D", "", symbol2)
    b_1 = int(b[:-2])
    b_2 = int(b[-2:])
    c = (a_1 - b_1) * 12 + (a_2 - b_2)
    if close1 == 0 or close2 == 0:
        return False
    if c > 0:
        return np.log(close2 / close1) / c * 12, symbol2, symbol1
    else:
        return np.log(close2 / close1) / c * 12, symbol1, symbol2
コード例 #10
0
def get_shfe_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD           
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    if date == '20100126':
        shfe_20100126['date'] = date
        return shfe_20100126
    elif date == '20101029':
        shfe_20101029['date'] = date
        return shfe_20101029
    elif date in ['20100416', '20130821']:
        return warnings.warn('20100416、20130821日期交易所数据丢失')
    else:
        var_list = [
            '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜',
            '铝', '锌', '黄金', '锡', '镍'
        ]
        url = cons.SHFE_RECEIPT_URL_1 % date
        data = pandas_read_html_link(url)[0]
        indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)]
        last_index = [
            x for x in data.index if '注' in str(data[0].tolist()[x])
        ][0] - 1
        records = pd.DataFrame()
        for i in list(range(len(indexes))):
            if i != len(indexes) - 1:
                data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :]
            else:
                data_cut = data.loc[indexes[i]:last_index, :]
                data_cut = data_cut.fillna(method='pad')
            data_dict = dict()
            data_dict['var'] = chinese_to_english(data_cut[0].tolist()[0])
            data_dict['receipt'] = int(data_cut[2].tolist()[-1])
            data_dict['receipt_chg'] = int(data_cut[3].tolist()[-1])
            data_dict['date'] = date
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #11
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_rank_sum_daily(start_day=None, end_day=None, vars_list=cons.contract_symbols):
    """
    采集四个期货交易所前5、前10、前15、前20会员持仓排名数据
    注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名;
        所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表
    注2:大商所只公布了品种排名,未公布标的排名
    :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
    symbol                           标的合约                     string
    var                              商品品种                     string
    vol_top5                         成交量前5会员成交量总和         int
    vol_chg_top5                     成交量前5会员成交量变化总和      int
    long_open_interest_top5          持多单前5会员持多单总和         int
    long_open_interest_chg_top5      持多单前5会员持多单变化总和      int
    short_open_interest_top5         持空单前5会员持空单总和         int
    short_open_interest_chg_top5     持空单前5会员持空单变化总和      int
    vol_top10                        成交量前10会员成交量总和        int
    ...
    date                             日期                         string YYYYMMDD
    """
    start_day = cons.convert_date(start_day) if start_day is not None else datetime.date.today()
    end_day = cons.convert_date(end_day) if end_day is not None else cons.convert_date(
        cons.get_latest_data_date(datetime.datetime.now()))
    records = pd.DataFrame()
    while start_day <= end_day:
        print(start_day)
        if start_day.strftime('%Y%m%d') in calendar:
            data = get_rank_sum(start_day, vars_list)
            if data is False:
                print(f"{start_day.strftime('%Y-%m-%d')}日交易所数据连接失败,已超过20次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试")
                return records.reset_index(drop=True)
            records = records.append(data)
        else:
            warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日")
        start_day += datetime.timedelta(days=1)

    return records.reset_index(drop=True)
コード例 #12
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_cffex_rank_table(date="20200427", vars_list=cons.contract_symbols):
    """
    中国金融期货交易所前 20 会员持仓排名数据明细
    注:该交易所既公布品种排名,也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']]
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2010, 4, 16):
        print(Exception("cffex数据源开始日期为20100416,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    big_dict = {}
    for var in vars_list:
        # print(var)
        # var = "IF"
        url = cons.CFFEX_VOL_RANK_URL % (date.strftime('%Y%m'), date.strftime('%d'), var)
        r = requests_link(url, encoding='gbk')
        if not r:
            return False
        if '网页错误' not in r.text:
            try:
                temp_chche = StringIO(r.text.split('\n交易日,')[1])
            except:
                temp_chche = StringIO(r.text.split('\n交易日,')[0][4:])  # 20200316开始数据结构变化,统一格式
            table = pd.read_csv(temp_chche)
            table = table.dropna(how='any')
            table = table.applymap(lambda x: x.strip() if isinstance(x, str) else x)
            for symbol in set(table['合约']):
                table_cut = table[table['合约'] == symbol]
                table_cut.columns = ['symbol', 'rank'] + rank_columns
                table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol)
                big_dict[symbol] = table_cut.reset_index(drop=True)
    return big_dict
コード例 #13
0
def get_dce_receipt(date: str = None,
                    symbol_list: List = cons.contract_symbols):
    """
    大连商品交易所注册仓单数据
    :param date: format 开始日期: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 为空时为当天
    :param symbol_list: 合约品种如 RB, AL等列表, 为空时为所有商品数据从 20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    if not isinstance(symbol_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    payload = {
        "weekQuotes.variety": "all",
        "year": date.year,
        "month": date.month - 1,  # 网站月份描述少 1 个月, 属于网站问题
        "day": date.day
    }
    data = pandas_read_html_link(cons.DCE_RECEIPT_URL,
                                 method="post",
                                 data=payload,
                                 headers=cons.dce_headers)[0]
    records = pd.DataFrame()
    for x in data.to_dict(orient='records'):
        if isinstance(x['品种'], str):
            if x['品种'][-2:] == '小计':
                var = x['品种'][:-2]
                temp_data = {
                    'var': chinese_to_english(var),
                    'receipt': int(x['今日仓单量']),
                    'receipt_chg': int(x['增减']),
                    'date': date.strftime('%Y%m%d')
                }
                records = records.append(pd.DataFrame(temp_data, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in symbol_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #14
0
def get_shfe_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        data_dict = {
            'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
            'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
            'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #15
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_shfe_v_wap(date="20200416"):
    """
        获取上期所日成交均价数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            郑商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                time_range    v_wap时段,分09:00-10:15和09:00-15:00两类
                v_wap          加权平均成交均价
        或 None(给定日期没有数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_V_WAP_URL % (day.strftime("%Y%m%d")),
                headers=cons.headers,
                encoding="utf-8",
            ).text
        )
    except requests.HTTPError as reason:
        if reason.response not in [404, 403]:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return None

    if len(json_data["o_currefprice"]) == 0:
        return None
    try:
        df = pd.DataFrame(json_data["o_currefprice"])
        df["INSTRUMENTID"] = df["INSTRUMENTID"].str.strip()
        df[":B1"].astype("int16")
        return df.rename(columns=cons.SHFE_V_WAP_COLUMNS)[
            list(cons.SHFE_V_WAP_COLUMNS.values())
        ]
    except:
        return None
コード例 #16
0
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    郑州商品交易所注册仓单数据
    适用 20151112-至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method='pad')
            try:
                receipt = data_cut.loc[:, '仓单数量'].tolist()[-1]
            except:
                receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1]
            receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
コード例 #17
0
def get_roll_yield_bar(type_method="var",
                       var="RB",
                       date="20200622",
                       start_day=None,
                       end_day=None,
                       plot=False):
    """
    展期收益率
    :param type_method: 'symbol': 获取指定交易日指定品种所有交割月合约的收盘价; 'var': 获取指定交易日所有品种两个主力合约的展期收益率(展期收益率横截面); 'date': 获取指定品种每天的两个主力合约的展期收益率(展期收益率时间序列)
    :param var: 合约品种如 "RB", "AL" 等
    :param date: 指定交易日 format: YYYYMMDD
    :param start_day: 开始日期 format:YYYYMMDD
    :param end_day: 结束日期 format:YYYYMMDD
    :param plot: True or False 是否作图
    :return: pandas.DataFrame
    展期收益率数据(DataFrame)
    ry      展期收益率
    index   日期或品种
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    start_day = (cons.convert_date(start_day)
                 if start_day is not None else datetime.date.today())
    end_day = (cons.convert_date(end_day)
               if end_day is not None else cons.convert_date(
                   cons.get_latest_data_date(datetime.datetime.now())))

    if type_method == "symbol":
        df = get_futures_daily(start_date=date,
                               end_date=date,
                               market=symbol_market(var))
        df = df[df["variety"] == var]
        if plot:
            _plot_bar_2(df[["symbol", "close"]])
        return df

    if type_method == "var":
        df = pd.DataFrame()
        for market in ["dce", "cffex", "shfe", "czce"]:
            df = df.append(
                get_futures_daily(start_date=date,
                                  end_date=date,
                                  market=market))
        var_list = list(set(df["variety"]))
        if "IO" in var_list:
            var_list.remove("IO")  # IO 为期权
        df_l = pd.DataFrame()
        for var in var_list:
            ry = get_roll_yield(date, var, df=df)
            if ry:
                df_l = df_l.append(
                    pd.DataFrame([ry],
                                 index=[var],
                                 columns=["roll_yield", "near_by",
                                          "deferred"]))
        df_l["date"] = date
        df_l = df_l.sort_values("roll_yield")
        if plot:
            _plot_bar(df_l["roll_yield"])
        return df_l

    if type_method == "date":
        df_l = pd.DataFrame()
        while start_day <= end_day:
            try:
                ry = get_roll_yield(start_day, var)
                if ry:
                    df_l = df_l.append(
                        pd.DataFrame(
                            [ry],
                            index=[start_day],
                            columns=["roll_yield", "near_by", "deferred"],
                        ))
            except:
                pass
            start_day += datetime.timedelta(days=1)
        if plot:
            _plot(df_l["roll_yield"])
        return df_l
コード例 #18
0
def get_receipt(start_day: str = None,
                end_day: str = None,
                vars_list: List = cons.contract_symbols):
    """
    大宗商品注册仓单数量
    :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type start_day: str
    :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type end_day: str
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :type vars_list: str
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    """
    start_day = cons.convert_date(
        start_day) if start_day is not None else datetime.date.today()
    end_day = cons.convert_date(
        end_day) if end_day is not None else cons.convert_date(
            cons.get_latest_data_date(datetime.datetime.now()))
    records = pd.DataFrame()
    while start_day <= end_day:
        if start_day.strftime('%Y%m%d') not in calendar:
            warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日")
        else:
            print(start_day)
            for market, market_vars in cons.market_exchange_symbols.items():

                if market == 'dce':
                    if start_day >= datetime.date(2009, 4, 7):
                        f = get_dce_receipt
                    else:
                        print('20090407起,dce每交易日更新仓单数据')
                        f = None
                elif market == 'shfe':
                    if datetime.date(2008, 10,
                                     6) <= start_day <= datetime.date(
                                         2014, 5, 16):
                        f = get_shfe_receipt_1
                    elif start_day > datetime.date(2014, 5, 16):
                        f = get_shfe_receipt_2
                    else:
                        f = None
                        print('20081006起,shfe每交易日更新仓单数据')
                elif market == 'czce':
                    if datetime.date(2008, 3, 3) <= start_day <= datetime.date(
                            2010, 8, 24):
                        f = get_czce_receipt_1
                    elif datetime.date(2010, 8,
                                       24) < start_day <= datetime.date(
                                           2015, 11, 11):
                        f = get_czce_receipt_2
                    elif start_day > datetime.date(2015, 11, 11):
                        f = get_czce_receipt_3
                    else:
                        f = None
                        print('20080303起,czce每交易日更新仓单数据')
                get_vars = [var for var in vars_list if var in market_vars]
                if market != 'cffex' and get_vars != []:
                    if f is not None:
                        records = records.append(f(start_day, get_vars))

        start_day += datetime.timedelta(days=1)
    records.reset_index(drop=True, inplace=True)
    if records.empty:
        return records
    if "MA" in records["var"].to_list():
        replace_index = records[records["var"] == "MA"]["receipt"].astype(
            str).str.split("0", expand=True)[0].index
        records.loc[replace_index, "receipt"] = records[
            records["var"] == "MA"]["receipt"].astype(str).str.split(
                "0", expand=True)[0]
    return records
コード例 #19
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_rank_sum(date=None, vars_list=cons.contract_symbols):
    """
    抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
    注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名;
        所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表
    注2:大商所只公布了品种排名, 未公布标的排名
    :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品
    :return: pd.DataFrame:
    展期收益率数据
    symbol                           标的合约                     string
    var                              商品品种                     string
    vol_top5                         成交量前5会员成交量总和         int
    vol_chg_top5                     成交量前5会员成交量变化总和      int
    long_open_interest_top5          持多单前5会员持多单总和         int
    long_open_interest_chg_top5      持多单前5会员持多单变化总和      int
    short_open_interest_top5         持空单前5会员持空单总和         int
    short_open_interest_chg_top5     持空单前5会员持空单变化总和      int
    vol_top10                        成交量前10会员成交量总和        int
    ...
    date                             日期                         string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']]
    czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']]
    cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']]
    big_dict = {}
    if len(dce_var) > 0:
        data = get_dce_rank_table(date, dce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(shfe_var) > 0:
        data = get_shfe_rank_table(date, shfe_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(czce_var) > 0:
        data = get_czce_rank_table(date, czce_var)
        if data is False:
            return False
        big_dict.update(data)
    if len(cffex_var) > 0:
        data = get_cffex_rank_table(date, cffex_var)
        if data is False:
            return False
        big_dict.update(data)
    records = pd.DataFrame()

    for symbol, table in big_dict.items():
        table = table.applymap(lambda x: 0 if x == '' else x)
        for symbol_inner in set(table['symbol']):

            var = symbol_varieties(symbol_inner)
            if var in vars_list:
                table_cut = table[table['symbol'] == symbol_inner]
                table_cut['rank'] = table_cut['rank'].astype('float')
                table_cut_top5 = table_cut[table_cut['rank'] <= 5]
                table_cut_top10 = table_cut[table_cut['rank'] <= 10]
                table_cut_top15 = table_cut[table_cut['rank'] <= 15]
                table_cut_top20 = table_cut[table_cut['rank'] <= 20]

                big_dict = {'symbol': symbol_inner, 'variety': var,

                            'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(),
                            'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(),
                            'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(),
                            'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(),
                            'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(),

                            'vol_top10': table_cut_top10['vol'].sum(),
                            'vol_chg_top10': table_cut_top10['vol_chg'].sum(),
                            'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(),
                            'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(),
                            'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(),
                            'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(),

                            'vol_top15': table_cut_top15['vol'].sum(),
                            'vol_chg_top15': table_cut_top15['vol_chg'].sum(),
                            'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(),
                            'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(),
                            'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(),
                            'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(),

                            'vol_top20': table_cut_top20['vol'].sum(),
                            'vol_chg_top20': table_cut_top20['vol_chg'].sum(),
                            'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(),
                            'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(),
                            'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(),
                            'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(),

                            'date': date.strftime('%Y%m%d')
                            }
                records = records.append(pd.DataFrame(big_dict, index=[0]))

    if len(big_dict.items()) > 0:
        add_vars = [i for i in cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if
                    i in records['variety'].tolist()]
        for var in add_vars:
            records_cut = records[records['variety'] == var]
            var_record = pd.DataFrame(records_cut.sum()).T
            var_record['date'] = date.strftime('%Y%m%d')
            var_record.loc[:, ['variety', 'symbol']] = var
            records = records.append(var_record)

    return records.reset_index(drop=True)
コード例 #20
0
ファイル: futures_daily_bar.py プロジェクト: cdmaxsmart/mssdk
def get_cffex_daily(date="20100401"):
    """
    中国金融期货交易所日交易数据
    http://www.cffex.com.cn/rtj/
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    url = f"http://www.cffex.com.cn/sj/historysj/{date[:-2]}/zip/{date[:-2]}.zip"
    r = requests.get(url)
    try:
        with zipfile.ZipFile(BytesIO(r.content)) as file:
            with file.open(f"{date}_1.csv") as my_file:
                data = my_file.read().decode("gb2312")
                data_df = pd.read_csv(StringIO(data))
    except:
        return None
    data_df = data_df[data_df["合约代码"] != "小计"]
    data_df = data_df[data_df["合约代码"] != "合计"]
    data_df = data_df[~data_df["合约代码"].str.contains("IO")]
    data_df.reset_index(inplace=True, drop=True)
    data_df["合约代码"] = data_df["合约代码"].str.strip()
    symbol_list = data_df["合约代码"].to_list()
    variety_list = [
        re.compile(r"[a-zA-Z_]+").findall(item)[0] for item in symbol_list
    ]
    if data_df.shape[1] == 15:
        data_df.columns = [
            "symbol", "open", "high", "low", "volume", "turnover",
            "open_interest", "_", "close", "settle", "pre_settle", "_", "_",
            "_", "_"
        ]
    else:
        data_df.columns = [
            "symbol", "open", "high", "low", "volume", "turnover",
            "open_interest", "_", "close", "settle", "pre_settle", "_", "_",
            "_"
        ]
    data_df["date"] = date
    data_df["variety"] = variety_list
    data_df = data_df[[
        "symbol", "date", "open", "high", "low", "close", "volume",
        "open_interest", "turnover", "settle", "pre_settle", "variety"
    ]]
    return data_df
コード例 #21
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def futures_dce_position_rank(date: str = "20160104") -> pd.DataFrame:
    """
    大连商品交易日每日持仓排名-具体合约
    http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rcjccpm/index.html
    :param date: 指定交易日; e.g., "20200511"
    :type date: str
    :return: 指定日期的持仓排名数据
    :rtype: pandas.DataFrame
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = "http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Length": "160",
        "Content-Type": "application/x-www-form-urlencoded",
        "Host": "www.dce.com.cn",
        "Origin": "http://www.dce.com.cn",
        "Pragma": "no-cache",
        "Referer": "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
    }
    payload = {
        "memberDealPosiQuotes.variety": "a",
        "memberDealPosiQuotes.trade_type": "0",
        "contract.contract_id": "a2009",
        "contract.variety_id": "a",
        "year": date.year,
        "month": date.month - 1,
        "day": date.day,
        "batchExportFlag": "batch",
    }
    r = requests.post(url, payload, headers=headers)
    big_dict = dict()
    with zipfile.ZipFile(BytesIO(r.content), "r") as z:
        for i in z.namelist():
            file_name = i.encode('cp437').decode('GBK')
            try:
                data = pd.read_table(z.open(i), header=None, sep="\t").iloc[:-6]
                if len(data) < 12:  # 处理没有活跃合约的情况
                    big_dict[file_name.split("_")[1]] = pd.DataFrame()
                    continue
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                data = data.iloc[start_list[0]:, data.columns[data.iloc[start_list[0], :].notnull()]]
                data.reset_index(inplace=True, drop=True)
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist()
                part_one = data[start_list[0]: end_list[0]].iloc[1:, :]
                part_two = data[start_list[1]: end_list[1]].iloc[1:, :]
                part_three = data[start_list[2]: end_list[2]].iloc[1:, :]
                temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True),
                                     part_three.reset_index(drop=True)], axis=1, ignore_index=True)
                temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"]
                temp_df["rank"] = range(1, len(temp_df) + 1)
                del temp_df["名次"]
                temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest",
                                   "long_open_interest_chg", "short_party_name", "short_open_interest",
                                   "short_open_interest_chg", "rank"]
                temp_df["symbol"] = file_name.split("_")[1]
                temp_df["variety"] = file_name.split("_")[1][:-4].upper()
                temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                   "short_open_interest", "short_open_interest_chg", "short_party_name",
                                   "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                big_dict[file_name.split("_")[1]] = temp_df
            except UnicodeDecodeError as e:
                try:
                    data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=3)
                except:
                    data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=4)
                start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
                end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist()
                part_one = data[start_list[0]: end_list[0]].iloc[1:, :]
                part_two = data[start_list[1]: end_list[1]].iloc[1:, :]
                part_three = data[start_list[2]: end_list[2]].iloc[1:, :]
                temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True),
                                     part_three.reset_index(drop=True)], axis=1, ignore_index=True)
                temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"]
                temp_df["rank"] = range(1, len(temp_df) + 1)
                del temp_df["名次"]
                temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest",
                                   "long_open_interest_chg", "short_party_name", "short_open_interest",
                                   "short_open_interest_chg", "rank"]
                temp_df["symbol"] = file_name.split("_")[1]
                temp_df["variety"] = file_name.split("_")[1][:-4].upper()
                temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank",
                                   "short_open_interest", "short_open_interest_chg", "short_party_name",
                                   "vol", "vol_chg", "vol_party_name", "symbol", "variety"]]
                big_dict[file_name.split("_")[1]] = temp_df
    return big_dict
コード例 #22
0
ファイル: futures_daily_bar.py プロジェクト: cdmaxsmart/mssdk
def get_dce_daily(date: str = "20030115") -> pd.DataFrame:
    """
    大连商品交易所日交易数据
    http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rxq/index.html
    :param date: 交易日, e.g., 20200416
    :type date: str
    :return: 具体交易日的个品种行情数据
    :rtype: pandas.DataFrame
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    url = "http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html"
    headers = {
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding":
        "gzip, deflate",
        "Accept-Language":
        "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control":
        "no-cache",
        "Connection":
        "keep-alive",
        "Content-Length":
        "86",
        "Content-Type":
        "application/x-www-form-urlencoded",
        "Host":
        "www.dce.com.cn",
        "Origin":
        "http://www.dce.com.cn",
        "Pragma":
        "no-cache",
        "Referer":
        "http://www.dce.com.cn/publicweb/quotesdata/dayQuotesCh.html",
        "Upgrade-Insecure-Requests":
        "1",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
    }
    params = {
        "dayQuotes.variety": "all",
        "dayQuotes.trade_type": "0",
        "year": date[:4],
        "month": str(int(date[4:6]) - 1),
        "day": date[6:],
        "exportFlag": "excel",
    }
    r = requests.post(url, data=params, headers=headers)
    data_df = pd.read_excel(BytesIO(r.content))
    data_df = data_df[~data_df["商品名称"].str.contains("小计")]
    data_df = data_df[~data_df["商品名称"].str.contains("总计")]
    data_df["variety"] = data_df["商品名称"].map(lambda x: cons.DCE_MAP[x])
    data_df["symbol"] = data_df["variety"] + data_df["交割月份"].astype(
        int).astype(str)
    del data_df["商品名称"]
    del data_df["交割月份"]
    data_df.columns = [
        "open", "high", "low", "close", "pre_settle", "settle", "_", "_",
        "volume", "open_interest", "_", "turnover", "variety", "symbol"
    ]
    data_df["date"] = date
    data_df = data_df[[
        "symbol", "date", "open", "high", "low", "close", "volume",
        "open_interest", "turnover", "settle", "pre_settle", "variety"
    ]]
    data_df = data_df.applymap(lambda x: x.replace(",", ""))
    data_df = data_df.astype({
        "open": "float",
        "high": "float",
        "low": "float",
        "close": "float",
        "volume": "float",
        "open_interest": "float",
        "turnover": "float",
        "settle": "float",
        "pre_settle": "float",
    })
    return data_df
コード例 #23
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_dce_rank_table(date="20180404", vars_list=cons.contract_symbols):
    """
    大连商品交易所前 20 会员持仓排名数据明细
    注: 该交易所既公布品种排名, 也公布标的合约排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天
    :param vars_list: 合约品种如 RB、AL等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据
    :return: pandas.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员      string(中文)
    vol                         该会员成交量                 int
    vol_chg                     该会员成交量变化量            int
    long_party_name             持多单排序的当前名次会员      string(中文)
    long_open_interest          该会员持多单                 int
    long_open_interest_chg      该会员持多单变化量            int
    short_party_name            持空单排序的当前名次会员       string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2006, 1, 4):
        print(Exception("大连商品交易所数据源开始日期为20060104,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']]
    big_dict = {}
    for var in vars_list:
        url = cons.DCE_VOL_RANK_URL % (var.lower(), var.lower(), date.year, date.month - 1, date.day)
        list_60_name = []
        list_60 = []
        list_60_chg = []
        rank = []
        texts = requests_link(url).content.splitlines()
        if not texts:
            return False
        if len(texts) > 30:
            for text in texts:
                line = text.decode("utf-8")
                string_list = line.split()
                try:
                    if int(string_list[0]) <= 20:
                        list_60_name.append(string_list[1])
                        list_60.append(string_list[2])
                        list_60_chg.append(string_list[3])
                        rank.append(string_list[0])
                except:
                    pass
            table_cut = pd.DataFrame({'rank': rank[0:20],
                                      'vol_party_name': list_60_name[0:20],
                                      'vol': list_60[0:20],
                                      'vol_chg': list_60_chg[0:20],
                                      'long_party_name': list_60_name[20:40],
                                      'long_open_interest': list_60[20:40],
                                      'long_open_interest_chg': list_60_chg[20:40],
                                      'short_party_name': list_60_name[40:60],
                                      'short_open_interest': list_60[40:60],
                                      'short_open_interest_chg': list_60_chg[40:60]
                                      })
            table_cut = table_cut.applymap(lambda x: x.replace(',', ''))
            table_cut = _table_cut_cal(table_cut, var)
            big_dict[var] = table_cut.reset_index(drop=True)
    return big_dict
コード例 #24
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
        rank                        排名                        int
        vol_party_name              成交量排序的当前名次会员        string(中文)
        vol                         该会员成交量                  int
        vol_chg                     该会员成交量变化量             int
        long_party_name             持多单排序的当前名次会员        string(中文)
        long_open_interest          该会员持多单                  int
        long_open_interest_chg      该会员持多单变化量             int
        short_party_name            持空单排序的当前名次会员        string(中文)
        short_open_interest         该会员持空单                  int
        short_open_interest_chg     该会员持空单变化量             int
        symbol                      标的合约                     string
        var                         品种                        string
        date                        日期                        string YYYYMMDD

    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg',
                 'CJ3': 'short_open_interest',
                 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name',
                 'PARTICIPANTABBR2': 'long_party_name',
                 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank',
                 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'})

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict
コード例 #25
0
ファイル: cot.py プロジェクト: wuyaoyao99/mssdk
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        r = requests.get(url)
        r.encoding = "utf-8"
        soup = BeautifulSoup(r.text, "lxml")
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))

            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                if symbol == "SR905日期:":
                    symbol = "SR905"
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 1]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0, header=None)[3:]
        big_df = pd.DataFrame()
        for item in data:
            big_df = pd.concat([big_df, item], axis=0, ignore_index=False)
        big_df.columns = big_df.iloc[0, :].tolist()
        data = big_df.iloc[1:, :]
    elif date < datetime.date(2017, 12, 28):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0, header=0)[1]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.index.name = table.columns[0]
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol.strip()] = table_cut.reset_index(drop=True)

    return big_dict
コード例 #26
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_czce_daily(date="20200416"):
    """
    郑州商品交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日
    :type date: str or datetime.date
    :return: 郑州商品交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    郑商所每日期货交易数据:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    volume        成交量
    open_interest 持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或
   郑商所每日期权交易数据
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    pre_settle      前结算价
    settle         结算价
    delta          对冲值
    volume         成交量
    open_interest     持仓量
    oi_change       持仓变化
    turnover        成交额
    implied_volatility 隐含波动率
    exercise_volume   行权量
    variety        合约类别
    None(类型错误或给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn(f"{day.strftime('%Y%m%d')}非交易日")
        return None
    if day > datetime.date(2010, 8, 24):
        if day > datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_3
            url = u % (day.strftime("%Y"), day.strftime("%Y%m%d"))
        elif day < datetime.date(2015, 9, 19):
            u = cons.CZCE_DAILY_URL_2
            url = u % (day.strftime("%Y"), day.strftime("%Y%m%d"))
        listed_columns = cons.CZCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
        try:
            r = requests.get(url)
            html = r.text
        except requests.exceptions.HTTPError as reason:
            if reason.response.status_code != 404:
                print(
                    cons.CZCE_DAILY_URL_3
                    % (day.strftime("%Y"), day.strftime("%Y%m%d")),
                    reason,
                )
            return
        if html.find("您的访问出错了") >= 0 or html.find("无期权每日行情交易记录") >= 0:
            return
        html = [
            i.replace(" ", "").split("|")
            for i in html.split("\n")[:-4]
            if i[0][0] != "小"
        ]

        if day > datetime.date(2015, 9, 19):
            if html[1][0] not in ["品种月份", "品种代码"]:
                return
            dict_data = list()
            day_const = int(day.strftime("%Y%m%d"))
            for row in html[2:]:
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                        "volume",
                        "open_interest",
                        "oi_chg",
                        "exercise_volume",
                    ]:
                        row[i + 1] = row[i + 1].replace(",", "")
                        row_dict[field] = int(row[i + 1])
                    else:
                        row[i + 1] = row[i + 1].replace(",", "")
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)

            return pd.DataFrame(dict_data)[output_columns]
        elif day < datetime.date(2015, 9, 19):
            dict_data = list()
            day_const = int(day.strftime("%Y%m%d"))
            for row in html[1:]:
                row = row[0].split(",")
                m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in [
                        "volume",
                        "open_interest",
                        "oi_chg",
                        "exercise_volume",
                    ]:
                        row_dict[field] = int(float(row[i + 1]))
                    else:
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)
            return pd.DataFrame(dict_data)[output_columns]

    if day <= datetime.date(2010, 8, 24):
        u = cons.CZCE_DAILY_URL_1
        url = u % day.strftime("%Y%m%d")
        listed_columns = cons.CZCE_COLUMNS_2
        output_columns = cons.OUTPUT_COLUMNS
        df = pd.read_html(url)[1].dropna(how="any")

        dict_data = list()
        day_const = int(day.strftime("%Y%m%d"))

        for row in df.to_dict(orient="records")[1:]:
            m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
            if not m:
                continue
            row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}
            for i, field in enumerate(listed_columns):
                if row[i + 1] == "\r":
                    row_dict[field] = 0.0
                elif field in ["volume", "open_interest", "oi_chg", "exercise_volume"]:

                    row_dict[field] = int(row[i + 1])
                else:

                    row_dict[field] = float(row[i + 1])
            dict_data.append(row_dict)

        return pd.DataFrame(dict_data)[output_columns]
コード例 #27
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_futures_daily(start_day="20200413", end_day="20200416", market="CFFEX", index_bar=False):
    """
    交易所日交易数据
    Parameters
    ------
        start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        market: 'CFFEX' 中金所, 'CZCE' 郑商所,  'SHFE' 上期所, 'DCE' 大商所 之一, 'INE' 上海国际能源交易中心。默认为中金所
        index_bar: bool  是否合成指数K线
    Return
    -------
        DataFrame
            中金所日交易数据(DataFrame):
                symbol      合约代码
                date       日期
                open       开盘价
                high       最高价
                low       最低价
                close      收盘价
                volume      成交量
                open_interest 持仓量
                turnover    成交额
                settle     结算价
                pre_settle   前结算价
                variety     合约类别
        或 None(给定日期没有交易数据)
    """
    if market.upper() == "CFFEX":
        f = get_cffex_daily
    elif market.upper() == "CZCE":
        f = get_czce_daily
    elif market.upper() == "SHFE":
        f = get_shfe_daily
    elif market.upper() == "DCE":
        f = get_dce_daily
    elif market.upper() == "INE":
        f = get_ine_daily
    else:
        print("Invalid Market Symbol")
        return

    start_day = (
        cons.convert_date(start_day) if start_day is not None else datetime.date.today()
    )
    end_day = (
        cons.convert_date(end_day)
        if end_day is not None
        else cons.convert_date(cons.get_latest_data_date(datetime.datetime.now()))
    )

    df_list = list()
    while start_day <= end_day:
        df = f(start_day)
        if df is not None:
            df_list.append(df)
            if index_bar:
                df_list.append(get_futures_index(df))
        start_day += datetime.timedelta(days=1)

    if len(df_list) > 0:
        return pd.concat(df_list).reset_index(drop=True)
コード例 #28
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_dce_daily(date="20200416", symbol_type="futures", retries=0):
    """
    大连商品交易所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一
        retries: int, 当前重试次数,达到3次则获取数据失败
    Return
    -------
        DataFrame
            大商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover       成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    if retries > 3:
        print("maximum retires for DCE market data: ", day.strftime("%Y%m%d"))
        return

    if symbol_type == "futures":
        url = (
            cons.DCE_DAILY_URL
            + "?"
            + urllib.parse.urlencode(
                {
                    "currDate": day.strftime("%Y%m%d"),
                    "year": day.strftime("%Y"),
                    "month": str(int(day.strftime("%m")) - 1),
                    "day": day.strftime("%d"),
                }
            )
        )
        listed_columns = cons.DCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
    elif symbol_type == "option":
        url = (
            cons.DCE_DAILY_URL
            + "?"
            + urllib.parse.urlencode(
                {
                    "currDate": day.strftime("%Y%m%d"),
                    "year": day.strftime("%Y"),
                    "month": str(int(day.strftime("%m")) - 1),
                    "day": day.strftime("%d"),
                    "dayQuotes.trade_type": "1",
                }
            )
        )
        listed_columns = cons.DCE_OPTION_COLUMNS
        output_columns = cons.OPTION_OUTPUT_COLUMNS
    else:
        print(
            "invalid symbol_type :"
            + symbol_type
            + ', should be one of "futures" or "option"'
        )
        return

    try:
        response = requests_link(url, method="post", headers=cons.headers).text
    except requests.exceptions.ContentDecodingError as reason:
        return get_dce_daily(day, retries=retries + 1)
    except requests.exceptions.HTTPError as reason:
        if reason.response == 504:
            return get_dce_daily(day, retries=retries + 1)
        elif reason.response != 404:
            print(cons.DCE_DAILY_URL, reason)
        return

    if "错误:您所请求的网址(URL)无法获取" in response:
        return get_dce_daily(day, retries=retries + 1)
    elif "暂无数据" in response:
        return

    data = BeautifulSoup(response, "html.parser").find_all("tr")
    if len(data) == 0:
        return

    dict_data = list()
    implied_data = list()
    for i_data in data[1:]:
        if "小计" in i_data.text or "总计" in i_data.text:
            continue
        x = i_data.find_all("td")
        if symbol_type == "futures":
            row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]}
            row_dict["symbol"] = row_dict["variety"] + x[1].text.strip()
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 16:
            m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip())
            if not m:
                continue
            row_dict = {
                "symbol": x[1].text.strip(),
                "variety": m.group(1).upper(),
                "contract_id": m.group(0),
            }
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 2:
            implied_data.append(
                {
                    "contract_id": x[0].text.strip(),
                    "implied_volatility": float(x[1].text.strip()),
                }
            )
    df = pd.DataFrame(dict_data)
    df["date"] = day.strftime("%Y%m%d")
    if symbol_type == "futures":
        return df[output_columns]
    else:
        return pd.merge(
            df,
            pd.DataFrame(implied_data),
            on="contract_id",
            how="left",
            indicator=False,
        )[output_columns]
コード例 #29
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_shfe_daily(date="20200416"):
    """
    上海期货交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日
    :type date: str or datetime.date
    :return: 上海期货交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    上期所日交易数据(DataFrame):
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    volume        成交量
    open_interest 持仓量
    turnover      成交额
    settle        结算价
    pre_settle     前结算价
    variety       合约类别
    或 None(给定交易日没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")),
                headers=cons.shfe_headers,
            ).text
        )
    except requests.HTTPError as reason:
        if reason.response != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return

    if len(json_data["o_curinstrument"]) == 0:
        return

    df = pd.DataFrame(
        [
            row
            for row in json_data["o_curinstrument"]
            if row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != ""
        ]
    )
    df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df["symbol"] = df["variety"] + df["DELIVERYMONTH"]
    df["date"] = day.strftime("%Y%m%d")
    v_wap_df = get_shfe_v_wap(day)
    if v_wap_df is not None:
        df = pd.merge(
            df,
            v_wap_df[v_wap_df.time_range == "9:00-15:00"],
            on=["date", "symbol"],
            how="left",
        )
        df["turnover"] = df.v_wap * df.VOLUME
    else:
        df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x)
        df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"]
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
コード例 #30
0
ファイル: daily_bar.py プロジェクト: wuyaoyao99/mssdk
def get_cffex_daily(date="20200416"):
    """
    中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        html = requests_link(
            cons.CFFEX_DAILY_URL.format(
                day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")
            ),
            encoding="gbk",
            headers=cons.headers,
        ).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL
                % (day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")),
                reason,
            )
        return

    if html.find("网页错误") >= 0:
        return
    html = [
        i.replace(" ", "").split(",") for i in html.split("\n")[:-2] if i[0][0] != "小"
    ]

    if html[0][0] != "合约代码":
        return

    dict_data = list()
    day_const = day.strftime("%Y%m%d")
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ["volume", "open_interest", "oi_chg"]:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"]
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]