예제 #1
0
def get_rollYield_bar(type = 'symbol',  var = 'RB',date= None, start = None, end = None, plot = False):
    """
        获取展期收益率
    Parameters
    ------
        type = 'symbol':获取某天某品种所有交割月合约的收盘价
        type = 'var':获取某天所有品种两个主力合约的展期收益率(展期收益率横截面)
        type = ‘date’:获取某品种每天的两个主力合约的展期收益率(展期收益率时间序列)
        start: 开始日期 format:YYYYMMDD
        end: 结束数据 format:YYYYMMDD
        date: 某一天日期 format: YYYYMMDD
        var: 合约品种如RB、AL等
    Return
    -------
        DataFrame
            展期收益率数据(DataFrame):
                ry      展期收益率
                index   日期或品种
    """

    date = cons.convert_date(date) if date is not None else datetime.date.today()
    start = cons.convert_date(start) if start is not None else datetime.date.today()
    end = cons.convert_date(end) if end is not None else cons.convert_date(cons.get_latestDataDate(datetime.datetime.now()))
	
    if type == 'symbol':
        df = get_future_daily(start=date, end=date, market=symbolMarket(var))
        df = df[df['variety'] == var]
        if plot:
            _plot_bar(df['close'].tolist(), df['symbol'].tolist())
        return df

    if type == 'var':
        df = pd.DataFrame()
        for market in ['dce','cffex','shfe','czce']:
            df = df.append(get_future_daily(start=date, end=date, market=market))
        varList = list(set(df['variety']))
        ryList = []
        for var in varList:
            ryList.append(get_rollYield(date, var, df=df))
        df = pd.DataFrame(ryList,index = varList,columns = ['ry'])
        df = df.sort_values('ry')
        if plot:
            _plot_bar(df['ry'].tolist(), df.index)
        return df

    if type == 'date':
        dfL=pd.DataFrame()
        while start <= end:
            try:
                ry = get_rollYield(start, var)
                dfL = dfL.append(pd.DataFrame([ry], index=[start], columns=['ry']))
            except:
                pass
            start += datetime.timedelta(days=1)
        if plot:
            _plot(pd.to_datetime(dfL.index), dfL['ry'].tolist())
        return dfL
예제 #2
0
def get_future_daily(start=None, end=None, market='CFFEX', indexBar=False):
    """
        获取交易所日交易数据
    Parameters
    ------
        start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        market: 'CFFEX' 中金所, 'CZCE' 郑商所,  'SHFE' 上期所, 'DCE' 大商所 之一。默认为中金所
        indexBar: bool  是否合成指数K线
    Return
    -------
        DataFrame
            中金所日交易数据(DataFrame):
                symbol      合约代码
                date       日期
                open       开盘价
                high       最高价
                low       最低价
                close      收盘价
                volume      成交量
                open_interest 持仓量
                turnover    成交额
                settle     结算价
                pre_settle   前结算价
                variety     合约类别
        或 None(给定日期没有交易数据)
    """
    if market.upper() == 'CFFEX':
        f = get_cffex_daily
    elif market.upper() == 'CZCE':
        f = get_czce_daily
    elif market.upper() == 'SHFE':
        f = get_shfe_daily
    elif market.upper() == 'DCE':
        f = get_dce_daily
    else:
        print('Invalid market.')
        return

    start = cons.convert_date(
        start) if start is not None else datetime.date.today()
    end = cons.convert_date(end) if end is not None else cons.convert_date(
        cons.get_latestDataDate(datetime.datetime.now()))

    df_list = list()
    while start <= end:
        df = f(start)
        if df is not None:
            df_list.append(df)
            if indexBar:
                df_list.append(get_futureIndex(df))
        start += datetime.timedelta(days=1)

    if len(df_list) > 0:
        return pd.concat(df_list).reset_index(drop=True)
예제 #3
0
def get_reciept(start=None, end=None, vars=cons.vars):
    """
        获取大宗商品注册仓单数量
        Parameters
        ------
            start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数量                     int
                    date            日期                        string YYYYMMDD
    """
    start = cons.convert_date(
        start) if start is not None else datetime.date.today()
    end = cons.convert_date(end) if end is not None else cons.convert_date(
        cons.get_latestDataDate(datetime.datetime.now()))
    records = pd.DataFrame()
    while start <= end:
        if start.strftime('%Y%m%d') not in calendar:
            print('%s非交易日' % start.strftime('%Y%m%d'))
        else:
            print(start)
            for market, marketVars in cons.market_var.items():

                if market == 'dce':
                    f = get_dce_reciept
                elif market == 'shfe':
                    if start <= datetime.date(2014, 5, 16):
                        f = get_shfe_reciept_1
                    else:
                        f = get_shfe_reciept_2
                elif market == 'czce':
                    if start <= datetime.date(2010, 8, 24):
                        f = get_czce_reciept_1
                    elif start <= datetime.date(2015, 11, 11):
                        f = get_czce_reciept_2
                    else:
                        f = get_czce_reciept_3

                get_vars = [var for var in vars if var in marketVars]

                if market != 'cffex' and get_vars != []:
                    records = records.append(f(start, get_vars))

        start += datetime.timedelta(days=1)
    return records.reset_index(drop=True)
예제 #4
0
def get_shfe_reciept_1(date=None, vars=cons.vars):
    """
        抓取上海商品交易所注册仓单数据
        适用20081006至20140518(包括)
        20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame
        20100416、20130821日期交易所数据丢失
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        print('%s非交易日' % date)
        return None
    if date == '20100126':
        shfe_20100126['date'] = date
        return shfe_20100126
    elif date == '20101029':
        shfe_20101029['date'] = date
        return shfe_20101029
    elif date in ['20100416', '20130821']:
        return None
    else:
        varList = [
            '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜',
            '铝', '锌', '黄金', '锡', '镍'
        ]
        url = cons.SHFE_RECIEPT_URL_1 % date

        data = pandas_readHtml_link(url)[0]

        indexs = [x for x in data.index if (data[0].tolist()[x] in varList)]
        lastIndex = [x for x in data.index if '注' in str(data[0].tolist()[x])
                     ][0] - 1
        records = pd.DataFrame()
        for i in list(range(len(indexs))):
            if i != len(indexs) - 1:
                dataCut = data.loc[indexs[i]:indexs[i + 1] - 1, :]
            else:
                dataCut = data.loc[indexs[i]:lastIndex, :]
                dataCut = dataCut.fillna(method='pad')
            D = {}
            D['var'] = chinese_to_english(dataCut[0].tolist()[0])
            D['reciept'] = int(dataCut[1].tolist()[-1])
            D['date'] = date
            records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #5
0
def get_shfe_daily(date=None):
    """
        获取上期所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            上期所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest 持仓量
                turnover      成交额
                settle        结算价
                pre_settle     前结算价
                variety       合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    try:
        json_data = json.loads(
            urlopen(
                Request(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')),
                        headers=cons.shfe_headers)).read().decode('utf8'))
    except HTTPError as reason:
        if reason.code != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), reason)
        return

    if len(json_data['o_curinstrument']) == 0:
        return

    df = pd.DataFrame([
        row for row in json_data['o_curinstrument']
        if row['DELIVERYMONTH'] != u'小计' and row['DELIVERYMONTH'] != ''
    ])
    df['variety'] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df['symbol'] = df['variety'] + df['DELIVERYMONTH']
    df['date'] = day.strftime('%Y%m%d')
    vwap_df = get_shfe_vwap(day)
    if vwap_df is not None:
        df = pd.merge(df,
                      vwap_df[vwap_df.time_range == '9:00-15:00'],
                      on=['date', 'symbol'],
                      how='left')
        df['turnover'] = df.vwap * df.VOLUME
    else:
        df['turnover'] = df['VOLUME'] * df['SETTLEMENTPRICE']
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
예제 #6
0
def get_czce_reciept_3(date=None, vars=cons.vars):
    """
        抓取郑州商品交易所注册仓单数据
        适用20151112(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):`1
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        print('%s非交易日' % date)
        return None
    url = cons.CZCE_RECIEPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for dataCut in data:
        if len(dataCut.columns) > 3:
            lastIndexs = [
                x for x in dataCut.index if '注:' in str(dataCut[0].tolist()[x])
            ]
            if len(lastIndexs) > 0:
                lastIndex = lastIndexs[0] - 1
                dataCut = dataCut.loc[:lastIndex, :]
            if 'PTA' in dataCut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = dataCut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            dataCut.columns = dataCut.loc[1, :]
            dataCut = dataCut.fillna(method='pad')
            try:
                reciept = dataCut.loc[:, '仓单数量'].tolist()[-1]
            except:
                reciept = dataCut.loc[:, '仓单数量(保税)'].tolist()[-1]
            D = {'var': var, 'reciept': int(reciept), 'date': date}
            records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #7
0
def get_spotPrice(date=None, vars=cons.vars):
    """
        获取某一天大宗商品现货价格,及相应基差
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars: 合约品种如RB、AL等列表 为空时为所有商品
    Return
    -------
        DataFrame
            展期收益率数据(DataFrame):
                var             商品品种                     string
                SP              现货价格                     float
                nearSymbol      临近交割合约                  string
                nearPrice       临近交割合约结算价             float
                domSymbol       主力合约                     string
                domPrice        主力合约结算价                float
                nearBasis       临近交割合约相对现货的基差      float
                domBasis        主力合约相对现货的基差         float
                nearBasisRate   临近交割合约相对现货的基差率    float
                domBasisRate    主力合约相对现货的基差率       float
                date            日期                       string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2011, 1, 4):
        raise Exception("数据源开始日期为20110104,请修改获取数据时段检查")
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    u1 = cons.SYS_SPOTPRICE_LATEST_URL
    u2 = cons.SYS_SPOTPRICE_URL % date.strftime('%Y-%m-%d')
    i = 1
    while True:
        for url in [u2, u1]:
            try:
                r = requests.get(url, timeout=2)
                string = pd.read_html(r.text)[0].loc[1, 1]
                news = ''.join(re.findall(r'[0-9]', string))
                if news[3:11] == date.strftime('%Y%m%d'):

                    records = _check_information(pd.read_html(r.text)[1], date)
                    records.index = records['var']
                    vars_inMarket = [i for i in vars if i in records.index]
                    return records.loc[vars_inMarket, :].reset_index(drop=True)
                else:
                    time.sleep(3)
            except Exception as e:
                print('%s日生意社数据连接失败,第%s次尝试,最多5次' %
                      (date.strftime('%Y-%m-%d'), str(i)))
                i += 1
                if i > 5:
                    print('%s日生意社数据连接失败,已超过5次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试' %
                          date.strftime('%Y-%m-%d'))
                    return False
예제 #8
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_rank_sum_daily(start=None, end=None, vars=cons.vars):
    """
        抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
        注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名;
            所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表
        注2:大商所只公布了品种排名,未公布标的排名
        Parameters
        ------
            start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    symbol                      标的合约                     string
                    var                         商品品种                     string
                    vol_top5                    成交量前5会员成交量总和         int
                    vol_chg_top5                成交量前5会员成交量变化总和      int
                    long_openIntr_top5          持多单前5会员持多单总和         int
                    long_openIntr_chg_top5      持多单前5会员持多单变化总和      int
                    short_openIntr_top5         持空单前5会员持空单总和         int
                    short_openIntr_chg_top5     持空单前5会员持空单变化总和      int 
                    vol_top10                   成交量前10会员成交量总和        int                   
                    ...
                    
                    date                        日期                         string YYYYMMDD
    """
    start = cons.convert_date(
        start) if start is not None else datetime.date.today()
    end = cons.convert_date(end) if end is not None else cons.convert_date(
        cons.get_latestDataDate(datetime.datetime.now()))
    records = pd.DataFrame()
    while start <= end:
        print(start)
        if start.strftime('%Y%m%d') in calendar:
            records = records.append(get_rank_sum(start, vars))
        else:
            print('%s非交易日' % start.strftime('%Y%m%d'))
        start += datetime.timedelta(days=1)

    return records.reset_index(drop=True)
예제 #9
0
def get_spotPrice_daily(start=None, end=None, vars=cons.vars):
    """
        获取大宗商品现货价格,及相应基差
    Parameters
    ------
        start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars: 合约品种如RB、AL等列表 为空时为所有商品
    Return
    -------
        DataFrame
            展期收益率数据(DataFrame):
                var             商品品种                     string
                SP              现货价格                     float
                nearSymbol      临近交割合约                  string
                nearPrice       临近交割合约结算价             float
                domSymbol       主力合约                     string
                domPrice        主力合约结算价                float
                nearBasis       临近交割合约相对现货的基差       float
                domBasis        主力合约相对现货的基差          float
                nearBasisRate   临近交割合约相对现货的基差率     float
                domBasisRate    主力合约相对现货的基差率        float
                date            日期                        string YYYYMMDD
    """

    start = cons.convert_date(
        start) if start is not None else datetime.date.today()
    end = cons.convert_date(end) if end is not None else cons.convert_date(
        cons.get_latestDataDate(datetime.datetime.now()))
    df_list = []
    while start <= end:
        print(start)
        df = get_spotPrice(start, vars)
        if df is False:
            return pd.concat(df_list).reset_index(drop=True)
        elif df is not None:
            df_list.append(df)
        start += datetime.timedelta(days=1)

    if len(df_list) > 0:
        return pd.concat(df_list).reset_index(drop=True)
예제 #10
0
def get_czce_reciept_1(date=None, vars=cons.vars):
    """
        抓取郑州商品交易所注册仓单数据
        适用20080222至20100824(包括)
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        print('%s非交易日' % date)
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECIEPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexs = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    for i in list(range(len(indexs))):
        if i != len(indexs) - 1:
            dataCut = data.loc[indexs[i]:indexs[i + 1] - 1, :]
            dataCut = dataCut.fillna(method='pad')
        else:
            dataCut = data.loc[indexs[i]:, :]
            dataCut = dataCut.fillna(method='pad')
        if 'PTA' in dataCut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub('[A-Z]+', '', dataCut[0].tolist()[0][3:]))
        if var == 'CF':
            reciept = dataCut[6].tolist()[-1]
        else:
            reciept = dataCut[5].tolist()[-1]
        D = {'var': var, 'reciept': int(reciept), 'date': date}
        records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #11
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_cffex_rank_table(date=None, vars=cons.vars):
    """
        抓取郑州商品期货交易所前20会员持仓排名数据明细
        注:该交易所即公布了品种排名,也公布了标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20100416开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    vars = [i for i in vars if i in cons.market_var['cffex']]
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    D = {}
    for var in vars:
        url = cons.CFFEX_VOLRANK_URL % (date.strftime('%Y%m'),
                                        date.strftime('%d'), var)
        r = requests_link(url, encoding='gbk')
        if '网页错误' not in r.text:
            table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1]))
            table = table.dropna(how='any')
            table = table.applymap(lambda x: x.strip()
                                   if type(x) == type('') else x)
            for symbol in set(table['合约']):
                tableCut = table[table['合约'] == symbol]
                tableCut.columns = ['symbol', 'rank'] + rank_columns
                tableCut = _tableCut_cal(tableCut, symbol)
                D[symbol] = tableCut.reset_index(drop=True)
    return D
예제 #12
0
def get_shfe_reciept_2(date=None, vars=None):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        print('%s非交易日' % date)
        return None
    url = cons.SHFE_RECIEPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    try:
        context = json.loads(r.text)
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        dataCut = data[data['VARNAME'] == var]
        D = {
            'var': chinese_to_english(re.sub("\W|[a-zA-Z]", "", var)),
            'reciept': int(dataCut['WRTWGHTS'].tolist()[-1]),
            'date': date
        }
        records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #13
0
def get_czce_reciept_2(date=None, vars=cons.vars):
    """
        抓取郑州商品交易所注册仓单数据
        适用20100825(包括)至20151111(包括)
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    url = cons.CZCE_RECIEPT_URL_2 % (date[:4], date)
    r = requests.get(url)
    r.encoding = 'utf-8'
    data = pd.read_html(r.text)[3:]
    records = pd.DataFrame()
    for dataCut in data:
        if len(dataCut.columns) > 3:
            lastIndexs = [
                x for x in dataCut.index if '注:' in str(dataCut[0].tolist()[x])
            ]
            if len(lastIndexs) > 0:
                lastIndex = lastIndexs[0] - 1
                dataCut = dataCut.loc[:lastIndex, :]
            if 'PTA' in dataCut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = dataCut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            reciept = dataCut.iloc[:, -3].tolist()[-1]
            D = {'var': var, 'reciept': reciept, 'date': date}
            records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #14
0
def get_shfe_vwap(date=None):
    """
        获取上期所日成交均价数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            郑商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                time_range    vwap时段,分09:00-10:15和09:00-15:00两类
                vwap          加权平均成交均价
        或 None(给定日期没有数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % day.strftime('%Y%m%d'))
        return None
    try:
        json_data = json.loads(
            urlopen(
                Request(cons.SHFE_VWAP_URL % (day.strftime('%Y%m%d')),
                        headers=cons.headers)).read().decode('utf8'))
    except HTTPError as reason:
        if reason.code not in [404, 403]:
            print(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), reason)
        return None

    if len(json_data['o_currefprice']) == 0:
        return None
    try:
        df = pd.DataFrame(json_data['o_currefprice'])
        df['INSTRUMENTID'] = df['INSTRUMENTID'].str.strip()
        df[':B1'].astype('int16')
        return df.rename(columns=cons.SHFE_VWAP_COLUMNS)[list(
            cons.SHFE_VWAP_COLUMNS.values())]
    except:
        return None
예제 #15
0
def get_rollYield(date=None, var='IF', symbol1=None, symbol2=None, df=None):
    """
            获取某一天某一品种(主力和次主力)、或固定两个合约的展期收益率
        Parameters
        ------
            date: string 某一天日期 format: YYYYMMDD
            var: string 合约品种如RB、AL等            
            symbol1: string 合约1如rb1810
            symbol2: string 合约2如rb1812
            df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快
        Return
        -------
            DataFrame
                展期收益率数据(DataFrame):
                    ry      展期收益率
                    index   日期或品种
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if symbol1:
        var = symbol2varietie(symbol1)
    if type(df) != type(pd.DataFrame()):
        market = symbolMarket(var)
        df = get_future_daily(start=date, end=date, market=market)
    if var:
        df = df[df['variety'] == var].sort_values('open_interest',
                                                  ascending=False)
        df['close'] = df['close'].astype('float')
        priceRate = df['close'].pct_change().tolist()[1]
        symbol1 = df['symbol'].tolist()[0]
        symbol2 = df['symbol'].tolist()[1]
    else:
        close1 = df['close'][df['symbol'] == symbol1.upper()].tolist()[0]
        close2 = df['close'][df['symbol'] == symbol2.upper()].tolist()[0]
        priceRate = close2 / close1 - 1
    mc = _monthChange(symbol2, symbol1)
    ry = priceRate / mc
    return ry
예제 #16
0
def get_dce_reciept(date=None, vars=cons.vars):
    """
        抓取大连商品交易所注册仓单数据
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    reciept         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.DCE_RECIEPT_URL % (date.year, date.month - 1, date.day)
    data = pandas_readHtml_link(url, encoding='utf-8')[0]
    records = pd.DataFrame()
    for x in data.to_dict(orient='records'):
        if type(x[0]) == type('a'):
            if x[0][-2:] == '小计':
                var = x[0][:-2]
                D = {
                    'var': chinese_to_english(var),
                    'reciept': int(x[3]),
                    'date': date.strftime('%Y%m%d')
                }
                records = records.append(pd.DataFrame(D, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_inMarket = [i for i in vars if i in records.index]
        records = records.loc[vars_inMarket, :]
    return records.reset_index(drop=True)
예제 #17
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_czce_rank_table(date=None, vars=cons.vars):
    """
        抓取郑州商品期货交易所前20会员持仓排名数据明细
        注:该交易所即公布了品种排名,也公布了标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20050509开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOLRANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        D = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            tableCut = data[i + 1]
            tableCut.columns = rank_columns
            tableCut = tableCut.iloc[:-1, :]
            tableCut.loc[:, 'rank'] = tableCut.index
            tableCut.loc['合计', 'rank'] = 999
            tableCut.loc[
                '合计',
                ['vol_party_name', 'long_party_name', 'short_party_name'
                 ]] = None
            tableCut.loc[:, 'symbol'] = symbol
            tableCut.loc[:, 'var'] = symbol2varietie(symbol)
            D[symbol] = tableCut.reset_index(drop=True)
        return D

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOLRANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=1)[0]
    else:
        url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=0)[0]

    if len(data.columns) < 6:
        return {}

    table = data.iloc[:, :9]
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexs = [i for i in table.index if '合约' in i or '品种' in i]
    indexs.insert(0, 0)
    D = {}

    for i in range(len(indexs)):
        if indexs[i] == 0:
            tableCut = table.loc[:indexs[i + 1], :]
            string = tableCut.index.name

        elif i < len(indexs) - 1:
            tableCut = table.loc[indexs[i]:indexs[i + 1], :]
            string = tableCut.index[0]
        else:
            tableCut = table.loc[indexs[i]:, :]
            string = tableCut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(':(.*) ').findall(string)[0]
        var = symbol2varietie(symbol)
        if var in vars:
            tableCut = tableCut.dropna(how='any').iloc[1:, :]
            tableCut = tableCut.loc[[
                x for x in tableCut.index if x in [str(i) for i in range(21)]
            ], :]
            tableCut = _tableCut_cal(tableCut, symbol)
            D[symbol] = tableCut.reset_index(drop=True)
    return D
예제 #18
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_dce_rank_table(date=None, vars=cons.vars):
    """
        抓取大连商品期货交易所前20会员持仓排名数据明细
        注:该交易所即公布了品种排名,也公布了标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20060104开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """

    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars = [i for i in vars if i in cons.market_var['dce']]
    D = {}
    for var in vars:
        url = cons.DCE_VOLRANK_URL % (var.lower(), var.lower(), date.year,
                                      date.month - 1, date.day)

        list_60_name = []
        list_60 = []
        list_60_chg = []
        rank = []
        texts = urllib.request.urlopen(url).readlines()
        if len(texts) > 30:
            for text in texts:
                line = text.decode('utf8')
                stringlist = line.split()
                try:
                    if int(stringlist[0]) <= 20:
                        list_60_name.append(stringlist[1])
                        list_60.append(stringlist[2])
                        list_60_chg.append(stringlist[3])
                        rank.append(stringlist[0])
                except:
                    pass
            tableCut = pd.DataFrame({
                'rank': rank[0:20],
                'vol_party_name': list_60_name[0:20],
                'vol': list_60[0:20],
                'vol_chg': list_60_chg[0:20],
                'long_party_name': list_60_name[20:40],
                'long_openIntr': list_60[20:40],
                'long_openIntr_chg': list_60_chg[20:40],
                'short_party_name': list_60_name[40:60],
                'short_openIntr': list_60[40:60],
                'short_openIntr_chg': list_60_chg[40:60]
            })
            tableCut = tableCut.applymap(lambda x: x.replace(',', ''))
            tableCut = _tableCut_cal(tableCut, var)
            D[var] = tableCut
    return D
예제 #19
0
def get_czce_daily(date=None):

    """
        获取郑商所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        type: 数据类型, 为'future'期货 或 'option'期权二者之一
    Return
    -------
        DataFrame
            郑商所每日期货交易数据:
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest 持仓量
                turnover      成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或 
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值  
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        None(类型错误或给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' %date.strftime('%Y%m%d'))
        return None
    if day > datetime.date(2010, 8, 24):
        if day > datetime.date(2015,9,19):
            u = cons.CZCE_DAILY_URL_3
            url = u % (day.strftime('%Y'), day.strftime('%Y%m%d'))
        elif day < datetime.date(2015,9,19):
            u = cons.CZCE_DAILY_URL_2
            url = u % (day.strftime('%Y'), day.strftime('%Y%m%d'))
        listed_columns = cons.CZCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
        try:
            r = requests.get(url)
            html = r.text
        except HTTPError as reason:
            if reason.code != 404:
                print(cons.CZCE_DAILY_URL % (day.strftime('%Y'),
                                           day.strftime('%Y%m%d')), reason)
            return
        if html.find(u'您的访问出错了') >= 0 or html.find(u'无期权每日行情交易记录') >= 0:
            return
        html = [i.replace(' ', '').split('|') for i in html.split('\n')[:-4] if i[0][0] != u'小']


        if day > datetime.date(2015, 9, 19):
            if html[1][0] not in [u'品种月份', u'品种代码']:
                return
            dict_data = list()
            day_const = int(day.strftime('%Y%m%d'))
            for row in html[2:]:
                m = cons.FUTURE_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']:
                        row[i + 1] = row[i + 1].replace(',', '')
                        row_dict[field] = int(row[i + 1])
                    else:
                        row[i + 1] = row[i + 1].replace(',', '')
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)

            return pd.DataFrame(dict_data)[output_columns]
        elif day < datetime.date(2015, 9, 19):
            dict_data = list()
            day_const = int(day.strftime('%Y%m%d'))
            for row in html[1:]:
                row = row[0].split(',')
                m = cons.FUTURE_SYMBOL_PATTERN.match(row[0])
                if not m:
                    continue
                row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}
                for i, field in enumerate(listed_columns):
                    if row[i + 1] == "\r":
                        row_dict[field] = 0.0
                    elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']:
                        row_dict[field] = int(float(row[i + 1]))
                    else:
                        row_dict[field] = float(row[i + 1])
                dict_data.append(row_dict)
            return pd.DataFrame(dict_data)[output_columns]

    if day <= datetime.date(2010,8,24):
        u = cons.CZCE_DAILY_URL_1
        url = u %  day.strftime('%Y%m%d')
        listed_columns = cons.CZCE_COLUMNS_2
        output_columns = cons.OUTPUT_COLUMNS
        df = pd.read_html(url)[1].dropna(how='any')

        dict_data = list()
        day_const = int(day.strftime('%Y%m%d'))

        for row in df.to_dict(orient = 'records')[1:]:
            m = cons.FUTURE_SYMBOL_PATTERN.match(row[0])
            if not m:
                continue
            row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}
            for i, field in enumerate(listed_columns):
                if row[i + 1] == "\r":
                    row_dict[field] = 0.0
                elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']:

                    row_dict[field] = int(row[i + 1])
                else:

                    row_dict[field] = float(row[i + 1])
            dict_data.append(row_dict)

        return pd.DataFrame(dict_data)[output_columns]
예제 #20
0
def get_dce_daily(date=None, type="future", retries=0):
    """
        获取大连商品交易所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        type: 数据类型, 为'future'期货 或 'option'期权二者之一
        retries: int, 当前重试次数,达到3次则获取数据失败
    Return
    -------
        DataFrame
            大商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover       成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或 
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值  
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' %date.strftime('%Y%m%d'))
        return None
    if retries > 3:
        print("maximum retires for DCE market data: ", day.strftime("%Y%m%d"))
        return

    if type == 'future':
        url = cons.DCE_DAILY_URL + '?' + urlencode({"currDate": day.strftime('%Y%m%d'),
                                                  "year": day.strftime('%Y'),
                                                  "month": str(int(day.strftime('%m')) - 1),
                                                  "day": day.strftime('%d')})
        listed_columns = cons.DCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
    elif type == 'option':
        url = cons.DCE_DAILY_URL + '?' + urlencode({"currDate": day.strftime('%Y%m%d'),
                                                  "year": day.strftime('%Y'),
                                                  "month": str(int(day.strftime('%m')) - 1),
                                                  "day": day.strftime('%d'),
                                                  "dayQuotes.trade_type": "1"})
        listed_columns = cons.DCE_OPTION_COLUMNS
        output_columns = cons.OPTION_OUTPUT_COLUMNS
    else:
        print('invalid type :' + type + ', should be one of "future" or "option"')
        return

    try:
        response = urlopen(Request(url, method='POST', headers=cons.headers)).read().decode('utf8')
    except IncompleteRead as reason:
        return get_dce_daily(day, retries=retries + 1)
    except HTTPError as reason:
        if reason.code == 504:
            return get_dce_daily(day, retries=retries + 1)
        elif reason.code != 404:
            print(cons.DCE_DAILY_URL, reason)
        return

    if u'错误:您所请求的网址(URL)无法获取' in response:
        return get_dce_daily(day, retries=retries + 1)
    elif u'暂无数据' in response:
        return

    data = BeautifulSoup(response, 'html.parser').find_all('tr')
    if len(data) == 0:
        return

    dict_data = list()
    implied_data = list()
    for idata in data[1:]:
        if u'小计' in idata.text or u'总计' in idata.text:
            continue
        x = idata.find_all('td')
        if type == 'future':
            row_dict = {'variety': cons.DCE_MAP[x[0].text.strip()]}
            row_dict['symbol'] = row_dict['variety'] + x[1].text.strip()
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if '-' in field_content:
                    row_dict[field] = 0
                elif field in ['volume', 'open_interest']:
                    row_dict[field] = int(field_content.replace(',', ''))
                else:
                    row_dict[field] = float(field_content.replace(',', ''))
            dict_data.append(row_dict)
        elif len(x) == 16:
            m = cons.FUTURE_SYMBOL_PATTERN.match(x[1].text.strip())
            if not m:
                continue
            row_dict = {'symbol': x[1].text.strip(), 'variety': m.group(1).upper(), 'contract_id': m.group(0)}
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if '-' in field_content:
                    row_dict[field] = 0
                elif field in ['volume', 'open_interest']:
                    row_dict[field] = int(field_content.replace(',', ''))
                else:
                    row_dict[field] = float(field_content.replace(',', ''))
            dict_data.append(row_dict)
        elif len(x) == 2:
            implied_data.append({'contract_id': x[0].text.strip(), 'implied_volatility': float(x[1].text.strip())})
    df = pd.DataFrame(dict_data)
    df['date'] = day.strftime('%Y%m%d')
    if type == 'future':
        return df[output_columns]
    else:
        return pd.merge(df, pd.DataFrame(implied_data), on='contract_id', how='left', indicator=False)[output_columns]
예제 #21
0
def get_cffex_daily(date=None):
    """
        获取中金所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            中金所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low          最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover      成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' %date.strftime('%Y%m%d'))
        return None
    try:
        html = urlopen(Request(cons.CFFEX_DAILY_URL % (day.strftime('%Y%m'),
                                                     day.strftime('%d'), day.strftime('%Y%m%d')),
                               headers=cons.headers)).read().decode('gbk', 'ignore')
    except HTTPError as reason:
        if reason.code != 404:
            print(cons.CFFEX_DAILY_URL % (day.strftime('%Y%m'), day.strftime('%d'),
                                        day.strftime('%Y%m%d')), reason)
        return

    if html.find(u'网页错误') >= 0:
        return
    html = [i.replace(' ', '').split(',') for i in html.split('\n')[:-2] if i[0][0] != u'小']

    if html[0][0] != u'合约代码':
        return

    dict_data = list()
    day_const = day.strftime('%Y%m%d')
    for row in html[1:]:
        m = cons.FUTURE_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == u"":
                row_dict[field] = 0.0
            elif field in ['volume', 'open_interest', 'oi_chg']:
                row_dict[field] = int(row[i + 1])
            else:
                row_dict[field] = float(row[i + 1])
        row_dict['pre_settle'] = row_dict['close'] - row_dict['change1']
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
예제 #22
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_shfe_rank_table(date=None, vars=cons.vars):
    """
        抓取上海商品期货交易所前20会员持仓排名数据明细
        注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20020107开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOLRANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={
            'CJ1': 'vol',
            'CJ1_CHG': 'vol_chg',
            'CJ2': 'long_openIntr',
            'CJ2_CHG': 'long_openIntr_chg',
            'CJ3': 'short_openIntr',
            'CJ3_CHG': 'short_openIntr_chg',
            'PARTICIPANTABBR1': 'vol_party_name',
            'PARTICIPANTABBR2': 'long_party_name',
            'PARTICIPANTABBR3': 'short_party_name',
            'PRODUCTNAME': 'product1',
            'RANK': 'rank',
            'INSTRUMENTID': 'symbol',
            'PRODUCTSORTNO': 'product2'
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if type(x) == type('') else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['var'] = df['symbol'].apply(lambda x: symbol2varietie(x))

    df = df[df['rank'] > 0]
    for col in [
            'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1',
            'product2'
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars if var in df['var'].tolist()]
    D = {}
    for var in get_vars:
        df_var = df[df['var'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            D[symbol] = df_symbol.reset_index(drop=True)
    return D
예제 #23
0
파일: cot.py 프로젝트: yutiansut/fushare
def get_rank_sum(date=None, vars=cons.vars):
    """
        抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据
        注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名;
            所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表
        注2:大商所只公布了品种排名,未公布标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    symbol                      标的合约                     string
                    var                         商品品种                     string
                    vol_top5                    成交量前5会员成交量总和         int
                    vol_chg_top5                成交量前5会员成交量变化总和      int
                    long_openIntr_top5          持多单前5会员持多单总和         int
                    long_openIntr_chg_top5      持多单前5会员持多单变化总和      int
                    short_openIntr_top5         持空单前5会员持空单总和         int
                    short_openIntr_chg_top5     持空单前5会员持空单变化总和      int 
                    vol_top10                   成交量前10会员成交量总和        int                   
                    ...

                    date                        日期                         string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        return None
    dce_var = [i for i in vars if i in cons.market_var['dce']]
    shfe_var = [i for i in vars if i in cons.market_var['shfe']]
    czce_var = [i for i in vars if i in cons.market_var['czce']]
    cffex_var = [i for i in vars if i in cons.market_var['cffex']]
    D = {}
    if len(dce_var) > 0:
        D.update(get_dce_rank_table(date, dce_var))
    if len(shfe_var) > 0:
        D.update(get_shfe_rank_table(date, shfe_var))
    if len(czce_var) > 0:
        D.update(get_czce_rank_table(date, czce_var))
    if len(cffex_var) > 0:
        D.update(get_cffex_rank_table(date, cffex_var))
    records = pd.DataFrame()

    for symbol, table in D.items():
        table = table.applymap(lambda x: 0 if x == '' else x)
        for symbol in set(table['symbol']):

            var = symbol2varietie(symbol)
            tableCut = table[table['symbol'] == symbol]
            tableCut_top5 = tableCut[tableCut['rank'] <= 5]
            tableCut_top10 = tableCut[tableCut['rank'] <= 10]
            tableCut_top15 = tableCut[tableCut['rank'] <= 15]
            tableCut_top20 = tableCut[tableCut['rank'] <= 20]

            D = {
                'symbol':
                symbol,
                'var':
                var,
                'vol_top5':
                tableCut_top5['vol'].sum(),
                'vol_chg_top5':
                tableCut_top5['vol_chg'].sum(),
                'long_openIntr_top5':
                tableCut_top5['long_openIntr'].sum(),
                'long_openIntr_chg_top5':
                tableCut_top5['long_openIntr_chg'].sum(),
                'short_openIntr_top5':
                tableCut_top5['short_openIntr'].sum(),
                'short_openIntr_chg_top5':
                tableCut_top5['short_openIntr_chg'].sum(),
                'vol_top10':
                tableCut_top10['vol'].sum(),
                'vol_chg_top10':
                tableCut_top10['vol_chg'].sum(),
                'long_openIntr_top10':
                tableCut_top10['long_openIntr'].sum(),
                'long_openIntr_chg_top10':
                tableCut_top10['long_openIntr_chg'].sum(),
                'short_openIntr_top10':
                tableCut_top10['short_openIntr'].sum(),
                'short_openIntr_chg_top10':
                tableCut_top10['short_openIntr_chg'].sum(),
                'vol_top15':
                tableCut_top15['vol'].sum(),
                'vol_chg_top15':
                tableCut_top15['vol_chg'].sum(),
                'long_openIntr_top15':
                tableCut_top15['long_openIntr'].sum(),
                'long_openIntr_chg_top15':
                tableCut_top15['long_openIntr_chg'].sum(),
                'short_openIntr_top15':
                tableCut_top15['short_openIntr'].sum(),
                'short_openIntr_chg_top15':
                tableCut_top15['short_openIntr_chg'].sum(),
                'vol_top20':
                tableCut_top20['vol'].sum(),
                'vol_chg_top20':
                tableCut_top20['vol_chg'].sum(),
                'long_openIntr_top20':
                tableCut_top20['long_openIntr'].sum(),
                'long_openIntr_chg_top20':
                tableCut_top20['long_openIntr_chg'].sum(),
                'short_openIntr_top20':
                tableCut_top20['short_openIntr'].sum(),
                'short_openIntr_chg_top20':
                tableCut_top20['short_openIntr_chg'].sum(),
                'date':
                date.strftime('%Y%m%d')
            }
            records = records.append(pd.DataFrame(D, index=[0]))

    if len(D.items()) > 0:
        add_vars = [
            i for i in cons.market_var['shfe'] + cons.market_var['cffex']
            if i in records['var'].tolist()
        ]
        for var in add_vars:
            recordsCut = records[records['var'] == var]
            var_record = pd.DataFrame(recordsCut.sum()).T
            var_record['date'] = date.strftime('%Y%m%d')
            var_record.loc[:, ['var', 'symbol']] = var
            records = records.append(var_record)

    return records.reset_index(drop=True)