Ejemplo n.º 1
0
def get_index():
    """
    获取大盘指数行情
    return
    -------
      DataFrame
          code:指数代码
          name:指数名称
          change:涨跌幅
          open:开盘价
          preclose:昨日收盘价
          close:收盘价
          high:最高价
          low:最低价
          volume:成交量(手)
          amount:成交金额(亿元)
    """
    request = Request(ct.INDEX_HQ_URL%(ct.P_TYPE['http'],
                                             ct.DOMAINS['sinahq']),headers=ua.get_ua())
    text = urlopen(request, timeout=10).read()
    text = text.decode('GBK')
    text = text.replace('var hq_str_sh', '').replace('var hq_str_sz', '')
    text = text.replace('";', '').replace('"', '').replace('=', ',')
    text = '%s%s'%(ct.INDEX_HEADER, text)
    df = pd.read_csv(StringIO(text), sep=',', thousands=',')
    df['change'] = (df['close'] / df['preclose'] - 1 ) * 100
    df['amount'] = df['amount'] / 100000000
    df['change'] = df['change'].map(ct.FORMAT)
    df['amount'] = df['amount'].map(ct.FORMAT4)
    df = df[ct.INDEX_COLS]
    df['code'] = df['code'].map(lambda x:str(x).zfill(6))
    df['change'] = df['change'].astype(float)
    df['amount'] = df['amount'].astype(float)
    return df
Ejemplo n.º 2
0
def _parsing_dayprice_json(types=None, page=1):
    """
           处理当日行情分页数据,格式为json
     Parameters
     ------
        pageNum:页码
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
    """
    ct._write_console()
    request = Request(ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                 ct.PAGES['jv'], types, page),headers=ua.get_ua())
    text = urlopen(request, timeout=10).read()
    if text == 'null':
        return None
    reg = re.compile(r'\,(.*?)\:')
    text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text)
    text = text.replace('"{symbol', '{"symbol')
    text = text.replace('{symbol', '{"symbol"')
    if ct.PY3:
        jstr = json.dumps(text)
    else:
        jstr = json.dumps(text, encoding='GBK')
    js = json.loads(jstr)
    df = pd.DataFrame(pd.read_json(js, dtype={'code':object}),
                      columns=ct.DAY_TRADING_COLUMNS)
    df = df.drop('symbol', axis=1)
#     df = df.ix[df.volume > 0]
    return df
Ejemplo n.º 3
0
def _parse_fq_data(url, index, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url,headers=ua.get_ua())
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath('//table[@id=\"FundHoldSharesTable\"]')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            if sarr == '':
                return None
            df = pd.read_html(sarr, skiprows = [0, 1])[0]
            if len(df) == 0:
                return pd.DataFrame()
            if index:
                df.columns = ct.HIST_FQ_COLS[0:7]
            else:
                df.columns = ct.HIST_FQ_COLS
            if df['date'].dtypes == np.object:
                df['date'] = pd.to_datetime(df['date'])
            df = df.drop_duplicates('date')
        except ValueError as e:
            # 时间较早,已经读不到数据
            return None
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 4
0
def get_tick_data(code=None, date=None, retry_count=3, pause=0.001,
                  src='sn'):
    """
        获取分笔数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format: YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
        src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if code is None or len(code)!=6 or date is None:
        return None
    if (src.strip() not in ct.TICK_SRCS):
        print(ct.TICK_SRC_ERROR)
        return None
    symbol = ct._code_to_symbol(code)
    symbol_dgt = ct._code_to_symbol_dgt(code)
    datestr = date.replace('-', '')
    url = {
            ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'],
                                date, symbol),
            ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'],
                                           symbol, datestr),
            ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4],
                                         datestr, symbol_dgt)
             }
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if src == ct.TICK_SRCS[2]:
                df = pd.read_excel(url[src])
                df.columns = ct.TICK_COLUMNS
            else:
                re = Request(url[src],headers=ua.get_ua())
                lines = urlopen(re, timeout=10).read()
                lines = lines.decode('GBK')
                if len(lines) < 20:
                    return None
                df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS,
                                   skiprows=[0])
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 5
0
def top10_holders(code=None, year=None, quarter=None, gdtype='0',
                  retry_count=3, pause=0.001):
    if code is None:
        return None
    else:
        code = ct._code_to_symbol(code)
    gdtype = 'LT' if gdtype == '1' else ''
    qdate = ''
    if (year is not None) & (quarter is not None):
        qdate = du.get_q_date(year, quarter)
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.TOP10_HOLDERS_URL%(ct.P_TYPE['http'], ct.DOMAINS['gw'],
                                    gdtype, code.upper()),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            lines = lines.decode('utf8') if ct.PY3 else lines
            reg = re.compile(r'= \'\[(.*?)\]\';')
            lines = reg.findall(lines)[0]
            jss = json.loads('[%s]' %lines)
            summ = []
            data = pd.DataFrame()
            for row in jss:
                qt = row['jzrq']
                hold = row['ljcy']
                change = row['ljbh']
                props = row['ljzb']
                arow = [qt, hold, change ,props]
                summ.append(arow)
                ls = row['sdgdList']
                dlist = []
                for inrow in ls:
                    sharetype = inrow['gbxz']
                    name = inrow['gdmc']
                    hold = inrow['cgs']
                    h_pro = inrow['zzgs']
                    status = inrow['zjqk']
                    dlist.append([qt, name, hold, h_pro, sharetype, status])
                ddata = pd.DataFrame(dlist, columns=rv.TOP10_PER_COLS)
                data = data.append(ddata, ignore_index=True)
            df = pd.DataFrame(summ, columns=rv.TOP10_SUMM_COLS)
            if qdate != '':
                df = df[df.quarter == qdate]
                data = data[data.quarter == qdate]
        except Exception as e:
            print(e)
        else:
            return df, data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 6
0
def xsg_data(year=None, month=None,
            retry_count=3, pause=0.001):
    """
    获取限售股解禁数据
    Parameters
    --------
    year:年份,默认为当前年
    month:解禁月份,默认为当前月
    retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
    pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    
    Return
    ------
    DataFrame
    code:股票代码
    name:名称
    date:解禁日期
    count:解禁数量(万股)
    ratio:占总盘比率
    """
    year = du.get_year() if year is None else year
    month = du.get_month() if month is None else month
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.XSG_URL%(ct.P_TYPE['http'], ct.DOMAINS['em'],
                                     ct.PAGES['emxsg'], year, month),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            lines = lines.decode('utf-8') if ct.PY3 else lines
        except Exception as e:
            print(e)
        else:
            da = lines[3:len(lines)-3]
            list =  []
            for row in da.split('","'):
                list.append([data for data in row.split(',')])
            df = pd.DataFrame(list)
            df = df[[1, 3, 4, 5, 6]]
            for col in [5, 6]:
                df[col] = df[col].astype(float)
            df[5] = df[5]/10000
            df[6] = df[6]*100
            df[5] = df[5].map(ct.FORMAT)
            df[6] = df[6].map(ct.FORMAT)
            df.columns = rv.XSG_COLS
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 7
0
def _get_type_data(url):
    try:
        request = Request(url)
        data_str = urlopen(request, timeout=10).read()
        data_str = data_str.decode('GBK')
        data_str = data_str.split('=')[1]
        data_json = json.loads(data_str)
        df = pd.DataFrame(
            [[row.split(',')[0], row.split(',')[1]]
             for row in data_json.values()],
            columns=['tag', 'name'],
            headers=ua.get_ua())
        return df
    except Exception as er:
        print(str(er))
Ejemplo n.º 8
0
def _get_data(url):
    try:
        request = Request(url,headers=ua.get_ua())
        data_str = urlopen(request, timeout=10).read()
        data_str = data_str.split('=')[1]
        data_str = data_str.replace('futures', '"futures"')
        if six.PY3:
            data_str = data_str.decode('utf-8')
        data_str = json.loads(data_str)
        df = pd.DataFrame([[col for col in row.split(',')] for row in data_str.values()[0]]
                        )
        df = df[[1, 2, 5, 4, 6, 7, 13, 9, 17, 18, 16, 21, 22]]
        df.columns = ct.INTL_FUTURES_COL
        return df
    except Exception as er:
        print(str(er))
Ejemplo n.º 9
0
def _day_cinema(date=None, pNo=1, retry_count=3, pause=0.001):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.BOXOFFICE_CBD%(ct.P_TYPE['http'], ct.DOMAINS['mbox'],
                              ct.BOX, pNo, date),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            df = pd.DataFrame(js['data1'])
            df = df.drop(['CinemaID'], axis=1)
            return df
Ejemplo n.º 10
0
def month_boxoffice(date=None, retry_count=3, pause=0.001):
    """
    获取单月电影票房数据
    数据来源:EBOT艺恩票房智库
    Parameters
    ------
        date:日期,默认为上一月,格式YYYY-MM
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 
              Irank         排名
              MovieName     电影名称
              WomIndex      口碑指数
              avgboxoffice  平均票价
              avgshowcount  场均人次
              box_pro       月度占比
              boxoffice     单月票房(万)     
              days          月内天数
              releaseTime   上映日期
    """
    if date is None:
        date = du.day_last_week(-30)[0:7]
    elif len(date)>8:
        print(ct.BOX_INPUT_ERR_MSG)
        return
    date += '-01'
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.BOXOFFICE_MONTH%(ct.P_TYPE['http'], ct.DOMAINS['mbox'],
                              ct.BOX, date),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            df = pd.DataFrame(js['data1'])
            df = df.drop(['defaultImage', 'EnMovieID'], axis=1)
            return df
Ejemplo n.º 11
0
def _sz_hz(date='', retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            request = Request(rv.MAR_SZ_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'],
                                    ct.PAGES['szsefc'], date),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) <= 200:
                return pd.DataFrame()
            df = pd.read_html(lines, skiprows=[0])[0]
            df.columns = rv.MAR_SZ_HZ_COLS
            df['opDate'] = date
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 12
0
def day_boxoffice(date=None, retry_count=3, pause=0.001):
    """
    获取单日电影票房数据
    数据来源:EBOT艺恩票房智库
    Parameters
    ------
        date:日期,默认为上一日
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 
              AvgPrice      平均票价
              AvpPeoPle     场均人次
              BoxOffice     单日票房(万)
              BoxOffice_Up  环比变化 (%)
              IRank         排名
              MovieDay      上映天数
              MovieName     影片名 
              SumBoxOffice  累计票房(万) 
              WomIndex      口碑指数 
    """
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if date is None:
                date = 0
            else:
                date = int(du.diff_day(du.today(), date)) + 1

            request = Request(ct.BOXOFFICE_DAY%(ct.P_TYPE['http'], ct.DOMAINS['mbox'],
                              ct.BOX, date, _random()),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            df = pd.DataFrame(js['data1'])
            df = df.drop(['MovieImg', 'BoxOffice1', 'MovieID', 'Director', 'IRank_pro'], axis=1)
            return df
Ejemplo n.º 13
0
def get_today_ticks(code=None, retry_count=3, pause=0.001):
    """
        获取当日分笔明细数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if code is None or len(code)!=6 :
        return None
    symbol = _code_to_symbol(code)
    date = du.today()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.TODAY_TICKS_PAGE_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                                       ct.PAGES['jv'], date,
                                                       symbol),headers=ua.get_ua())
            data_str = urlopen(request, timeout=10).read()
            data_str = data_str.decode('GBK')
            data_str = data_str[1:-1]
            data_str = eval(data_str, type('Dummy', (dict,),
                                           dict(__getitem__ = lambda s, n:n))())
            data_str = json.dumps(data_str)
            data_str = json.loads(data_str)
            pages = len(data_str['detailPages'])
            data = pd.DataFrame()
            ct._write_head()
            for pNo in range(1, pages+1):
                data = data.append(_today_ticks(symbol, date, pNo,
                                                retry_count, pause), ignore_index=True)
        except Exception as er:
            print(str(er))
        else:
            return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 14
0
def get_latest_news(top=None, show_content=False):
    """
        获取即时财经新闻
    
    Parameters
    --------
        top:数值,显示最新消息的条数,默认为80条
        show_content:是否显示新闻内容,默认False
    
    Return
    --------
        DataFrame
            classify :新闻类别
            title :新闻标题
            time :发布时间
            url :新闻链接
            content:新闻内容(在show_content为True的情况下出现)
    """
    top = ct.PAGE_NUM[2] if top is None else top
    try:
        request = Request(nv.LATEST_URL % (ct.P_TYPE['http'], ct.DOMAINS['sina'],
                                                   ct.PAGES['lnews'], top,
                                                   _random()),headers=ua.get_ua())
        data_str = urlopen(request, timeout=10).read()
        data_str = data_str.decode('GBK')
        data_str = data_str.split('=')[1][:-1]
        data_str = eval(data_str, type('Dummy', (dict,),
                                       dict(__getitem__ = lambda s, n:n))())
        data_str = json.dumps(data_str)
        data_str = json.loads(data_str)
        data_str = data_str['list']
        data = []
        for r in data_str:
            rt = datetime.fromtimestamp(r['time'])
            rtstr = datetime.strftime(rt, "%m-%d %H:%M")
            arow = [r['channel']['title'], r['title'], rtstr, r['url']]
            if show_content:
                arow.append(latest_content(r['url']))
            data.append(arow)
        df = pd.DataFrame(data, columns=nv.LATEST_COLS_C if show_content else nv.LATEST_COLS)
        return df
    except Exception as er:
        print(str(er))
Ejemplo n.º 15
0
def sz_margin_details(date='', retry_count=3, pause=0.001):
    """
    获取深市融资融券明细列表
    Parameters
    --------
    date:string
                明细数据日期 format:YYYY-MM-DD 默认为空''
    retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
    pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    
    Return
    ------
    DataFrame
    opDate:信用交易日期
    stockCode:标的证券代码
    securityAbbr:标的证券简称
    rzmre: 融资买入额(元)
    rzye:融资余额(元)
    rqmcl: 融券卖出量
    rqyl: 融券余量
    rqye: 融券余量(元)
    rzrqye:融资融券余额(元)
    """
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.MAR_SZ_MX_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'],
                                    ct.PAGES['szsefc'], date),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) <= 200:
                return pd.DataFrame()
            df = pd.read_html(lines, skiprows=[0])[0]
            df.columns = rv.MAR_SZ_MX_COLS
            df['stockCode'] = df['stockCode'].map(lambda x:str(x).zfill(6))
            df['opDate'] = date
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 16
0
def get_cash_flow(code):
    """
        获取某股票的历史所有时期现金流表
    Parameters
    --------
    code:str 股票代码 e.g:600518
       
    Return
    --------
    DataFrame
        行列名称为中文且数目较多,建议获取数据后保存到本地查看
    """
    if code.isdigit():
        request = Request(ct.SINA_CASHFLOW_URL % (code), headers=ua.get_ua())
        text = urlopen(request, timeout=10).read()
        text = text.decode('GBK')
        text = text.replace('\t\n', '\r\n')
        text = text.replace('\t', ',')
        df = pd.read_csv(StringIO(text), dtype={'code': 'object'})
        return df
Ejemplo n.º 17
0
def _parase_fq_factor(code, start, end):
    symbol = _code_to_symbol(code)
    request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
                                             ct.DOMAINS['vsf'], symbol),headers=ua.get_ua())
    text = urlopen(request, timeout=10).read()
    text = text[1:len(text)-1]
    text = text.decode('utf-8') if ct.PY3 else text
    text = text.replace('{_', '{"')
    text = text.replace('total', '"total"')
    text = text.replace('data', '"data"')
    text = text.replace(':"', '":"')
    text = text.replace('",_', '","')
    text = text.replace('_', '-')
    text = json.loads(text)
    df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
    df['date'] = df['date'].map(_fun_except) # for null case
    if df['date'].dtypes == np.object:
        df['date'] = pd.to_datetime(df['date'])
    df = df.drop_duplicates('date')
    df['factor'] = df['factor'].astype(float)
    return df
Ejemplo n.º 18
0
def get_stock_basics(date=None):
    """
        获取沪深上市公司基本情况
    Parameters
    date:日期YYYY-MM-DD,默认为上一个交易日,目前只能提供2016-08-09之后的历史数据

    Return
    --------
    DataFrame
               code,代码
               name,名称
               industry,细分行业
               area,地区
               pe,市盈率
               outstanding,流通股本
               totals,总股本(万)
               totalAssets,总资产(万)
               liquidAssets,流动资产
               fixedAssets,固定资产
               reserved,公积金
               reservedPerShare,每股公积金
               eps,每股收益
               bvps,每股净资
               pb,市净率
               timeToMarket,上市日期
    """
    wdate = du.last_tddate() if date is None else date
    wdate = wdate.replace('-', '')
    if wdate < '20160809':
        return None
    datepre = '' if date is None else wdate[0:4] + wdate[4:6] + '/'
    request = Request(ct.ALL_STOCK_BASICS_FILE %
                      (datepre, '' if date is None else wdate),
                      headers=ua.get_ua())
    text = urlopen(request, timeout=10).read()
    text = text.decode('GBK')
    text = text.replace('--', '')
    df = pd.read_csv(StringIO(text), dtype={'code': 'object'})
    df = df.set_index('code')
    return df
Ejemplo n.º 19
0
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001):
    """
        获取sina大单数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format:YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:股票代码    股票名称    交易时间    价格    成交量    前一笔价格    类型(买、卖、中性盘)
    """
    if code is None or len(code)!=6 or date is None:
        return None
    symbol = _code_to_symbol(code)
    vol = vol*100
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'],
                                symbol, vol, date),headers=ua.get_ua())
            lines = urlopen(re, timeout=10).read()
            lines = lines.decode('GBK')
            if len(lines) < 100:
                return None
            df = pd.read_csv(StringIO(lines), names=ct.SINA_DD_COLS,
                               skiprows=[0])
            if df is not None:
                df['code'] = df['code'].map(lambda x: x[2:])
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 20
0
def _get_report_data(year,
                     quarter,
                     pageNo,
                     dataArr,
                     retry_count=3,
                     pause=0.001):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(
                ct.REPORT_URL %
                (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year,
                 quarter, pageNo, ct.PAGE_NUM[1]),
                headers=ua.get_ua())
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            text = text.replace('--', '')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//table[@class=\"list_table\"]/tr")
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>' % sarr
            df = pd.read_html(sarr)[0]
            df = df.drop(11, axis=1)
            df.columns = ct.REPORT_COLS
            dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
            if len(nextPage) > 0:
                pageNo = re.findall(r'\d+', nextPage[0])[0]
                return _get_report_data(year, quarter, pageNo, dataArr)
            else:
                return dataArr
        except Exception as e:
            pass
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 21
0
def _get_k_data(url, dataflag='',
                symbol='',
                code = '',
                index = False,
                ktype = '',
                retry_count=3,
                pause=0.001):
    for _ in range(retry_count):
            time.sleep(pause)
            try:
                request = Request(url,headers=ua.get_ua())
                lines = urlopen(request, timeout = 10).read()
                if len(lines) < 100: #no data
                    return None
            except Exception as e:
                print(e)
            else:
                lines = lines.decode('utf-8') if ct.PY3 else lines
                lines = lines.split('=')[1]
                reg = re.compile(r',{"nd.*?}')
                lines = re.subn(reg, '', lines)
                js = json.loads(lines[0])
                dataflag = dataflag if dataflag in list(js['data'][symbol].keys()) else ct.TT_K_TYPE[ktype.upper()]
                if len(js['data'][symbol][dataflag]) == 0:
                    return None
                if len(js['data'][symbol][dataflag][0]) == 6:
                    df = pd.DataFrame(js['data'][symbol][dataflag],
                                  columns = ct.KLINE_TT_COLS_MINS)
                else:
                    df = pd.DataFrame(js['data'][symbol][dataflag],
                                  columns = ct.KLINE_TT_COLS)
                df['code'] = symbol if index else code
                if ktype in ct.K_MIN_LABELS:
                    df['date'] = df['date'].map(lambda x: '%s-%s-%s %s:%s'%(x[0:4], x[4:6],
                                                                            x[6:8], x[8:10],
                                                                            x[10:12]))
                for col in df.columns[1:6]:
                    df[col] = df[col].astype(float)
                return df
Ejemplo n.º 22
0
def _holding_cotent(start, end, pageNo, retry_count, pause):
    for _ in range(retry_count):
        time.sleep(pause)
        if pageNo>0:
                ct._write_console()
        try:
            request = Request(rv.FUND_HOLDS_URL%(ct.P_TYPE['http'], ct.DOMAINS['163'],
                     ct.PAGES['163fh'], ct.PAGES['163fh'],
                     pageNo, start, end, _random(5)),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            lines = lines.decode('utf-8') if ct.PY3 else lines
            lines = lines.replace('--', '0')
            lines = json.loads(lines)
            data = lines['list']
            df = pd.DataFrame(data)
            df = df.drop(['CODE', 'ESYMBOL', 'EXCHANGE', 'NAME', 'RN', 'SHANGQIGUSHU',
                              'SHANGQISHIZHI', 'SHANGQISHULIANG'], axis=1)
            for col in ['GUSHU', 'GUSHUBIJIAO', 'SHIZHI', 'SCSTC27']:
                df[col] = df[col].astype(float)
            df['SCSTC27'] = df['SCSTC27']*100
            df['GUSHU'] = df['GUSHU']/10000
            df['GUSHUBIJIAO'] = df['GUSHUBIJIAO']/10000
            df['SHIZHI'] = df['SHIZHI']/10000
            df['GUSHU'] = df['GUSHU'].map(ct.FORMAT)
            df['GUSHUBIJIAO'] = df['GUSHUBIJIAO'].map(ct.FORMAT)
            df['SHIZHI'] = df['SHIZHI'].map(ct.FORMAT)
            df['SCSTC27'] = df['SCSTC27'].map(ct.FORMAT)
            df.columns = rv.FUND_HOLDS_COLS
            df = df[['code', 'name', 'date', 'nums', 'nlast', 'count',
                         'clast', 'amount', 'ratio']]
        except Exception as e:
            print(e)
        else:
            if pageNo == 0:
                return df, int(lines['pagecount'])
            else:
                return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 23
0
def realtime_boxoffice(retry_count=3,pause=0.001):
    """
    获取实时电影票房数据
    数据来源:EBOT艺恩票房智库
    Parameters
    ------
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 
              BoxOffice     实时票房(万) 
              Irank         排名
              MovieName     影片名 
              boxPer        票房占比 (%)
              movieDay      上映天数
              sumBoxOffice  累计票房(万) 
              time          数据获取时间
    """
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.MOVIE_BOX%(ct.P_TYPE['http'], ct.DOMAINS['mbox'],
                              ct.BOX, _random()),headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            df = pd.DataFrame(js['data2'])
            df = df.drop(['MovieImg','mId'], axis=1)
            df['time'] = du.get_now()
            return df
Ejemplo n.º 24
0
def _get_detail(tag, retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            ct._write_console()
            request = Request(
                ct.SINA_DATA_DETAIL_URL %
                (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], tag),
                headers=ua.get_ua())
            text = urlopen(request, timeout=10).read()
            text = text.decode('gbk')
        except _network_error_classes:
            pass
        else:
            reg = re.compile(r'\,(.*?)\:')
            text = reg.sub(r',"\1":', text)
            text = text.replace('"{symbol', '{"symbol')
            text = text.replace('{symbol', '{"symbol"')
            jstr = json.dumps(text)
            js = json.loads(jstr)
            df = pd.DataFrame(pd.read_json(js, dtype={'code': object}),
                              columns=ct.THE_FIELDS)
            df = df[ct.FOR_CLASSIFY_B_COLS]
            return df
Ejemplo n.º 25
0
def get_hist_data(code=None, start=None, end=None,
                  ktype='D', retry_count=3,
                  pause=0.001):
    """
        获取个股历史交易记录
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据
      ktype:string
                  数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率
    """
    symbol = _code_to_symbol(code)
    url = ''
    if ktype.upper() in ct.K_LABELS:
        url = ct.DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                ct.K_TYPE[ktype.upper()], symbol)
    elif ktype in ct.K_MIN_LABELS:
        url = ct.DAY_PRICE_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                    symbol, ktype)
    else:
        raise TypeError('ktype input error.')

    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url,headers=ua.get_ua())
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            cols = []
            if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS):
                cols = ct.INX_DAY_PRICE_COLUMNS
            else:
                cols = ct.DAY_PRICE_COLUMNS
            if len(js['record'][0]) == 14:
                cols = ct.INX_DAY_PRICE_COLUMNS
            df = pd.DataFrame(js['record'], columns=cols)
            if ktype.upper() in ['D', 'W', 'M']:
                df = df.applymap(lambda x: x.replace(u',', u''))
                df[df==''] = 0
            for col in cols[1:]:
                df[col] = df[col].astype(float)
            if start is not None:
                df = df[df.date >= start]
            if end is not None:
                df = df[df.date <= end]
            if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS):
                df = df.drop('turnover', axis=1)
            df = df.set_index('date')
            df = df.sort_index(ascending = False)
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Ejemplo n.º 26
0
def get_realtime_quotes(symbols=None):
    """
        获取实时交易数据 getting real time quotes data
       用于跟踪交易情况(本次执行的结果-上一次执行的数据)
    Parameters
    ------
        symbols : string, array-like object (list, tuple, Series).
        
    return
    -------
        DataFrame 实时交易数据
              属性:0:name,股票名字
            1:open,今日开盘价
            2:pre_close,昨日收盘价
            3:price,当前价格
            4:high,今日最高价
            5:low,今日最低价
            6:bid,竞买价,即“买一”报价
            7:ask,竞卖价,即“卖一”报价
            8:volumn,成交量 maybe you need do volumn/100
            9:amount,成交金额(元 CNY)
            10:b1_v,委买一(笔数 bid volume)
            11:b1_p,委买一(价格 bid price)
            12:b2_v,“买二”
            13:b2_p,“买二”
            14:b3_v,“买三”
            15:b3_p,“买三”
            16:b4_v,“买四”
            17:b4_p,“买四”
            18:b5_v,“买五”
            19:b5_p,“买五”
            20:a1_v,委卖一(笔数 ask volume)
            21:a1_p,委卖一(价格 ask price)
            ...
            30:date,日期;
            31:time,时间;
    """
    symbols_list = ''
    if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(symbols, tuple) or isinstance(symbols, pd.Series):
        for code in symbols:
            symbols_list += _code_to_symbol(code) + ','
    else:
        symbols_list = _code_to_symbol(symbols)

    symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list
    request = Request(ct.LIVE_DATA_URL%(ct.P_TYPE['http'], ct.DOMAINS['sinahq'],
                                                _random(), symbols_list),headers=ua.get_ua())
    text = urlopen(request,timeout=10).read()
    text = text.decode('GBK')
    reg = re.compile(r'\="(.*?)\";')
    data = reg.findall(text)
    regSym = re.compile(r'(?:sh|sz)(.*?)\=')
    syms = regSym.findall(text)
    data_list = []
    syms_list = []
    for index, row in enumerate(data):
        if len(row)>1:
            data_list.append([astr for astr in row.split(',')])
            syms_list.append(syms[index])
    if len(syms_list) == 0:
        return None
    df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS)
    df = df.drop('s', axis=1)
    df['code'] = syms_list
    ls = [cls for cls in df.columns if '_v' in cls]
    for txt in ls:
        df[txt] = df[txt].map(lambda x : x[:-2])
    return df