Example #1
0
def _get_profit_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:

        #param:["ylnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}]
        ylnl_list_param = '["ylnl","","","","%d","%d","",0,%d,%d]' % (
            year, quarter, pageNo, ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL %
                          (quote(ylnl_list_param, ',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'])
        if not df.empty:
            df = df.drop(0, axis=1)
            df.columns = ct.PROFIT_COLS
            dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM:
            pageNo = pageNo + 1
            return _get_profit_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Example #2
0
def _parsing_dayprice_json(pageNum=1):
    """
           处理当日行情分页数据,格式为json
     Parameters
     ------
        pageNum:页码
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
    """
    ct._write_console()
    request = Request(ct.SINA_DAY_PRICE_URL % (pageNum))
    text = urlopen(request, timeout=ct.API_TIMEOUT).read()
    if text == 'null':
        return None
    reg = re.compile(r'\,(.*?)\:')
    text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text)
    text = text.replace('"{symbol', '{"symbol')
    text = text.replace('{symbol', '{"symbol"')
    if ct.PY3:
        jstr = json.dumps(text)
    else:
        jstr = json.dumps(text, encoding='GBK')
    js = json.loads(jstr)
    df = pd.DataFrame(pd.read_json(js, dtype={'code': object}),
                      columns=ct.DAY_TRADING_COLUMNS)
    df = df.drop('symbol', axis=1)
    df = df.ix[df.volume > 0]
    return df
Example #3
0
def _parsing_dayprice_json(pageNum=1):
    """
           处理当日行情分页数据,格式为json
     Parameters
     ------
        pageNum:页码
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
    """
    ct._write_console()
    request = Request(ct.SINA_DAY_PRICE_URL%(pageNum))
    text = urlopen(request, timeout=ct.API_TIMEOUT).read()
    if text == 'null':
        return None
    reg = re.compile(r'\,(.*?)\:') 
    text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) 
    text = text.replace('"{symbol', '{"symbol')
    text = text.replace('{symbol', '{"symbol"')
    if ct.PY3:
        jstr = json.dumps(text)
    else:
        jstr = json.dumps(text, encoding='GBK')
    js = json.loads(jstr)
    df = pd.DataFrame(pd.read_json(js, dtype={'code':object}),
                      columns=ct.DAY_TRADING_COLUMNS)
    df = df.drop('symbol', axis=1)
    df = df.ix[df.volume > 0]
    return df
Example #4
0
def _get_growth_data(year, quarter, pageNo, dataArr):
    ct._write_console()
    try:

        #param:["cnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}]
        list_param = '["cnl","","","","%d","%d","",0,%d,%d]' % (
            year, quarter, pageNo, ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL % (quote(list_param, ',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'])
        if not df.empty:
            df.columns = ct.GROWTH_COLS
            for col in (ct.GROWTH_COLS[0:4] + ct.GROWTH_COLS[7:]):
                df[col] = df[col].astype(str).replace("--", "0").replace(
                    "", "0").replace("None", "0").astype(float)
            dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM:
            pageNo = pageNo + 1
            return _get_growth_data(year, quarter, pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Example #5
0
def _get_profit_data(year, quarter,pageNo,dataArr):
    ct._write_console()
    try:

        #param:["ylnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}]
        ylnl_list_param = '["ylnl","","","","%d","%d","",0,%d,%d]'%(year, quarter,pageNo,ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL%(quote(ylnl_list_param,',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text 
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'])
        if not df.empty:
            df = df.drop(0,axis=1)
            df.columns = ct.PROFIT_COLS
            dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM :
            pageNo = pageNo+1
            return _get_profit_data(year, quarter,pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e) 
Example #6
0
def _get_growth_data(year, quarter,pageNo,dataArr):
    ct._write_console()
    try:

        #param:["cnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}]
        list_param = '["cnl","","","","%d","%d","",0,%d,%d]'%(year, quarter,pageNo,ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL%(quote(list_param,',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text 
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'])
        if not df.empty:
            df.columns=ct.GROWTH_COLS
            for col in (ct.GROWTH_COLS[0:4]+ct.GROWTH_COLS[7:]):
                df[col] = df[col].astype(str).replace("--","0").replace("", "0").replace("None","0").astype(float)
            dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM :
            pageNo = pageNo+1
            return _get_growth_data(year, quarter,pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Example #7
0
def _get_stock_hq_list(pageNo, dataArr):
    ct._write_console()
    try:
        #param:["hq","hs_a","{sort}",{asc},{page},{num}]
        hq_list_param = '["hq","hs_a","",0,%d,%d]'%(pageNo,ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL%(quote(hq_list_param,',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text 
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'], columns=js[0]['fields'])
        dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM :
            pageNo = pageNo+1
            return _get_stock_hq_list(pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)    
Example #8
0
def _get_stock_hq_list(pageNo, dataArr):
    ct._write_console()
    try:
        #param:["hq","hs_a","{sort}",{asc},{page},{num}]
        hq_list_param = '["hq","hs_a","",0,%d,%d]' % (pageNo,
                                                      ct.OPEN_API_PAGE_NUM)
        request = Request(ct.SINA_OPEN_API_URL % (quote(hq_list_param, ',[]')))
        request.add_header("User-Agent", ct.USER_AGENT)
        text = urlopen(request, timeout=ct.API_TIMEOUT).read()
        text = text.decode('gbk') if ct.PY3 else text
        js = json.loads(text.strip())
        if js is None:
            return dataArr
        df = pd.DataFrame(js[0]['items'], columns=js[0]['fields'])
        dataArr = dataArr.append(df, ignore_index=True)
        if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM:
            pageNo = pageNo + 1
            return _get_stock_hq_list(pageNo, dataArr)
        else:
            return dataArr
    except Exception as e:
        print(e)
Example #9
0
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (symbol, tdate, pageNo))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x : x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #10
0
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL %
                                   (symbol, tdate, pageNo))
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>' % sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x: x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #11
0
def get_h_data(code,
               start=None,
               end=None,
               autype='qfq',
               index=False,
               retry_count=3,
               pause=0.001):
    '''
    获取历史复权数据
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取当前日期
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取去年今日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交金额
          factor 后复权因子
    '''

    start = du.today_last_year() if start is None else start
    end = du.today() if end is None else end
    qs = du.get_quarts(start, end)
    qt = qs[0]
    ct._write_head()
    data = _parse_fq_data(_get_index_url(index, code, qt), index, retry_count,
                          pause)
    if len(qs) > 1:
        for d in range(1, len(qs)):
            qt = qs[d]
            ct._write_console()
            df = _parse_fq_data(_get_index_url(index, code, qt), index,
                                retry_count, pause)
            data = data.append(df, ignore_index=True)
    if len(data) == 0 or len(
            data[(data.date >= start) & (data.date <= end)]) == 0:
        return None
    data = data.drop_duplicates('date')
    if index:
        data = data[(data.date >= start) & (data.date <= end)]
        data = data.set_index('date')
        data = data.sort_index(ascending=False)
        return data
    if autype == 'hfq':
        #data = data.drop('factor', axis=1)
        data = data[(data.date >= start) & (data.date <= end)]
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label].map(ct.FORMAT)
            data[label] = data[label].astype(float)
        data = data.set_index('date')
        data = data.sort_index(ascending=False)
        return data
    else:
        if autype == 'qfq':
            #data = data.drop('factor', axis=1)
            df = _parase_fq_factor(code, start, end)
            df = df.drop_duplicates('date')
            df = df.sort('date', ascending=False)
            frow = df.head(1)
            rt = get_realtime_quotes(code)
            if rt is None:
                return None
            if ((float(rt['high']) == 0) & (float(rt['low']) == 0)):
                preClose = float(rt['pre_close'])
            else:
                if du.is_holiday(du.today()):
                    preClose = float(rt['price'])
                else:
                    if (du.get_hour() > 9) & (du.get_hour() < 18):
                        preClose = float(rt['pre_close'])
                    else:
                        preClose = float(rt['price'])

            rate = float(frow['factor']) / preClose
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'low', 'close']:
                data[label] = data[label] / rate
                data[label] = data[label].map(ct.FORMAT)
                data[label] = data[label].astype(float)
            data = data.set_index('date')
            data = data.sort_index(ascending=False)
            return data
        else:
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label] / data['factor']
            #data = data.drop('factor', axis=1)
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label].map(ct.FORMAT)
            data = data.set_index('date')
            data = data.sort_index(ascending=False)
            data = data.astype(float)
            return data
Example #12
0
def get_h_data(code, start=None, end=None, autype='qfq',
               index=False, retry_count=3, pause=0.001):
    '''
    获取历史复权数据
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取当前日期
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取去年今日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交金额
          factor 后复权因子
    '''
    
    start = du.today_last_year() if start is None else start
    end = du.today() if end is None else end
    qs = du.get_quarts(start, end)
    qt = qs[0]
    ct._write_head()
    data = _parse_fq_data(_get_index_url(index, code, qt), index,
                          retry_count, pause)
    if len(qs)>1:
        for d in range(1, len(qs)):
            qt = qs[d]
            ct._write_console()
            df = _parse_fq_data(_get_index_url(index, code, qt), index,
                                retry_count, pause)
            data = data.append(df, ignore_index=True)
    if len(data) == 0 or len(data[(data.date>=start)&(data.date<=end)]) == 0:
        return None
    data = data.drop_duplicates('date')
    if index:
        data = data[(data.date>=start) & (data.date<=end)]
        data = data.set_index('date')
        data = data.sort_index(ascending=False)
        return data
    if autype == 'hfq':
        #data = data.drop('factor', axis=1)
        data = data[(data.date>=start) & (data.date<=end)]
        for label in ['open', 'high', 'close', 'low']:
            data[label] = data[label].map(ct.FORMAT)
            data[label] = data[label].astype(float)
        data = data.set_index('date')
        data = data.sort_index(ascending = False)
        return data
    else:
        if autype == 'qfq':
            #data = data.drop('factor', axis=1)
            df = _parase_fq_factor(code, start, end)
            df = df.drop_duplicates('date')
            df = df.sort('date', ascending=False)
            frow = df.head(1)
            rt = get_realtime_quotes(code)
            if rt is None:
                return None
            if ((float(rt['high']) == 0) & (float(rt['low']) == 0)):
                preClose = float(rt['pre_close'])
            else:
                if du.is_holiday(du.today()):
                    preClose = float(rt['price'])
                else:
                    if (du.get_hour() > 9) & (du.get_hour() < 18):
                        preClose = float(rt['pre_close'])
                    else:
                        preClose = float(rt['price'])
            
            rate = float(frow['factor']) / preClose
            data = data[(data.date >= start) & (data.date <= end)]
            for label in ['open', 'high', 'low', 'close']:
                data[label] = data[label] / rate
                data[label] = data[label].map(ct.FORMAT)
                data[label] = data[label].astype(float)
            data = data.set_index('date')
            data = data.sort_index(ascending = False)
            return data
        else:
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label] / data['factor']
            #data = data.drop('factor', axis=1)
            data = data[(data.date>=start) & (data.date<=end)]
            for label in ['open', 'high', 'close', 'low']:
                data[label] = data[label].map(ct.FORMAT)
            data = data.set_index('date')
            data = data.sort_index(ascending=False)
            data = data.astype(float)
            return data