Example #1
0
def get_tick_data(code=None, date=None, retry_count=3, pause=0.001,
                  src='sn'):
    """
        获取分笔数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format: YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
        src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if code is None or len(code)!=6 or date is None:
        return None
    if (src.strip() not in ct.TICK_SRCS):
        print(ct.TICK_SRC_ERROR)
        return None
    symbol = ct._code_to_symbol(code)
    symbol_dgt = ct._code_to_symbol_dgt(code)
    datestr = date.replace('-', '')
    url = {
            ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'],
                                date, symbol),
            ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'],
                                           symbol, datestr),
            ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4], 
                                         datestr, symbol_dgt)
             }
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if src == ct.TICK_SRCS[2]:
                df = pd.read_excel(url[src])
                df.columns = ct.TICK_COLUMNS
            else:
                re = Request(url[src])
                lines = urlopen(re, timeout=10).read()
                lines = lines.decode('GBK') 
                if len(lines) < 20:
                    return None
                df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS,
                                   skiprows=[0])      
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #2
0
def top10_holders(code=None, year=None, quarter=None, gdtype='0',
                  retry_count=3, pause=0.001):
    if code is None:
        return None
    else:
        code = ct._code_to_symbol(code)
    gdtype = 'LT' if gdtype == '1' else ''
    qdate = ''
    if (year is not None) & (quarter is not None):
        qdate = du.get_q_date(year, quarter)
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.TOP10_HOLDERS_URL%(ct.P_TYPE['http'], ct.DOMAINS['gw'],
                                    gdtype, code.upper()))
            lines = urlopen(request, timeout = 10).read()
            lines = lines.decode('utf8') if ct.PY3 else lines
            reg = re.compile(r'= \'\[(.*?)\]\';')
            lines = reg.findall(lines)[0]
            jss = json.loads('[%s]' %lines)
            summ = []
            data = pd.DataFrame()
            for row in jss:
                qt = row['jzrq']
                hold = row['ljcy']
                change = row['ljbh']
                props = row['ljzb']
                arow = [qt, hold, change ,props]
                summ.append(arow)
                ls = row['sdgdList']
                dlist = []
                for inrow in ls:
                    sharetype = inrow['gbxz']
                    name = inrow['gdmc']
                    hold = inrow['cgs']
                    h_pro = inrow['zzgs']
                    status = inrow['zjqk']
                    dlist.append([qt, name, hold, h_pro, sharetype, status])
                ddata = pd.DataFrame(dlist, columns=rv.TOP10_PER_COLS)
                data = data.append(ddata, ignore_index=True)
            df = pd.DataFrame(summ, columns=rv.TOP10_SUMM_COLS)
            if qdate != '':
                df = df[df.quarter == qdate]
                data = data[data.quarter == qdate]
        except Exception as e:
            print(e)
        else:
            return df, data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #3
0
def get_today_ticks(code=None, retry_count=3, pause=0.001):
    """
        获取当日分笔明细数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if code is None or len(code) != 6:
        return None
    symbol = ct._code_to_symbol(code)
    date = du.today()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.TODAY_TICKS_PAGE_URL %
                              (ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                               ct.PAGES['jv'], date, symbol))
            data_str = urlopen(request, timeout=10).read()
            data_str = data_str.decode('GBK')
            data_str = data_str[1:-1]
            data_str = eval(
                data_str,
                type('Dummy', (dict, ), dict(__getitem__=lambda s, n: n))())
            data_str = json.dumps(data_str)
            data_str = json.loads(data_str)
            pages = len(data_str['detailPages'])
            data = pd.DataFrame()
            ct._write_head()
            for pNo in range(1, pages + 1):
                data = data.append(_today_ticks(symbol, date, pNo, retry_count,
                                                pause),
                                   ignore_index=True)
        except Exception as er:
            print(str(er))
        else:
            return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #4
0
def get_today_ticks(code=None, retry_count=3, pause=0.001):
    """
        获取当日分笔明细数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if code is None or len(code)!=6 :
        return None
    symbol = ct._code_to_symbol(code)
    date = du.today()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.TODAY_TICKS_PAGE_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                                       ct.PAGES['jv'], date,
                                                       symbol))
            data_str = urlopen(request, timeout=10).read()
            data_str = data_str.decode('GBK')
            data_str = data_str[1:-1]
            data_str = eval(data_str, type('Dummy', (dict,), 
                                           dict(__getitem__ = lambda s, n:n))())
            data_str = json.dumps(data_str)
            data_str = json.loads(data_str)
            pages = len(data_str['detailPages'])
            data = pd.DataFrame()
            ct._write_head()
            for pNo in range(1, pages+1):
                data = data.append(_today_ticks(symbol, date, pNo,
                                                retry_count, pause), ignore_index=True)
        except Exception as er:
            print(str(er))
        else:
            return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #5
0
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001):
    """
        获取sina大单数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format:YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:股票代码    股票名称    交易时间    价格    成交量    前一笔价格    类型(买、卖、中性盘)
    """
    if code is None or len(code) != 6 or date is None:
        return None
    symbol = ct._code_to_symbol(code)
    vol = vol * 100
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            headers = {'User-Agent': _get_user_agent()}
            re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                       ct.PAGES['sinadd'], symbol, vol, date),
                         headers=headers)
            lines = urlopen(re, timeout=10).read()
            lines = lines.decode('GBK')
            if len(lines) < 100:
                return None
            df = pd.read_csv(StringIO(lines),
                             names=ct.SINA_DD_COLS,
                             skiprows=[0])
            if df is not None:
                df['code'] = df['code'].map(lambda x: x[2:])
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #6
0
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001):
    """
        获取sina大单数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format:YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:股票代码    股票名称    交易时间    价格    成交量    前一笔价格    类型(买、卖、中性盘)
    """
    if code is None or len(code) != 6 or date is None:
        return None
    symbol = ct._code_to_symbol(code)
    vol = vol * 100
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'],
                                       ct.PAGES['sinadd'], symbol, vol, date))
            lines = urlopen(re, timeout=10).read()
            lines = lines.decode('GBK')
            lines_j = eval(
                lines,
                type('Dummy', (dict, ), dict(__getitem__=lambda s, n: n))())
            if len(lines) < 100:
                return None
            df = pd.read_json(json.dumps(lines_j))
            if df is not None:
                df['symbol'] = df['symbol'].map(lambda x: x[2:])
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #7
0
def _parase_fq_factor(code, start, end):
    symbol = ct._code_to_symbol(code)
    request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
                                             ct.DOMAINS['vsf'], symbol))
    text = urlopen(request, timeout=10).read()
    text = text[1:len(text)-1]
    text = text.decode('utf-8') if ct.PY3 else text
    text = text.replace('{_', '{"')
    text = text.replace('total', '"total"')
    text = text.replace('data', '"data"')
    text = text.replace(':"', '":"')
    text = text.replace('",_', '","')
    text = text.replace('_', '-')
    text = json.loads(text)
    df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
    df['date'] = df['date'].map(_fun_except) # for null case
    if df['date'].dtypes == np.object:
        df['date'] = pd.to_datetime(df['date'])
    df = df.drop_duplicates('date')
    df['factor'] = df['factor'].astype(float)
    return df
Example #8
0
def _parase_fq_factor(code, start, end):
    symbol = ct._code_to_symbol(code)
    request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'],
                                             ct.DOMAINS['vsf'], symbol))
    text = urlopen(request, timeout=10).read()
    text = text[1:len(text)-1]
    text = text.decode('utf-8') if ct.PY3 else text
    text = text.replace('{_', '{"')
    text = text.replace('total', '"total"')
    text = text.replace('data', '"data"')
    text = text.replace(':"', '":"')
    text = text.replace('",_', '","')
    text = text.replace('_', '-')
    text = json.loads(text)
    df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())})
    df['date'] = df['date'].map(_fun_except) # for null case
    if df['date'].dtypes == np.object:
        df['date'] = pd.to_datetime(df['date'])
    df = df.drop_duplicates('date')
    df['factor'] = df['factor'].astype(float)
    return df
Example #9
0
def getDayData(code=None, start="2017-01-01", end="2018-12-31"):
    symbol = ct._code_to_symbol(code)  #将代码转换成标准格式
    url = 'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq2017&param=%s,day,%s,%s,640,qfq' % (
        symbol, start, end)
    try:
        request = Request(url)
        lines = urlopen(request, timeout=10).read()
        if len(lines) < 100:  #no data
            return None
    except Exception as e:
        print(e)
    else:
        lines = lines.decode('utf-8') if ct.PY3 else lines
        lines = lines.split('=')[1]
        reg = re.compile(r',{"nd.*?}')
        lines = re.subn(reg, '', lines)
        reg = re.compile(r',"qt":{.*?}')
        lines = re.subn(reg, '', lines[0])
        reg = r',"mx_price".*?"version":"4"'
        lines = re.subn(reg, '', lines[0])
        reg = r',"mx_price".*?"version":"12"'
        lines = re.subn(reg, '', lines[0])
        #将str格式转换成byte
        textByte = bytes(lines[0], encoding='utf-8')
    return textByte
    raise IOError(ct.NETWORK_URL_ERROR_MSG)


#获取所有股票的日线数据,并Post至java jar中
# def getAllStockData():
#     list1=globalFunction.getAllStockCode()
#     for code in list1:
#         textByte=getDayData(code)
#         urlPost='http://localhost:8080/stock/tradeHistory'
#         globalFunction.postData(textByte,urlPost,code)

# code="600145"
# textByte = getDayData(code)
# urlPost = 'http://localhost:8080/stock/tradeHistory'
# globalFunction.postData(textByte, urlPost, code)
Example #10
0
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001):
    """
        获取sina大单数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format:YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:股票代码    股票名称    交易时间    价格    成交量    前一笔价格    类型(买、卖、中性盘)
    """
    if code is None or len(code)!=6 or date is None:
        return None
    symbol = ct._code_to_symbol(code)
    vol = vol*100
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'],
                                symbol, vol, date))
            lines = urlopen(re, timeout=10).read()
            lines = lines.decode('GBK') 
            if len(lines) < 100:
                return None
            df = pd.read_csv(StringIO(lines), names=ct.SINA_DD_COLS,
                               skiprows=[0])    
            if df is not None:
                df['code'] = df['code'].map(lambda x: x[2:])
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #11
0
def get_k_data(code=None,
               start='',
               end='',
               ktype='D',
               autype='qfq',
               index=False,
               retry_count=3,
               pause=0.001):
    """
    获取k线数据
    ---------
    Parameters:
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取上市首日
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取最近一个交易日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      ktype:string
                  数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交额
          turnoverratio 换手率
          code 股票代码
    """
    symbol = ct.INDEX_SYMBOL[code] if index else ct._code_to_symbol(code)
    url = ''
    dataflag = ''
    autype = '' if autype is None else autype
    if (start is not None) & (start != ''):
        end = du.today() if end is None or end == '' else end
    if ktype.upper() in ct.K_LABELS:
        fq = autype if autype is not None else ''
        if code[:1] in ('1', '5') or index:
            fq = ''
        kline = '' if autype is None else 'fq'
        if (start is None or start == '') & (end is None or end == ''):
            urls = [
                ct.KLINE_TT_URL %
                (ct.P_TYPE['http'], ct.DOMAINS['tt'], kline, fq, symbol,
                 ct.TT_K_TYPE[ktype.upper()], start, end, fq, _random(17))
            ]
        else:
            years = du.tt_dates(start, end)
            urls = []
            for year in years:
                startdate = str(year) + '-01-01'
                enddate = str(year + 1) + '-12-31'
                url = ct.KLINE_TT_URL % (ct.P_TYPE['http'], ct.DOMAINS['tt'],
                                         kline, fq + str(year), symbol,
                                         ct.TT_K_TYPE[ktype.upper()],
                                         startdate, enddate, fq, _random(17))
                urls.append(url)
        dataflag = '%s%s' % (fq, ct.TT_K_TYPE[ktype.upper()])
    elif ktype in ct.K_MIN_LABELS:
        urls = [
            ct.KLINE_TT_MIN_URL % (ct.P_TYPE['http'], ct.DOMAINS['tt'], symbol,
                                   ktype, ktype, _random(16))
        ]
        dataflag = 'm%s' % ktype
    else:
        raise TypeError('ktype input error.')
    data = pd.DataFrame()
    for url in urls:
        data = data.append(_get_k_data(url, dataflag, symbol, code, index,
                                       ktype, retry_count, pause),
                           ignore_index=True)
    if ktype not in ct.K_MIN_LABELS:
        if ((start is not None) & (start != '')) & ((end is not None) &
                                                    (end != '')):
            if data.empty == False:
                data = data[(data.date >= start) & (data.date <= end)]
    return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #12
0
def get_realtime_quotes(symbols=None):
    """
        获取实时交易数据 getting real time quotes data
       用于跟踪交易情况(本次执行的结果-上一次执行的数据)
    Parameters
    ------
        symbols : string, array-like object (list, tuple, Series).
        
    return
    -------
        DataFrame 实时交易数据
              属性:0:name,股票名字
            1:open,今日开盘价
            2:pre_close,昨日收盘价
            3:price,当前价格
            4:high,今日最高价
            5:low,今日最低价
            6:bid,竞买价,即“买一”报价
            7:ask,竞卖价,即“卖一”报价
            8:volumn,成交量 maybe you need do volumn/100
            9:amount,成交金额(元 CNY)
            10:b1_v,委买一(笔数 bid volume)
            11:b1_p,委买一(价格 bid price)
            12:b2_v,“买二”
            13:b2_p,“买二”
            14:b3_v,“买三”
            15:b3_p,“买三”
            16:b4_v,“买四”
            17:b4_p,“买四”
            18:b5_v,“买五”
            19:b5_p,“买五”
            20:a1_v,委卖一(笔数 ask volume)
            21:a1_p,委卖一(价格 ask price)
            ...
            30:date,日期;
            31:time,时间;
    """
    symbols_list = ''
    if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(
            symbols, tuple) or isinstance(symbols, pd.Series):
        for code in symbols:
            symbols_list += ct._code_to_symbol(code) + ','
    else:
        symbols_list = ct._code_to_symbol(symbols)

    symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list
    request = Request(
        ct.LIVE_DATA_URL %
        (ct.P_TYPE['http'], ct.DOMAINS['sinahq'], _random(), symbols_list))
    text = urlopen(request, timeout=10).read()
    text = text.decode('GBK')
    reg = re.compile(r'\="(.*?)\";')
    data = reg.findall(text)
    regSym = re.compile(r'(?:sh|sz|gb_)(.*?)\=')
    syms = regSym.findall(text)
    data_list = []
    syms_list = []
    for index, row in enumerate(data):
        if len(row) > 1:
            data_list.append([astr for astr in row.split(',')])
            syms_list.append(syms[index])
    if len(syms_list) == 0:
        return None
    if len(data_list[0]) == 28:
        df = pd.DataFrame(data_list, columns=ct.US_LIVE_DATA_COLS)
    else:
        df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS)
        df = df.drop('s', axis=1)
    df['code'] = syms_list
    ls = [cls for cls in df.columns if '_v' in cls]
    for txt in ls:
        df[txt] = df[txt].map(lambda x: x[:-2])
    return df
Example #13
0
def get_hist_data(code=None,
                  start=None,
                  end=None,
                  ktype='D',
                  retry_count=3,
                  pause=0.001):
    """
        获取个股历史交易记录
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据
      ktype:string
                  数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率
    """
    symbol = ct._code_to_symbol(code)
    url = ''
    if ktype.upper() in ct.K_LABELS:
        url = ct.DAY_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                  ct.K_TYPE[ktype.upper()], symbol)
    elif ktype in ct.K_MIN_LABELS:
        url = ct.DAY_PRICE_MIN_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                      symbol, ktype)
    else:
        raise TypeError('ktype input error.')

    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url)
            lines = urlopen(request, timeout=10).read()
            if len(lines) < 15:  #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            cols = []
            if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS):
                cols = ct.INX_DAY_PRICE_COLUMNS
            else:
                cols = ct.DAY_PRICE_COLUMNS
            if len(js['record'][0]) == 14:
                cols = ct.INX_DAY_PRICE_COLUMNS
            df = pd.DataFrame(js['record'], columns=cols)
            if ktype.upper() in ['D', 'W', 'M']:
                df = df.applymap(lambda x: x.replace(u',', u''))
                df[df == ''] = 0
            for col in cols[1:]:
                df[col] = df[col].astype(float)
            if start is not None:
                df = df[df.date >= start]
            if end is not None:
                df = df[df.date <= end]
            if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS):
                df = df.drop('turnover', axis=1)
            df = df.set_index('date')
            df = df.sort_index(ascending=False)
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #14
0
def get_hist_data(code=None, start=None, end=None,
                  ktype='D', retry_count=3,
                  pause=0.001):
    """
        获取个股历史交易记录
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据
      ktype:string
                  数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率
    """
    symbol = ct._code_to_symbol(code)
    url = ''
    if ktype.upper() in ct.K_LABELS:
        url = ct.DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                ct.K_TYPE[ktype.upper()], symbol)
    elif ktype in ct.K_MIN_LABELS:
        url = ct.DAY_PRICE_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'],
                                    symbol, ktype)
    else:
        raise TypeError('ktype input error.')
    
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url)
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if ct.PY3 else lines)
            cols = []
            if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS):
                cols = ct.INX_DAY_PRICE_COLUMNS
            else:
                cols = ct.DAY_PRICE_COLUMNS
            if len(js['record'][0]) == 14:
                cols = ct.INX_DAY_PRICE_COLUMNS
            df = pd.DataFrame(js['record'], columns=cols)
            if ktype.upper() in ['D', 'W', 'M']:
                df = df.applymap(lambda x: x.replace(u',', u''))
                df[df==''] = 0
            for col in cols[1:]:
                df[col] = df[col].astype(float)
            if start is not None:
                df = df[df.date >= start]
            if end is not None:
                df = df[df.date <= end]
            if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS):
                df = df.drop('turnover', axis=1)
            df = df.set_index('date')
            df = df.sort_index(ascending = False)
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #15
0
def get_realtime_quotes(symbols=None):
    """
        获取实时交易数据 getting real time quotes data
       用于跟踪交易情况(本次执行的结果-上一次执行的数据)
    Parameters
    ------
        symbols : string, array-like object (list, tuple, Series).
        
    return
    -------
        DataFrame 实时交易数据
              属性:0:name,股票名字
            1:open,今日开盘价
            2:pre_close,昨日收盘价
            3:price,当前价格
            4:high,今日最高价
            5:low,今日最低价
            6:bid,竞买价,即“买一”报价
            7:ask,竞卖价,即“卖一”报价
            8:volumn,成交量 maybe you need do volumn/100
            9:amount,成交金额(元 CNY)
            10:b1_v,委买一(笔数 bid volume)
            11:b1_p,委买一(价格 bid price)
            12:b2_v,“买二”
            13:b2_p,“买二”
            14:b3_v,“买三”
            15:b3_p,“买三”
            16:b4_v,“买四”
            17:b4_p,“买四”
            18:b5_v,“买五”
            19:b5_p,“买五”
            20:a1_v,委卖一(笔数 ask volume)
            21:a1_p,委卖一(价格 ask price)
            ...
            30:date,日期;
            31:time,时间;
    """
    symbols_list = ''
    if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(symbols, tuple) or isinstance(symbols, pd.Series):
        for code in symbols:
            symbols_list += ct._code_to_symbol(code) + ','
    else:
        symbols_list = ct._code_to_symbol(symbols)
        
    symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list 
    request = Request(ct.LIVE_DATA_URL%(ct.P_TYPE['http'], ct.DOMAINS['sinahq'],
                                                _random(), symbols_list))
    text = urlopen(request,timeout=10).read()
    text = text.decode('GBK')
    reg = re.compile(r'\="(.*?)\";')
    data = reg.findall(text)
    regSym = re.compile(r'(?:sh|sz)(.*?)\=')
    syms = regSym.findall(text)
    data_list = []
    syms_list = []
    for index, row in enumerate(data):
        if len(row)>1:
            data_list.append([astr for astr in row.split(',')])
            syms_list.append(syms[index])
    if len(syms_list) == 0:
        return None
    df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS)
    df = df.drop('s', axis=1)
    df['code'] = syms_list
    ls = [cls for cls in df.columns if '_v' in cls]
    for txt in ls:
        df[txt] = df[txt].map(lambda x : x[:-2])
    return df
Example #16
0
def get_tick_data(code=None, date=None, retry_count=3, pause=0.001,
                  src='sn',ip=None):
    """
        获取分笔数据
    Parameters
    ------
        code:string
                  股票代码 e.g. 600848
        date:string
                  日期 format: YYYY-MM-DD
        retry_count : int, 默认 3
                  如遇网络等问题重复执行的次数
        pause : int, 默认 0
                 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
        src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn
     return
     -------
        DataFrame 当日所有股票交易数据(DataFrame)
              属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
    """
    if (src.strip() not in ct.TICK_SRCS):
        print(ct.TICK_SRC_ERROR)
        return None
    symbol = ct._code_to_symbol(code)
    symbol_dgt = ct._code_to_symbol_dgt(code)
    datestr = date.replace('-', '')
    url = {
            ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'],
                                date, symbol),
            ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'],
                                           symbol, datestr),
            ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4], 
                                         datestr, symbol_dgt)
             }
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            if src == ct.TICK_SRCS[2]:
                df = pd.read_excel(url[src])
                df.columns = ct.TICK_COLUMNS
            else:
                re = Request(url[src])
                print("start..")
                #proxies = {'http':'http://'+ct.hq_hosts[ipindex][1]+':'+str(ct.hq_hosts[ipindex][2])}
                if ip != None:
                    proxies = {'http':ip}
                    proxy_support = urllib.request.ProxyHandler(proxies)
                    opener = urllib.request.build_opener(proxy_support)
                    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]
                    urllib.request.install_opener(opener)
                    print (proxies)
                lines = urlopen(re, timeout=10).read()
                lines = lines.decode('GBK') 
                if len(lines) < 20:
                    return None
                df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS,
                                   skiprows=[0])      
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)
Example #17
0
def get_k_data(code=None, start='', end='',
                  ktype='D', autype='qfq', 
                  index=False,
                  retry_count=3,
                  pause=0.001):
    """
    获取k线数据
    ---------
    Parameters:
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format:YYYY-MM-DD 为空时取上市首日
      end:string
                  结束日期 format:YYYY-MM-DD 为空时取最近一个交易日
      autype:string
                  复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq
      ktype:string
                  数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          date 交易日期 (index)
          open 开盘价
          high  最高价
          close 收盘价
          low 最低价
          volume 成交量
          amount 成交额
          turnoverratio 换手率
          code 股票代码
    """
    symbol = ct.INDEX_SYMBOL[code] if index else ct._code_to_symbol(code)
    url = ''
    dataflag = ''
    autype = '' if autype is None else autype
    if (start is not None) & (start != ''):
        end = du.today() if end is None or end == '' else end
    if ktype.upper() in ct.K_LABELS:
        fq = autype if autype is not None else ''
        if code[:1] in ('1', '5') or index:
            fq = ''
        kline = '' if autype is None else 'fq'
        if (start is None or start == '') & (end is None or end == ''):
            urls = [ct.KLINE_TT_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'],
                                    kline, fq, symbol, 
                                    ct.TT_K_TYPE[ktype.upper()], start, end,
                                    fq, _random(17))]
        else:
            years = du.tt_dates(start, end)
            urls = []
            for year in years:
                startdate = str(year) + '-01-01'
                enddate = str(year+1) + '-12-31'
                url = ct.KLINE_TT_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'],
                                    kline, fq+str(year), symbol, 
                                    ct.TT_K_TYPE[ktype.upper()], startdate, enddate,
                                    fq, _random(17))
                urls.append(url)
        dataflag = '%s%s'%(fq, ct.TT_K_TYPE[ktype.upper()])
    elif ktype in ct.K_MIN_LABELS:
        urls = [ct.KLINE_TT_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'],
                                    symbol, ktype, ktype,
                                    _random(16))]
        dataflag = 'm%s'%ktype
    else:
        raise TypeError('ktype input error.')
    data = pd.DataFrame()
    for url in urls:
        data = data.append(_get_k_data(url, dataflag, 
                                       symbol, code,
                                       index, ktype,
                                       retry_count, pause), 
                           ignore_index=True)
    if ktype not in ct.K_MIN_LABELS:
        if ((start is not None) & (start != '')) & ((end is not None) & (end != '')):
            if data.empty==False:       
                data = data[(data.date >= start) & (data.date <= end)]
    return data
    raise IOError(ct.NETWORK_URL_ERROR_MSG)