def get_tick_data(code=None, date=None, retry_count=3, pause=0.001, src='sn'): """ 获取分笔数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format: YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code)!=6 or date is None: return None if (src.strip() not in ct.TICK_SRCS): print(ct.TICK_SRC_ERROR) return None symbol = ct._code_to_symbol(code) symbol_dgt = ct._code_to_symbol_dgt(code) datestr = date.replace('-', '') url = { ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'], date, symbol), ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'], symbol, datestr), ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4], datestr, symbol_dgt) } for _ in range(retry_count): time.sleep(pause) try: if src == ct.TICK_SRCS[2]: df = pd.read_excel(url[src]) df.columns = ct.TICK_COLUMNS else: re = Request(url[src]) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 20: return None df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS, skiprows=[0]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def top10_holders(code=None, year=None, quarter=None, gdtype='0', retry_count=3, pause=0.001): if code is None: return None else: code = ct._code_to_symbol(code) gdtype = 'LT' if gdtype == '1' else '' qdate = '' if (year is not None) & (quarter is not None): qdate = du.get_q_date(year, quarter) for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.TOP10_HOLDERS_URL%(ct.P_TYPE['http'], ct.DOMAINS['gw'], gdtype, code.upper())) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf8') if ct.PY3 else lines reg = re.compile(r'= \'\[(.*?)\]\';') lines = reg.findall(lines)[0] jss = json.loads('[%s]' %lines) summ = [] data = pd.DataFrame() for row in jss: qt = row['jzrq'] hold = row['ljcy'] change = row['ljbh'] props = row['ljzb'] arow = [qt, hold, change ,props] summ.append(arow) ls = row['sdgdList'] dlist = [] for inrow in ls: sharetype = inrow['gbxz'] name = inrow['gdmc'] hold = inrow['cgs'] h_pro = inrow['zzgs'] status = inrow['zjqk'] dlist.append([qt, name, hold, h_pro, sharetype, status]) ddata = pd.DataFrame(dlist, columns=rv.TOP10_PER_COLS) data = data.append(ddata, ignore_index=True) df = pd.DataFrame(summ, columns=rv.TOP10_SUMM_COLS) if qdate != '': df = df[df.quarter == qdate] data = data[data.quarter == qdate] except Exception as e: print(e) else: return df, data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_today_ticks(code=None, retry_count=3, pause=0.001): """ 获取当日分笔明细数据 Parameters ------ code:string 股票代码 e.g. 600848 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code) != 6: return None symbol = ct._code_to_symbol(code) date = du.today() for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.TODAY_TICKS_PAGE_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], date, symbol)) data_str = urlopen(request, timeout=10).read() data_str = data_str.decode('GBK') data_str = data_str[1:-1] data_str = eval( data_str, type('Dummy', (dict, ), dict(__getitem__=lambda s, n: n))()) data_str = json.dumps(data_str) data_str = json.loads(data_str) pages = len(data_str['detailPages']) data = pd.DataFrame() ct._write_head() for pNo in range(1, pages + 1): data = data.append(_today_ticks(symbol, date, pNo, retry_count, pause), ignore_index=True) except Exception as er: print(str(er)) else: return data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_today_ticks(code=None, retry_count=3, pause=0.001): """ 获取当日分笔明细数据 Parameters ------ code:string 股票代码 e.g. 600848 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code)!=6 : return None symbol = ct._code_to_symbol(code) date = du.today() for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.TODAY_TICKS_PAGE_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], date, symbol)) data_str = urlopen(request, timeout=10).read() data_str = data_str.decode('GBK') data_str = data_str[1:-1] data_str = eval(data_str, type('Dummy', (dict,), dict(__getitem__ = lambda s, n:n))()) data_str = json.dumps(data_str) data_str = json.loads(data_str) pages = len(data_str['detailPages']) data = pd.DataFrame() ct._write_head() for pNo in range(1, pages+1): data = data.append(_today_ticks(symbol, date, pNo, retry_count, pause), ignore_index=True) except Exception as er: print(str(er)) else: return data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001): """ 获取sina大单数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format:YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:股票代码 股票名称 交易时间 价格 成交量 前一笔价格 类型(买、卖、中性盘) """ if code is None or len(code) != 6 or date is None: return None symbol = ct._code_to_symbol(code) vol = vol * 100 for _ in range(retry_count): time.sleep(pause) try: headers = {'User-Agent': _get_user_agent()} re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'], symbol, vol, date), headers=headers) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 100: return None df = pd.read_csv(StringIO(lines), names=ct.SINA_DD_COLS, skiprows=[0]) if df is not None: df['code'] = df['code'].map(lambda x: x[2:]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001): """ 获取sina大单数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format:YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:股票代码 股票名称 交易时间 价格 成交量 前一笔价格 类型(买、卖、中性盘) """ if code is None or len(code) != 6 or date is None: return None symbol = ct._code_to_symbol(code) vol = vol * 100 for _ in range(retry_count): time.sleep(pause) try: re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'], symbol, vol, date)) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') lines_j = eval( lines, type('Dummy', (dict, ), dict(__getitem__=lambda s, n: n))()) if len(lines) < 100: return None df = pd.read_json(json.dumps(lines_j)) if df is not None: df['symbol'] = df['symbol'].map(lambda x: x[2:]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _parase_fq_factor(code, start, end): symbol = ct._code_to_symbol(code) request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], symbol)) text = urlopen(request, timeout=10).read() text = text[1:len(text)-1] text = text.decode('utf-8') if ct.PY3 else text text = text.replace('{_', '{"') text = text.replace('total', '"total"') text = text.replace('data', '"data"') text = text.replace(':"', '":"') text = text.replace('",_', '","') text = text.replace('_', '-') text = json.loads(text) df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())}) df['date'] = df['date'].map(_fun_except) # for null case if df['date'].dtypes == np.object: df['date'] = pd.to_datetime(df['date']) df = df.drop_duplicates('date') df['factor'] = df['factor'].astype(float) return df
def getDayData(code=None, start="2017-01-01", end="2018-12-31"): symbol = ct._code_to_symbol(code) #将代码转换成标准格式 url = 'http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?_var=kline_dayqfq2017¶m=%s,day,%s,%s,640,qfq' % ( symbol, start, end) try: request = Request(url) lines = urlopen(request, timeout=10).read() if len(lines) < 100: #no data return None except Exception as e: print(e) else: lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines.split('=')[1] reg = re.compile(r',{"nd.*?}') lines = re.subn(reg, '', lines) reg = re.compile(r',"qt":{.*?}') lines = re.subn(reg, '', lines[0]) reg = r',"mx_price".*?"version":"4"' lines = re.subn(reg, '', lines[0]) reg = r',"mx_price".*?"version":"12"' lines = re.subn(reg, '', lines[0]) #将str格式转换成byte textByte = bytes(lines[0], encoding='utf-8') return textByte raise IOError(ct.NETWORK_URL_ERROR_MSG) #获取所有股票的日线数据,并Post至java jar中 # def getAllStockData(): # list1=globalFunction.getAllStockCode() # for code in list1: # textByte=getDayData(code) # urlPost='http://localhost:8080/stock/tradeHistory' # globalFunction.postData(textByte,urlPost,code) # code="600145" # textByte = getDayData(code) # urlPost = 'http://localhost:8080/stock/tradeHistory' # globalFunction.postData(textByte, urlPost, code)
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001): """ 获取sina大单数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format:YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:股票代码 股票名称 交易时间 价格 成交量 前一笔价格 类型(买、卖、中性盘) """ if code is None or len(code)!=6 or date is None: return None symbol = ct._code_to_symbol(code) vol = vol*100 for _ in range(retry_count): time.sleep(pause) try: re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'], symbol, vol, date)) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 100: return None df = pd.read_csv(StringIO(lines), names=ct.SINA_DD_COLS, skiprows=[0]) if df is not None: df['code'] = df['code'].map(lambda x: x[2:]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_k_data(code=None, start='', end='', ktype='D', autype='qfq', index=False, retry_count=3, pause=0.001): """ 获取k线数据 --------- Parameters: code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取上市首日 end:string 结束日期 format:YYYY-MM-DD 为空时取最近一个交易日 autype:string 复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame date 交易日期 (index) open 开盘价 high 最高价 close 收盘价 low 最低价 volume 成交量 amount 成交额 turnoverratio 换手率 code 股票代码 """ symbol = ct.INDEX_SYMBOL[code] if index else ct._code_to_symbol(code) url = '' dataflag = '' autype = '' if autype is None else autype if (start is not None) & (start != ''): end = du.today() if end is None or end == '' else end if ktype.upper() in ct.K_LABELS: fq = autype if autype is not None else '' if code[:1] in ('1', '5') or index: fq = '' kline = '' if autype is None else 'fq' if (start is None or start == '') & (end is None or end == ''): urls = [ ct.KLINE_TT_URL % (ct.P_TYPE['http'], ct.DOMAINS['tt'], kline, fq, symbol, ct.TT_K_TYPE[ktype.upper()], start, end, fq, _random(17)) ] else: years = du.tt_dates(start, end) urls = [] for year in years: startdate = str(year) + '-01-01' enddate = str(year + 1) + '-12-31' url = ct.KLINE_TT_URL % (ct.P_TYPE['http'], ct.DOMAINS['tt'], kline, fq + str(year), symbol, ct.TT_K_TYPE[ktype.upper()], startdate, enddate, fq, _random(17)) urls.append(url) dataflag = '%s%s' % (fq, ct.TT_K_TYPE[ktype.upper()]) elif ktype in ct.K_MIN_LABELS: urls = [ ct.KLINE_TT_MIN_URL % (ct.P_TYPE['http'], ct.DOMAINS['tt'], symbol, ktype, ktype, _random(16)) ] dataflag = 'm%s' % ktype else: raise TypeError('ktype input error.') data = pd.DataFrame() for url in urls: data = data.append(_get_k_data(url, dataflag, symbol, code, index, ktype, retry_count, pause), ignore_index=True) if ktype not in ct.K_MIN_LABELS: if ((start is not None) & (start != '')) & ((end is not None) & (end != '')): if data.empty == False: data = data[(data.date >= start) & (data.date <= end)] return data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_realtime_quotes(symbols=None): """ 获取实时交易数据 getting real time quotes data 用于跟踪交易情况(本次执行的结果-上一次执行的数据) Parameters ------ symbols : string, array-like object (list, tuple, Series). return ------- DataFrame 实时交易数据 属性:0:name,股票名字 1:open,今日开盘价 2:pre_close,昨日收盘价 3:price,当前价格 4:high,今日最高价 5:low,今日最低价 6:bid,竞买价,即“买一”报价 7:ask,竞卖价,即“卖一”报价 8:volumn,成交量 maybe you need do volumn/100 9:amount,成交金额(元 CNY) 10:b1_v,委买一(笔数 bid volume) 11:b1_p,委买一(价格 bid price) 12:b2_v,“买二” 13:b2_p,“买二” 14:b3_v,“买三” 15:b3_p,“买三” 16:b4_v,“买四” 17:b4_p,“买四” 18:b5_v,“买五” 19:b5_p,“买五” 20:a1_v,委卖一(笔数 ask volume) 21:a1_p,委卖一(价格 ask price) ... 30:date,日期; 31:time,时间; """ symbols_list = '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance( symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += ct._code_to_symbol(code) + ',' else: symbols_list = ct._code_to_symbol(symbols) symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list request = Request( ct.LIVE_DATA_URL % (ct.P_TYPE['http'], ct.DOMAINS['sinahq'], _random(), symbols_list)) text = urlopen(request, timeout=10).read() text = text.decode('GBK') reg = re.compile(r'\="(.*?)\";') data = reg.findall(text) regSym = re.compile(r'(?:sh|sz|gb_)(.*?)\=') syms = regSym.findall(text) data_list = [] syms_list = [] for index, row in enumerate(data): if len(row) > 1: data_list.append([astr for astr in row.split(',')]) syms_list.append(syms[index]) if len(syms_list) == 0: return None if len(data_list[0]) == 28: df = pd.DataFrame(data_list, columns=ct.US_LIVE_DATA_COLS) else: df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS) df = df.drop('s', axis=1) df['code'] = syms_list ls = [cls for cls in df.columns if '_v' in cls] for txt in ls: df[txt] = df[txt].map(lambda x: x[:-2]) return df
def get_hist_data(code=None, start=None, end=None, ktype='D', retry_count=3, pause=0.001): """ 获取个股历史交易记录 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据 end:string 结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据 ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率 """ symbol = ct._code_to_symbol(code) url = '' if ktype.upper() in ct.K_LABELS: url = ct.DAY_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'], ct.K_TYPE[ktype.upper()], symbol) elif ktype in ct.K_MIN_LABELS: url = ct.DAY_PRICE_MIN_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'], symbol, ktype) else: raise TypeError('ktype input error.') for _ in range(retry_count): time.sleep(pause) try: request = Request(url) lines = urlopen(request, timeout=10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) cols = [] if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS): cols = ct.INX_DAY_PRICE_COLUMNS else: cols = ct.DAY_PRICE_COLUMNS if len(js['record'][0]) == 14: cols = ct.INX_DAY_PRICE_COLUMNS df = pd.DataFrame(js['record'], columns=cols) if ktype.upper() in ['D', 'W', 'M']: df = df.applymap(lambda x: x.replace(u',', u'')) df[df == ''] = 0 for col in cols[1:]: df[col] = df[col].astype(float) if start is not None: df = df[df.date >= start] if end is not None: df = df[df.date <= end] if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS): df = df.drop('turnover', axis=1) df = df.set_index('date') df = df.sort_index(ascending=False) return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_hist_data(code=None, start=None, end=None, ktype='D', retry_count=3, pause=0.001): """ 获取个股历史交易记录 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据 end:string 结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据 ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率 """ symbol = ct._code_to_symbol(code) url = '' if ktype.upper() in ct.K_LABELS: url = ct.DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'], ct.K_TYPE[ktype.upper()], symbol) elif ktype in ct.K_MIN_LABELS: url = ct.DAY_PRICE_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'], symbol, ktype) else: raise TypeError('ktype input error.') for _ in range(retry_count): time.sleep(pause) try: request = Request(url) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) cols = [] if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS): cols = ct.INX_DAY_PRICE_COLUMNS else: cols = ct.DAY_PRICE_COLUMNS if len(js['record'][0]) == 14: cols = ct.INX_DAY_PRICE_COLUMNS df = pd.DataFrame(js['record'], columns=cols) if ktype.upper() in ['D', 'W', 'M']: df = df.applymap(lambda x: x.replace(u',', u'')) df[df==''] = 0 for col in cols[1:]: df[col] = df[col].astype(float) if start is not None: df = df[df.date >= start] if end is not None: df = df[df.date <= end] if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS): df = df.drop('turnover', axis=1) df = df.set_index('date') df = df.sort_index(ascending = False) return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_realtime_quotes(symbols=None): """ 获取实时交易数据 getting real time quotes data 用于跟踪交易情况(本次执行的结果-上一次执行的数据) Parameters ------ symbols : string, array-like object (list, tuple, Series). return ------- DataFrame 实时交易数据 属性:0:name,股票名字 1:open,今日开盘价 2:pre_close,昨日收盘价 3:price,当前价格 4:high,今日最高价 5:low,今日最低价 6:bid,竞买价,即“买一”报价 7:ask,竞卖价,即“卖一”报价 8:volumn,成交量 maybe you need do volumn/100 9:amount,成交金额(元 CNY) 10:b1_v,委买一(笔数 bid volume) 11:b1_p,委买一(价格 bid price) 12:b2_v,“买二” 13:b2_p,“买二” 14:b3_v,“买三” 15:b3_p,“买三” 16:b4_v,“买四” 17:b4_p,“买四” 18:b5_v,“买五” 19:b5_p,“买五” 20:a1_v,委卖一(笔数 ask volume) 21:a1_p,委卖一(价格 ask price) ... 30:date,日期; 31:time,时间; """ symbols_list = '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += ct._code_to_symbol(code) + ',' else: symbols_list = ct._code_to_symbol(symbols) symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list request = Request(ct.LIVE_DATA_URL%(ct.P_TYPE['http'], ct.DOMAINS['sinahq'], _random(), symbols_list)) text = urlopen(request,timeout=10).read() text = text.decode('GBK') reg = re.compile(r'\="(.*?)\";') data = reg.findall(text) regSym = re.compile(r'(?:sh|sz)(.*?)\=') syms = regSym.findall(text) data_list = [] syms_list = [] for index, row in enumerate(data): if len(row)>1: data_list.append([astr for astr in row.split(',')]) syms_list.append(syms[index]) if len(syms_list) == 0: return None df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS) df = df.drop('s', axis=1) df['code'] = syms_list ls = [cls for cls in df.columns if '_v' in cls] for txt in ls: df[txt] = df[txt].map(lambda x : x[:-2]) return df
def get_tick_data(code=None, date=None, retry_count=3, pause=0.001, src='sn',ip=None): """ 获取分笔数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format: YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if (src.strip() not in ct.TICK_SRCS): print(ct.TICK_SRC_ERROR) return None symbol = ct._code_to_symbol(code) symbol_dgt = ct._code_to_symbol_dgt(code) datestr = date.replace('-', '') url = { ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'], date, symbol), ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'], symbol, datestr), ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4], datestr, symbol_dgt) } for _ in range(retry_count): time.sleep(pause) try: if src == ct.TICK_SRCS[2]: df = pd.read_excel(url[src]) df.columns = ct.TICK_COLUMNS else: re = Request(url[src]) print("start..") #proxies = {'http':'http://'+ct.hq_hosts[ipindex][1]+':'+str(ct.hq_hosts[ipindex][2])} if ip != None: proxies = {'http':ip} proxy_support = urllib.request.ProxyHandler(proxies) opener = urllib.request.build_opener(proxy_support) opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')] urllib.request.install_opener(opener) print (proxies) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 20: return None df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS, skiprows=[0]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_k_data(code=None, start='', end='', ktype='D', autype='qfq', index=False, retry_count=3, pause=0.001): """ 获取k线数据 --------- Parameters: code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取上市首日 end:string 结束日期 format:YYYY-MM-DD 为空时取最近一个交易日 autype:string 复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame date 交易日期 (index) open 开盘价 high 最高价 close 收盘价 low 最低价 volume 成交量 amount 成交额 turnoverratio 换手率 code 股票代码 """ symbol = ct.INDEX_SYMBOL[code] if index else ct._code_to_symbol(code) url = '' dataflag = '' autype = '' if autype is None else autype if (start is not None) & (start != ''): end = du.today() if end is None or end == '' else end if ktype.upper() in ct.K_LABELS: fq = autype if autype is not None else '' if code[:1] in ('1', '5') or index: fq = '' kline = '' if autype is None else 'fq' if (start is None or start == '') & (end is None or end == ''): urls = [ct.KLINE_TT_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'], kline, fq, symbol, ct.TT_K_TYPE[ktype.upper()], start, end, fq, _random(17))] else: years = du.tt_dates(start, end) urls = [] for year in years: startdate = str(year) + '-01-01' enddate = str(year+1) + '-12-31' url = ct.KLINE_TT_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'], kline, fq+str(year), symbol, ct.TT_K_TYPE[ktype.upper()], startdate, enddate, fq, _random(17)) urls.append(url) dataflag = '%s%s'%(fq, ct.TT_K_TYPE[ktype.upper()]) elif ktype in ct.K_MIN_LABELS: urls = [ct.KLINE_TT_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['tt'], symbol, ktype, ktype, _random(16))] dataflag = 'm%s'%ktype else: raise TypeError('ktype input error.') data = pd.DataFrame() for url in urls: data = data.append(_get_k_data(url, dataflag, symbol, code, index, ktype, retry_count, pause), ignore_index=True) if ktype not in ct.K_MIN_LABELS: if ((start is not None) & (start != '')) & ((end is not None) & (end != '')): if data.empty==False: data = data[(data.date >= start) & (data.date <= end)] return data raise IOError(ct.NETWORK_URL_ERROR_MSG)