def _get_profit_data(year, quarter, pageNo, dataArr): ct._write_console() try: #param:["ylnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}] ylnl_list_param = '["ylnl","","","","%d","%d","",0,%d,%d]' % ( year, quarter, pageNo, ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL % (quote(ylnl_list_param, ',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items']) if not df.empty: df = df.drop(0, axis=1) df.columns = ct.PROFIT_COLS dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM: pageNo = pageNo + 1 return _get_profit_data(year, quarter, pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _parsing_dayprice_json(pageNum=1): """ 处理当日行情分页数据,格式为json Parameters ------ pageNum:页码 return ------- DataFrame 当日所有股票交易数据(DataFrame) """ ct._write_console() request = Request(ct.SINA_DAY_PRICE_URL % (pageNum)) text = urlopen(request, timeout=ct.API_TIMEOUT).read() if text == 'null': return None reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) text = text.replace('"{symbol', '{"symbol') text = text.replace('{symbol', '{"symbol"') if ct.PY3: jstr = json.dumps(text) else: jstr = json.dumps(text, encoding='GBK') js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code': object}), columns=ct.DAY_TRADING_COLUMNS) df = df.drop('symbol', axis=1) df = df.ix[df.volume > 0] return df
def _parsing_dayprice_json(pageNum=1): """ 处理当日行情分页数据,格式为json Parameters ------ pageNum:页码 return ------- DataFrame 当日所有股票交易数据(DataFrame) """ ct._write_console() request = Request(ct.SINA_DAY_PRICE_URL%(pageNum)) text = urlopen(request, timeout=ct.API_TIMEOUT).read() if text == 'null': return None reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) text = text.replace('"{symbol', '{"symbol') text = text.replace('{symbol', '{"symbol"') if ct.PY3: jstr = json.dumps(text) else: jstr = json.dumps(text, encoding='GBK') js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code':object}), columns=ct.DAY_TRADING_COLUMNS) df = df.drop('symbol', axis=1) df = df.ix[df.volume > 0] return df
def _get_growth_data(year, quarter, pageNo, dataArr): ct._write_console() try: #param:["cnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}] list_param = '["cnl","","","","%d","%d","",0,%d,%d]' % ( year, quarter, pageNo, ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL % (quote(list_param, ',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items']) if not df.empty: df.columns = ct.GROWTH_COLS for col in (ct.GROWTH_COLS[0:4] + ct.GROWTH_COLS[7:]): df[col] = df[col].astype(str).replace("--", "0").replace( "", "0").replace("None", "0").astype(float) dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM: pageNo = pageNo + 1 return _get_growth_data(year, quarter, pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _get_profit_data(year, quarter,pageNo,dataArr): ct._write_console() try: #param:["ylnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}] ylnl_list_param = '["ylnl","","","","%d","%d","",0,%d,%d]'%(year, quarter,pageNo,ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL%(quote(ylnl_list_param,',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items']) if not df.empty: df = df.drop(0,axis=1) df.columns = ct.PROFIT_COLS dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM : pageNo = pageNo+1 return _get_profit_data(year, quarter,pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _get_growth_data(year, quarter,pageNo,dataArr): ct._write_console() try: #param:["cnl","行业","地域","概念","年","季度","{sort}",{asc},{page},{num}] list_param = '["cnl","","","","%d","%d","",0,%d,%d]'%(year, quarter,pageNo,ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL%(quote(list_param,',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items']) if not df.empty: df.columns=ct.GROWTH_COLS for col in (ct.GROWTH_COLS[0:4]+ct.GROWTH_COLS[7:]): df[col] = df[col].astype(str).replace("--","0").replace("", "0").replace("None","0").astype(float) dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM : pageNo = pageNo+1 return _get_growth_data(year, quarter,pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _get_stock_hq_list(pageNo, dataArr): ct._write_console() try: #param:["hq","hs_a","{sort}",{asc},{page},{num}] hq_list_param = '["hq","hs_a","",0,%d,%d]'%(pageNo,ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL%(quote(hq_list_param,',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items'], columns=js[0]['fields']) dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM : pageNo = pageNo+1 return _get_stock_hq_list(pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _get_stock_hq_list(pageNo, dataArr): ct._write_console() try: #param:["hq","hs_a","{sort}",{asc},{page},{num}] hq_list_param = '["hq","hs_a","",0,%d,%d]' % (pageNo, ct.OPEN_API_PAGE_NUM) request = Request(ct.SINA_OPEN_API_URL % (quote(hq_list_param, ',[]'))) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=ct.API_TIMEOUT).read() text = text.decode('gbk') if ct.PY3 else text js = json.loads(text.strip()) if js is None: return dataArr df = pd.DataFrame(js[0]['items'], columns=js[0]['fields']) dataArr = dataArr.append(df, ignore_index=True) if int(js[0]['count']) > pageNo * ct.OPEN_API_PAGE_NUM: pageNo = pageNo + 1 return _get_stock_hq_list(pageNo, dataArr) else: return dataArr except Exception as e: print(e)
def _today_ticks(symbol, tdate, pageNo, retry_count, pause): ct._write_console() for _ in range(retry_count): time.sleep(pause) try: html = lxml.html.parse(ct.TODAY_TICKS_URL % (symbol, tdate, pageNo)) res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>'%sarr sarr = sarr.replace('--', '0') df = pd.read_html(StringIO(sarr), parse_dates=False)[0] df.columns = ct.TODAY_TICK_COLUMNS df['pchange'] = df['pchange'].map(lambda x : x.replace('%', '')) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _today_ticks(symbol, tdate, pageNo, retry_count, pause): ct._write_console() for _ in range(retry_count): time.sleep(pause) try: html = lxml.html.parse(ct.TODAY_TICKS_URL % (symbol, tdate, pageNo)) res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>' % sarr sarr = sarr.replace('--', '0') df = pd.read_html(StringIO(sarr), parse_dates=False)[0] df.columns = ct.TODAY_TICK_COLUMNS df['pchange'] = df['pchange'].map(lambda x: x.replace('%', '')) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_h_data(code, start=None, end=None, autype='qfq', index=False, retry_count=3, pause=0.001): ''' 获取历史复权数据 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取当前日期 end:string 结束日期 format:YYYY-MM-DD 为空时取去年今日 autype:string 复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame date 交易日期 (index) open 开盘价 high 最高价 close 收盘价 low 最低价 volume 成交量 amount 成交金额 factor 后复权因子 ''' start = du.today_last_year() if start is None else start end = du.today() if end is None else end qs = du.get_quarts(start, end) qt = qs[0] ct._write_head() data = _parse_fq_data(_get_index_url(index, code, qt), index, retry_count, pause) if len(qs) > 1: for d in range(1, len(qs)): qt = qs[d] ct._write_console() df = _parse_fq_data(_get_index_url(index, code, qt), index, retry_count, pause) data = data.append(df, ignore_index=True) if len(data) == 0 or len( data[(data.date >= start) & (data.date <= end)]) == 0: return None data = data.drop_duplicates('date') if index: data = data[(data.date >= start) & (data.date <= end)] data = data.set_index('date') data = data.sort_index(ascending=False) return data if autype == 'hfq': #data = data.drop('factor', axis=1) data = data[(data.date >= start) & (data.date <= end)] for label in ['open', 'high', 'close', 'low']: data[label] = data[label].map(ct.FORMAT) data[label] = data[label].astype(float) data = data.set_index('date') data = data.sort_index(ascending=False) return data else: if autype == 'qfq': #data = data.drop('factor', axis=1) df = _parase_fq_factor(code, start, end) df = df.drop_duplicates('date') df = df.sort('date', ascending=False) frow = df.head(1) rt = get_realtime_quotes(code) if rt is None: return None if ((float(rt['high']) == 0) & (float(rt['low']) == 0)): preClose = float(rt['pre_close']) else: if du.is_holiday(du.today()): preClose = float(rt['price']) else: if (du.get_hour() > 9) & (du.get_hour() < 18): preClose = float(rt['pre_close']) else: preClose = float(rt['price']) rate = float(frow['factor']) / preClose data = data[(data.date >= start) & (data.date <= end)] for label in ['open', 'high', 'low', 'close']: data[label] = data[label] / rate data[label] = data[label].map(ct.FORMAT) data[label] = data[label].astype(float) data = data.set_index('date') data = data.sort_index(ascending=False) return data else: for label in ['open', 'high', 'close', 'low']: data[label] = data[label] / data['factor'] #data = data.drop('factor', axis=1) data = data[(data.date >= start) & (data.date <= end)] for label in ['open', 'high', 'close', 'low']: data[label] = data[label].map(ct.FORMAT) data = data.set_index('date') data = data.sort_index(ascending=False) data = data.astype(float) return data
def get_h_data(code, start=None, end=None, autype='qfq', index=False, retry_count=3, pause=0.001): ''' 获取历史复权数据 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取当前日期 end:string 结束日期 format:YYYY-MM-DD 为空时取去年今日 autype:string 复权类型,qfq-前复权 hfq-后复权 None-不复权,默认为qfq retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame date 交易日期 (index) open 开盘价 high 最高价 close 收盘价 low 最低价 volume 成交量 amount 成交金额 factor 后复权因子 ''' start = du.today_last_year() if start is None else start end = du.today() if end is None else end qs = du.get_quarts(start, end) qt = qs[0] ct._write_head() data = _parse_fq_data(_get_index_url(index, code, qt), index, retry_count, pause) if len(qs)>1: for d in range(1, len(qs)): qt = qs[d] ct._write_console() df = _parse_fq_data(_get_index_url(index, code, qt), index, retry_count, pause) data = data.append(df, ignore_index=True) if len(data) == 0 or len(data[(data.date>=start)&(data.date<=end)]) == 0: return None data = data.drop_duplicates('date') if index: data = data[(data.date>=start) & (data.date<=end)] data = data.set_index('date') data = data.sort_index(ascending=False) return data if autype == 'hfq': #data = data.drop('factor', axis=1) data = data[(data.date>=start) & (data.date<=end)] for label in ['open', 'high', 'close', 'low']: data[label] = data[label].map(ct.FORMAT) data[label] = data[label].astype(float) data = data.set_index('date') data = data.sort_index(ascending = False) return data else: if autype == 'qfq': #data = data.drop('factor', axis=1) df = _parase_fq_factor(code, start, end) df = df.drop_duplicates('date') df = df.sort('date', ascending=False) frow = df.head(1) rt = get_realtime_quotes(code) if rt is None: return None if ((float(rt['high']) == 0) & (float(rt['low']) == 0)): preClose = float(rt['pre_close']) else: if du.is_holiday(du.today()): preClose = float(rt['price']) else: if (du.get_hour() > 9) & (du.get_hour() < 18): preClose = float(rt['pre_close']) else: preClose = float(rt['price']) rate = float(frow['factor']) / preClose data = data[(data.date >= start) & (data.date <= end)] for label in ['open', 'high', 'low', 'close']: data[label] = data[label] / rate data[label] = data[label].map(ct.FORMAT) data[label] = data[label].astype(float) data = data.set_index('date') data = data.sort_index(ascending = False) return data else: for label in ['open', 'high', 'close', 'low']: data[label] = data[label] / data['factor'] #data = data.drop('factor', axis=1) data = data[(data.date>=start) & (data.date<=end)] for label in ['open', 'high', 'close', 'low']: data[label] = data[label].map(ct.FORMAT) data = data.set_index('date') data = data.sort_index(ascending=False) data = data.astype(float) return data