def get_index(): """ 获取大盘指数行情 return ------- DataFrame code:指数代码 name:指数名称 change:涨跌幅 open:开盘价 preclose:昨日收盘价 close:收盘价 high:最高价 low:最低价 volume:成交量(手) amount:成交金额(亿元) """ request = Request(ct.INDEX_HQ_URL%(ct.P_TYPE['http'], ct.DOMAINS['sinahq']),headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('GBK') text = text.replace('var hq_str_sh', '').replace('var hq_str_sz', '') text = text.replace('";', '').replace('"', '').replace('=', ',') text = '%s%s'%(ct.INDEX_HEADER, text) df = pd.read_csv(StringIO(text), sep=',', thousands=',') df['change'] = (df['close'] / df['preclose'] - 1 ) * 100 df['amount'] = df['amount'] / 100000000 df['change'] = df['change'].map(ct.FORMAT) df['amount'] = df['amount'].map(ct.FORMAT4) df = df[ct.INDEX_COLS] df['code'] = df['code'].map(lambda x:str(x).zfill(6)) df['change'] = df['change'].astype(float) df['amount'] = df['amount'].astype(float) return df
def _parsing_dayprice_json(types=None, page=1): """ 处理当日行情分页数据,格式为json Parameters ------ pageNum:页码 return ------- DataFrame 当日所有股票交易数据(DataFrame) """ ct._write_console() request = Request(ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], types, page),headers=ua.get_ua()) text = urlopen(request, timeout=10).read() if text == 'null': return None reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) text = text.replace('"{symbol', '{"symbol') text = text.replace('{symbol', '{"symbol"') if ct.PY3: jstr = json.dumps(text) else: jstr = json.dumps(text, encoding='GBK') js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code':object}), columns=ct.DAY_TRADING_COLUMNS) df = df.drop('symbol', axis=1) # df = df.ix[df.volume > 0] return df
def _parse_fq_data(url, index, retry_count, pause): for _ in range(retry_count): time.sleep(pause) try: request = Request(url,headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('GBK') html = lxml.html.parse(StringIO(text)) res = html.xpath('//table[@id=\"FundHoldSharesTable\"]') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) if sarr == '': return None df = pd.read_html(sarr, skiprows = [0, 1])[0] if len(df) == 0: return pd.DataFrame() if index: df.columns = ct.HIST_FQ_COLS[0:7] else: df.columns = ct.HIST_FQ_COLS if df['date'].dtypes == np.object: df['date'] = pd.to_datetime(df['date']) df = df.drop_duplicates('date') except ValueError as e: # 时间较早,已经读不到数据 return None except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_tick_data(code=None, date=None, retry_count=3, pause=0.001, src='sn'): """ 获取分笔数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format: YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 src : 数据源选择,可输入sn(新浪)、tt(腾讯)、nt(网易),默认sn return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code)!=6 or date is None: return None if (src.strip() not in ct.TICK_SRCS): print(ct.TICK_SRC_ERROR) return None symbol = ct._code_to_symbol(code) symbol_dgt = ct._code_to_symbol_dgt(code) datestr = date.replace('-', '') url = { ct.TICK_SRCS[0] : ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'], date, symbol), ct.TICK_SRCS[1] : ct.TICK_PRICE_URL_TT % (ct.P_TYPE['http'], ct.DOMAINS['tt'], ct.PAGES['idx'], symbol, datestr), ct.TICK_SRCS[2] : ct.TICK_PRICE_URL_NT % (ct.P_TYPE['http'], ct.DOMAINS['163'], date[0:4], datestr, symbol_dgt) } for _ in range(retry_count): time.sleep(pause) try: if src == ct.TICK_SRCS[2]: df = pd.read_excel(url[src]) df.columns = ct.TICK_COLUMNS else: re = Request(url[src],headers=ua.get_ua()) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 20: return None df = pd.read_table(StringIO(lines), names=ct.TICK_COLUMNS, skiprows=[0]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def top10_holders(code=None, year=None, quarter=None, gdtype='0', retry_count=3, pause=0.001): if code is None: return None else: code = ct._code_to_symbol(code) gdtype = 'LT' if gdtype == '1' else '' qdate = '' if (year is not None) & (quarter is not None): qdate = du.get_q_date(year, quarter) for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.TOP10_HOLDERS_URL%(ct.P_TYPE['http'], ct.DOMAINS['gw'], gdtype, code.upper()),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf8') if ct.PY3 else lines reg = re.compile(r'= \'\[(.*?)\]\';') lines = reg.findall(lines)[0] jss = json.loads('[%s]' %lines) summ = [] data = pd.DataFrame() for row in jss: qt = row['jzrq'] hold = row['ljcy'] change = row['ljbh'] props = row['ljzb'] arow = [qt, hold, change ,props] summ.append(arow) ls = row['sdgdList'] dlist = [] for inrow in ls: sharetype = inrow['gbxz'] name = inrow['gdmc'] hold = inrow['cgs'] h_pro = inrow['zzgs'] status = inrow['zjqk'] dlist.append([qt, name, hold, h_pro, sharetype, status]) ddata = pd.DataFrame(dlist, columns=rv.TOP10_PER_COLS) data = data.append(ddata, ignore_index=True) df = pd.DataFrame(summ, columns=rv.TOP10_SUMM_COLS) if qdate != '': df = df[df.quarter == qdate] data = data[data.quarter == qdate] except Exception as e: print(e) else: return df, data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def xsg_data(year=None, month=None, retry_count=3, pause=0.001): """ 获取限售股解禁数据 Parameters -------- year:年份,默认为当前年 month:解禁月份,默认为当前月 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame code:股票代码 name:名称 date:解禁日期 count:解禁数量(万股) ratio:占总盘比率 """ year = du.get_year() if year is None else year month = du.get_month() if month is None else month for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.XSG_URL%(ct.P_TYPE['http'], ct.DOMAINS['em'], ct.PAGES['emxsg'], year, month),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf-8') if ct.PY3 else lines except Exception as e: print(e) else: da = lines[3:len(lines)-3] list = [] for row in da.split('","'): list.append([data for data in row.split(',')]) df = pd.DataFrame(list) df = df[[1, 3, 4, 5, 6]] for col in [5, 6]: df[col] = df[col].astype(float) df[5] = df[5]/10000 df[6] = df[6]*100 df[5] = df[5].map(ct.FORMAT) df[6] = df[6].map(ct.FORMAT) df.columns = rv.XSG_COLS return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _get_type_data(url): try: request = Request(url) data_str = urlopen(request, timeout=10).read() data_str = data_str.decode('GBK') data_str = data_str.split('=')[1] data_json = json.loads(data_str) df = pd.DataFrame( [[row.split(',')[0], row.split(',')[1]] for row in data_json.values()], columns=['tag', 'name'], headers=ua.get_ua()) return df except Exception as er: print(str(er))
def _get_data(url): try: request = Request(url,headers=ua.get_ua()) data_str = urlopen(request, timeout=10).read() data_str = data_str.split('=')[1] data_str = data_str.replace('futures', '"futures"') if six.PY3: data_str = data_str.decode('utf-8') data_str = json.loads(data_str) df = pd.DataFrame([[col for col in row.split(',')] for row in data_str.values()[0]] ) df = df[[1, 2, 5, 4, 6, 7, 13, 9, 17, 18, 16, 21, 22]] df.columns = ct.INTL_FUTURES_COL return df except Exception as er: print(str(er))
def _day_cinema(date=None, pNo=1, retry_count=3, pause=0.001): ct._write_console() for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.BOXOFFICE_CBD%(ct.P_TYPE['http'], ct.DOMAINS['mbox'], ct.BOX, pNo, date),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) df = pd.DataFrame(js['data1']) df = df.drop(['CinemaID'], axis=1) return df
def month_boxoffice(date=None, retry_count=3, pause=0.001): """ 获取单月电影票房数据 数据来源:EBOT艺恩票房智库 Parameters ------ date:日期,默认为上一月,格式YYYY-MM retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame Irank 排名 MovieName 电影名称 WomIndex 口碑指数 avgboxoffice 平均票价 avgshowcount 场均人次 box_pro 月度占比 boxoffice 单月票房(万) days 月内天数 releaseTime 上映日期 """ if date is None: date = du.day_last_week(-30)[0:7] elif len(date)>8: print(ct.BOX_INPUT_ERR_MSG) return date += '-01' for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.BOXOFFICE_MONTH%(ct.P_TYPE['http'], ct.DOMAINS['mbox'], ct.BOX, date),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) df = pd.DataFrame(js['data1']) df = df.drop(['defaultImage', 'EnMovieID'], axis=1) return df
def _sz_hz(date='', retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: request = Request(rv.MAR_SZ_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'], ct.PAGES['szsefc'], date),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) <= 200: return pd.DataFrame() df = pd.read_html(lines, skiprows=[0])[0] df.columns = rv.MAR_SZ_HZ_COLS df['opDate'] = date except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def day_boxoffice(date=None, retry_count=3, pause=0.001): """ 获取单日电影票房数据 数据来源:EBOT艺恩票房智库 Parameters ------ date:日期,默认为上一日 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame AvgPrice 平均票价 AvpPeoPle 场均人次 BoxOffice 单日票房(万) BoxOffice_Up 环比变化 (%) IRank 排名 MovieDay 上映天数 MovieName 影片名 SumBoxOffice 累计票房(万) WomIndex 口碑指数 """ for _ in range(retry_count): time.sleep(pause) try: if date is None: date = 0 else: date = int(du.diff_day(du.today(), date)) + 1 request = Request(ct.BOXOFFICE_DAY%(ct.P_TYPE['http'], ct.DOMAINS['mbox'], ct.BOX, date, _random()),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) df = pd.DataFrame(js['data1']) df = df.drop(['MovieImg', 'BoxOffice1', 'MovieID', 'Director', 'IRank_pro'], axis=1) return df
def get_today_ticks(code=None, retry_count=3, pause=0.001): """ 获取当日分笔明细数据 Parameters ------ code:string 股票代码 e.g. 600848 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code)!=6 : return None symbol = _code_to_symbol(code) date = du.today() for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.TODAY_TICKS_PAGE_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], date, symbol),headers=ua.get_ua()) data_str = urlopen(request, timeout=10).read() data_str = data_str.decode('GBK') data_str = data_str[1:-1] data_str = eval(data_str, type('Dummy', (dict,), dict(__getitem__ = lambda s, n:n))()) data_str = json.dumps(data_str) data_str = json.loads(data_str) pages = len(data_str['detailPages']) data = pd.DataFrame() ct._write_head() for pNo in range(1, pages+1): data = data.append(_today_ticks(symbol, date, pNo, retry_count, pause), ignore_index=True) except Exception as er: print(str(er)) else: return data raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_latest_news(top=None, show_content=False): """ 获取即时财经新闻 Parameters -------- top:数值,显示最新消息的条数,默认为80条 show_content:是否显示新闻内容,默认False Return -------- DataFrame classify :新闻类别 title :新闻标题 time :发布时间 url :新闻链接 content:新闻内容(在show_content为True的情况下出现) """ top = ct.PAGE_NUM[2] if top is None else top try: request = Request(nv.LATEST_URL % (ct.P_TYPE['http'], ct.DOMAINS['sina'], ct.PAGES['lnews'], top, _random()),headers=ua.get_ua()) data_str = urlopen(request, timeout=10).read() data_str = data_str.decode('GBK') data_str = data_str.split('=')[1][:-1] data_str = eval(data_str, type('Dummy', (dict,), dict(__getitem__ = lambda s, n:n))()) data_str = json.dumps(data_str) data_str = json.loads(data_str) data_str = data_str['list'] data = [] for r in data_str: rt = datetime.fromtimestamp(r['time']) rtstr = datetime.strftime(rt, "%m-%d %H:%M") arow = [r['channel']['title'], r['title'], rtstr, r['url']] if show_content: arow.append(latest_content(r['url'])) data.append(arow) df = pd.DataFrame(data, columns=nv.LATEST_COLS_C if show_content else nv.LATEST_COLS) return df except Exception as er: print(str(er))
def sz_margin_details(date='', retry_count=3, pause=0.001): """ 获取深市融资融券明细列表 Parameters -------- date:string 明细数据日期 format:YYYY-MM-DD 默认为空'' retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame opDate:信用交易日期 stockCode:标的证券代码 securityAbbr:标的证券简称 rzmre: 融资买入额(元) rzye:融资余额(元) rqmcl: 融券卖出量 rqyl: 融券余量 rqye: 融券余量(元) rzrqye:融资融券余额(元) """ for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.MAR_SZ_MX_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'], ct.PAGES['szsefc'], date),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) <= 200: return pd.DataFrame() df = pd.read_html(lines, skiprows=[0])[0] df.columns = rv.MAR_SZ_MX_COLS df['stockCode'] = df['stockCode'].map(lambda x:str(x).zfill(6)) df['opDate'] = date except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_cash_flow(code): """ 获取某股票的历史所有时期现金流表 Parameters -------- code:str 股票代码 e.g:600518 Return -------- DataFrame 行列名称为中文且数目较多,建议获取数据后保存到本地查看 """ if code.isdigit(): request = Request(ct.SINA_CASHFLOW_URL % (code), headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('GBK') text = text.replace('\t\n', '\r\n') text = text.replace('\t', ',') df = pd.read_csv(StringIO(text), dtype={'code': 'object'}) return df
def _parase_fq_factor(code, start, end): symbol = _code_to_symbol(code) request = Request(ct.HIST_FQ_FACTOR_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], symbol),headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text[1:len(text)-1] text = text.decode('utf-8') if ct.PY3 else text text = text.replace('{_', '{"') text = text.replace('total', '"total"') text = text.replace('data', '"data"') text = text.replace(':"', '":"') text = text.replace('",_', '","') text = text.replace('_', '-') text = json.loads(text) df = pd.DataFrame({'date':list(text['data'].keys()), 'factor':list(text['data'].values())}) df['date'] = df['date'].map(_fun_except) # for null case if df['date'].dtypes == np.object: df['date'] = pd.to_datetime(df['date']) df = df.drop_duplicates('date') df['factor'] = df['factor'].astype(float) return df
def get_stock_basics(date=None): """ 获取沪深上市公司基本情况 Parameters date:日期YYYY-MM-DD,默认为上一个交易日,目前只能提供2016-08-09之后的历史数据 Return -------- DataFrame code,代码 name,名称 industry,细分行业 area,地区 pe,市盈率 outstanding,流通股本 totals,总股本(万) totalAssets,总资产(万) liquidAssets,流动资产 fixedAssets,固定资产 reserved,公积金 reservedPerShare,每股公积金 eps,每股收益 bvps,每股净资 pb,市净率 timeToMarket,上市日期 """ wdate = du.last_tddate() if date is None else date wdate = wdate.replace('-', '') if wdate < '20160809': return None datepre = '' if date is None else wdate[0:4] + wdate[4:6] + '/' request = Request(ct.ALL_STOCK_BASICS_FILE % (datepre, '' if date is None else wdate), headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('GBK') text = text.replace('--', '') df = pd.read_csv(StringIO(text), dtype={'code': 'object'}) df = df.set_index('code') return df
def get_sina_dd(code=None, date=None, vol=400, retry_count=3, pause=0.001): """ 获取sina大单数据 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format:YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:股票代码 股票名称 交易时间 价格 成交量 前一笔价格 类型(买、卖、中性盘) """ if code is None or len(code)!=6 or date is None: return None symbol = _code_to_symbol(code) vol = vol*100 for _ in range(retry_count): time.sleep(pause) try: re = Request(ct.SINA_DD % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['sinadd'], symbol, vol, date),headers=ua.get_ua()) lines = urlopen(re, timeout=10).read() lines = lines.decode('GBK') if len(lines) < 100: return None df = pd.read_csv(StringIO(lines), names=ct.SINA_DD_COLS, skiprows=[0]) if df is not None: df['code'] = df['code'].map(lambda x: x[2:]) except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _get_report_data(year, quarter, pageNo, dataArr, retry_count=3, pause=0.001): ct._write_console() for _ in range(retry_count): time.sleep(pause) try: request = Request( ct.REPORT_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, quarter, pageNo, ct.PAGE_NUM[1]), headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('GBK') text = text.replace('--', '') html = lxml.html.parse(StringIO(text)) res = html.xpath("//table[@class=\"list_table\"]/tr") if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '<table>%s</table>' % sarr df = pd.read_html(sarr)[0] df = df.drop(11, axis=1) df.columns = ct.REPORT_COLS dataArr = dataArr.append(df, ignore_index=True) nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') if len(nextPage) > 0: pageNo = re.findall(r'\d+', nextPage[0])[0] return _get_report_data(year, quarter, pageNo, dataArr) else: return dataArr except Exception as e: pass raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _get_k_data(url, dataflag='', symbol='', code = '', index = False, ktype = '', retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) try: request = Request(url,headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 100: #no data return None except Exception as e: print(e) else: lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines.split('=')[1] reg = re.compile(r',{"nd.*?}') lines = re.subn(reg, '', lines) js = json.loads(lines[0]) dataflag = dataflag if dataflag in list(js['data'][symbol].keys()) else ct.TT_K_TYPE[ktype.upper()] if len(js['data'][symbol][dataflag]) == 0: return None if len(js['data'][symbol][dataflag][0]) == 6: df = pd.DataFrame(js['data'][symbol][dataflag], columns = ct.KLINE_TT_COLS_MINS) else: df = pd.DataFrame(js['data'][symbol][dataflag], columns = ct.KLINE_TT_COLS) df['code'] = symbol if index else code if ktype in ct.K_MIN_LABELS: df['date'] = df['date'].map(lambda x: '%s-%s-%s %s:%s'%(x[0:4], x[4:6], x[6:8], x[8:10], x[10:12])) for col in df.columns[1:6]: df[col] = df[col].astype(float) return df
def _holding_cotent(start, end, pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) if pageNo>0: ct._write_console() try: request = Request(rv.FUND_HOLDS_URL%(ct.P_TYPE['http'], ct.DOMAINS['163'], ct.PAGES['163fh'], ct.PAGES['163fh'], pageNo, start, end, _random(5)),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines.replace('--', '0') lines = json.loads(lines) data = lines['list'] df = pd.DataFrame(data) df = df.drop(['CODE', 'ESYMBOL', 'EXCHANGE', 'NAME', 'RN', 'SHANGQIGUSHU', 'SHANGQISHIZHI', 'SHANGQISHULIANG'], axis=1) for col in ['GUSHU', 'GUSHUBIJIAO', 'SHIZHI', 'SCSTC27']: df[col] = df[col].astype(float) df['SCSTC27'] = df['SCSTC27']*100 df['GUSHU'] = df['GUSHU']/10000 df['GUSHUBIJIAO'] = df['GUSHUBIJIAO']/10000 df['SHIZHI'] = df['SHIZHI']/10000 df['GUSHU'] = df['GUSHU'].map(ct.FORMAT) df['GUSHUBIJIAO'] = df['GUSHUBIJIAO'].map(ct.FORMAT) df['SHIZHI'] = df['SHIZHI'].map(ct.FORMAT) df['SCSTC27'] = df['SCSTC27'].map(ct.FORMAT) df.columns = rv.FUND_HOLDS_COLS df = df[['code', 'name', 'date', 'nums', 'nlast', 'count', 'clast', 'amount', 'ratio']] except Exception as e: print(e) else: if pageNo == 0: return df, int(lines['pagecount']) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def realtime_boxoffice(retry_count=3,pause=0.001): """ 获取实时电影票房数据 数据来源:EBOT艺恩票房智库 Parameters ------ retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame BoxOffice 实时票房(万) Irank 排名 MovieName 影片名 boxPer 票房占比 (%) movieDay 上映天数 sumBoxOffice 累计票房(万) time 数据获取时间 """ for _ in range(retry_count): time.sleep(pause) try: request = Request(ct.MOVIE_BOX%(ct.P_TYPE['http'], ct.DOMAINS['mbox'], ct.BOX, _random()),headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) df = pd.DataFrame(js['data2']) df = df.drop(['MovieImg','mId'], axis=1) df['time'] = du.get_now() return df
def _get_detail(tag, retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) try: ct._write_console() request = Request( ct.SINA_DATA_DETAIL_URL % (ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['jv'], tag), headers=ua.get_ua()) text = urlopen(request, timeout=10).read() text = text.decode('gbk') except _network_error_classes: pass else: reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text) text = text.replace('"{symbol', '{"symbol') text = text.replace('{symbol', '{"symbol"') jstr = json.dumps(text) js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code': object}), columns=ct.THE_FIELDS) df = df[ct.FOR_CLASSIFY_B_COLS] return df
def get_hist_data(code=None, start=None, end=None, ktype='D', retry_count=3, pause=0.001): """ 获取个股历史交易记录 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据 end:string 结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据 ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率 """ symbol = _code_to_symbol(code) url = '' if ktype.upper() in ct.K_LABELS: url = ct.DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'], ct.K_TYPE[ktype.upper()], symbol) elif ktype in ct.K_MIN_LABELS: url = ct.DAY_PRICE_MIN_URL%(ct.P_TYPE['http'], ct.DOMAINS['ifeng'], symbol, ktype) else: raise TypeError('ktype input error.') for _ in range(retry_count): time.sleep(pause) try: request = Request(url,headers=ua.get_ua()) lines = urlopen(request, timeout = 10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) cols = [] if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS): cols = ct.INX_DAY_PRICE_COLUMNS else: cols = ct.DAY_PRICE_COLUMNS if len(js['record'][0]) == 14: cols = ct.INX_DAY_PRICE_COLUMNS df = pd.DataFrame(js['record'], columns=cols) if ktype.upper() in ['D', 'W', 'M']: df = df.applymap(lambda x: x.replace(u',', u'')) df[df==''] = 0 for col in cols[1:]: df[col] = df[col].astype(float) if start is not None: df = df[df.date >= start] if end is not None: df = df[df.date <= end] if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS): df = df.drop('turnover', axis=1) df = df.set_index('date') df = df.sort_index(ascending = False) return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_realtime_quotes(symbols=None): """ 获取实时交易数据 getting real time quotes data 用于跟踪交易情况(本次执行的结果-上一次执行的数据) Parameters ------ symbols : string, array-like object (list, tuple, Series). return ------- DataFrame 实时交易数据 属性:0:name,股票名字 1:open,今日开盘价 2:pre_close,昨日收盘价 3:price,当前价格 4:high,今日最高价 5:low,今日最低价 6:bid,竞买价,即“买一”报价 7:ask,竞卖价,即“卖一”报价 8:volumn,成交量 maybe you need do volumn/100 9:amount,成交金额(元 CNY) 10:b1_v,委买一(笔数 bid volume) 11:b1_p,委买一(价格 bid price) 12:b2_v,“买二” 13:b2_p,“买二” 14:b3_v,“买三” 15:b3_p,“买三” 16:b4_v,“买四” 17:b4_p,“买四” 18:b5_v,“买五” 19:b5_p,“买五” 20:a1_v,委卖一(笔数 ask volume) 21:a1_p,委卖一(价格 ask price) ... 30:date,日期; 31:time,时间; """ symbols_list = '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += _code_to_symbol(code) + ',' else: symbols_list = _code_to_symbol(symbols) symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list request = Request(ct.LIVE_DATA_URL%(ct.P_TYPE['http'], ct.DOMAINS['sinahq'], _random(), symbols_list),headers=ua.get_ua()) text = urlopen(request,timeout=10).read() text = text.decode('GBK') reg = re.compile(r'\="(.*?)\";') data = reg.findall(text) regSym = re.compile(r'(?:sh|sz)(.*?)\=') syms = regSym.findall(text) data_list = [] syms_list = [] for index, row in enumerate(data): if len(row)>1: data_list.append([astr for astr in row.split(',')]) syms_list.append(syms[index]) if len(syms_list) == 0: return None df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS) df = df.drop('s', axis=1) df['code'] = syms_list ls = [cls for cls in df.columns if '_v' in cls] for txt in ls: df[txt] = df[txt].map(lambda x : x[:-2]) return df