def fetch_quotes(stock_codes): """ 获取股票列表的分时报价 Parameters ---------- stock_codes : list 股票代码列表 Returns ------- res : DataFrame 行数 = len(stock_codes) 33列 Example ------- >>> df = fetch_quotes(['000001','000002']) >>> df.iloc[:,:8] 股票代码 股票简称 开盘 前收盘 现价 最高 最低 竞买价 0 000001 平安银行 11.040 11.050 10.900 11.050 10.880 10.900 1 000002 万 科A 33.700 34.160 33.290 33.990 33.170 33.290 """ stock_codes = ensure_list(stock_codes) num = len(stock_codes) length = 800 url_fmt = 'http://hq.sinajs.cn/list={}' dfs = [] for p_codes in partition_all(length, stock_codes): # p_codes = stock_codes[i * length:(i + 1) * length] url = url_fmt.format(','.join(map(_add_prefix, p_codes))) content = get_page_response(url).text dfs.append(_to_dataframe(content, p_codes)) return pd.concat(dfs).sort_values('股票代码')
def _parse_report_data(url): response = get_page_response(url) #response.encoding = 'gb2312' # 000001资产负债表 -> 应收出口退税(万元) -> ' --' 导致解析类型不正确!!! na_values = ['--', ' --', '-- '] return pd.read_csv(StringIO(response.text), na_values=na_values).iloc[:, :-1]
def fetch_report_periods(stock_code, query): """ 下载股东持股、基金持股时,网络已有可供下载的期间 返回:dict对象 键:期末日期 eg 2017-06-30 值:2017-06-30,2017-03-31 """ valid_types = ('c', 't', 'jjcg') assert query in valid_types, '{}不在有效类型{}中'.format(query, valid_types) if query == 'jjcg': type_ = 'jjcg' target_num = 0 elif query == 't': type_ = 'gdfx' target_num = 1 else: type_ = 'gdfx' target_num = 0 result = {} url_fmt = 'http://quotes.money.163.com/f10/{type}_{stock_code}.html' url = url_fmt.format(stock_code=stock_code, type=type_) response = get_page_response(url, 'post') soup = BeautifulSoup(response.text, 'lxml') ss = soup.find_all('select', {'id': '', 'name': ''}) # 找到对应的选项父节点 target = ss[target_num] for o in target.find_all('option'): if len(o['value']): result[o.string] = o['value'] return result
def _get_leaf_codes(freq, page_code): '''return list of code which directly denotes a series page_code should be the node which are direct parent to leafs''' params = { 'm': 'QueryData', 'rowcode': 'zb', 'colcode': 'sj', 'wds': '[]', 'dbcode': _freq_to_dbcode(freq), 'dfwds': '[{"wdcode":"zb","valuecode":"%s"}]' % page_code, } r = get_page_response(HOST_URL, method='post', params=params) res = r.json() records = [] for cval in res['returndata']['wdnodes'][0]['nodes']: code = cval['code'] cname = cval['cname'] unit = cval['unit'] row = (code, cname, unit) records.append(row) labels = ['code', 'cname', 'unit'] df = pd.DataFrame.from_records(records, columns=labels) return df
def fetch_economics(code, start, end, freq): '''freq = monthly, quarterly, yearly''' start = _sanitize_date(start, freq) end = _sanitize_date(end, freq) date_rng = start + '-' + end params = { 'm': 'QueryData', 'rowcode': 'zb', 'colcode': 'sj', 'wds': '[]', 'dbcode': _freq_to_dbcode(freq), 'dfwds': '[{"wdcode":"zb","valuecode":"%s"}, {"wdcode":"sj","valuecode": "%s"}]' % (code, date_rng), } r = get_page_response(HOST_URL, method='post', params=params) records = [] labels = ['code', 'asof_date', 'value'] for record in r.json()['returndata']['datanodes']: val = record['data'] if val['hasdata']: code = record['wds'][0]['valuecode'] asof_date = record['wds'][1]['valuecode'] records.append((code, _extract_date(asof_date), val['data'])) df = pd.DataFrame.from_records(records, columns=labels) return df
def fetch_minutely_prices(): """所有股票当前成交数据(每分钟更新)""" url = 'http://stock.gtimg.cn/data/get_hs_xls.php?id=ranka&type=1&metric=chr' kwds = {'skiprows': [0], 'index_col': '代码'} page_response = get_page_response(url, 'post') df = pd.read_excel(BytesIO(page_response.content), **kwds) df.updatetime = pd.Timestamp('now') return df
def _process(url): # 部分网页并不存在 try: page_response = get_page_response(url) df = pd.read_html(BytesIO(page_response.content), header=0)[1] dfs.append(df) except ConnectFailed: pass
def fetch_suspend_stocks(): """获取暂停上市股票列表""" url_fmt = 'http://www.cninfo.com.cn/cninfo-new/information/suspendlist-1?market={}' urls = [url_fmt.format(x) for x in ('sh', 'sz')] datas = [get_page_response(url, method='post').json() for url in urls] dfs = [pd.DataFrame(d) for d in datas] df = pd.concat(dfs).iloc[:, 1:] return df.reset_index(drop=True)
def _fetch_one_item_stocks(item_id, item_name): """提取单个行业(区域、概念)的股票清单""" url_fmt = 'http://stock.gtimg.cn/data/index.php?appn=rank&t=pt{}/chr&l=1000&v=list_data' url = url_fmt.format(item_id) response = get_page_response(url) codes = pd.Series(_parse_stock_codes(response.text)).unique() df = pd.DataFrame( {'item_id': item_id, 'item_name': item_name, 'code': codes}) return df
def _get_page_codes(freq='quarterly', node_id='zb'): '''default: the children of the root return the direct children to the node_id''' params = { 'id': node_id, 'dbcode': _freq_to_dbcode(freq), 'wdcode': 'zb', 'm': 'getTree', } r = get_page_response(HOST_URL, method='post', params=params) return pd.DataFrame.from_records(r.json())
def fetch_symbols_list(only_a=True): """获取上市公司代码及简称""" url = 'http://www.cninfo.com.cn/cninfo-new/information/companylist' response = get_page_response(url) def _parse(response): soup = BeautifulSoup(response.text, 'lxml') tag_as = soup.find_all('a', href=re.compile("companyinfo_n.html")) res = [(x.text[:6].strip(), x.text[7:].lstrip()) for x in tag_as] df = pd.DataFrame(res, columns=['code', 'short_name']) if only_a: df = df[df.code.str.get(0).str.contains('[0,3,6]')] return df.set_index('code', drop=True) return _parse(response)
def fetch_jjcg(stock_code, query_date): """ 给定股票代码、期末日期,返回基金持股数据 Parameters ---------- stock_code : str 股票代码(6位) query_date : date_like 要查询的期末日期 Returns ------- res : DataFrame 包含6列,如案例所示 Notes ----- 如果查询日期不在可选期间,会报错 Example ------- >>> fetch_jjcg('000001','2004-09-30') 基金简称 持仓市值(万元) 持仓股数(万股) 与上期持仓股数变化(万股) 占基金净值比例 占流通股比例 0 融通深证100指数A 3361 390.78 增仓87.12 4.81% 0.28% 1 博时沪深300指数A 5270 612.78 增仓44.67 1.56% 0.43% 2 基金普丰 11840 1392.89 减仓800.00 4.32% 0.99% 3 华宝兴业宝康配置混合 0 0.00 退出 0 0 """ query_date_str = pd.Timestamp(query_date).strftime('%Y-%m-%d') url_fmt = 'http://quotes.money.163.com/service/{}.html?{}date={}%2C{}&symbol={}' query_type = 'jjcg' prefix = '' periods = fetch_report_periods(stock_code, query_type) if query_date_str not in periods.keys(): raise NoWebData('不存在股票{}报告期为:"{}"的基金持股数据'.format( stock_code, query_date_str)) from_date_str = periods[query_date_str].split(',')[1] url = url_fmt.format(query_type, prefix, query_date_str, from_date_str, stock_code) response = get_page_response(url) table = response.json() df = pd.read_html(table['table'], header=0, skiprows=range(1))[0] return df
def _industry_stocks(industry_id, date_str): url = "http://www.cnindex.com.cn/stockPEs.do" if len(industry_id) == 1: # 代表证监会行业分类 category = '008001' else: # 国证行业分类 category = '008100' data = { 'query.plate': quote('深沪全市场'), 'query.category': category, 'query.industry': industry_id, 'query.date': date_str, 'pageSize': 3000, # 一次性完成所有页面加载 } r = get_page_response(url, 'post', data) df = pd.read_html(r.text, skiprows=[0])[0].iloc[:, 1:] return df
def fetch_delisting_stocks(): """获取终止上市股票清单""" url_fmt = 'http://three.cninfo.com.cn/new/information/getDelistingList?market={}' urls = [url_fmt.format(x) for x in ('sh', 'sz')] datas = [get_page_response(url, method='post').json() for url in urls] dfs = [pd.DataFrame(d) for d in datas] df = pd.concat(dfs) df = df.rename( columns={ 'f007d_0007': '转板日期', 'f008d_0007': '终止上市日期', 'r_seccode_0007': '三板证券代码', 'r_secname_0007': '三板证券简称', 'y_seccode_0007': '股票代码', 'y_secname_0007': '股票简称', }) df.set_index('股票代码', drop=True, inplace=True) return df.applymap(str.strip)
def fetch_performance_notice(stock_code, output_type='list'): """业绩预告 如果输出类型为list,则输出dict列表 否则,输出DataFrame对象。 """ url_fmt = 'http://quotes.money.163.com/f10/yjyg_{}.html' url = url_fmt.format(stock_code) response = get_page_response(url) dfs = pd.read_html(response.text, match='报告日期', attrs={'class': 'table_bg001 border_box table_details'}) result = [_parse_performance_notice(df) for df in dfs] if output_type == 'list': return result else: df = pd.DataFrame.from_dict(result) df.set_index('announcement_date', inplace=True) return df
def _fetch_prbookinfos(report_date, url, markets): logger = logbook.Logger('财务报告预约披露时间表') dfs = [] date_ = pd.Timestamp(report_date) year = date_.year q = date_.quarter for market in markets: pagenum = 1 total_page = 1 has_next_page = True data = { 'sectionTime': report_date, 'market': market, 'isDesc': False, 'pagenum': pagenum } while has_next_page and (pagenum <= total_page): logger.info('提取{}年{}季度{}第{}页数据'.format(year, q, JUCHAO_MARKET_MAPS[market], pagenum)) if pagenum % 30 == 0: t = np.random.randint(1, 3 * 100) / 100 time.sleep(t) try: r = get_page_response(url, 'post', data) book = r.json()['prbookinfos'] except ConnectFailed: logger.info('{}第{}页出现异常!!!'.format(JUCHAO_MARKET_MAPS[market], pagenum)) logger.info('休眠3秒后再次尝试') time.sleep(3) book = _retry_one_page(url, data) has_next_page = r.json()['hasNextPage'] total_page = int(r.json()['totalPages']) df = pd.DataFrame.from_records(book) dfs.append(df) pagenum += 1 data.update(pagenum=pagenum) return dfs
def fetch_announcement_summary(): """获取最近一期公司公告摘要信息 用途: 1、限定需要更新公司名录; 2、限定刷新公司财务报告名录; 3、辅助分析 """ cols = [ 'announcementTime', 'announcementTitle', 'announcementType', 'announcementTypeName', 'secCode', 'secName' ] url_fmt = 'http://www.cninfo.com.cn/cninfo-new/disclosure/{}_summary/?pageNum={}' markets = ('sse', 'szse') dfs = [] for m in markets: for i in range(1, 100): url = url_fmt.format(m, i) r = get_page_response(url, 'post') d = r.json() df = pd.DataFrame.from_dict(d['announcements'])[cols] dfs.append(df) if not d['hasMore']: break data = pd.concat(dfs) data.reset_index(inplace=True, drop=True) output = pd.DataFrame({ '股票代码': data['secCode'].values, '股票简称': data['secName'].values, '公告时间': data['announcementTime'].apply(pd.Timestamp, unit='ms'), '公告标题': data['announcementTitle'].values, '类别': data['announcementTypeName'].values, }) return output
def fetch_history(code, start, end=None, is_index=False): """获取股票或者指数的历史交易数据(不复权) 备注: 提供的数据延迟一日 记录: `2018-12-12 16:00`时下载 002622 历史数据,数据截至日为2018-12-10 延迟2日 """ start, end = sanitize_dates(start, end) url_fmt = 'http://quotes.money.163.com/service/chddata.html?code={}&start={}&end={}' code = _query_code(code, is_index) start_str = start.strftime('%Y%m%d') end_str = end.strftime('%Y%m%d') url = url_fmt.format(code, start_str, end_str) + '#01b07' na_values = ['None', '--', 'none'] kwds = { 'index_col': 0, 'encoding': 'cp936', 'parse_dates': True, 'na_values': na_values, } page_response = get_page_response(url, 'get') df = pd.read_csv(BytesIO(page_response.content), **kwds) return df
def fetch_adjustment(stock_code): """ 提取股票历史分配记录 深圳交易所除权基准日与红股上市日一致;上海证券交易所红股上市日 一般晚于除权基准日。 注意: 使用除权基准日作为支付日,红股上市日作为生效日; """ url = _get_url(stock_code, 'dividend') page_response = get_page_response(url) # 跳过标头 df = pd.read_html(BytesIO(page_response.content), match='分红年度', skiprows=[0])[0] # 如果无数据,仍然可能返回全部为NaN的一行 df.dropna(how='all', inplace=True) if df.empty: return df df.columns = _ADJUSTMENT_FIELDS data = _parse_ratio_and_amount(df) data.set_index('effective_date', inplace=True) data.sort_index(inplace=True) return data
def get_index_from(ex): url = url_fmt.format_map({'page': 0, 'ex': ex, 'count': one_big_int}) #response = get_response(url, 'get', 'json') response = get_page_response(url, method='post') df = pd.DataFrame(response.json()['list']) return df.loc[:, ['SYMBOL', 'NAME']]
def fetch_issue_info(stock_code): """发行信息""" url = _get_url(stock_code, 'issue') page_response = get_page_response(url) df = pd.read_html(StringIO(page_response.content))[1] return df
def _page_content(url): """页面内容""" response = get_page_response(url) response.encoding = 'utf-8' return response.text