def _download_dividends(symbol: str): reader = _BaseReader('') try: response = reader._get_response( r'http://vip.stock.finance.sina.com.cn/' r'corp/go.php/vISSUE_ShareBonus/stockid' r'/{0}.phtml'.format(symbol), _AbsDailyReader._default_headers()) txt = str(response.content, encoding='gb2312') fh = re.search( '<!--分红 begin-->[\s\S]*<tbody>([\s\S]*)<\/tbody>[\s\S]*<!--分红 end-->', txt) pg = re.search( '<!--配股 begin-->[\s\S]*<tbody>([\s\S]*)<\/tbody>[\s\S]*<!--配股 end-->', txt) df1, df2 = pd.DataFrame(), pd.DataFrame() # 分红数据 r = _parse_body(bs(fh.group(1), 'lxml'), _parse_divided_line) if r: df1 = _create_df(r) # 配股数据 r = _parse_body(bs(pg.group(1), 'lxml'), _parse_allotment_line) if r: df2 = _create_df(r) except Exception: raise finally: reader.close() return [df1, df2]
def _get_mac_price(num: int, event: int, cate: str, start=0, index=None, dtype=np.float64): """从新浪 中国宏观经济数据页 分析数据 # http://finance.sina.com.cn/mac/#price-0-0-31-2 Args: url: index: Returns: 返回 [数据表,url源码] """ c = finance_datareader_py._random() url = 'http://money.finance.sina.com.cn/mac/api/jsonp.php' \ '/SINAREMOTECALLCALLBACK{' \ 'c}/MacPage_Service.get_pagedata?cate={cate}&event={event}&from={' \ 'start}&num={num}&condition=&_={c}'.format(c=c, start=start, num=num, event=event, cate=cate) reader = _BaseReader(url) try: rep = reader._get_response(url) if rep: txt = rep.text m = re.compile('count:"\d+",data:(.*)}').search(txt) title = re.compile('all:(.*),defaultItems:').search(txt).group(1) columns = [] for t in json.loads(title): if len(t) > 2 and t[2]: columns.append(t[1] + '({0})'.format(t[2])) else: columns.append(t[1]) df = pd.DataFrame(json.loads(m.group(1)), columns=columns) if not index and len(columns) > 0: df = df.set_index(columns[0]) elif index and index in columns: df = df.set_index(index) if dtype: df = df.astype(dtype) return df except Exception: raise finally: reader.close() return None
def _download_sse_symbols(timeout): reader = _BaseReader('') try: result = [] response = reader._get_response( r'http://www.sse.com.cn/js/common/ssesuggestdataAll.js', headers=_AbsDailyReader._default_headers()) matches = _RE_SYMBOLS.finditer(response.text) for match in matches: result.append({'symbol': match.group(1), 'name': match.group(2)}) data = pd.DataFrame(result) # data.set_index("symbol", inplace=True) return data finally: reader.close()
def get_pdf(top=1): """ 从 中国证券监督管理委员会 获取 上市公司行业分类结果 Args: top: 获取总条数。 Returns: dict {文件名:pdf文件路径} Examples: .. code-block:: python >>> from finance_datareader_py.csrc import category >>> print(category.get_pdf()) { "2018年2季度上市公司行业分类结果": "http://www.csrc.gov.cn/pub/newsite/scb/ssgshyfljg/201807/W020180730329934473366.pdf" } .. hint:: 对于 pdf 文件的解析,可以参考 `tabula-py <https://github.com/chezou/tabula-py>`_。 .. code-block:: python >>> import tabula >>> df = tabula.read_pdf(r'http://www.csrc.gov.cn/pub/newsite/scb/ssgshyfljg/201805/W020180521522232342268.pdf',encoding='gbk', pages='all', format='json',silent=True, pandas_options={'header': 0}) >>> df = df.loc[df['上市公司代码'].str.isnumeric() == True] >>> df = df.fillna(method='ffill') >>> print(df.tail()) 门类名称及代码 行业大类代码 行业大类名称 上市公司代码 上市公司简称 3597 综合(S) 90 综合 600777 新潮能源 3598 综合(S) 90 综合 600783 鲁信创投 3599 综合(S) 90 综合 600784 鲁银投资 3600 综合(S) 90 综合 600805 悦达投资 3601 综合(S) 90 综合 600895 张江高科 """ result = {} if top <= 0: return result try: reader = _BaseReader('') page_index = 0 while True: src = SRC if page_index > 0: src = urljoin(src, 'index_{0}.htm'.format(page_index)) page_index = page_index + 1 txt = _get_text(reader, src) dic = _parse_list(reader, txt) for key, value in dic.items(): result[key] = value if len(result) >= top: return result except RemoteDataError: pass finally: reader.close() return result
def test_valid_retry_count(self): with tm.assertRaises(ValueError): base._BaseReader([], retry_count='stuff') with tm.assertRaises(ValueError): base._BaseReader([], retry_count=-1)
def test_invalid_format(self): with tm.assertRaises(NotImplementedError): b = base._BaseReader([]) b._format = 'IM_NOT_AN_IMPLEMENTED_TYPE' b._read_one_data('a', None)
def test_invalid_url(self): with tm.assertRaises(NotImplementedError): base._BaseReader([]).url
def test_invalid_format(self): with pytest.raises(NotImplementedError): b = base._BaseReader([]) b._format = "IM_NOT_AN_IMPLEMENTED_TYPE" b._read_one_data("a", None)
def test_invalid_url(self): with pytest.raises(NotImplementedError): base._BaseReader([]).url
def test_valid_retry_count(self): with pytest.raises(ValueError): base._BaseReader([], retry_count="stuff") with pytest.raises(ValueError): base._BaseReader([], retry_count=-1)
def test_default_start_date(self): b = base._BaseReader([]) assert b.default_start_date == dt.date.today() - dt.timedelta( days=365 * 5)