コード例 #1
0
def _download_dividends(symbol: str):
    reader = _BaseReader('')

    try:
        response = reader._get_response(
            r'http://vip.stock.finance.sina.com.cn/'
            r'corp/go.php/vISSUE_ShareBonus/stockid'
            r'/{0}.phtml'.format(symbol), _AbsDailyReader._default_headers())
        txt = str(response.content, encoding='gb2312')
        fh = re.search(
            '<!--分红 begin-->[\s\S]*<tbody>([\s\S]*)<\/tbody>[\s\S]*<!--分红 end-->',
            txt)
        pg = re.search(
            '<!--配股 begin-->[\s\S]*<tbody>([\s\S]*)<\/tbody>[\s\S]*<!--配股 end-->',
            txt)
        df1, df2 = pd.DataFrame(), pd.DataFrame()
        # 分红数据
        r = _parse_body(bs(fh.group(1), 'lxml'), _parse_divided_line)
        if r:
            df1 = _create_df(r)
        # 配股数据
        r = _parse_body(bs(pg.group(1), 'lxml'), _parse_allotment_line)
        if r:
            df2 = _create_df(r)
    except Exception:
        raise
    finally:
        reader.close()
    return [df1, df2]
コード例 #2
0
def _get_mac_price(num: int,
                   event: int,
                   cate: str,
                   start=0,
                   index=None,
                   dtype=np.float64):
    """从新浪 中国宏观经济数据页 分析数据
    # http://finance.sina.com.cn/mac/#price-0-0-31-2

    Args:
        url:
        index:

    Returns: 返回 [数据表,url源码]

    """
    c = finance_datareader_py._random()
    url = 'http://money.finance.sina.com.cn/mac/api/jsonp.php' \
          '/SINAREMOTECALLCALLBACK{' \
          'c}/MacPage_Service.get_pagedata?cate={cate}&event={event}&from={' \
          'start}&num={num}&condition=&_={c}'.format(c=c, start=start,
                                                     num=num, event=event,
                                                     cate=cate)
    reader = _BaseReader(url)
    try:
        rep = reader._get_response(url)
        if rep:
            txt = rep.text
            m = re.compile('count:"\d+",data:(.*)}').search(txt)
            title = re.compile('all:(.*),defaultItems:').search(txt).group(1)
            columns = []
            for t in json.loads(title):
                if len(t) > 2 and t[2]:
                    columns.append(t[1] + '({0})'.format(t[2]))
                else:
                    columns.append(t[1])
            df = pd.DataFrame(json.loads(m.group(1)), columns=columns)
            if not index and len(columns) > 0:
                df = df.set_index(columns[0])
            elif index and index in columns:
                df = df.set_index(index)
            if dtype:
                df = df.astype(dtype)
            return df
    except Exception:
        raise
    finally:
        reader.close()
    return None
コード例 #3
0
def _download_sse_symbols(timeout):
    reader = _BaseReader('')

    try:
        result = []
        response = reader._get_response(
            r'http://www.sse.com.cn/js/common/ssesuggestdataAll.js',
            headers=_AbsDailyReader._default_headers())
        matches = _RE_SYMBOLS.finditer(response.text)
        for match in matches:
            result.append({'symbol': match.group(1), 'name': match.group(2)})
        data = pd.DataFrame(result)
        # data.set_index("symbol", inplace=True)
        return data
    finally:
        reader.close()
コード例 #4
0
def get_pdf(top=1):
    """ 从 中国证券监督管理委员会 获取 上市公司行业分类结果

    Args:
        top: 获取总条数。

    Returns:
         dict
        {文件名:pdf文件路径}

    Examples:
        .. code-block:: python

            >>> from finance_datareader_py.csrc import category

            >>> print(category.get_pdf())

            {
                "2018年2季度上市公司行业分类结果": "http://www.csrc.gov.cn/pub/newsite/scb/ssgshyfljg/201807/W020180730329934473366.pdf"
            }

    .. hint::
        对于 pdf 文件的解析,可以参考 `tabula-py <https://github.com/chezou/tabula-py>`_。

        .. code-block:: python

            >>> import tabula

            >>> df = tabula.read_pdf(r'http://www.csrc.gov.cn/pub/newsite/scb/ssgshyfljg/201805/W020180521522232342268.pdf',encoding='gbk', pages='all', format='json',silent=True, pandas_options={'header': 0})

            >>> df = df.loc[df['上市公司代码'].str.isnumeric() == True]

            >>> df = df.fillna(method='ffill')

            >>> print(df.tail())

                 门类名称及代码 行业大类代码 行业大类名称  上市公司代码 上市公司简称
            3597   综合(S)     90     综合  600777   新潮能源
            3598   综合(S)     90     综合  600783   鲁信创投
            3599   综合(S)     90     综合  600784   鲁银投资
            3600   综合(S)     90     综合  600805   悦达投资
            3601   综合(S)     90     综合  600895   张江高科

    """

    result = {}
    if top <= 0:
        return result
    try:
        reader = _BaseReader('')
        page_index = 0
        while True:
            src = SRC
            if page_index > 0:
                src = urljoin(src, 'index_{0}.htm'.format(page_index))
            page_index = page_index + 1
            txt = _get_text(reader, src)
            dic = _parse_list(reader, txt)
            for key, value in dic.items():
                result[key] = value
                if len(result) >= top:
                    return result
    except RemoteDataError:
        pass
    finally:
        reader.close()
    return result
コード例 #5
0
 def test_valid_retry_count(self):
     with tm.assertRaises(ValueError):
         base._BaseReader([], retry_count='stuff')
     with tm.assertRaises(ValueError):
         base._BaseReader([], retry_count=-1)
コード例 #6
0
 def test_invalid_format(self):
     with tm.assertRaises(NotImplementedError):
         b = base._BaseReader([])
         b._format = 'IM_NOT_AN_IMPLEMENTED_TYPE'
         b._read_one_data('a', None)
コード例 #7
0
 def test_invalid_url(self):
     with tm.assertRaises(NotImplementedError):
         base._BaseReader([]).url
コード例 #8
0
 def test_invalid_format(self):
     with pytest.raises(NotImplementedError):
         b = base._BaseReader([])
         b._format = "IM_NOT_AN_IMPLEMENTED_TYPE"
         b._read_one_data("a", None)
コード例 #9
0
 def test_invalid_url(self):
     with pytest.raises(NotImplementedError):
         base._BaseReader([]).url
コード例 #10
0
 def test_valid_retry_count(self):
     with pytest.raises(ValueError):
         base._BaseReader([], retry_count="stuff")
     with pytest.raises(ValueError):
         base._BaseReader([], retry_count=-1)
コード例 #11
0
 def test_default_start_date(self):
     b = base._BaseReader([])
     assert b.default_start_date == dt.date.today() - dt.timedelta(
         days=365 * 5)