Beispiel #1
0
def fetch_quotes(stock_codes):
    """
    获取股票列表的分时报价

    Parameters
    ----------
    stock_codes : list
        股票代码列表

    Returns
    -------
    res : DataFrame
        行数 = len(stock_codes)   
        33列   

    Example
    -------
    >>> df = fetch_quotes(['000001','000002'])
    >>> df.iloc[:,:8] 
        股票代码  股票简称      开盘     前收盘      现价      最高      最低     竞买价
    0  000001  平安银行  11.040  11.050  10.900  11.050  10.880  10.900
    1  000002  万 科A  33.700  34.160  33.290  33.990  33.170  33.290
    """
    stock_codes = ensure_list(stock_codes)
    num = len(stock_codes)
    length = 800
    url_fmt = 'http://hq.sinajs.cn/list={}'
    dfs = []
    for p_codes in partition_all(length, stock_codes):
        # p_codes = stock_codes[i * length:(i + 1) * length]
        url = url_fmt.format(','.join(map(_add_prefix, p_codes)))
        content = get_page_response(url).text
        dfs.append(_to_dataframe(content, p_codes))
    return pd.concat(dfs).sort_values('股票代码')
Beispiel #2
0
def _parse_report_data(url):
    response = get_page_response(url)
    #response.encoding = 'gb2312'
    # 000001资产负债表 -> 应收出口退税(万元) -> ' --' 导致解析类型不正确!!!
    na_values = ['--', ' --', '-- ']
    return pd.read_csv(StringIO(response.text),
                       na_values=na_values).iloc[:, :-1]
Beispiel #3
0
def fetch_report_periods(stock_code, query):
    """
    下载股东持股、基金持股时,网络已有可供下载的期间

    返回:dict对象
        键:期末日期 eg 2017-06-30
        值:2017-06-30,2017-03-31
    """
    valid_types = ('c', 't', 'jjcg')
    assert query in valid_types, '{}不在有效类型{}中'.format(query, valid_types)
    if query == 'jjcg':
        type_ = 'jjcg'
        target_num = 0
    elif query == 't':
        type_ = 'gdfx'
        target_num = 1
    else:
        type_ = 'gdfx'
        target_num = 0
    result = {}
    url_fmt = 'http://quotes.money.163.com/f10/{type}_{stock_code}.html'
    url = url_fmt.format(stock_code=stock_code, type=type_)
    response = get_page_response(url, 'post')
    soup = BeautifulSoup(response.text, 'lxml')
    ss = soup.find_all('select', {'id': '', 'name': ''})
    # 找到对应的选项父节点
    target = ss[target_num]
    for o in target.find_all('option'):
        if len(o['value']):
            result[o.string] = o['value']
    return result
Beispiel #4
0
def _get_leaf_codes(freq, page_code):
   '''return list of code which directly denotes a series
   page_code should be the node which are direct parent to leafs'''      
   params = {
       'm': 'QueryData',
       'rowcode': 'zb',
       'colcode': 'sj',
       'wds': '[]',          
       'dbcode': _freq_to_dbcode(freq),
       'dfwds': '[{"wdcode":"zb","valuecode":"%s"}]' % page_code,
   } 
   
   r = get_page_response(HOST_URL, method='post', params=params)
   
   res = r.json()
   
   records = []
   for cval in res['returndata']['wdnodes'][0]['nodes']:
      code = cval['code']
      cname = cval['cname']
      unit = cval['unit']
      row = (code, cname, unit)   
      records.append(row)
   
   labels = ['code', 'cname', 'unit']
   df = pd.DataFrame.from_records(records, columns=labels)
   
   return df
Beispiel #5
0
def fetch_economics(code, start, end, freq):
   '''freq = monthly, quarterly, yearly'''
   start = _sanitize_date(start, freq)
   end = _sanitize_date(end, freq)
   
   date_rng = start + '-' + end
   
   params = {
       'm': 'QueryData',
       'rowcode': 'zb',
       'colcode': 'sj',
       'wds': '[]',          
       'dbcode': _freq_to_dbcode(freq),
       'dfwds': '[{"wdcode":"zb","valuecode":"%s"}, {"wdcode":"sj","valuecode": "%s"}]' % (code, date_rng),
   } 
   
   r = get_page_response(HOST_URL, method='post', params=params)
      
   records = []
   labels = ['code', 'asof_date', 'value']
   for record in r.json()['returndata']['datanodes']:
      val = record['data']
      if val['hasdata']:
         code = record['wds'][0]['valuecode']
         asof_date = record['wds'][1]['valuecode']        
         records.append((code, _extract_date(asof_date), val['data']))
   
   df = pd.DataFrame.from_records(records, columns=labels)
   return df
Beispiel #6
0
def fetch_minutely_prices():
    """所有股票当前成交数据(每分钟更新)"""
    url = 'http://stock.gtimg.cn/data/get_hs_xls.php?id=ranka&type=1&metric=chr'
    kwds = {'skiprows': [0], 'index_col': '代码'}
    page_response = get_page_response(url, 'post')
    df = pd.read_excel(BytesIO(page_response.content), **kwds)
    df.updatetime = pd.Timestamp('now')
    return df
Beispiel #7
0
 def _process(url):
     # 部分网页并不存在
     try:
         page_response = get_page_response(url)
         df = pd.read_html(BytesIO(page_response.content), header=0)[1]
         dfs.append(df)
     except ConnectFailed:
         pass
Beispiel #8
0
def fetch_suspend_stocks():
    """获取暂停上市股票列表"""
    url_fmt = 'http://www.cninfo.com.cn/cninfo-new/information/suspendlist-1?market={}'
    urls = [url_fmt.format(x) for x in ('sh', 'sz')]
    datas = [get_page_response(url, method='post').json() for url in urls]
    dfs = [pd.DataFrame(d) for d in datas]
    df = pd.concat(dfs).iloc[:, 1:]
    return df.reset_index(drop=True)
Beispiel #9
0
def _fetch_one_item_stocks(item_id, item_name):
    """提取单个行业(区域、概念)的股票清单"""
    url_fmt = 'http://stock.gtimg.cn/data/index.php?appn=rank&t=pt{}/chr&l=1000&v=list_data'
    url = url_fmt.format(item_id)
    response = get_page_response(url)
    codes = pd.Series(_parse_stock_codes(response.text)).unique()
    df = pd.DataFrame(
        {'item_id': item_id, 'item_name': item_name, 'code': codes})
    return df
Beispiel #10
0
def _get_page_codes(freq='quarterly', node_id='zb'):
   '''default: the children of the root
   return the direct children to the node_id'''
   params = {
      'id': node_id,
      'dbcode': _freq_to_dbcode(freq),
      'wdcode': 'zb',
      'm': 'getTree',
      }

   r = get_page_response(HOST_URL, method='post', params=params)
   
   return pd.DataFrame.from_records(r.json())
Beispiel #11
0
def fetch_symbols_list(only_a=True):
    """获取上市公司代码及简称"""
    url = 'http://www.cninfo.com.cn/cninfo-new/information/companylist'
    response = get_page_response(url)

    def _parse(response):
        soup = BeautifulSoup(response.text, 'lxml')
        tag_as = soup.find_all('a', href=re.compile("companyinfo_n.html"))
        res = [(x.text[:6].strip(), x.text[7:].lstrip()) for x in tag_as]
        df = pd.DataFrame(res, columns=['code', 'short_name'])
        if only_a:
            df = df[df.code.str.get(0).str.contains('[0,3,6]')]
        return df.set_index('code', drop=True)

    return _parse(response)
Beispiel #12
0
def fetch_jjcg(stock_code, query_date):
    """
    给定股票代码、期末日期,返回基金持股数据

    Parameters
    ----------
    stock_code : str
      股票代码(6位)

    query_date : date_like
      要查询的期末日期

    Returns
    -------
    res : DataFrame
        包含6列,如案例所示
    Notes
    -----
        如果查询日期不在可选期间,会报错

    Example
    -------
    >>> fetch_jjcg('000001','2004-09-30')
            基金简称  持仓市值(万元)  持仓股数(万股) 与上期持仓股数变化(万股) 占基金净值比例 占流通股比例
    0  融通深证100指数A      3361    390.78       增仓87.12   4.81%  0.28%
    1  博时沪深300指数A      5270    612.78       增仓44.67   1.56%  0.43%
    2        基金普丰     11840   1392.89      减仓800.00   4.32%  0.99%
    3  华宝兴业宝康配置混合         0      0.00            退出       0      0

    """
    query_date_str = pd.Timestamp(query_date).strftime('%Y-%m-%d')
    url_fmt = 'http://quotes.money.163.com/service/{}.html?{}date={}%2C{}&symbol={}'
    query_type = 'jjcg'
    prefix = ''
    periods = fetch_report_periods(stock_code, query_type)
    if query_date_str not in periods.keys():
        raise NoWebData('不存在股票{}报告期为:"{}"的基金持股数据'.format(
            stock_code, query_date_str))
    from_date_str = periods[query_date_str].split(',')[1]
    url = url_fmt.format(query_type, prefix, query_date_str, from_date_str,
                         stock_code)
    response = get_page_response(url)
    table = response.json()
    df = pd.read_html(table['table'], header=0, skiprows=range(1))[0]
    return df
Beispiel #13
0
def _industry_stocks(industry_id, date_str):
    url = "http://www.cnindex.com.cn/stockPEs.do"
    if len(industry_id) == 1:
        # 代表证监会行业分类
        category = '008001'
    else:
        # 国证行业分类
        category = '008100'
    data = {
        'query.plate': quote('深沪全市场'),
        'query.category': category,
        'query.industry': industry_id,
        'query.date': date_str,
        'pageSize': 3000,  # 一次性完成所有页面加载
    }
    r = get_page_response(url, 'post', data)
    df = pd.read_html(r.text, skiprows=[0])[0].iloc[:, 1:]
    return df
Beispiel #14
0
def fetch_delisting_stocks():
    """获取终止上市股票清单"""
    url_fmt = 'http://three.cninfo.com.cn/new/information/getDelistingList?market={}'
    urls = [url_fmt.format(x) for x in ('sh', 'sz')]
    datas = [get_page_response(url, method='post').json() for url in urls]
    dfs = [pd.DataFrame(d) for d in datas]
    df = pd.concat(dfs)
    df = df.rename(
        columns={
            'f007d_0007': '转板日期',
            'f008d_0007': '终止上市日期',
            'r_seccode_0007': '三板证券代码',
            'r_secname_0007': '三板证券简称',
            'y_seccode_0007': '股票代码',
            'y_secname_0007': '股票简称',
        })
    df.set_index('股票代码', drop=True, inplace=True)
    return df.applymap(str.strip)
Beispiel #15
0
def fetch_performance_notice(stock_code, output_type='list'):
    """业绩预告
    如果输出类型为list,则输出dict列表
    否则,输出DataFrame对象。
    """
    url_fmt = 'http://quotes.money.163.com/f10/yjyg_{}.html'
    url = url_fmt.format(stock_code)
    response = get_page_response(url)
    dfs = pd.read_html(response.text,
                       match='报告日期',
                       attrs={'class': 'table_bg001 border_box table_details'})
    result = [_parse_performance_notice(df) for df in dfs]
    if output_type == 'list':
        return result
    else:
        df = pd.DataFrame.from_dict(result)
        df.set_index('announcement_date', inplace=True)
        return df
Beispiel #16
0
def _fetch_prbookinfos(report_date, url, markets):
    logger = logbook.Logger('财务报告预约披露时间表')
    dfs = []
    date_ = pd.Timestamp(report_date)
    year = date_.year
    q = date_.quarter
    for market in markets:
        pagenum = 1
        total_page = 1
        has_next_page = True
        data = {
            'sectionTime': report_date,
            'market': market,
            'isDesc': False,
            'pagenum': pagenum
        }
        while has_next_page and (pagenum <= total_page):
            logger.info('提取{}年{}季度{}第{}页数据'.format(year, q,
                                                   JUCHAO_MARKET_MAPS[market],
                                                   pagenum))
            if pagenum % 30 == 0:
                t = np.random.randint(1, 3 * 100) / 100
                time.sleep(t)
            try:
                r = get_page_response(url, 'post', data)
                book = r.json()['prbookinfos']
            except ConnectFailed:
                logger.info('{}第{}页出现异常!!!'.format(JUCHAO_MARKET_MAPS[market],
                                                   pagenum))
                logger.info('休眠3秒后再次尝试')
                time.sleep(3)
                book = _retry_one_page(url, data)
            has_next_page = r.json()['hasNextPage']
            total_page = int(r.json()['totalPages'])
            df = pd.DataFrame.from_records(book)
            dfs.append(df)
            pagenum += 1
            data.update(pagenum=pagenum)
    return dfs
Beispiel #17
0
def fetch_announcement_summary():
    """获取最近一期公司公告摘要信息
    用途:
        1、限定需要更新公司名录;
        2、限定刷新公司财务报告名录;
        3、辅助分析
    """
    cols = [
        'announcementTime', 'announcementTitle', 'announcementType',
        'announcementTypeName', 'secCode', 'secName'
    ]

    url_fmt = 'http://www.cninfo.com.cn/cninfo-new/disclosure/{}_summary/?pageNum={}'
    markets = ('sse', 'szse')
    dfs = []
    for m in markets:
        for i in range(1, 100):
            url = url_fmt.format(m, i)
            r = get_page_response(url, 'post')
            d = r.json()
            df = pd.DataFrame.from_dict(d['announcements'])[cols]
            dfs.append(df)
            if not d['hasMore']:
                break
    data = pd.concat(dfs)
    data.reset_index(inplace=True, drop=True)
    output = pd.DataFrame({
        '股票代码':
        data['secCode'].values,
        '股票简称':
        data['secName'].values,
        '公告时间':
        data['announcementTime'].apply(pd.Timestamp, unit='ms'),
        '公告标题':
        data['announcementTitle'].values,
        '类别':
        data['announcementTypeName'].values,
    })
    return output
Beispiel #18
0
def fetch_history(code, start, end=None, is_index=False):
    """获取股票或者指数的历史交易数据(不复权)
    备注:
        提供的数据延迟一日

    记录:
        `2018-12-12 16:00`时下载 002622 历史数据,数据截至日为2018-12-10 延迟2日
    """
    start, end = sanitize_dates(start, end)
    url_fmt = 'http://quotes.money.163.com/service/chddata.html?code={}&start={}&end={}'
    code = _query_code(code, is_index)
    start_str = start.strftime('%Y%m%d')
    end_str = end.strftime('%Y%m%d')
    url = url_fmt.format(code, start_str, end_str) + '#01b07'
    na_values = ['None', '--', 'none']
    kwds = {
        'index_col': 0,
        'encoding': 'cp936',
        'parse_dates': True,
        'na_values': na_values,
    }
    page_response = get_page_response(url, 'get')
    df = pd.read_csv(BytesIO(page_response.content), **kwds)
    return df
Beispiel #19
0
def fetch_adjustment(stock_code):
    """
    提取股票历史分配记录
        深圳交易所除权基准日与红股上市日一致;上海证券交易所红股上市日
        一般晚于除权基准日。

    注意:
        使用除权基准日作为支付日,红股上市日作为生效日;
    """
    url = _get_url(stock_code, 'dividend')
    page_response = get_page_response(url)
    # 跳过标头
    df = pd.read_html(BytesIO(page_response.content),
                      match='分红年度',
                      skiprows=[0])[0]
    # 如果无数据,仍然可能返回全部为NaN的一行
    df.dropna(how='all', inplace=True)
    if df.empty:
        return df
    df.columns = _ADJUSTMENT_FIELDS
    data = _parse_ratio_and_amount(df)
    data.set_index('effective_date', inplace=True)
    data.sort_index(inplace=True)
    return data
Beispiel #20
0
 def get_index_from(ex):
     url = url_fmt.format_map({'page': 0, 'ex': ex, 'count': one_big_int})
     #response = get_response(url, 'get', 'json')
     response = get_page_response(url, method='post')
     df = pd.DataFrame(response.json()['list'])
     return df.loc[:, ['SYMBOL', 'NAME']]
Beispiel #21
0
def fetch_issue_info(stock_code):
    """发行信息"""
    url = _get_url(stock_code, 'issue')
    page_response = get_page_response(url)
    df = pd.read_html(StringIO(page_response.content))[1]
    return df
Beispiel #22
0
def _page_content(url):
    """页面内容"""
    response = get_page_response(url)
    response.encoding = 'utf-8'
    return response.text