def load_fund_holding(start_date, end_date):
    session = login()
    querl_url_template = 'http://simudata.howbuy.com/profile/favouriteStocks.htm?' \
                         'jjdm5=&zqdm=&endDate={0}&orderBy=cgsl&orderRule=Desc&page={1}'

    stamps = date_stamps(start_date, end_date)

    datas = []

    for end_date in stamps:

        page = 0
        previous_page = None

        while True:
            page += 1
            query_url = querl_url_template.format(end_date, page)

            info_data = session.post(query_url)
            soup = BeautifulSoup(info_data.text, 'lxml')

            error_message = soup.find('div', attrs={'class': 'iocn'})
            if error_message:
                raise ValueError(error_message.text)

            tables = soup.find_all('table')

            if soup == previous_page:
                break

            if tables:
                target_table = tables[1]

                if target_table.tbody.td.text == '未查询到相关数据!':
                    break

                fund_data = parse_table(target_table)
                datas.append(fund_data)
            previous_page = soup
            spyder_logger.info("Page No. {0:4d} is finished.".format(page))

        spyder_logger.info(
            'Publication Date : {0} is finished for fund holding'.format(
                end_date))

    if datas:
        total_table = pd.concat(datas)
        total_table.drop_duplicates(['基金代码', '基金简称', '股票代码'], inplace=True)
        return total_table[[
            '基金代码', '基金简称', '截止日期', '持股数量(万股)', '持股比例(%)', '变动数量(万股)', '股票代码',
            '股票简称'
        ]]
    else:
        spyder_logger.warning("No any data got between {0} and {1}".format(
            start_date, end_date))
        return pd.DataFrame()
Esempio n. 2
0
def suspend(query_date):
    codes = []
    names = []
    status = []
    reasons = []
    stop_times = []

    with requests.Session() as session:
        session.headers[
            'Referer'] = 'http://www.sse.com.cn/disclosure/dealinstruc/suspension/'

        template_url = 'http://query.sse.com.cn/' \
                       'infodisplay/querySpecialTipsInfoByPage.do?' \
                       'jsonCallBack=jsonpCallback45028&isPagination=true&searchDate={query_date}' \
                       '&bgFlag=1&searchDo=1&pageHelp.pageSize=5000&pageHelp.pageNo=1' \
                       '&pageHelp.beginPage=1&pageHelp.cacheSize=1&_=1477364635046'

        query_url = template_url.format(query_date=query_date)

        info_data = try_request(session, query_url, req_type='post')

        info_data.encoding = 'utf8'
        soup = BeautifulSoup(info_data.text, 'lxml')
        content = json.loads(soup.text.split('(')[1].strip(')'))

        json_data = content['result']

        for row in json_data:
            if row['showDate'] == query_date and row['productCode'].startswith(
                    '6'):
                codes.append(row['productCode'])
                names.append(row['productName'])

                if row['stopTime'].find('停牌终止') != -1:
                    status.append('复牌')
                else:
                    status.append('停牌')
                stop_times.append(row['stopTime'])
                reasons.append(row['stopReason'].strip())

    df = pd.DataFrame({
        '停(复)牌时间': query_date,
        '证券代码': codes,
        '证券简称': names,
        '状态': status,
        '原因': reasons,
        '期限': stop_times
    })

    if df.empty:
        spyder_logger.warning(
            'No data found for the date {0}'.format(query_date))

    return df
Esempio n. 3
0
def announcement(query_date):

    with requests.Session() as session:
        session.headers[
            'Referer'] = 'http://www.sse.com.cn/disclosure/listedinfo/announcement/'

        template_url = 'http://query.sse.com.cn/' \
                       'infodisplay/queryLatestBulletinNew.do?' \
                       'jsonCallBack=jsonpCallback98209&isPagination=true&productId=&keyWord=&reportType2=&' \
                       'reportType=ALL&beginDate={query_date}&endDate={query_date}&pageHelp.pageSize=5000&' \
                       'pageHelp.pageCount=50&pageHelp.pageNo=1&pageHelp.beginPage=1&' \
                       'pageHelp.cacheSize=1&pageHelp.endPage=5&_=1492758467504'

        query_url = template_url.format(query_date=query_date)

        info_data = try_request(session, query_url, req_type='post')

        info_data.encoding = 'utf8'
        soup = BeautifulSoup(info_data.text, 'lxml')

        text = soup.text
        text = text[text.find('(') + 1:text.rfind(')')]

        content = json.loads(text)

        json_data = content['result']

        codes = [row['security_Code'] for row in json_data]
        titles = [row['title'] for row in json_data]
        urls = ['http://www.sse.com.cn' + row['URL'] for row in json_data]
        report_dates = [row['SSEDate'] for row in json_data]

    df = pd.DataFrame({
        '报告日期': report_dates,
        '证券代码': codes,
        '标题': titles,
        'url': urls,
        'updateTime': dt.datetime.now(),
        'exchangePlace': 'xshg'
    })

    if df.empty:
        spyder_logger.warning(
            'No data found for the date {0}'.format(query_date))

    exist_data = find_existing(query_date)
    new_records = set(df.url).difference(set(exist_data.url))
    df = df[df.url.isin(new_records)]

    return df
Esempio n. 4
0
def suspend(query_date):
    codes = []
    names = []
    status = []
    reasons = []
    stop_times = []

    previous_page = None

    with requests.Session() as session:
        session.headers[
            'Referer'] = 'https://www.szse.cn/main/disclosure/news/tfpts/'
        session.headers['Host'] = 'www.szse.cn'
        session.headers['Origin'] = 'https://www.szse.cn'
        query_url = 'https://www.szse.cn/szseWeb/FrontController.szse'

        page = 1

        while True:

            info_data = try_request(session,
                                    query_url,
                                    data={
                                        'ACTIONID': 7,
                                        'AJAX': 'AJAX-TRUE',
                                        'CATALOGID': 1798,
                                        'TABKEY': 'tab1',
                                        'REPORT_ACTION': 'navigate',
                                        'txtKsrq': query_date,
                                        'txtZzrq': query_date,
                                        'tab1PAGECOUNT': 999,
                                        'tab1RECORDCOUNT': 999999,
                                        'tab1PAGENUM': page
                                    },
                                    req_type='post')

            info_data.encoding = 'gbk'
            soup = BeautifulSoup(info_data.text, 'lxml')

            if soup == previous_page:
                break

            table = soup.find_all(
                attrs={'class': 'cls-data-table-common cls-data-table'})[0]
            rows = table.find_all('tr')
            if rows:
                for row in rows:
                    cells = row.find_all('td')
                    if cells and len(cells) >= 6:
                        codes.append(cells[0].text)
                        names.append(cells[1].text)

                        info_message = cells[4].text.strip()

                        if info_message.find('取消停牌') != -1:
                            status.append('复牌')
                            stop_times.append('')
                        else:
                            status.append('停牌')
                            stop_times.append(info_message)

                        reasons.append(cells[5].text.strip())
            else:
                break
            page += 1
            previous_page = soup

    df = pd.DataFrame({
        '停(复)牌时间': query_date,
        '证券代码': codes,
        '证券简称': names,
        '状态': status,
        '原因': reasons,
        '期限': stop_times
    })

    if df.empty:
        spyder_logger.warning(
            'No data found for the date {0}'.format(query_date))
    return df
Esempio n. 5
0
def announcement(query_date):

    full_codes_path = os.path.join(os.path.dirname(__file__), 'data/xshe.xlsx')
    full_codes = pd.read_excel(full_codes_path)[['A股代码', 'A股简称']]

    with requests.Session() as session:
        session.headers[
            'Referer'] = 'http://www.sse.com.cn/disclosure/listedinfo/announcement/'

        query_url = 'http://disclosure.szse.cn/m/search0425.jsp'

        page = 1
        previous_page = None

        datas = []
        exist_data = find_existing(query_date)

        while True:

            short_names = []
            titles = []
            urls = []
            report_dates = []

            info_data = try_request(session,
                                    query_url,
                                    data={
                                        'startTime': query_date,
                                        'endTime': query_date,
                                        'pageNo': page
                                    },
                                    req_type='post')

            info_data.encoding = 'gbk'
            soup = BeautifulSoup(info_data.text, 'lxml')

            if soup == previous_page:
                break

            rows = soup.find_all('td', attrs={'class': 'td2'})

            for row in rows:
                titles.append(row.a.text)
                short_names.append(row.a.text.split(':')[0])
                urls.append('http://disclosure.szse.cn/' + row.a['href'])
                report_dates.append(row.span.text[1:-1])

            codes = match_codes(short_names, full_codes)

            previous_page = soup
            page += 1

            df = pd.DataFrame({
                '报告日期': report_dates,
                '证券代码': codes,
                '标题': titles,
                'url': urls,
                'updateTime': dt.datetime.now(),
                'exchangePlace': 'xshe'
            })

            new_records = set(df.url).difference(set(exist_data.url))
            original_length = len(df)
            df = df[df.url.isin(new_records)]
            datas.append(df)

            if len(df) != original_length:
                break

    df = pd.concat(datas)
    df.drop_duplicates(['url'], inplace=True)

    if df.empty:
        spyder_logger.warning(
            'No data found for the date {0}'.format(query_date))

    return df