def profession_report_spider():
    profession_report = SuperSpider(
        table_name='profession_report',
        field_list=('name', 'spider_date', 'up_date', 'up_down', 'report',
                    'grade', 'grade_change', 'institution'))
    sql1 = 'select MAX(up_date) from profession_report'
    latest_time = profession_report.sql_search(sql1)[0][0]
    if not latest_time:
        latest_datetime = datetime.now() - timedelta(days=1)
    else:
        latest_datetime = datetime(latest_time.year, latest_time.month,
                                   latest_time.day)
    is_end = False
    for page in range(1, 1337):
        url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=HYSR&mkt=0&stat=0&cmd=4&code=&sc=&ps=50&p=' + str(
            page
        ) + '&js=var%20vMcgaFDg={%22data%22:[(x)],%22pages%22:%22(pc)%22,%22update%22:%22(ud)%22,%22count%22:%22(count)%22}&rt=51553086'
        try:
            json_data = profession_report.use_requests_to_html(url, 'utf8')
            data_list = profession_report.json_to_py(json_data,
                                                     deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        for data in data_list:
            data = data.split(',')
            time1 = data[1].split(' ')[0].replace('/', '-')
            datetime1 = datetime.strptime(time1, '%Y-%m-%d')
            if datetime1 <= latest_datetime:
                print('暂无数据更新')
                is_end = True
                break
            infocode = data[2]
            time2 = time1.replace('-', '')
            try:
                profession_report.get_request(
                    f'http://data.eastmoney.com/report/{time2}/{infocode}.html'
                )
            except:
                continue
            report = ''
            for par in profession_report.data_search('find', '.newsContent p'):
                report = report + par
            profession_report.name = data[10]
            profession_report.up_date = time1
            profession_report.up_down = profession_report.to_null(data[11])
            profession_report.report = report
            profession_report.grade = data[7]
            profession_report.grade_change = data[0]
            profession_report.institution = data[4]
            profession_report.data_save()
            print(
                f'行业研报:{profession_report.up_date}-{profession_report.name}-{profession_report.institution}-导入完成'
            )
        if is_end == True:
            break
    profession_report.spider_end()
    print('end:行业研报')
def stock_report_spider():
    stock_report = SuperSpider(
        host='47.102.40.81',
        passwd='Abc12345',
        db='bryframe',
        table_name='stock_report',
        field_list=('code', 'name', 'spider_date', 'up_date', 'report',
                    'grade', 'grade_change', 'institution', 'income_2018',
                    'rate_2018', 'income_2019', 'rate_2019'))
    sql1 = 'select MAX(up_date) from stock_report'
    latest_time = stock_report.sql_search(sql1)[0][0]
    if not latest_time:
        latest_datetime = datetime.now() - timedelta(days=1)
    else:
        latest_datetime = datetime(latest_time.year, latest_time.month,
                                   latest_time.day)
    is_end = False
    for page in range(1, 254):
        url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=GGSR&js=var%20MILbIdwm={"data":[(x)],"pages":"(pc)","update":"(ud)","count":"(count)"}&ps=50&p=' + str(
            page) + '&mkt=0&stat=0&cmd=2&code=&rt=51552935'
        try:
            json_data = stock_report.use_requests_to_html(url, 'utf8')
            data_list = stock_report.json_to_py(json_data, deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        for data in data_list:
            time1 = data['datetime'][:10]
            datetime1 = datetime.strptime(time1, '%Y-%m-%d')
            if datetime1 <= latest_datetime:
                print('暂无数据更新')
                is_end = True
                break
            infocode = data['infoCode']
            time2 = time1.replace('-', '')
            try:
                stock_report.get_request(
                    f'http://data.eastmoney.com/report/{time2}/{infocode}.html'
                )
            except:
                continue
            report = ''
            for par in stock_report.data_search('find',
                                                '#ContentBody .newsContent p'):
                report = report + par
            stock_report.code = data['secuFullCode']
            stock_report.name = data['secuName']
            stock_report.up_date = stock_report.spider_date
            stock_report.report = report
            stock_report.grade = data['rate']
            stock_report.grade_change = data['change']
            stock_report.institution = data['insName']
            stock_report.income_2018 = stock_report.to_null(data['sys'][0])
            stock_report.rate_2018 = stock_report.to_null(data['syls'][0])
            stock_report.income_2019 = stock_report.to_null(data['sys'][1])
            stock_report.rate_2019 = stock_report.to_null(data['syls'][1])
            stock_report.data_save()
            print(
                f'个股研报:{stock_report.spider_date}-{stock_report.code}-{stock_report.name}-导入完成'
            )
        if is_end == True:
            break
    stock_report.spider_end()
    print('end:个股研报')
Exemple #3
0
def profession_report_spider():
    profession_report_list = []
    profession_report = SuperSpider(
        host='47.102.40.81',
        passwd='Abc12345',
        db='bryframe',
        table_name='profession_report',
        field_list=('name', 'spider_date', 'up_date', 'up_down', 'report',
                    'grade', 'grade_change', 'institution'))
    sql1 = 'select MAX(up_date) from profession_report'
    latest_time = profession_report.sql_search(sql1)[0][0]
    if not latest_time:
        latest_datetime = datetime.now() - timedelta(days=1)
    else:
        latest_datetime = datetime(latest_time.year, latest_time.month,
                                   latest_time.day)
    is_end = False
    for page in range(1, 1337):
        url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=HYSR&mkt=0&stat=0&cmd=4&code=&sc=&ps=50&p=' + str(
            page
        ) + '&js=var%20vMcgaFDg={%22data%22:[(x)],%22pages%22:%22(pc)%22,%22update%22:%22(ud)%22,%22count%22:%22(count)%22}&rt=51553086'
        try:
            json_data = profession_report.get_html(url)
            data_list = profession_report.json_to_py(json_data,
                                                     deal=True)['data']
        except Exception as error:
            print(f'第{page}页获取失败')
            print(error)
            page += 1
            continue
        for data in data_list:
            data = data.split(',')
            time1 = data[1].split(' ')[0].replace('/', '-')
            profession_report.name = data[10]
            profession_report.up_date = time1
            datetime1 = datetime.strptime(time1, '%Y-%m-%d')
            if datetime1 <= latest_datetime:
                print('暂无数据更新')
                is_end = True
                break
            infocode = data[2]
            time2 = time1.replace('-', '')
            profession_report.up_down = profession_report.to_null(data[11])
            try:
                profession_report.report = (''.join(
                    profession_report.data_search(
                        f'http://data.eastmoney.com/report/{time2}/{infocode}.html',
                        '//div[@class="newsContent"]/text()',
                        'gb2312'))).strip()
            except:
                pass
            sql = f'select name from profession_report where name="{profession_report.name}" and spider_date="{profession_report.spider_date}" and up_date="{profession_report.up_date}" and report="{profession_report.report}"'
            same_data = profession_report.sql_search(sql)
            profession_report.grade = data[7]
            profession_report.grade_change = data[0]
            profession_report.institution = data[4]
            profession_report.data_save()
            print(
                f'行业研报:{profession_report.up_date}-{profession_report.name}-{profession_report.institution}-导入完成'
            )
        if is_end == True:
            break
    profession_report.spider_end()
    print('end:行业研报')