def profession_report_spider(): profession_report = SuperSpider( table_name='profession_report', field_list=('name', 'spider_date', 'up_date', 'up_down', 'report', 'grade', 'grade_change', 'institution')) sql1 = 'select MAX(up_date) from profession_report' latest_time = profession_report.sql_search(sql1)[0][0] if not latest_time: latest_datetime = datetime.now() - timedelta(days=1) else: latest_datetime = datetime(latest_time.year, latest_time.month, latest_time.day) is_end = False for page in range(1, 1337): url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=HYSR&mkt=0&stat=0&cmd=4&code=&sc=&ps=50&p=' + str( page ) + '&js=var%20vMcgaFDg={%22data%22:[(x)],%22pages%22:%22(pc)%22,%22update%22:%22(ud)%22,%22count%22:%22(count)%22}&rt=51553086' try: json_data = profession_report.use_requests_to_html(url, 'utf8') data_list = profession_report.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue for data in data_list: data = data.split(',') time1 = data[1].split(' ')[0].replace('/', '-') datetime1 = datetime.strptime(time1, '%Y-%m-%d') if datetime1 <= latest_datetime: print('暂无数据更新') is_end = True break infocode = data[2] time2 = time1.replace('-', '') try: profession_report.get_request( f'http://data.eastmoney.com/report/{time2}/{infocode}.html' ) except: continue report = '' for par in profession_report.data_search('find', '.newsContent p'): report = report + par profession_report.name = data[10] profession_report.up_date = time1 profession_report.up_down = profession_report.to_null(data[11]) profession_report.report = report profession_report.grade = data[7] profession_report.grade_change = data[0] profession_report.institution = data[4] profession_report.data_save() print( f'行业研报:{profession_report.up_date}-{profession_report.name}-{profession_report.institution}-导入完成' ) if is_end == True: break profession_report.spider_end() print('end:行业研报')
def stock_report_spider(): stock_report = SuperSpider( host='47.102.40.81', passwd='Abc12345', db='bryframe', table_name='stock_report', field_list=('code', 'name', 'spider_date', 'up_date', 'report', 'grade', 'grade_change', 'institution', 'income_2018', 'rate_2018', 'income_2019', 'rate_2019')) sql1 = 'select MAX(up_date) from stock_report' latest_time = stock_report.sql_search(sql1)[0][0] if not latest_time: latest_datetime = datetime.now() - timedelta(days=1) else: latest_datetime = datetime(latest_time.year, latest_time.month, latest_time.day) is_end = False for page in range(1, 254): url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=GGSR&js=var%20MILbIdwm={"data":[(x)],"pages":"(pc)","update":"(ud)","count":"(count)"}&ps=50&p=' + str( page) + '&mkt=0&stat=0&cmd=2&code=&rt=51552935' try: json_data = stock_report.use_requests_to_html(url, 'utf8') data_list = stock_report.json_to_py(json_data, deal=True)['data'] except: print(f'第{page}页获取失败') page += 1 continue for data in data_list: time1 = data['datetime'][:10] datetime1 = datetime.strptime(time1, '%Y-%m-%d') if datetime1 <= latest_datetime: print('暂无数据更新') is_end = True break infocode = data['infoCode'] time2 = time1.replace('-', '') try: stock_report.get_request( f'http://data.eastmoney.com/report/{time2}/{infocode}.html' ) except: continue report = '' for par in stock_report.data_search('find', '#ContentBody .newsContent p'): report = report + par stock_report.code = data['secuFullCode'] stock_report.name = data['secuName'] stock_report.up_date = stock_report.spider_date stock_report.report = report stock_report.grade = data['rate'] stock_report.grade_change = data['change'] stock_report.institution = data['insName'] stock_report.income_2018 = stock_report.to_null(data['sys'][0]) stock_report.rate_2018 = stock_report.to_null(data['syls'][0]) stock_report.income_2019 = stock_report.to_null(data['sys'][1]) stock_report.rate_2019 = stock_report.to_null(data['syls'][1]) stock_report.data_save() print( f'个股研报:{stock_report.spider_date}-{stock_report.code}-{stock_report.name}-导入完成' ) if is_end == True: break stock_report.spider_end() print('end:个股研报')
def profession_report_spider(): profession_report_list = [] profession_report = SuperSpider( host='47.102.40.81', passwd='Abc12345', db='bryframe', table_name='profession_report', field_list=('name', 'spider_date', 'up_date', 'up_down', 'report', 'grade', 'grade_change', 'institution')) sql1 = 'select MAX(up_date) from profession_report' latest_time = profession_report.sql_search(sql1)[0][0] if not latest_time: latest_datetime = datetime.now() - timedelta(days=1) else: latest_datetime = datetime(latest_time.year, latest_time.month, latest_time.day) is_end = False for page in range(1, 1337): url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?type=SR&sty=HYSR&mkt=0&stat=0&cmd=4&code=&sc=&ps=50&p=' + str( page ) + '&js=var%20vMcgaFDg={%22data%22:[(x)],%22pages%22:%22(pc)%22,%22update%22:%22(ud)%22,%22count%22:%22(count)%22}&rt=51553086' try: json_data = profession_report.get_html(url) data_list = profession_report.json_to_py(json_data, deal=True)['data'] except Exception as error: print(f'第{page}页获取失败') print(error) page += 1 continue for data in data_list: data = data.split(',') time1 = data[1].split(' ')[0].replace('/', '-') profession_report.name = data[10] profession_report.up_date = time1 datetime1 = datetime.strptime(time1, '%Y-%m-%d') if datetime1 <= latest_datetime: print('暂无数据更新') is_end = True break infocode = data[2] time2 = time1.replace('-', '') profession_report.up_down = profession_report.to_null(data[11]) try: profession_report.report = (''.join( profession_report.data_search( f'http://data.eastmoney.com/report/{time2}/{infocode}.html', '//div[@class="newsContent"]/text()', 'gb2312'))).strip() except: pass sql = f'select name from profession_report where name="{profession_report.name}" and spider_date="{profession_report.spider_date}" and up_date="{profession_report.up_date}" and report="{profession_report.report}"' same_data = profession_report.sql_search(sql) profession_report.grade = data[7] profession_report.grade_change = data[0] profession_report.institution = data[4] profession_report.data_save() print( f'行业研报:{profession_report.up_date}-{profession_report.name}-{profession_report.institution}-导入完成' ) if is_end == True: break profession_report.spider_end() print('end:行业研报')