Ejemplo n.º 1
0
def business_detail_spider():
	stock_list=[]
	business_detail=SuperSpider(host='47.102.40.81',passwd='Abc12345',db='bryframe',table_name='business_detail',field_list=('spider_date','up_date','code','name','department_name','amount'))
	business_detail.up_date=business_detail.spider_date
	page=1
	while True:
		try:
			json_data=business_detail.use_requests_to_html(f'http://data.eastmoney.com/DataCenter_V3/stock2016/ActiveStatistics/pagesize=50,page={page},sortRule=-1,sortType=JmMoney,startDate={business_detail.spider_date},endDate={business_detail.spider_date},gpfw=0,js=var%20data_tab_1.html?rt=25861061','GB2312')
			data_list=business_detail.json_to_py(json_data,deal=True)['data']
		except:
			print(f'第{page}页获取失败')
			page+=1
			continue
		if not data_list or page == 500:
			break
		print(f'第{page}页')
		for data in data_list:
			if not data['SName']:
				continue
			stock_data_list=business_detail.json_to_py(data['SName'])
			for stock_data in stock_data_list:
				if stock_data['CodeName'] not in stock_list:
					stock_list.append(stock_data['CodeName'])
				else:
					continue
				business_detail.name=stock_data['CodeName']
				business_detail.code=stock_data['SCode']
				try:
					url_code=business_detail.re_find(r'\d+',business_detail.code).__next__().group()
				except:
					continue
				print(url_code)
				url=f'http://data.eastmoney.com/stock/lhb,{business_detail.spider_date},{url_code}.html'
				try:
					business_detail.get_request(url)
				except:
					continue
				detail_data_list=list(business_detail.data_search('find','table tbody td'))
				for i,j in zip(range(1,71,7),range(6,71,7)):
					try:
						business_detail.department_name=detail_data_list[i].split('\n')[0]
					except:
						break
					business_detail.amount=detail_data_list[j]
					business_detail.data_save()
					print(f'每日成交明细——{business_detail.up_date}——{business_detail.code}——{business_detail.name}——{business_detail.department_name}——导入完成')
		page+=1
	business_detail.spider_end()
Ejemplo n.º 2
0
def business_detail_spider():
    business_detail_list = []
    business_detail = SuperSpider(host='47.102.40.81',
                                  passwd='Abc12345',
                                  db='bryframe',
                                  table_name='business_detail',
                                  field_list=('spider_date', 'up_date', 'code',
                                              'name', 'department_name',
                                              'amount'))
    business_detail.up_date = business_detail.spider_date
    page = 1
    while True:
        try:
            json_data = business_detail.get_html(
                f'http://data.eastmoney.com/DataCenter_V3/stock2016/ActiveStatistics/pagesize=50,page={page},sortRule=-1,sortType=JmMoney,startDate={business_detail.spider_date},endDate={business_detail.spider_date},gpfw=0,js=var%20data_tab_1.html?rt=25861061',
                'GB2312')
            data_list = business_detail.json_to_py(json_data,
                                                   deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        if not data_list or page == 500:
            break
        print(f'第{page}页')
        for data in data_list:
            if not data['SName']:
                continue
            stock_data_list = business_detail.json_to_py(data['SName'])
            for stock_data in stock_data_list:
                if stock_data['CodeName'] not in business_detail_list:
                    business_detail_list.append(stock_data['CodeName'])
                else:
                    continue
                business_detail.name = stock_data['CodeName']
                business_detail.code = stock_data['SCode']
                sql = f'select code from business_detail where code="{business_detail.code}" and spider_date="{business_detail.spider_date}"'
                same_data = business_detail.sql_search(sql)
                if same_data:
                    business_detail.sql_search(
                        f'delete from business_detail where code="{business_detail.code}" and spider_date="{business_detail.spider_date}"'
                    )
                    print(
                        f'重新爬取-{business_detail.spider_date}-{business_detail.code}-{business_detail.name}'
                    )
                try:
                    url_code = business_detail.re_find(
                        r'\d+', business_detail.code).__next__().group()
                except:
                    continue
                url = f'http://data.eastmoney.com/stock/lhb,{business_detail.spider_date},{url_code}.html'
                try:
                    detail_data_list = [
                        i for i in business_detail.data_search(
                            url, '//div[@class="content-sepe"]//td//text()',
                            'gb2312') if i.strip() and '\r' not in i
                    ]
                    for i in range(6):
                        if '(买入前5名与卖出前5名)' in detail_data_list:
                            error_index = detail_data_list.index(
                                '(买入前5名与卖出前5名)')
                            del detail_data_list[error_index:error_index + 6]
                except:
                    print(
                        f'{business_detail.code}-{business_detail.name}-获取失败')
                    continue
                # print(detail_data_list)
                department_list = []
                for i, j in zip(range(1, 1000, 8), range(7, 1000, 8)):
                    try:
                        business_detail.department_name = detail_data_list[i]
                        if business_detail.department_name not in department_list:
                            department_list.append(
                                business_detail.department_name)
                        else:
                            print(
                                f'{business_detail.name}-{business_detail.department_name}-信息重复'
                            )
                            continue
                        business_detail.amount = detail_data_list[j]
                        # print(business_detail.amount)
                    except:
                        break
                    business_detail.data_save()
                    print(
                        f'每日成交明细——{business_detail.up_date}——{business_detail.code}——{business_detail.name}——{business_detail.department_name}——导入完成'
                    )
        page += 1
    business_detail.spider_end()