def start_requests(self): for page in range(1, self.max_page + 1): url = self.base_url.format(page=page) # yield scrapy.Request(url,callback=self.parse) yield PyppeteerRequest(url, callback=self.parse_index, wait_for='.item .name')
def start_requests(self): """ first page :return: """ start_url = f'{self.base_url}/page/1' logger.info('crawling %s', start_url) yield PyppeteerRequest(start_url, callback=self.parse_index, wait_for='.item .name')
def parse_index(self, response): """ extract books and get next page :param response: :return: """ items = response.css('.item') for item in items: href = item.css('.top a::attr(href)').extract_first() detail_url = response.urljoin(href) yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item .name') # next page match = re.search(r'page/(\d+)', response.url) if not match: return page = int(match.group(1)) + 1 next_url = f'{self.base_url}/page/{page}' yield PyppeteerRequest(next_url, callback=self.parse_index, wait_for='.item .name')
def parse_index(self, response): """ extract movies :param response: :return: """ items = response.css('.item') for item in items: href = item.css('a::attr(href)').extract_first() detail_url = response.urljoin(href) logger.info('detail url %s', detail_url) yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item')
def start_requests(self): """ first page :return: """ for page in range(1, self.max_page + 1): url = f'{self.base_url}/page/{page}' logger.debug('start url %s', url) cookies = { 'name': 'germey' } yield PyppeteerRequest(url, callback=self.parse_index, priority=10, wait_for='.item', pretend=True, cookies=cookies)
def start_requests(self): url = 'https://bot.sannysoft.com/' yield PyppeteerRequest(url=url, callback=self.parse_index, pretend=True, screenshot=False)
def start_requests(self): for url in self.start_urls: yield PyppeteerRequest(url, callback=self.parse_index, pretend=False)
def start_requests(self): for page in range(1, self.max_page + 1): url = self.base_url.format(page=page) yield PyppeteerRequest(url=url, callback=self.parse_index)