Esempio n. 1
0
 def start_requests(self):
     for page in range(1, self.max_page + 1):
         url = self.base_url.format(page=page)
         # yield scrapy.Request(url,callback=self.parse)
         yield PyppeteerRequest(url,
                                callback=self.parse_index,
                                wait_for='.item .name')
Esempio n. 2
0
 def start_requests(self):
     """
     first page
     :return:
     """
     start_url = f'{self.base_url}/page/1'
     logger.info('crawling %s', start_url)
     yield PyppeteerRequest(start_url, callback=self.parse_index, wait_for='.item .name')
Esempio n. 3
0
    def parse_index(self, response):
        """
        extract books and get next page
        :param response:
        :return:
        """
        items = response.css('.item')
        for item in items:
            href = item.css('.top a::attr(href)').extract_first()
            detail_url = response.urljoin(href)
            yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item .name')

        # next page
        match = re.search(r'page/(\d+)', response.url)
        if not match:
            return
        page = int(match.group(1)) + 1
        next_url = f'{self.base_url}/page/{page}'
        yield PyppeteerRequest(next_url, callback=self.parse_index, wait_for='.item .name')
Esempio n. 4
0
 def parse_index(self, response):
     """
     extract movies
     :param response:
     :return:
     """
     items = response.css('.item')
     for item in items:
         href = item.css('a::attr(href)').extract_first()
         detail_url = response.urljoin(href)
         logger.info('detail url %s', detail_url)
         yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item')
Esempio n. 5
0
 def start_requests(self):
     """
     first page
     :return:
     """
     for page in range(1, self.max_page + 1):
         url = f'{self.base_url}/page/{page}'
         logger.debug('start url %s', url)
         cookies = {
             'name': 'germey'
         }
         yield PyppeteerRequest(url, callback=self.parse_index, priority=10, wait_for='.item', pretend=True, cookies=cookies)
Esempio n. 6
0
 def start_requests(self):
     url = 'https://bot.sannysoft.com/'
     yield PyppeteerRequest(url=url,
                            callback=self.parse_index,
                            pretend=True,
                            screenshot=False)
Esempio n. 7
0
 def start_requests(self):
     for url in self.start_urls:
         yield PyppeteerRequest(url,
                                callback=self.parse_index,
                                pretend=False)
Esempio n. 8
0
 def start_requests(self):
     for page in range(1, self.max_page + 1):
         url = self.base_url.format(page=page)
         yield PyppeteerRequest(url=url, callback=self.parse_index)