Python Response.follow_all Examples

Programming Language: Python

Namespace/Package Name: scrapy.http

Class/Type: Response

Method/Function: follow_all

Examples at hotexamples.com: 3

Python Response.follow_all - 3 examples found. These are the top rated real world Python examples of scrapy.http.Response.follow_all extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Response(30)

follow(30)

urljoin(30)

xpath(30)

css(30)

request(13)

replace(9)

encoding(7)

status(5)

flags(4)

copy(4)

headers(4)

follow_all(3)

_get_url(1)

set_ext_data(1)

text(1)

url(1)

Example #1

Show file

    def parse(self, response: Response, **kwargs):
        for script in response.xpath('//script/text()').getall():
            # Look for the specific script tag we want
            if 'INITIAL_STATE' in script:
                # Extract the interesting part from the script tag
                m = re.match(r'window\.INITIAL_STATE\s+=\s+({[\s\S]+});',
                             script)

                # Decode it properly, handling annoying unicode escapes and nonsense from the site renderer
                custom_demjson = CustomJSON(json_options=demjson.json_options(
                    compactly=False))
                decoded = custom_demjson.decode(m.group(1),
                                                encoding='unicode-escape')

                # Write a proper valid JSON file out
                # with open('example.json', 'w', encoding='utf-8') as file:
                #     file.write(custom_demjson.encode(decoded))

                raw_data = decoded['searchData']
                word = Word.from_raw(data=raw_data)

                urls = word.get_urls()
                new = urls - self.queue
                self.queue.update(new)

                if len(new) > 0:
                    print(f'Found {len(new)} more URLs.')
                return response.follow_all(new)

Example #2

Show file

    def parse(self, response: Response):
        yield from response.follow_all(
            xpath='//*[starts-with(@id, "item_")]/div[1]/a/@href',
            callback=self.parse_item,
        )

        link = response.xpath(
            '//*[@id="navigation-bar-bottom"]/div[2]/ul/'
            'li[contains(@class, "next-page")]/a/@href').get()

        yield response.follow(link, callback=self.parse)

Example #3

Show file

File: imobiliare.py Project: pohribeduard/real-estate-rental-estimator

    def parse(self, response: Response, **kwargs):
        if self.url_to_crawl:
            yield response.follow(url=self.url_to_crawl, callback=self.parse_residences)
        else:
            residences = response.xpath("//a[contains(@class,'detalii-proprietate')][contains(.,'Vezi detalii')]/@href").getall()
            residences = list(set(residences))

            yield from response.follow_all(urls=residences, callback=self.parse_residences)

            next_page = response.xpath("//a[@class='inainte butonpaginare']/@href").get()
            if next_page:
                yield response.follow(url=next_page, callback=self.parse)