Ejemplo n.º 1
0
 def _get_item(self, response):
     item = Page(url=response.url,
                 size=str(len(response.body)),
                 referer=response.request.headers.get('Referer'))
     self._set_title(item, response)
     self._set_new_cookies(item, response)
     return item
Ejemplo n.º 2
0
    def parse(self, response):
        """Extract lorem ipsum text

        @url http://es.lipsum.com/
        @returns items 1 1
        @scrapes url title body
        """
        self.log(LOREMIPSUM[:30], level=log.DEBUG)
        self.log(LOREMIPSUM[30:60], level=log.INFO)
        self.log(LOREMIPSUM[60:90], level=log.WARNING)
        self.log(LOREMIPSUM[90:120], level=log.ERROR)
        yield Page(url=response.url, title=LOREMIPSUM[:20], body=LOREMIPSUM)
        if self.loremfile:
            url = 'file://{0}?x-error-response'.format(self.loremfile.name)
            yield Request(url, callback=self.parse, errback=self.recover)
Ejemplo n.º 3
0
 def parse(self, response):
     self.log(LOREMIPSUM[:30], level=log.DEBUG)
     self.log(LOREMIPSUM[30:60], level=log.INFO)
     self.log(LOREMIPSUM[60:90], level=log.WARNING)
     self.log(LOREMIPSUM[90:120], level=log.ERROR)
     yield Page(url=response.url, title=LOREMIPSUM[:20], body=LOREMIPSUM)