def _get_item(self, response): item = Page(url=response.url, size=str(len(response.body)), referer=response.request.headers.get('Referer')) self._set_title(item, response) self._set_new_cookies(item, response) return item
def parse(self, response): """Extract lorem ipsum text @url http://es.lipsum.com/ @returns items 1 1 @scrapes url title body """ self.log(LOREMIPSUM[:30], level=log.DEBUG) self.log(LOREMIPSUM[30:60], level=log.INFO) self.log(LOREMIPSUM[60:90], level=log.WARNING) self.log(LOREMIPSUM[90:120], level=log.ERROR) yield Page(url=response.url, title=LOREMIPSUM[:20], body=LOREMIPSUM) if self.loremfile: url = 'file://{0}?x-error-response'.format(self.loremfile.name) yield Request(url, callback=self.parse, errback=self.recover)
def parse(self, response): self.log(LOREMIPSUM[:30], level=log.DEBUG) self.log(LOREMIPSUM[30:60], level=log.INFO) self.log(LOREMIPSUM[60:90], level=log.WARNING) self.log(LOREMIPSUM[90:120], level=log.ERROR) yield Page(url=response.url, title=LOREMIPSUM[:20], body=LOREMIPSUM)