def parse(self, response): selector = Selector(response) for deal in selector.select(self.deals_list_xpath): loader = XPathItemLoader(LivingSocialDeal(), selector=deal) loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parse(self, response): """ Default callback used by Scrapy to process downloaded responses Testing contracts: @url http://www.livingsocial.com/cities/15-san-francisco @returns items 1 @scrapes title link """ # iterate over deals for i in range(100, 1000): loader = XPathItemLoader(Grailed(), selector=Grailed) # define processors loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() # iterate over fields and add xpaths to the loader for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parseItemWithLoader(self, response): selector = HtmlXPathSelector(response) for sel in selector.select(self._x_query['xpath_item']): flowersItemLoader = XPathItemLoader(FlowersItem(), selector=sel) # flowersItemLoader.default_input_processor = MapCompose(unicode.strip) flowersItemLoader.default_output_processor = TakeFirst() # printcn(flowersItemLoader.get_xpath(self._x_query['xpath_item_name'])) # print flowersItemLoader.get_xpath(self._x_query['xpath_item_image']) # printcn(flowersItemLoader.get_xpath(self._x_query['xpath_item_desc'])) flowersItemLoader.add_xpath('flowerName', self._x_query['xpath_item_name']) flowersItemLoader.add_xpath('imageUrl', self._x_query['xpath_item_image']) flowersItemLoader.add_xpath('desc', self._x_query['xpath_item_desc']) url = str(response.url) flowersItemLoader.add_value('url', url) flowerItem = flowersItemLoader.load_item() yield flowerItem