def parse(self, response): """ Default callback used by Scrapy to process downloaded responses Testing contracts: @url http://www.livingsocial.com/cities/15-san-francisco @returns items 1 @scrapes title link """ selector = HtmlXPathSelector(response) # iterate over deals for deal in selector.select(self.deals_list_xpath): loader = XPathItemLoader(LivingSocialDeal(), selector=deal) # define processors loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() # iterate over fields and add xpaths to the loader for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parse(self, response): """ Default callback used by Scrapy to process downloaded responses Testing contracts: @url http://www.livingsocial.com/cities/15-san-francisco @returns items 1 @scrapes title link """ # Gives ability to select parts of response defined in deals_list_xpath selector = HtmlXPathSelector(response) # Iterate through found deals for deal in selector.xpath(self.deals_list_xpath): # Loads data into item fields defined in items.py loader = XPathItemLoader(LivingSocialDeal(), selector=deal) # Define processors for clean up and joining elements loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() # Iterate over item_fields dict and add xpaths to loader for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parse(self, response): # actually a method """ Default callback used by Scrapy to process downloaded responses Testing contracts: @url http://www.livingsocial.com/cities/15-san-francisco @returns items 1 @scrapes title link """ selector = HtmlXPathSelector(response) # instantiate HtmlXPathSelector() w/ response parameter # iterate over deals for deal in selector.xpath(self.deals_list_xpath): #multiple deals per page loader = XPathItemLoader(LivingSocialDeal(), selector=deal) #iterate over each deal # define processors # An Item Loader contains one input processor and one output processor for each (item) field. loader.default_input_processor = MapCompose(unicode.strip) #strip out white-space of unicode strings loader.default_output_processor = Join() #join data by a space # iterate over fields and add xpaths to the loader for field, xpath in self.item_fields.iteritems(): #itemitems() method allows you to iterate (k, v) of items in a dict loader.add_xpath(field, xpath) #add specific field xpath to loader yield loader.load_item() # load_item: grabs each item field (link, title, etc), gets xpath, process data # w/ input output processor. Yield each item, then move onto next deal
def parse(self, response): selector = HtmlXPathSelector(response) # looking for a deals for deal in selector.select(self.deals_list_xpath): loader = XPathItemLoader(LivingSocialDeal(), selector=deal) loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parse(self, response): selector = HtmlXPathSelector(response) #iterate over deals for deal in selector.select(self.deals_list_xpath): loader = XPathItemLoader(LivingSocialDeal(), selector=deal) #define processor # renove whitespace loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() #iterate over fields and add xpaths to the loader for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
def parse(self, response): """ Default callback used by Scrapy to process downloaded responses """ sel = Selector(response) # iterate over deals for deal in sel.xpath(self.deals_list_xpath): loader = ItemLoader(LivingSocialDeal(), selector=deal) # define processors loader.default_input_processor = MapCompose() loader.default_output_processor = Join() # iterate over fields and add xpaths to the Loader for field, xpath in self.item_fields.items(): loader.add_xpath(field, xpath) yield loader.load_item()