예제 #1
0
    def parse(self, response):
        """
        Default callback used by Scrapy to process downloaded responses

        Testing contracts:
        @url http://www.livingsocial.com/cities/15-san-francisco
        @returns items 1
        @scrapes title link

        """
        selector = HtmlXPathSelector(response)

        # iterate over deals
        for deal in selector.select(self.deals_list_xpath):
            loader = XPathItemLoader(LivingSocialDeal(), selector=deal)

            # define processors

            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()
            # iterate over fields and add xpaths to the loader

            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            yield loader.load_item()
예제 #2
0
    def parse(self, response):
        """
        Default callback used by Scrapy to process downloaded responses

        Testing contracts:
        @url http://www.livingsocial.com/cities/15-san-francisco
        @returns items 1
        @scrapes title link
        """
        # Gives ability to select parts of response defined in deals_list_xpath
        selector = HtmlXPathSelector(response)

        # Iterate through found deals
        for deal in selector.xpath(self.deals_list_xpath):
            # Loads data into item fields defined in items.py
            loader = XPathItemLoader(LivingSocialDeal(), selector=deal)

            # Define processors for clean up and joining elements
            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()

            # Iterate over item_fields dict and add xpaths to loader
            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            yield loader.load_item()
예제 #3
0
	def parse(self, response): # actually a method
		"""
		Default callback used by Scrapy to process downloaded responses
		
		Testing contracts:
		@url http://www.livingsocial.com/cities/15-san-francisco
		@returns items 1
		@scrapes title link
		
		"""
		         
		selector = HtmlXPathSelector(response) # instantiate HtmlXPathSelector() w/ response parameter
		
		# iterate over deals
		for deal in selector.xpath(self.deals_list_xpath): #multiple deals per page
			loader = XPathItemLoader(LivingSocialDeal(), selector=deal) #iterate over each deal
			
			# define processors
			# An Item Loader contains one input processor and one output processor for each (item) field.
			loader.default_input_processor = MapCompose(unicode.strip) #strip out white-space of unicode strings
			loader.default_output_processor = Join() #join data by a space
			
			# iterate over fields and add xpaths to the loader
			for field, xpath in self.item_fields.iteritems(): #itemitems() method allows you to iterate (k, v) of items in a dict
				loader.add_xpath(field, xpath) #add specific field xpath to loader
			yield loader.load_item() # load_item: grabs each item field (link, title, etc), gets xpath, process data
			# w/ input output processor. Yield each item, then move onto next deal
    def parse(self, response):
        selector = HtmlXPathSelector(response)
        # looking for a deals
        for deal in selector.select(self.deals_list_xpath):
            loader = XPathItemLoader(LivingSocialDeal(), selector=deal)

            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()

            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)

            yield loader.load_item()
    def parse(self, response):
        selector = HtmlXPathSelector(response)

        #iterate over deals
        for deal in selector.select(self.deals_list_xpath):
            loader = XPathItemLoader(LivingSocialDeal(), selector=deal)

            #define processor
            # renove whitespace
            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()

            #iterate over fields and add xpaths to the loader
            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            yield loader.load_item()
예제 #6
0
    def parse(self, response):
        """
        Default callback used by Scrapy to process downloaded responses
        """
        sel = Selector(response)

        # iterate over deals
        for deal in sel.xpath(self.deals_list_xpath):

            loader = ItemLoader(LivingSocialDeal(), selector=deal)

            # define processors
            loader.default_input_processor = MapCompose()
            loader.default_output_processor = Join()

            # iterate over fields and add xpaths to the Loader
            for field, xpath in self.item_fields.items():
                loader.add_xpath(field, xpath)
            yield loader.load_item()