Exemple #1
0
	def testPipeoutToCrawledItem(self):
		domitems = {
			"name": 'category',
			"url": '/politique/index.1.html', 
			"selector": 'div#mainNav > ul#menu_main > li > a',
			"nested_items":{
				"name": 'article',
				"url": '/politique/212121.html',
				"selector": 'h2.section_title > a'	
			} 
		}

		try:
			from copy import deepcopy
			worm = Worm("http://www.hespress.com/", deepcopy(domitems))
		except Exception as e:
			self.print_failure("Test failed with error: %s" % str(e))
			raise e
		else:
			crawled_items = worm._pipeout(worm.domitems, "")
			worm.domitems.crawled_items = crawled_items
			self.print_success("----------------- Piped out : %s" % worm.domitems.crawled_items)

			self.print_info("----------------- Crawling subitems")
			for i in worm.domitems.crawled_items:
				self.print_success("----------------- Crawling: %s" % i.url)
				self.print_success("----------------- Respective DomItem: %s" % i.dom_item.name)
				worm._pipeout_to_crawled_item(i, 'smart')
				self.print_success("Extracted data:%s\n" % i.nested_items)
Exemple #2
0
	def testPipeout(self):
		domitems = {
			"name": "category",
			"url": "/politique/index.1.html", 
			"selector": "div#mainNav > ul#menu_main > li > a", 
		}

		try:
			from copy import deepcopy
			worm = Worm("http://www.hespress.com/", deepcopy(domitems))
		except Exception as e:
			self.print_failure("Test failed with error: %s" % str(e))
			raise e
		else:
			self.print_success("Extracted data:\n %s" % worm._pipeout(worm.domitems, ""))