def testPipeoutToCrawledItem(self): domitems = { "name": 'category', "url": '/politique/index.1.html', "selector": 'div#mainNav > ul#menu_main > li > a', "nested_items":{ "name": 'article', "url": '/politique/212121.html', "selector": 'h2.section_title > a' } } try: from copy import deepcopy worm = Worm("http://www.hespress.com/", deepcopy(domitems)) except Exception as e: self.print_failure("Test failed with error: %s" % str(e)) raise e else: crawled_items = worm._pipeout(worm.domitems, "") worm.domitems.crawled_items = crawled_items self.print_success("----------------- Piped out : %s" % worm.domitems.crawled_items) self.print_info("----------------- Crawling subitems") for i in worm.domitems.crawled_items: self.print_success("----------------- Crawling: %s" % i.url) self.print_success("----------------- Respective DomItem: %s" % i.dom_item.name) worm._pipeout_to_crawled_item(i, 'smart') self.print_success("Extracted data:%s\n" % i.nested_items)
def testPipeout(self): domitems = { "name": "category", "url": "/politique/index.1.html", "selector": "div#mainNav > ul#menu_main > li > a", } try: from copy import deepcopy worm = Worm("http://www.hespress.com/", deepcopy(domitems)) except Exception as e: self.print_failure("Test failed with error: %s" % str(e)) raise e else: self.print_success("Extracted data:\n %s" % worm._pipeout(worm.domitems, ""))