def test_html_css(self):
     selector = Selector.from_text(HTML)
     self.assertEqual(len(selector.css('div')), 4)
     self.assertEqual(selector.css('a').extract(), ['Link'])
     self.assertEqual(selector.css('a').extract(raw=True), ['<a href="page">Link</a>'])
     self.assertEqual(selector.css('a::text').extract(), ['Link'])
     self.assertEqual(selector.css('a::attr(href)').extract(), ['page'])
     self.assertEqual(selector.css('html>body>div>h1::text').extract(), ['Heading'])
 def test_html_xpath(self):
     selector = Selector.from_text(HTML)
     self.assertEqual(len(selector.xpath('.//div')), 4)
     self.assertEqual(selector.xpath('.//a').extract(), ['Link'])
     self.assertEqual(selector.xpath('.//a').extract(raw=True), ['<a href="page">Link</a>'])
     self.assertEqual(selector.xpath('.//a/text()').extract(), ['Link'])
     self.assertEqual(selector.xpath('.//a/@href').extract(), ['page'])
     self.assertEqual(selector.xpath('/html/body/div/h1/text()').extract(), ['Heading'])
Ejemplo n.º 3
0
 def setUp(self):
     """Instantiate the Entity from a HTML string."""
     self.scraped = BookChapter(Selector.from_text(HTML))
Ejemplo n.º 4
0
 def setUp(self):
     """Instantiate the Entity from a HTML string."""
     self.scraped = SimpleContent(Selector.from_text(SIMPLE_HTML))
Ejemplo n.º 5
0
 def setUp(self):
     self.scrapes = ArticleC.scrape(Selector.from_text(COMPLEX_HTML),
                                    root='html>body>div')
Ejemplo n.º 6
0
 def setUp(self):
     self.scrapes = Article.scrape(Selector.from_text(COMPLEX_HTML),
                                   root='/html/body/div',
                                   xpath=True)
Ejemplo n.º 7
0
 def setUp(self):
     self.blogs = Blog.scrape(Selector.from_text(ARTICLE_HTML),
                              root='/html/body/div',
                              xpath=True)