コード例 #1
0
 def test_html_css(self):
     selector = Selector.from_text(HTML)
     self.assertEqual(len(selector.css('div')), 4)
     self.assertEqual(selector.css('a').extract(), ['Link'])
     self.assertEqual(selector.css('a').extract(raw=True), ['<a href="page">Link</a>'])
     self.assertEqual(selector.css('a::text').extract(), ['Link'])
     self.assertEqual(selector.css('a::attr(href)').extract(), ['page'])
     self.assertEqual(selector.css('html>body>div>h1::text').extract(), ['Heading'])
コード例 #2
0
 def test_html_xpath(self):
     selector = Selector.from_text(HTML)
     self.assertEqual(len(selector.xpath('.//div')), 4)
     self.assertEqual(selector.xpath('.//a').extract(), ['Link'])
     self.assertEqual(selector.xpath('.//a').extract(raw=True), ['<a href="page">Link</a>'])
     self.assertEqual(selector.xpath('.//a/text()').extract(), ['Link'])
     self.assertEqual(selector.xpath('.//a/@href').extract(), ['page'])
     self.assertEqual(selector.xpath('/html/body/div/h1/text()').extract(), ['Heading'])
コード例 #3
0
ファイル: test_scrape_fields.py プロジェクト: kgubsch/CDEWIP
 def setUp(self):
     """Instantiate the Entity from a HTML string."""
     self.scraped = BookChapter(Selector.from_text(HTML))
コード例 #4
0
 def setUp(self):
     """Instantiate the Entity from a HTML string."""
     self.scraped = SimpleContent(Selector.from_text(SIMPLE_HTML))
コード例 #5
0
 def setUp(self):
     self.scrapes = ArticleC.scrape(Selector.from_text(COMPLEX_HTML),
                                    root='html>body>div')
コード例 #6
0
 def setUp(self):
     self.scrapes = Article.scrape(Selector.from_text(COMPLEX_HTML),
                                   root='/html/body/div',
                                   xpath=True)
コード例 #7
0
 def setUp(self):
     self.blogs = Blog.scrape(Selector.from_text(ARTICLE_HTML),
                              root='/html/body/div',
                              xpath=True)