def test_html_css(self): selector = Selector.from_text(HTML) self.assertEqual(len(selector.css('div')), 4) self.assertEqual(selector.css('a').extract(), ['Link']) self.assertEqual(selector.css('a').extract(raw=True), ['<a href="page">Link</a>']) self.assertEqual(selector.css('a::text').extract(), ['Link']) self.assertEqual(selector.css('a::attr(href)').extract(), ['page']) self.assertEqual(selector.css('html>body>div>h1::text').extract(), ['Heading'])
def test_html_xpath(self): selector = Selector.from_text(HTML) self.assertEqual(len(selector.xpath('.//div')), 4) self.assertEqual(selector.xpath('.//a').extract(), ['Link']) self.assertEqual(selector.xpath('.//a').extract(raw=True), ['<a href="page">Link</a>']) self.assertEqual(selector.xpath('.//a/text()').extract(), ['Link']) self.assertEqual(selector.xpath('.//a/@href').extract(), ['page']) self.assertEqual(selector.xpath('/html/body/div/h1/text()').extract(), ['Heading'])
def setUp(self): """Instantiate the Entity from a HTML string.""" self.scraped = BookChapter(Selector.from_text(HTML))
def setUp(self): """Instantiate the Entity from a HTML string.""" self.scraped = SimpleContent(Selector.from_text(SIMPLE_HTML))
def setUp(self): self.scrapes = ArticleC.scrape(Selector.from_text(COMPLEX_HTML), root='html>body>div')
def setUp(self): self.scrapes = Article.scrape(Selector.from_text(COMPLEX_HTML), root='/html/body/div', xpath=True)
def setUp(self): self.blogs = Blog.scrape(Selector.from_text(ARTICLE_HTML), root='/html/body/div', xpath=True)