def test_incorrect_xpath(self): # The lxml xpath function return boolean for following xpath # This breaks selector internal logic that assumes that only # list could be returnsed # So it was fixed and this test was crated sel = XpathSelector(self.tree).select('//ul/li/text()="oops"') self.assertEquals(False, sel.exists()) # Selector list is always empty in this special case # Even if the xpath return True on lxml level self.assertEquals(True, self.tree.xpath('//ul[1]/li[1]/text()="one"')) sel = XpathSelector(self.tree).select('//ul[1]/li[1]/text()="one"') self.assertEquals(False, sel.exists())
def test_regexp(self): html = '<div><h1 id="h1">foo</h1><h2>bar</h2></div>' sel = XpathSelector(fromstring(html)) self.assertEqual('h2', sel.select('//*[re:test(text(), "b.r")]')\ .node().tag) self.assertEqual( 'foo', sel.select('//*[re:test(@id, "^h\d+$")]' '/text()').text())
def test_node_list(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals(self.tree.xpath('//ul/li'), sel.node_list())
def test_xpath_concat_function(self): html = '<a href="index.html"></a>' sel = XpathSelector(fromstring(html)) self.assertEqual('/index.html', sel.select('concat("/",//a/@href)')\ .text())
def test_rex_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, lambda: sel.rex('zz')) self.assertEquals('DEFAULT', sel.rex('zz', default='DEFAULT'))
def test_context_manager_select_iter(self): html = '<b>one</b><b>two</b>' with XpathSelector(fromstring(html)).select('b') as qs: vals = [x.text() for x in qs] self.assertEqual(set(['one', 'two']), set(vals))
def test_selector_number_does_not_exist(self): sel = XpathSelector(self.tree).select('//ul/li[1]').one() self.assertEquals('DEFAULT', sel.number(default='DEFAULT')) self.assertRaises(DataNotFound, lambda: sel.number())
def test_text(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertEquals('one', sel.rex('(\w+)').text())
def test_text_default_value(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertEquals('DEFAULT', sel.rex('(zz)').text(default='DEFAULT'))
def test_number(self): sel = XpathSelector(self.tree).select('//ul/li[4]') self.assertEquals(4, sel.number())
def test_number_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, sel.number) self.assertEquals('DEFAULT', sel.number(default='DEFAULT'))
def test_inner_html(self): sel = XpathSelector(self.tree).select('//ul[@id="second-list"]') self.assertEquals( u'<li class="li-1">yet one</li>\n <li class="li-2">yet two</li>', sel.inner_html().strip())
def test_inner_html_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, sel.inner_html) self.assertEquals('DEFAULT', sel.inner_html(default='DEFAULT'))
def test_html(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals(u'<li>one</li>', sel.html().strip())
def test_text(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals('one', sel.text())
def test_node_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, lambda: sel.node()) self.assertEqual('DEFAULT', sel.node(default='DEFAULT'))
def test_require(self): XpathSelector(self.tree).select('//ul').require() self.assertRaises(RequiredDataNotFound, XpathSelector(self.tree).select('//foo').require)
def test_exists(self): sel = XpathSelector(self.tree).select('//ul/li[4]') self.assertEquals(True, sel.exists()) sel = XpathSelector(self.tree).select('//ul/li[5]') self.assertEquals(False, sel.exists())
def test_one(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals('SRE_Match', sel.rex('one').one().__class__.__name__)
def test_in_general(self): XpathSelector(self.tree)
def test_text_no_default(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertRaises(DataNotFound, lambda: sel.rex('(zz)').text())
def test_html(self): sel = XpathSelector(self.tree.xpath('//h1')[0]) self.assertEquals('<h1>test</h1>', sel.html().strip())
def test_number(self): sel = XpathSelector(self.tree).select('//ul/li[4]/text()') self.assertEquals(4, sel.rex('(\d+)').number())
def test_context_manager_select_text(self): html = '<b>one</b><b>two</b>' with XpathSelector(fromstring(html)).select('b') as elem: self.assertEqual('one', elem.text())
def test_select_node(self): sel = XpathSelector(self.tree) self.assertEquals('test', sel.select('//h1')[0]._node.text)
def test_attr(self): sel = XpathSelector(self.tree).select('//ul[2]/li') self.assertEquals('li-1', sel.attr('class'))
def test_selector_number(self): sel = XpathSelector(self.tree) self.assertEquals(4, sel.select('//ul/li[last()]').one().number()) self.assertEquals(6, sel.select('//ul/li[last()]/@id').one().number())
def test_attr_default(self): sel = XpathSelector(self.tree).select('//ul[2]/li[10]') self.assertRaises(DataNotFound, lambda: sel.attr('class')) self.assertEquals('DEFAULT', sel.attr('class', default='DEFAULT'))
def test_text_selector(self): sel = XpathSelector(self.tree).select('//li/text()').one() self.assertTrue(sel.is_text_node()) self.assertEquals( 'one', XpathSelector(self.tree).select('//li/text()').text())
def test_rex(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertTrue(isinstance(sel.rex('(\w+)'), RexResultList))