def test_regexp(self): html = '<div><h1 id="h1">foo</h1><h2>bar</h2></div>' sel = XpathSelector(fromstring(html)) self.assertEqual('h2', sel.select('//*[re:test(text(), "b.r")]')\ .node().tag) self.assertEqual('foo', sel.select('//*[re:test(@id, "^h\d+$")]' '/text()').text())
def test_regexp(self): html = '<div><h1 id="h1">foo</h1><h2>bar</h2></div>' sel = XpathSelector(fromstring(html)) self.assertEqual('h2', sel.select('//*[re:test(text(), "b.r")]')\ .node().tag) self.assertEqual( 'foo', sel.select('//*[re:test(@id, "^h\d+$")]' '/text()').text())
def test_incorrect_xpath(self): # The lxml xpath function return boolean for following xpath # This breaks selector internal logic that assumes that only # list could be returnsed # So it was fixed and this test was crated sel = XpathSelector(self.tree).select('//ul/li/text()="oops"') self.assertEquals(False, sel.exists()) # Selector list is always empty in this special case # Even if the xpath return True on lxml level self.assertEquals(True, self.tree.xpath('//ul[1]/li[1]/text()="one"')) sel = XpathSelector(self.tree).select('//ul[1]/li[1]/text()="one"') self.assertEquals(False, sel.exists())
def test_text(self): sel = XpathSelector(self.tree).select("//ul/li") self.assertEquals("one", sel.text())
def test_rex_method(self): sel = XpathSelector(self.tree) self.assertTrue(isinstance(sel.select("//li").rex("\w*"), RexResultList))
def test_text_selector_html(self): sel = XpathSelector(self.tree).select("//li/text()").one() self.assertEquals(u"one", sel.html())
def test_node_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, lambda: sel.node()) self.assertEqual('DEFAULT', sel.node(default='DEFAULT'))
def test_attr_does_not_exist(self): root = XpathSelector(self.tree) self.assertRaises(DataNotFound, lambda: root.select("//ul[1]").attr("id-xxx"))
def test_xpath_concat_function(self): html = '<a href="index.html"></a>' sel = XpathSelector(fromstring(html)) self.assertEqual('/index.html', sel.select('concat("/",//a/@href)')\ .text())
def test_rex_default(self): sel = XpathSelector(self.tree).select("//ul/li[10]") self.assertRaises(DataNotFound, lambda: sel.rex("zz")) self.assertEquals("DEFAULT", sel.rex("zz", default="DEFAULT"))
def test_number_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, sel.number) self.assertEquals('DEFAULT', sel.number(default='DEFAULT'))
def test_exists(self): sel = XpathSelector(self.tree).select('//ul/li[4]') self.assertEquals(True, sel.exists()) sel = XpathSelector(self.tree).select('//ul/li[5]') self.assertEquals(False, sel.exists())
def test_inner_html_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, sel.inner_html) self.assertEquals('DEFAULT', sel.inner_html(default='DEFAULT'))
def test_number(self): sel = XpathSelector(self.tree).select('//ul/li[4]') self.assertEquals(4, sel.number())
def test_inner_html(self): sel = XpathSelector(self.tree).select('//ul[@id="second-list"]') self.assertEquals( u'<li class="li-1">yet one</li>\n <li class="li-2">yet two</li>', sel.inner_html().strip())
def test_html(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals(u'<li>one</li>', sel.html().strip())
def test_text(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals('one', sel.text())
def test_number(self): sel = XpathSelector(self.tree).select("//ul/li[4]") self.assertEquals(4, sel.number())
def test_text_no_default(self): sel = XpathSelector(self.tree).select("//ul/li/text()") self.assertRaises(DataNotFound, lambda: sel.rex("(zz)").text())
def test_assert_exists(self): sel = XpathSelector(self.tree).select("//ul/li") sel.assert_exists() sel = XpathSelector(self.tree).select("//ul/li[10]") self.assertRaises(DataNotFound, sel.assert_exists)
def test_html(self): sel = XpathSelector(self.tree.xpath("//h1")[0]) self.assertEquals("<h1>test</h1>", sel.html().strip())
def test_attr_default(self): sel = XpathSelector(self.tree).select("//ul[2]/li[10]") self.assertRaises(DataNotFound, lambda: sel.attr("class")) self.assertEquals("DEFAULT", sel.attr("class", default="DEFAULT"))
def test_context_manager_select_text(self): html = '<b>one</b><b>two</b>' with XpathSelector(fromstring(html)).select('b') as elem: self.assertEqual('one', elem.text())
def test_one(self): sel = XpathSelector(self.tree).select("//ul/li") self.assertEquals("SRE_Match", sel.rex("one").one().__class__.__name__)
def test_context_manager_select_iter(self): html = '<b>one</b><b>two</b>' with XpathSelector(fromstring(html)).select('b') as qs: vals = [x.text() for x in qs] self.assertEqual(set(['one', 'two']), set(vals))
def test_number(self): sel = XpathSelector(self.tree).select("//ul/li[4]/text()") self.assertEquals(4, sel.rex("(\d+)").number())
def test_attr(self): sel = XpathSelector(self.tree).select('//ul[2]/li') self.assertEquals('li-1', sel.attr('class'))
def test_selector_number_does_not_exist(self): sel = XpathSelector(self.tree).select("//ul/li[1]").one() self.assertEquals("DEFAULT", sel.number(default="DEFAULT")) self.assertRaises(DataNotFound, lambda: sel.number())
def test_attr_default(self): sel = XpathSelector(self.tree).select('//ul[2]/li[10]') self.assertRaises(DataNotFound, lambda: sel.attr('class')) self.assertEquals('DEFAULT', sel.attr('class', default='DEFAULT'))
def test_attr_with_default_value(self): root = XpathSelector(self.tree) self.assertEqual("z", root.select("//ul[2]").attr("id-xxx", default="z"))
def test_rex(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertTrue(isinstance(sel.rex('(\w+)'), RexResultList))
def test_attr_list(self): root = XpathSelector(self.tree) self.assertEquals(set(["li-1", "li-2"]), set(root.select('//ul[@id="second-list"]/li').attr_list("class")))
def test_rex_default(self): sel = XpathSelector(self.tree).select('//ul/li[10]') self.assertRaises(DataNotFound, lambda: sel.rex('zz')) self.assertEquals('DEFAULT', sel.rex('zz', default='DEFAULT'))
def test_text_selector_select(self): sel = XpathSelector(self.tree).select("//li/text()").one() self.assertRaises(SelectionRuntimeError, lambda: sel.select("foo"))
def test_node_list(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals(self.tree.xpath('//ul/li'), sel.node_list())
def test_node_default(self): sel = XpathSelector(self.tree).select("//ul/li[10]") self.assertRaises(DataNotFound, lambda: sel.node()) self.assertEqual("DEFAULT", sel.node(default="DEFAULT"))
def test_require(self): XpathSelector(self.tree).select('//ul').require() self.assertRaises(RequiredDataNotFound, XpathSelector(self.tree).select('//foo').require)
def test_html(self): sel = XpathSelector(self.tree).select("//ul/li") self.assertEquals(u"<li>one</li>", sel.html().strip())
def test_one(self): sel = XpathSelector(self.tree).select('//ul/li') self.assertEquals('SRE_Match', sel.rex('one').one().__class__.__name__)
def test_number_default(self): sel = XpathSelector(self.tree).select("//ul/li[10]") self.assertRaises(DataNotFound, sel.number) self.assertEquals("DEFAULT", sel.number(default="DEFAULT"))
def test_text(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertEquals('one', sel.rex('(\w+)').text())
def test_exists(self): sel = XpathSelector(self.tree).select("//ul/li[4]") self.assertEquals(True, sel.exists()) sel = XpathSelector(self.tree).select("//ul/li[5]") self.assertEquals(False, sel.exists())
def test_text_no_default(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertRaises(DataNotFound, lambda: sel.rex('(zz)').text())
def test_attr(self): sel = XpathSelector(self.tree).select("//ul[2]/li") self.assertEquals("li-1", sel.attr("class"))
def test_text_default_value(self): sel = XpathSelector(self.tree).select('//ul/li/text()') self.assertEquals('DEFAULT', sel.rex('(zz)').text(default='DEFAULT'))
def test_rex(self): sel = XpathSelector(self.tree).select("//ul/li") self.assertTrue(isinstance(sel.rex("(\w+)"), RexResultList))
def test_number(self): sel = XpathSelector(self.tree).select('//ul/li[4]/text()') self.assertEquals(4, sel.rex('(\d+)').number())
def test_node_list(self): sel = XpathSelector(self.tree).select("//ul/li") self.assertEquals(self.tree.xpath("//ul/li"), sel.node_list())
def test_in_general(self): XpathSelector(self.tree)
def test_text(self): sel = XpathSelector(self.tree).select("//ul/li/text()") self.assertEquals("one", sel.rex("(\w+)").text())
def test_select_node(self): sel = XpathSelector(self.tree) self.assertEquals('test', sel.select('//h1')[0]._node.text)
def test_text_default_value(self): sel = XpathSelector(self.tree).select("//ul/li/text()") self.assertEquals("DEFAULT", sel.rex("(zz)").text(default="DEFAULT"))
def test_html(self): sel = XpathSelector(self.tree.xpath('//h1')[0]) self.assertEquals('<h1>test</h1>', sel.html().strip())
def test_select_node(self): sel = XpathSelector(self.tree) self.assertEquals("test", sel.select("//h1")[0]._node.text)
def test_selector_number(self): sel = XpathSelector(self.tree) self.assertEquals(4, sel.select('//ul/li[last()]').one().number()) self.assertEquals(6, sel.select('//ul/li[last()]/@id').one().number())
def test_selector_number(self): sel = XpathSelector(self.tree) self.assertEquals(4, sel.select("//ul/li[last()]").one().number()) self.assertEquals(6, sel.select("//ul/li[last()]/@id").one().number())
def test_selector_number_does_not_exist(self): sel = XpathSelector(self.tree).select('//ul/li[1]').one() self.assertEquals('DEFAULT', sel.number(default='DEFAULT')) self.assertRaises(DataNotFound, lambda: sel.number())
def test_text_selector(self): sel = XpathSelector(self.tree).select("//li/text()").one() self.assertTrue(sel.is_text_node()) self.assertEquals("one", XpathSelector(self.tree).select("//li/text()").text())
def test_text_selector(self): sel = XpathSelector(self.tree).select('//li/text()').one() self.assertTrue(sel.is_text_node()) self.assertEquals( 'one', XpathSelector(self.tree).select('//li/text()').text())