예제 #1
0
 def test_single_element(self):
     source = BytesIO(b'<img src="#">')
     context = iterparse(source)
     elements = list(context)  # consume the generator
     self.assertTrue(len(elements) == 1)
     el, attr, url, pos = elements.pop()
     self.assertEqual(url, '#')
     self.assertEqual(pos, 0)
     self.assertEqual(attr, 'src')
     self.assertEqual(el.tag, 'img')
     self.assertEqual(el.attrib, {'src': '#'})
예제 #2
0
 def test_source_is_not_bytes(self):
     source = StringIO('<img src="#">')
     context = iterparse(source)
     lst = list(context)
     self.assertTrue(len(lst) == 1)
     el, attr, url, pos = lst.pop()
     self.assertEqual(url, '#')
     self.assertEqual(pos, 0)
     self.assertEqual(attr, 'src')
     self.assertEqual(el.tag, 'img')
     self.assertEqual(el.attrib, {'src': '#'})
예제 #3
0
 def test_meta_charset_insertion(self):
     context = iterparse(BytesIO(b''), include_meta_charset_tag=True)
     self.assertEqual(list(context), [])
     elms = context.root.xpath('//meta')
     self.assertEqual(len(elms), 1)
     meta = elms.pop()
     self.assertEqual(meta.tag, 'meta')
     self.assertEqual(meta.attrib['charset'], 'iso-8859-1')
     self.assertEqual(
         tostring(meta),
         tostring(lxml.html.Element('meta', charset='iso-8859-1')))
     self.assertEqual(tostring(context.root),
                      b'<html><head>%s</head></html>' % tostring(meta))
예제 #4
0
 def test_root_post_iteration(self):
     source = BytesIO(b'<img src="#">')
     context = iterparse(source)
     list(context)  # consume the generator
     self.assertTrue(context.root is not None)
     self.assertTrue(isinstance(context.root, lxml.etree._Element))
예제 #5
0
 def test_root_pre_iteration(self):
     source = BytesIO(b'<img src="#">')
     context = iterparse(source)
     self.assertTrue(hasattr(context, 'root'))
     self.assertTrue(context.root is None)
예제 #6
0
 def test_return_type(self):
     source = BytesIO(b'<img src="#">')
     context = iterparse(source)
     self.assertTrue(isinstance(context, Iterator))
예제 #7
0
class TestFullLatin1EncodedHTMLParsing(unittest.TestCase):

    # Single instance of the parser to avoid creating anew each time.
    context = iterparse(BytesIO(html.encode('latin1', 'replace')),
                        encoding='latin1')

    # NOTE: Methods are ordered in the sequence they will be parsed

    def test_a_first_meta_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'meta')
        self.assertEqual(url, 'http://nx-domain.com/redirect')
        self.assertEqual(pos, 2)
        self.assertEqual(attr, 'content')
        self.assertEqual(el.attrib, {
            'http-equiv': 'refresh',
            'content': "3;http://nx-domain.com/redirect"
        })

    def test_b_first_link_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'link')
        self.assertEqual(url, 'css/main.css')
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'href')
        self.assertEqual(el.attrib, {
            'rel': 'stylesheet',
            'href': "css/main.css"
        })

    def test_c_second_link_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'link')
        self.assertEqual(url, 'http://nx-domain.com/css/style.css')
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'href')
        self.assertEqual(el.attrib, {
            'rel': 'stylesheet',
            'href': 'http://nx-domain.com/css/style.css'
        })

    def test_d_style_tag_css_url_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'style')
        self.assertEqual(url, "img/background.png")
        self.assertEqual(pos, 49)
        self.assertEqual(attr, None)
        self.assertEqual(el.attrib, {})

    def test_e_style_tag_css_import_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'style')
        self.assertEqual(url, "css/theme.css")
        self.assertEqual(pos, 10)
        self.assertEqual(attr, None)
        self.assertEqual(el.attrib, {})

    def test_f_first_anchor_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'a')
        self.assertEqual(url, "#")
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'href')
        self.assertEqual(el.attrib, {'href': '#'})

    def test_g_second_anchor_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'a')
        self.assertEqual(url, "javascript:void(0);")
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'href')
        self.assertEqual(el.attrib, {'href': "javascript:void(0);"})

    def test_h_third_anchor_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'a')
        self.assertEqual(url, "http://new-site.com")
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'href')
        self.assertEqual(el.attrib, {'href': "http://new-site.com"})

    def test_i_inline_style_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'div')
        self.assertEqual(url, "img/background.png")
        self.assertEqual(pos, 17)
        self.assertEqual(attr, 'style')
        self.assertEqual(el.attrib,
                         {'style': "background: url('img/background.png');"})

    def test_j_first_img_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'img')
        self.assertEqual(url, "img/img1.png")
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'src')
        self.assertEqual(el.attrib, {'src': "img/img1.png", 'alt': 'img1-alt'})

    def test_k_second_img_element(self):
        el, attr, url, pos = next(self.context)
        self.assertEqual(el.tag, 'img')
        self.assertEqual(url, "http://static-site.com/img/img3.png")
        self.assertEqual(pos, 0)
        self.assertEqual(attr, 'src')
        self.assertEqual(el.attrib, {
            'src': "http://static-site.com/img/img3.png",
            'alt': ""
        })

    def test_y_empty_iterator(self):
        with self.assertRaises(StopIteration):
            next(self.context)
        self.assertEqual(len(list(self.context)), 0)

    def test_z_root_tree_attribute(self):
        self.assertTrue(hasattr(self.context.root, 'getroottree'))
        self.assertTrue(isinstance(self.context.root, lxml.etree.ElementBase))
        self.assertTrue(
            isinstance(self.context.root.getroottree(),
                       lxml.etree._ElementTree))
예제 #8
0
 def test_source_is_empty(self):
     source = BytesIO(b'')
     context = iterparse(source)
     self.assertEqual(list(context), [])
     self.assertEqual(tostring(context.root), b'<html></html>')
예제 #9
0
 def test_source_without_read_method(self):
     with self.assertRaises(TypeError):
         iterparse(object())
     with self.assertRaises(TypeError):
         iterparse(None)
예제 #10
0
 def test_source_is_empty_string(self):
     with self.assertRaises(TypeError):
         iterparse('')