Ejemplo n.º 1
0
    def test_hyphenate_html(self):
        root = parse_html5('''
<p>beautiful, <span lang="sv"><!-- x -->tillata\n<span lang="en">Expand</span></span> "latitude!''',
                           line_numbers=False)
        add_soft_hyphens_to_html(root, hyphen_char='=')
        raw = etree.tostring(root, method='text', encoding='unicode')
        self.ae(raw, 'beau=ti=ful, tilla=ta\nEx=pand "lat=i=tude!')
Ejemplo n.º 2
0
def get_html_ids(raw_data):
    ans = set()
    root = parse_html5(raw_data, discard_namespaces=True, line_numbers=False, fix_newlines=False)
    for body in root.xpath('//body'):
        ans.update(set(body.xpath('descendant-or-self::*/@id')))
        ans.update(set(body.xpath('descendant::a/@name')))
    return ans
Ejemplo n.º 3
0
def get_html_ids(raw_data):
    ans = set()
    root = parse_html5(raw_data, discard_namespaces=True, line_numbers=False, fix_newlines=False)
    for body in root.xpath('//body'):
        ans.update(set(body.xpath('descendant-or-self::*/@id')))
        ans.update(set(body.xpath('descendant::a/@name')))
    return ans
Ejemplo n.º 4
0
 def test_html5lib(self):
     import html5lib.html5parser  # noqa
     from html5lib import parse  # noqa
     # Test that we are using the calibre version of html5lib
     from calibre.ebooks.oeb.polish.parsing import parse_html5
     parse_html5('<p>xxx')
Ejemplo n.º 5
0
 def test_html5lib(self):
     import html5lib.html5parser  # noqa
     from html5lib import parse  # noqa
     # Test that we are using the calibre version of html5lib
     from calibre.ebooks.oeb.polish.parsing import parse_html5
     parse_html5('<p>xxx')