Ejemplo n.º 1
0
 def select_ids(selector, html_only):
     xml_ids = [element.etree_element.get('id', 'nil') for element in
                ElementWrapper.from_xml_root(root).query_all(selector)]
     html_ids = [element.etree_element.get('id', 'nil') for element in
                 ElementWrapper.from_html_root(root).query_all(selector)]
     if html_only:
         assert xml_ids == []
     else:
         assert xml_ids == html_ids
     return html_ids
Ejemplo n.º 2
0
def test_select_shakespeare():
    document = etree.fromstring(HTML_SHAKESPEARE)
    body = document.find('.//{http://www.w3.org/1999/xhtml}body')
    body = ElementWrapper.from_xml_root(body)

    def count(selector):
        return sum(1 for _ in body.query_all(selector))

    # Data borrowed from http://mootools.net/slickspeed/

    # # Changed from original; probably because I'm only
    # # searching the body.
    # assert count('*') == 252
    assert count('*') == 246
    # assert count('div:contains(CELIA)') == 26
    assert count('div:only-child') == 22  # ?
    assert count('div:nth-child(even)') == 106
    assert count('div:nth-child(2n)') == 106
    assert count('div:nth-child(odd)') == 137
    assert count('div:nth-child(2n+1)') == 137
    assert count('div:nth-child(n)') == 243
    assert count('div:last-child') == 53
    assert count('div:first-child') == 51
    assert count('div > div') == 242
    assert count('div + div') == 190
    assert count('div ~ div') == 190
    assert count('body') == 1
    assert count('body div') == 243
    assert count('div') == 243
    assert count('div div') == 242
    assert count('div div div') == 241
    assert count('div, div, div') == 243
    assert count('div, a, span') == 243
    assert count('.dialog') == 51
    assert count('div.dialog') == 51
    assert count('div .dialog') == 51
    assert count('div.character, div.dialog') == 99
    assert count('div.direction.dialog') == 0
    assert count('div.dialog.direction') == 0
    assert count('div.dialog.scene') == 1
    assert count('div.scene.scene') == 1
    assert count('div.scene .scene') == 0
    assert count('div.direction .dialog ') == 0
    assert count('div .dialog .direction') == 4
    assert count('div.dialog .dialog .direction') == 4
    assert count('#speech5') == 1
    assert count('div#speech5') == 1
    assert count('div #speech5') == 1
    assert count('div.scene div.dialog') == 49
    assert count('div#scene1 div.dialog div') == 142
    assert count('#scene1 #speech1') == 1
    assert count('div[class]') == 103
    assert count('div[class=dialog]') == 50
    assert count('div[class^=dia]') == 51
    assert count('div[class$=log]') == 50
    assert count('div[class*=sce]') == 1
    assert count('div[class|=dialog]') == 50  # ? Seems right
    # assert count('div[class!=madeup]') == 243  # ? Seems right
    assert count('div[class~=dialog]') == 51  # ? Seems right
Ejemplo n.º 3
0
def test_valid_selectors(test):
    if test.get('xfail'):
        pytest.xfail()
    exclude = test.get('exclude', ())
    if 'document' in exclude or 'xhtml' in exclude:
        return
    root = ElementWrapper.from_xml_root(TEST_DOCUMENT)
    result = [e.id for e in root.query_all(test['selector'])]
    if result != test['expect']:
        print(test['selector'])
        print(result)
        print('!=')
        print(test['expect'])
        raise AssertionError(test['name'])
Ejemplo n.º 4
0
def test_lang():
    doc = etree.fromstring('''
        <html xmlns="http://www.w3.org/1999/xhtml"></html>
    ''')
    assert not ElementWrapper.from_xml_root(doc).matches(':lang(fr)')

    doc = etree.fromstring('''
        <html xmlns="http://www.w3.org/1999/xhtml">
            <meta http-equiv="Content-Language" content=" fr \t"/>
        </html>
    ''')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(fr)')

    doc = etree.fromstring('''
        <html>
            <meta http-equiv="Content-Language" content=" fr \t"/>
        </html>
    ''')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(en)')

    doc = etree.fromstring('<html></html>')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(en)')

    root = ElementWrapper.from_xml_root(doc, content_language='en, es')
    assert not root.matches(':lang(en)')

    root = ElementWrapper.from_xml_root(doc)
    assert not root.matches(':lang(en)')

    doc = etree.fromstring('<html lang="eN"></html>')
    root = ElementWrapper.from_html_root(doc)
    assert root.matches(':lang(en)')

    doc = etree.fromstring('<html lang="eN"></html>')
    root = ElementWrapper.from_xml_root(doc)
    assert not root.matches(':lang(en)')
Ejemplo n.º 5
0
    def __init__(self, bytestring_svg, url):
        tree = ElementTree.fromstring(bytestring_svg)
        wrapper = ElementWrapper.from_xml_root(tree)
        style = parse_stylesheets(wrapper, url)
        self.tree = Node(wrapper, style)
        self.url = url

        self.filters = {}
        self.gradients = {}
        self.images = {}
        self.markers = {}
        self.masks = {}
        self.patterns = {}
        self.paths = {}

        self.cursor_position = [0, 0]
        self.cursor_d_position = [0, 0]
        self.text_path_width = 0

        self.parse_defs(self.tree)
Ejemplo n.º 6
0
def test_lang():
    doc = etree.fromstring('''
        <html xmlns="http://www.w3.org/1999/xhtml"></html>
    ''')
    assert not ElementWrapper.from_xml_root(doc).matches(':lang(fr)')

    doc = etree.fromstring('''
        <html xmlns="http://www.w3.org/1999/xhtml">
            <meta http-equiv="Content-Language" content=" fr \t"/>
        </html>
    ''')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(fr)')

    doc = etree.fromstring('''
        <html>
            <meta http-equiv="Content-Language" content=" fr \t"/>
        </html>
    ''')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(en)')

    doc = etree.fromstring('<html></html>')
    root = ElementWrapper.from_xml_root(doc, content_language='en')
    assert root.matches(':lang(en)')

    root = ElementWrapper.from_xml_root(doc, content_language='en, es')
    assert not root.matches(':lang(en)')

    root = ElementWrapper.from_xml_root(doc)
    assert not root.matches(':lang(en)')

    doc = etree.fromstring('<html lang="eN"></html>')
    root = ElementWrapper.from_html_root(doc)
    assert root.matches(':lang(en)')

    doc = etree.fromstring('<html lang="eN"></html>')
    root = ElementWrapper.from_xml_root(doc)
    assert not root.matches(':lang(en)')
Ejemplo n.º 7
0
def get_main(root: cssselect2.ElementWrapper) -> cssselect2.ElementWrapper:
    """ Get the ARIA main content node from the given root. """
    selector = '[role="main"]'
    # todo: what if there are multiple nodes?
    return root.query(selector)
Ejemplo n.º 8
0
"""

import xml.etree.ElementTree as etree
from pathlib import Path

import pytest
from cssselect2 import ElementWrapper, SelectorError, compile_selector_list

from .w3_selectors import invalid_selectors, valid_selectors

CURRENT_FOLDER = Path(__file__).parent
IDS_ROOT = etree.parse(CURRENT_FOLDER / 'ids.html')
ALL_IDS = [
    element.etree_element.get('id', 'nil')
    for element in ElementWrapper.from_html_root(IDS_ROOT).query_all('*')
]
SHAKESPEARE_BODY = (ElementWrapper.from_xml_root(
    etree.parse(
        CURRENT_FOLDER /
        'shakespeare.html').find('.//{http://www.w3.org/1999/xhtml}body')))


def get_test_document():
    document = etree.parse(CURRENT_FOLDER / 'content.xhtml')
    parent = document.find(".//*[@id='root']")

    # Setup namespace tests
    for id in ('any-namespace', 'no-namespace'):
        div = etree.SubElement(parent, '{http://www.w3.org/1999/xhtml}div')
        div.set('id', id)
Ejemplo n.º 9
0

    if len(val) == 1:
        return val[0]
    elif len(val) > 1:
        return val
    else:
        if required:
            raise RequirementMissing(item)
        return None

items = []

src = sys.stdin.read()
t = html5lib.parse(src, namespaceHTMLElements=False)
doc = ElementWrapper.from_html_root(t)

for scraper in scrapers:
    item_selector = scraper.get("item")
    if args.verbose: print ("Running scraper {0}".format(item_selector), file=sys.stderr)
    for item_elt in doc.query_all(item_selector):
        if args.verbose: print ("ITEM: {0}".format(tag_open(item_elt.etree_element)), file=sys.stderr)
        item_elt.etree_element.set("itemscope", "itemscope")
        # item_elt = match.etree_element
        try:
            item = {}
            for key, selector in scraper['keys'].items():
                if args.verbose:
                    print ("key, selector: {0}, {1}".format(key, selector), file=sys.stderr)
                #continue
                item[key] = item_selection(item_elt, selector, key, verbose=args.verbose)