def select_ids(selector, html_only): xml_ids = [element.etree_element.get('id', 'nil') for element in ElementWrapper.from_xml_root(root).query_all(selector)] html_ids = [element.etree_element.get('id', 'nil') for element in ElementWrapper.from_html_root(root).query_all(selector)] if html_only: assert xml_ids == [] else: assert xml_ids == html_ids return html_ids
def test_select_shakespeare(): document = etree.fromstring(HTML_SHAKESPEARE) body = document.find('.//{http://www.w3.org/1999/xhtml}body') body = ElementWrapper.from_xml_root(body) def count(selector): return sum(1 for _ in body.query_all(selector)) # Data borrowed from http://mootools.net/slickspeed/ # # Changed from original; probably because I'm only # # searching the body. # assert count('*') == 252 assert count('*') == 246 # assert count('div:contains(CELIA)') == 26 assert count('div:only-child') == 22 # ? assert count('div:nth-child(even)') == 106 assert count('div:nth-child(2n)') == 106 assert count('div:nth-child(odd)') == 137 assert count('div:nth-child(2n+1)') == 137 assert count('div:nth-child(n)') == 243 assert count('div:last-child') == 53 assert count('div:first-child') == 51 assert count('div > div') == 242 assert count('div + div') == 190 assert count('div ~ div') == 190 assert count('body') == 1 assert count('body div') == 243 assert count('div') == 243 assert count('div div') == 242 assert count('div div div') == 241 assert count('div, div, div') == 243 assert count('div, a, span') == 243 assert count('.dialog') == 51 assert count('div.dialog') == 51 assert count('div .dialog') == 51 assert count('div.character, div.dialog') == 99 assert count('div.direction.dialog') == 0 assert count('div.dialog.direction') == 0 assert count('div.dialog.scene') == 1 assert count('div.scene.scene') == 1 assert count('div.scene .scene') == 0 assert count('div.direction .dialog ') == 0 assert count('div .dialog .direction') == 4 assert count('div.dialog .dialog .direction') == 4 assert count('#speech5') == 1 assert count('div#speech5') == 1 assert count('div #speech5') == 1 assert count('div.scene div.dialog') == 49 assert count('div#scene1 div.dialog div') == 142 assert count('#scene1 #speech1') == 1 assert count('div[class]') == 103 assert count('div[class=dialog]') == 50 assert count('div[class^=dia]') == 51 assert count('div[class$=log]') == 50 assert count('div[class*=sce]') == 1 assert count('div[class|=dialog]') == 50 # ? Seems right # assert count('div[class!=madeup]') == 243 # ? Seems right assert count('div[class~=dialog]') == 51 # ? Seems right
def test_valid_selectors(test): if test.get('xfail'): pytest.xfail() exclude = test.get('exclude', ()) if 'document' in exclude or 'xhtml' in exclude: return root = ElementWrapper.from_xml_root(TEST_DOCUMENT) result = [e.id for e in root.query_all(test['selector'])] if result != test['expect']: print(test['selector']) print(result) print('!=') print(test['expect']) raise AssertionError(test['name'])
def test_lang(): doc = etree.fromstring(''' <html xmlns="http://www.w3.org/1999/xhtml"></html> ''') assert not ElementWrapper.from_xml_root(doc).matches(':lang(fr)') doc = etree.fromstring(''' <html xmlns="http://www.w3.org/1999/xhtml"> <meta http-equiv="Content-Language" content=" fr \t"/> </html> ''') root = ElementWrapper.from_xml_root(doc, content_language='en') assert root.matches(':lang(fr)') doc = etree.fromstring(''' <html> <meta http-equiv="Content-Language" content=" fr \t"/> </html> ''') root = ElementWrapper.from_xml_root(doc, content_language='en') assert root.matches(':lang(en)') doc = etree.fromstring('<html></html>') root = ElementWrapper.from_xml_root(doc, content_language='en') assert root.matches(':lang(en)') root = ElementWrapper.from_xml_root(doc, content_language='en, es') assert not root.matches(':lang(en)') root = ElementWrapper.from_xml_root(doc) assert not root.matches(':lang(en)') doc = etree.fromstring('<html lang="eN"></html>') root = ElementWrapper.from_html_root(doc) assert root.matches(':lang(en)') doc = etree.fromstring('<html lang="eN"></html>') root = ElementWrapper.from_xml_root(doc) assert not root.matches(':lang(en)')
def __init__(self, bytestring_svg, url): tree = ElementTree.fromstring(bytestring_svg) wrapper = ElementWrapper.from_xml_root(tree) style = parse_stylesheets(wrapper, url) self.tree = Node(wrapper, style) self.url = url self.filters = {} self.gradients = {} self.images = {} self.markers = {} self.masks = {} self.patterns = {} self.paths = {} self.cursor_position = [0, 0] self.cursor_d_position = [0, 0] self.text_path_width = 0 self.parse_defs(self.tree)
def get_main(root: cssselect2.ElementWrapper) -> cssselect2.ElementWrapper: """ Get the ARIA main content node from the given root. """ selector = '[role="main"]' # todo: what if there are multiple nodes? return root.query(selector)
""" import xml.etree.ElementTree as etree from pathlib import Path import pytest from cssselect2 import ElementWrapper, SelectorError, compile_selector_list from .w3_selectors import invalid_selectors, valid_selectors CURRENT_FOLDER = Path(__file__).parent IDS_ROOT = etree.parse(CURRENT_FOLDER / 'ids.html') ALL_IDS = [ element.etree_element.get('id', 'nil') for element in ElementWrapper.from_html_root(IDS_ROOT).query_all('*') ] SHAKESPEARE_BODY = (ElementWrapper.from_xml_root( etree.parse( CURRENT_FOLDER / 'shakespeare.html').find('.//{http://www.w3.org/1999/xhtml}body'))) def get_test_document(): document = etree.parse(CURRENT_FOLDER / 'content.xhtml') parent = document.find(".//*[@id='root']") # Setup namespace tests for id in ('any-namespace', 'no-namespace'): div = etree.SubElement(parent, '{http://www.w3.org/1999/xhtml}div') div.set('id', id)
if len(val) == 1: return val[0] elif len(val) > 1: return val else: if required: raise RequirementMissing(item) return None items = [] src = sys.stdin.read() t = html5lib.parse(src, namespaceHTMLElements=False) doc = ElementWrapper.from_html_root(t) for scraper in scrapers: item_selector = scraper.get("item") if args.verbose: print ("Running scraper {0}".format(item_selector), file=sys.stderr) for item_elt in doc.query_all(item_selector): if args.verbose: print ("ITEM: {0}".format(tag_open(item_elt.etree_element)), file=sys.stderr) item_elt.etree_element.set("itemscope", "itemscope") # item_elt = match.etree_element try: item = {} for key, selector in scraper['keys'].items(): if args.verbose: print ("key, selector: {0}, {1}".format(key, selector), file=sys.stderr) #continue item[key] = item_selection(item_elt, selector, key, verbose=args.verbose)