def _get_cib_version( cib: _ElementTree, attribute: str, regexp: Pattern ) -> Version: version = cib.getroot().get(attribute) if version is None: raise LibraryError( ReportItem.error( reports.messages.CibLoadErrorBadFormat( f"the attribute '{attribute}' of the element 'cib' " "is missing" ) ) ) match = regexp.match(version) if not match: raise LibraryError( ReportItem.error( reports.messages.CibLoadErrorBadFormat( f"the attribute '{attribute}' of the element 'cib' has " f"an invalid value: '{version}'" ) ) ) return Version( int(match.group("major")), int(match.group("minor")), int(match.group("rev")) if match.group("rev") else None, )
def validate(tree: etree._ElementTree, raise_: bool = False) -> bool: """Validate an UBL document against its associated schema. Args: tree: The tree document to validate raise_: True to raise an exception if the validation fails Returns: True if the document is validated (and raise_is False), False Raises: Validation failure - see https://lxml.de/validation.html#xmlschema """ # We find the appropriate XSD file ubl_version = ubl_version_finder_xp(tree) if len(ubl_version) > 0: ubl_version = ubl_version[0].text.strip() else: ubl_version = "2.0" ubl_version_tuple = tuple([int(x) for x in ubl_version.split(".")]) if ubl_version_tuple > (2, 1): warnings.warn( f"We cannot validate UBL {ubl_version} documents. Trying anyway") root = tree.getroot() schema = get_schema(root.tag) if schema is None: if raise_: raise KeyError(f"No schema available for root tree {root.tag}") return False if raise_: schema.assertValid(tree) return True return schema.validate(tree)
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]: """Extract an interface element from an ElementTree if present.""" try: interface_schema().assertValid(element_tree) except etree.DocumentInvalid as e: raise errors.InvalidNoDLDocumentError(e) return _parse_interface(element_tree.getroot())
def find_urls(self, tree: etree._ElementTree) -> Iterator[str]: """Yield URLs found in the document C{tree}.""" for node in tree.getroot().iter(): for attr in self.link_attrs_for_node(node.tag): try: yield cast(str, node.attrib[attr]) except KeyError: pass
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]: """Extract an interface element from an ElementTree if present. :param element_tree: parsed xml tree to operate on :type element_tree: etree._ElementTree :raises InvalidNoDLDocumentError: if tree does not adhere to schema :return: List of NoDL nodes present in the xml tree. :rtype: List[Node] """ try: interface_schema().assertValid(element_tree) except etree.DocumentInvalid as e: raise InvalidNoDLDocumentError(e) return _parse_interface(element_tree.getroot())
def list_tag_attribute_usage(tree: etree._ElementTree): def generate_tag_attr_paths(element, parent_path): children = list(element.iterchildren(tag=etree.Element)) value = (element.text if (len(children) == 0 and element.text and element.text.strip()) else None) attrs = { k: v for k, v in element.attrib.items() # Ignore namespaced attributes if not k.startswith('{') } yield parent_path, element.tag, attrs, value path = parent_path + (element.tag, ) for child_el in children: yield from generate_tag_attr_paths(child_el, path) yield from generate_tag_attr_paths(tree.getroot(), ())
def generate_criteria( crit_def: _ElementTree ) -> Tuple[CriteriaFunction, CriteriaFunction, CriteriaFunction]: """ Generates a tuple containing functions evaluating criteria. :param crit_def: The criteria root element. :return: A tuple representing functions for evaluating criteria. The first is for preconditions, the second for success criteria and the third for fail criteria. """ from dbtypes.criteria import UnknownEvaluable from util.xml import xpath root: _Element = crit_def.getroot() preconditions_nodes = xpath(root, "db:precondition") precondition = generate_criterion( preconditions_nodes[0]) if preconditions_nodes else ( lambda _: UnknownEvaluable()) success_nodes = xpath(root, "db:success") success = generate_criterion( success_nodes[0]) if success_nodes else (lambda _: UnknownEvaluable()) failure_nodes = xpath(root, "db:failure") failure = generate_criterion( failure_nodes[0]) if failure_nodes else (lambda _: UnknownEvaluable()) return precondition, success, failure
def repair_tree(tree: etree._ElementTree, content_type: str, report: Report) -> bool: """Check the document tree for general errors that would prevent other checkers from doing their work and repair those if possible. @return: True iff the tree was modified. """ modified = False # Make sure XHTML root element has a namespace. if content_type == 'application/xhtml+xml': root = tree.getroot() if root.tag != '{http://www.w3.org/1999/xhtml}html': msg = 'The root element does not use the XHTML namespace.' html = concat( msg, xml.br, 'expected: ', xml.code['<html xmlns="http://www.w3.org/1999/xhtml"']) report.error(msg, extra={'html': html}) # lxml will auto-fix this for us when serializing, so there is # no need to actually modify the tree. modified = True return modified
def find_referrers_in_html(self, tree: etree._ElementTree, url: str) -> Iterator[Referrer]: """Yield referrers for links and forms found in HTML tags in the document C{tree}. """ root = tree.getroot() if None in root.nsmap: default_ns = root.nsmap[None] if isinstance(default_ns, bytes): default_ns = default_ns.decode('ascii') ns_prefix = '{%s}' % default_ns else: ns_prefix = '' for form_node in root.iter(ns_prefix + 'form'): # TODO: How to handle an empty action? # 1. take current path, erase query (current impl) # 2. take current path, merge query # 3. flag as error (not clearly specced) # I think either flag as error, or mimic the browsers. try: action = cast(str, form_node.attrib['action']) \ or urlsplit(url).path method = cast(str, form_node.attrib['method']).lower() except KeyError: continue if method == 'post': # TODO: Support POST (with flag to enable/disable). continue if method != 'get': # The DTD will already have flagged this as a violation. continue submit_url = urljoin(url, action) if not submit_url.startswith(self.base_url): continue # Note: Disabled controls should not be submitted, so we pretend # they do not even exist. controls = [] radio_buttons: DefaultDict[str, List[RadioButton]] \ = defaultdict(list) submit_buttons = [] for inp in form_node.iter(ns_prefix + 'input'): control = _parse_input_control(inp.attrib) if control is None: pass elif isinstance(control, RadioButton): radio_buttons[control.name].append(control) elif isinstance(control, SubmitButton): submit_buttons.append(control) else: controls.append(control) for control_node in form_node.iter(ns_prefix + 'select'): name = control_node.attrib.get('name') multiple = control_node.attrib.get('multiple') disabled = 'disabled' in control_node.attrib if disabled: continue options = [ option.attrib.get('value', option.text) for option in control_node.iter(ns_prefix + 'option') if option.text is not None ] if multiple: for option in options: controls.append(SelectMultiple(name, option)) else: controls.append(SelectSingle(name, options)) for control_node in form_node.iter(ns_prefix + 'textarea'): name = control_node.attrib.get('name') value = control_node.text disabled = 'disabled' in control_node.attrib if disabled: continue _LOG.debug('textarea "%s": %s', name, value) controls.append(TextArea(name, value)) # Merge exclusive controls. for buttons in radio_buttons.values(): controls.append(RadioButtonGroup(buttons)) if submit_buttons: controls.append(SubmitButtons(submit_buttons)) # If the form contains no submit buttons, assume it can be # submitted using JavaScript, so continue. yield Form(submit_url, method, controls)
def test__parse_nodes(valid_nodl: etree._ElementTree): nodes = nodl._parsing._v1._parsing._parse_nodes(valid_nodl.getroot()) assert len(nodes) == 2
def safe_xinclude(tree: etree._ElementTree): """Tries to prevent problems with the lxml xinclude function, where unexpanded nodes sometimes still stick in the tree after xinclusion.""" tree.xinclude() return etree.fromstring(etree.tostring(tree.getroot()))
def is_dbe(root: _ElementTree) -> bool: from util.xml import get_tag_name return get_tag_name(root.getroot()) == "environment"
def is_dbc(root: _ElementTree) -> bool: from util.xml import get_tag_name return get_tag_name(root.getroot()) == "criteria"
def __init__(self, document: etree._ElementTree): self.document = document self.root = document.getroot() self.cleanup()