Ejemplo n.º 1
0
def _get_cib_version(
    cib: _ElementTree, attribute: str, regexp: Pattern
) -> Version:
    version = cib.getroot().get(attribute)
    if version is None:
        raise LibraryError(
            ReportItem.error(
                reports.messages.CibLoadErrorBadFormat(
                    f"the attribute '{attribute}' of the element 'cib' "
                    "is missing"
                )
            )
        )
    match = regexp.match(version)
    if not match:
        raise LibraryError(
            ReportItem.error(
                reports.messages.CibLoadErrorBadFormat(
                    f"the attribute '{attribute}' of the element 'cib' has "
                    f"an invalid value: '{version}'"
                )
            )
        )
    return Version(
        int(match.group("major")),
        int(match.group("minor")),
        int(match.group("rev")) if match.group("rev") else None,
    )
Ejemplo n.º 2
0
def validate(tree: etree._ElementTree, raise_: bool = False) -> bool:
    """Validate an UBL document against its associated schema.

    Args:
        tree: The tree document to validate
        raise_: True to raise an exception if the validation fails

    Returns:
        True if the document is validated (and raise_is False), False

    Raises:
        Validation failure - see https://lxml.de/validation.html#xmlschema
    """
    # We find the appropriate XSD file
    ubl_version = ubl_version_finder_xp(tree)
    if len(ubl_version) > 0:
        ubl_version = ubl_version[0].text.strip()
    else:
        ubl_version = "2.0"
    ubl_version_tuple = tuple([int(x) for x in ubl_version.split(".")])
    if ubl_version_tuple > (2, 1):
        warnings.warn(
            f"We cannot validate UBL {ubl_version} documents. Trying anyway")

    root = tree.getroot()
    schema = get_schema(root.tag)
    if schema is None:
        if raise_:
            raise KeyError(f"No schema available for root tree {root.tag}")
        return False
    if raise_:
        schema.assertValid(tree)
        return True
    return schema.validate(tree)
Ejemplo n.º 3
0
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]:
    """Extract an interface element from an ElementTree if present."""
    try:
        interface_schema().assertValid(element_tree)
    except etree.DocumentInvalid as e:
        raise errors.InvalidNoDLDocumentError(e)
    return _parse_interface(element_tree.getroot())
Ejemplo n.º 4
0
 def find_urls(self, tree: etree._ElementTree) -> Iterator[str]:
     """Yield URLs found in the document C{tree}."""
     for node in tree.getroot().iter():
         for attr in self.link_attrs_for_node(node.tag):
             try:
                 yield cast(str, node.attrib[attr])
             except KeyError:
                 pass
Ejemplo n.º 5
0
def _parse_element_tree(element_tree: etree._ElementTree) -> List[Node]:
    """Extract an interface element from an ElementTree if present.

    :param element_tree: parsed xml tree to operate on
    :type element_tree: etree._ElementTree
    :raises InvalidNoDLDocumentError: if tree does not adhere to schema
    :return: List of NoDL nodes present in the xml tree.
    :rtype: List[Node]
    """
    try:
        interface_schema().assertValid(element_tree)
    except etree.DocumentInvalid as e:
        raise InvalidNoDLDocumentError(e)
    return _parse_interface(element_tree.getroot())
Ejemplo n.º 6
0
def list_tag_attribute_usage(tree: etree._ElementTree):
    def generate_tag_attr_paths(element, parent_path):
        children = list(element.iterchildren(tag=etree.Element))
        value = (element.text if (len(children) == 0 and element.text
                                  and element.text.strip()) else None)
        attrs = {
            k: v
            for k, v in element.attrib.items()
            # Ignore namespaced attributes
            if not k.startswith('{')
        }
        yield parent_path, element.tag, attrs, value
        path = parent_path + (element.tag, )

        for child_el in children:
            yield from generate_tag_attr_paths(child_el, path)

    yield from generate_tag_attr_paths(tree.getroot(), ())
Ejemplo n.º 7
0
def generate_criteria(
    crit_def: _ElementTree
) -> Tuple[CriteriaFunction, CriteriaFunction, CriteriaFunction]:
    """
    Generates a tuple containing functions evaluating criteria.
    :param crit_def: The criteria root element.
    :return: A tuple representing functions for evaluating criteria. The first is for preconditions, the second for
    success criteria and the third for fail criteria.
    """
    from dbtypes.criteria import UnknownEvaluable
    from util.xml import xpath
    root: _Element = crit_def.getroot()
    preconditions_nodes = xpath(root, "db:precondition")
    precondition = generate_criterion(
        preconditions_nodes[0]) if preconditions_nodes else (
            lambda _: UnknownEvaluable())
    success_nodes = xpath(root, "db:success")
    success = generate_criterion(
        success_nodes[0]) if success_nodes else (lambda _: UnknownEvaluable())
    failure_nodes = xpath(root, "db:failure")
    failure = generate_criterion(
        failure_nodes[0]) if failure_nodes else (lambda _: UnknownEvaluable())
    return precondition, success, failure
Ejemplo n.º 8
0
def repair_tree(tree: etree._ElementTree, content_type: str,
                report: Report) -> bool:
    """Check the document tree for general errors that would prevent
    other checkers from doing their work and repair those if possible.

    @return: True iff the tree was modified.
    """

    modified = False

    # Make sure XHTML root element has a namespace.
    if content_type == 'application/xhtml+xml':
        root = tree.getroot()
        if root.tag != '{http://www.w3.org/1999/xhtml}html':
            msg = 'The root element does not use the XHTML namespace.'
            html = concat(
                msg, xml.br, 'expected: ',
                xml.code['<html xmlns="http://www.w3.org/1999/xhtml"'])
            report.error(msg, extra={'html': html})
            # lxml will auto-fix this for us when serializing, so there is
            # no need to actually modify the tree.
            modified = True

    return modified
Ejemplo n.º 9
0
    def find_referrers_in_html(self, tree: etree._ElementTree,
                               url: str) -> Iterator[Referrer]:
        """Yield referrers for links and forms found in HTML tags in
        the document C{tree}.
        """

        root = tree.getroot()
        if None in root.nsmap:
            default_ns = root.nsmap[None]
            if isinstance(default_ns, bytes):
                default_ns = default_ns.decode('ascii')
            ns_prefix = '{%s}' % default_ns
        else:
            ns_prefix = ''

        for form_node in root.iter(ns_prefix + 'form'):
            # TODO: How to handle an empty action?
            #       1. take current path, erase query (current impl)
            #       2. take current path, merge query
            #       3. flag as error (not clearly specced)
            #       I think either flag as error, or mimic the browsers.
            try:
                action = cast(str, form_node.attrib['action']) \
                      or urlsplit(url).path
                method = cast(str, form_node.attrib['method']).lower()
            except KeyError:
                continue
            if method == 'post':
                # TODO: Support POST (with flag to enable/disable).
                continue
            if method != 'get':
                # The DTD will already have flagged this as a violation.
                continue
            submit_url = urljoin(url, action)
            if not submit_url.startswith(self.base_url):
                continue

            # Note: Disabled controls should not be submitted, so we pretend
            #       they do not even exist.
            controls = []
            radio_buttons: DefaultDict[str, List[RadioButton]] \
                         = defaultdict(list)
            submit_buttons = []
            for inp in form_node.iter(ns_prefix + 'input'):
                control = _parse_input_control(inp.attrib)
                if control is None:
                    pass
                elif isinstance(control, RadioButton):
                    radio_buttons[control.name].append(control)
                elif isinstance(control, SubmitButton):
                    submit_buttons.append(control)
                else:
                    controls.append(control)
            for control_node in form_node.iter(ns_prefix + 'select'):
                name = control_node.attrib.get('name')
                multiple = control_node.attrib.get('multiple')
                disabled = 'disabled' in control_node.attrib
                if disabled:
                    continue
                options = [
                    option.attrib.get('value', option.text)
                    for option in control_node.iter(ns_prefix + 'option')
                    if option.text is not None
                ]
                if multiple:
                    for option in options:
                        controls.append(SelectMultiple(name, option))
                else:
                    controls.append(SelectSingle(name, options))
            for control_node in form_node.iter(ns_prefix + 'textarea'):
                name = control_node.attrib.get('name')
                value = control_node.text
                disabled = 'disabled' in control_node.attrib
                if disabled:
                    continue
                _LOG.debug('textarea "%s": %s', name, value)
                controls.append(TextArea(name, value))

            # Merge exclusive controls.
            for buttons in radio_buttons.values():
                controls.append(RadioButtonGroup(buttons))
            if submit_buttons:
                controls.append(SubmitButtons(submit_buttons))
            # If the form contains no submit buttons, assume it can be
            # submitted using JavaScript, so continue.

            yield Form(submit_url, method, controls)
Ejemplo n.º 10
0
def test__parse_nodes(valid_nodl: etree._ElementTree):
    nodes = nodl._parsing._v1._parsing._parse_nodes(valid_nodl.getroot())
    assert len(nodes) == 2
Ejemplo n.º 11
0
def safe_xinclude(tree: etree._ElementTree):
    """Tries to prevent problems with the lxml xinclude function, where unexpanded nodes sometimes still stick
    in the tree after xinclusion."""
    tree.xinclude()
    return etree.fromstring(etree.tostring(tree.getroot()))
Ejemplo n.º 12
0
def is_dbe(root: _ElementTree) -> bool:
    from util.xml import get_tag_name
    return get_tag_name(root.getroot()) == "environment"
Ejemplo n.º 13
0
def is_dbc(root: _ElementTree) -> bool:
    from util.xml import get_tag_name
    return get_tag_name(root.getroot()) == "criteria"
Ejemplo n.º 14
0
 def __init__(self, document: etree._ElementTree):
     self.document = document
     self.root = document.getroot()
     self.cleanup()