Exemple #1
0
def _xml_semantic_compare(left_xml_str, right_xml_str):
    """
    Compare two XML strings semantically to ensure the content is the same.
    """
    # Reasons for using lxml.etree instead of Python's xml.etree.ElementTree
    # (ET):
    # - The XML strings do not contain an XML directive specifying the
    #   encoding, and the encoding parameter of ET.XMLParser() does not seem
    #   to work.
    # - ET.fromstring() and ET.XML() do not handle byte strings in Python 2.x
    #   (see bugs.python.org/issue11033), so a manual conversion to unicode
    #   would be needed.
    # Note: lxml.etree.fromstring() has issues with unicode strings as input,
    # so we pass UTF-8 encoded byte strings. See lxml bug
    # https://bugs.launchpad.net/lxml/+bug/1902364.
    left_xml = etree.fromstring(_ensure_bytes(left_xml_str))
    right_xml = etree.fromstring(_ensure_bytes(right_xml_str))
    return doctest_xml_compare.xml_compare(left_xml, right_xml)
Exemple #2
0
def validate_cim_xml(cim_xml_str, root_elem_name=None):
    """
    Validate a CIM-XML string against the CIM-XML DTD, optionally
    requiring a particular XML root element.

    If the validation succeeds, the function returns. Otherwise,
    `CIMXMLValidationError` is raised and its exception message is the
    (possible multi-line) output of the `xmllint` command.

    Parameters:

      cim_xml_str (string): CIM-XML string to be validated.

      root_elem_name (string): Name of XML element that is expected as the root
        element in the CIM-XML string to be validated.
        `None` means no checking for a particular root element is performed.

    Raises:

      CIMXMLValidationError: CIM-XML validation error
    """

    # The DOCTYPE instruction needs the DTD file with forward slashes.
    # Also, the xmllint used on Windows requires forward slashes and complains
    # with "Could not parse DTD tests\dtd\DSP0203_2.3.1.dtd" if invoked
    # with backslashes.
    dtd_file_fw = DTD_FILE.replace('\\', '/')

    # Make sure the validator checks the specified root element, if any
    if root_elem_name is not None:
        cim_xml_str = u'<!DOCTYPE {0} SYSTEM "{1}">\n{2}'. \
            format(root_elem_name, dtd_file_fw, cim_xml_str)
        xmllint_cmd = 'xmllint --valid --noout -'
    else:
        xmllint_cmd = 'xmllint --dtdvalid {0} --noout -'.format(dtd_file_fw)

    p = Popen(xmllint_cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE, shell=True)

    cim_xml_str = _ensure_bytes(cim_xml_str)
    p.stdin.write(cim_xml_str)
    p.stdin.close()

    status = p.wait()
    if status != 0:
        out_lines = p.stdout.readlines()
        p.stdout.close()
        output = _ensure_unicode(b'\n'.join(out_lines))
        raise CIMXMLValidationError(output)

    p.stdout.close()
Exemple #3
0
def validate_xml(data, dtd_directory=None, root_elem=None):
    """
    Validate the provided XML instance data against a CIM-XML DTD, optionally
    requiring a particular XML root element.

    Arguments:

      * `data`: XML instance data to be validated.
      * `dtd_directory`: Directory with the DTD file (see `DTD_FILE` for name).
      * `root_elem`: Name of XML element that is expected as the root element
        in the XML instance data to be validated. None means no checking for
        a particular root element is performed.
    """

    dtd_file = DTD_FILE
    if dtd_directory is not None:
        dtd_file = os.path.join(dtd_directory, DTD_FILE).replace('\\', '/')

    # Make sure the XML data requires the specified root element, if any
    if root_elem is not None:
        data = '<!DOCTYPE %s SYSTEM "%s">\n' % (root_elem, dtd_file) + data
        xmllint_cmd = 'xmllint --valid --noout -'
    else:
        xmllint_cmd = 'xmllint --dtdvalid %s --noout -' % dtd_file

    p = Popen(xmllint_cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE, shell=True)

    data = _ensure_bytes(data)
    p.stdin.write(data)
    p.stdin.close()

    first_time = True
    for x in p.stdout.readlines():
        if first_time:
            first_time = False
            print("\nOutput from xmllint:")
        sys.stdout.write(x)

    status = p.wait()
    p.stdout.close()

    if status != 0:
        return False

    return True
Exemple #4
0
def assertXMLEqual(s_act, s_exp, entity):
    """
    Assert that the two XML fragments are equal, tolerating the following
    variations:

      * whitespace outside of element content and attribute values.
      * order of attributes.
      * order of certain child elements (see `sort_elements` in this
        function).

    Parameters:

      * s_act and s_exp are string representations of an XML fragment. The
        strings may be Unicode strings or UTF-8 encoded byte strings.
        The strings may contain an encoding declaration even when
        they are Unicode strings.

        Note: An encoding declaration is the `encoding` attribute in the
        XML declaration (aka XML processing statement), e.g.:
            <?xml version="1.0" encoding="utf-8" ?>

      * entity (string): A human readable identification for what is compared.
    """

    # Make sure that None values are already excluded by the caller
    assert isinstance(s_act, (six.text_type, six.binary_type))
    assert isinstance(s_exp, (six.text_type, six.binary_type))

    # Ensure Unicode strings and remove encoding from XML declaration
    encoding_pattern = re.compile(
        r'^<\?xml +(([a-zA-Z0-9_]+=".*")?) +' +
        r'encoding="utf-8" +(([a-zA-Z0-9_]+=".*")?) *\?>')
    encoding_repl = r'<?xml \1 \3 ?>'
    s_act = re.sub(encoding_pattern, encoding_repl, _ensure_unicode(s_act))
    s_exp = re.sub(encoding_pattern, encoding_repl, _ensure_unicode(s_exp))

    parser = etree.XMLParser(remove_blank_text=True)
    try:
        # Note: lxml.etree.XML() has issues with unicode strings as input,
        # so we pass UTF-8 encoded byte strings. See lxml bug
        # https://bugs.launchpad.net/lxml/+bug/1902364 for a similar issue
        # with lxml.etree.fromstring().
        x_act = etree.XML(_ensure_bytes(s_act), parser=parser)
        x_exp = etree.XML(_ensure_bytes(s_exp), parser=parser)
    except etree.XMLSyntaxError as exc:
        raise AssertionError("XML cannot be validated for %s: %s" %
                             (entity, exc))

    def sort_embedded(root, sort_elements):
        """
        Helper function for `sort_children()`, in support of embedded
        objects. This function invokes sort_children() on each embedded
        object in `root`, after unembedding the embedded object.

        Parameters:
          root (etree.Element):
            XML tree of the CIM-XML representation of the CIM element that
            contains an embedded CIM object (e.g. the CIM element may be
            an INSTANCE XML element and one of its PROPERTY child elements
            has a value that is an embedded CIM instance).
        """
        emb_elems = root.xpath("//*[@EmbeddedObject or @EMBEDDEDOBJECT]"
                               "/*[local-name() = 'VALUE' or "
                               "local-name() = 'VALUE.ARRAY']")
        for emb_elem in emb_elems:
            elem = xml_unembed(emb_elem.text)
            sort_children(elem, sort_elements)
            emb_elem.text = xml_embed(elem)

    def sort_children(root, sort_elements):
        """
        Sort certain elements in the `root` parameter to facilitate
        comparison of two XML documents.

        In addition, make sure this is also applied to any embedded
        objects (in their unembedded state).
        """
        sort_embedded(root, sort_elements)
        for tag, attr in sort_elements:
            # elems is a list of elements with this tag name
            elems = root.xpath("//*[local-name() = $tag]", tag=tag)
            if elems:
                parent = elems[0].getparent()
                first = None
                after = None
                for i, p in enumerate(parent):
                    # TODO 6/18 AM: Loop above should probably be on elems
                    if p.tag == tag and first is None:
                        first = i
                    if p.tag != tag and first is not None:
                        after = i
                # The following changes the original XML tree:
                # The following pylint warning can safely be disabled, see
                # https://stackoverflow.com/a/25314665
                # pylint: disable=cell-var-from-loop
                parent[first:after] = sorted(elems,
                                             key=lambda e: e.attrib[attr])

    sort_elements = [
        # Sort sibling elements with <first> tag by its <second> attribute
        ("IPARAMVALUE", "NAME"),
        ("PROPERTY", "NAME"),
        ("PROPERTY.ARRAY", "NAME"),
        ("PARAMETER", "NAME"),
        ("KEYBINDING", "NAME"),
    ]
    sort_children(x_act, sort_elements)
    sort_children(x_exp, sort_elements)

    ns_act = _ensure_unicode(etree.tostring(x_act))
    ns_exp = _ensure_unicode(etree.tostring(x_exp))

    checker = doctestcompare.LXMLOutputChecker()

    # This tolerates differences in whitespace and attribute order
    if not checker.check_output(ns_act, ns_exp, 0):
        diff = checker.output_difference(doctest.Example("", ns_exp), ns_act,
                                         0)
        raise AssertionError("XML is not as expected in %s: %s" %
                             (entity, diff))