def _xml_semantic_compare(left_xml_str, right_xml_str): """ Compare two XML strings semantically to ensure the content is the same. """ # Reasons for using lxml.etree instead of Python's xml.etree.ElementTree # (ET): # - The XML strings do not contain an XML directive specifying the # encoding, and the encoding parameter of ET.XMLParser() does not seem # to work. # - ET.fromstring() and ET.XML() do not handle byte strings in Python 2.x # (see bugs.python.org/issue11033), so a manual conversion to unicode # would be needed. # Note: lxml.etree.fromstring() has issues with unicode strings as input, # so we pass UTF-8 encoded byte strings. See lxml bug # https://bugs.launchpad.net/lxml/+bug/1902364. left_xml = etree.fromstring(_ensure_bytes(left_xml_str)) right_xml = etree.fromstring(_ensure_bytes(right_xml_str)) return doctest_xml_compare.xml_compare(left_xml, right_xml)
def validate_cim_xml(cim_xml_str, root_elem_name=None): """ Validate a CIM-XML string against the CIM-XML DTD, optionally requiring a particular XML root element. If the validation succeeds, the function returns. Otherwise, `CIMXMLValidationError` is raised and its exception message is the (possible multi-line) output of the `xmllint` command. Parameters: cim_xml_str (string): CIM-XML string to be validated. root_elem_name (string): Name of XML element that is expected as the root element in the CIM-XML string to be validated. `None` means no checking for a particular root element is performed. Raises: CIMXMLValidationError: CIM-XML validation error """ # The DOCTYPE instruction needs the DTD file with forward slashes. # Also, the xmllint used on Windows requires forward slashes and complains # with "Could not parse DTD tests\dtd\DSP0203_2.3.1.dtd" if invoked # with backslashes. dtd_file_fw = DTD_FILE.replace('\\', '/') # Make sure the validator checks the specified root element, if any if root_elem_name is not None: cim_xml_str = u'<!DOCTYPE {0} SYSTEM "{1}">\n{2}'. \ format(root_elem_name, dtd_file_fw, cim_xml_str) xmllint_cmd = 'xmllint --valid --noout -' else: xmllint_cmd = 'xmllint --dtdvalid {0} --noout -'.format(dtd_file_fw) p = Popen(xmllint_cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE, shell=True) cim_xml_str = _ensure_bytes(cim_xml_str) p.stdin.write(cim_xml_str) p.stdin.close() status = p.wait() if status != 0: out_lines = p.stdout.readlines() p.stdout.close() output = _ensure_unicode(b'\n'.join(out_lines)) raise CIMXMLValidationError(output) p.stdout.close()
def validate_xml(data, dtd_directory=None, root_elem=None): """ Validate the provided XML instance data against a CIM-XML DTD, optionally requiring a particular XML root element. Arguments: * `data`: XML instance data to be validated. * `dtd_directory`: Directory with the DTD file (see `DTD_FILE` for name). * `root_elem`: Name of XML element that is expected as the root element in the XML instance data to be validated. None means no checking for a particular root element is performed. """ dtd_file = DTD_FILE if dtd_directory is not None: dtd_file = os.path.join(dtd_directory, DTD_FILE).replace('\\', '/') # Make sure the XML data requires the specified root element, if any if root_elem is not None: data = '<!DOCTYPE %s SYSTEM "%s">\n' % (root_elem, dtd_file) + data xmllint_cmd = 'xmllint --valid --noout -' else: xmllint_cmd = 'xmllint --dtdvalid %s --noout -' % dtd_file p = Popen(xmllint_cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE, shell=True) data = _ensure_bytes(data) p.stdin.write(data) p.stdin.close() first_time = True for x in p.stdout.readlines(): if first_time: first_time = False print("\nOutput from xmllint:") sys.stdout.write(x) status = p.wait() p.stdout.close() if status != 0: return False return True
def assertXMLEqual(s_act, s_exp, entity): """ Assert that the two XML fragments are equal, tolerating the following variations: * whitespace outside of element content and attribute values. * order of attributes. * order of certain child elements (see `sort_elements` in this function). Parameters: * s_act and s_exp are string representations of an XML fragment. The strings may be Unicode strings or UTF-8 encoded byte strings. The strings may contain an encoding declaration even when they are Unicode strings. Note: An encoding declaration is the `encoding` attribute in the XML declaration (aka XML processing statement), e.g.: <?xml version="1.0" encoding="utf-8" ?> * entity (string): A human readable identification for what is compared. """ # Make sure that None values are already excluded by the caller assert isinstance(s_act, (six.text_type, six.binary_type)) assert isinstance(s_exp, (six.text_type, six.binary_type)) # Ensure Unicode strings and remove encoding from XML declaration encoding_pattern = re.compile( r'^<\?xml +(([a-zA-Z0-9_]+=".*")?) +' + r'encoding="utf-8" +(([a-zA-Z0-9_]+=".*")?) *\?>') encoding_repl = r'<?xml \1 \3 ?>' s_act = re.sub(encoding_pattern, encoding_repl, _ensure_unicode(s_act)) s_exp = re.sub(encoding_pattern, encoding_repl, _ensure_unicode(s_exp)) parser = etree.XMLParser(remove_blank_text=True) try: # Note: lxml.etree.XML() has issues with unicode strings as input, # so we pass UTF-8 encoded byte strings. See lxml bug # https://bugs.launchpad.net/lxml/+bug/1902364 for a similar issue # with lxml.etree.fromstring(). x_act = etree.XML(_ensure_bytes(s_act), parser=parser) x_exp = etree.XML(_ensure_bytes(s_exp), parser=parser) except etree.XMLSyntaxError as exc: raise AssertionError("XML cannot be validated for %s: %s" % (entity, exc)) def sort_embedded(root, sort_elements): """ Helper function for `sort_children()`, in support of embedded objects. This function invokes sort_children() on each embedded object in `root`, after unembedding the embedded object. Parameters: root (etree.Element): XML tree of the CIM-XML representation of the CIM element that contains an embedded CIM object (e.g. the CIM element may be an INSTANCE XML element and one of its PROPERTY child elements has a value that is an embedded CIM instance). """ emb_elems = root.xpath("//*[@EmbeddedObject or @EMBEDDEDOBJECT]" "/*[local-name() = 'VALUE' or " "local-name() = 'VALUE.ARRAY']") for emb_elem in emb_elems: elem = xml_unembed(emb_elem.text) sort_children(elem, sort_elements) emb_elem.text = xml_embed(elem) def sort_children(root, sort_elements): """ Sort certain elements in the `root` parameter to facilitate comparison of two XML documents. In addition, make sure this is also applied to any embedded objects (in their unembedded state). """ sort_embedded(root, sort_elements) for tag, attr in sort_elements: # elems is a list of elements with this tag name elems = root.xpath("//*[local-name() = $tag]", tag=tag) if elems: parent = elems[0].getparent() first = None after = None for i, p in enumerate(parent): # TODO 6/18 AM: Loop above should probably be on elems if p.tag == tag and first is None: first = i if p.tag != tag and first is not None: after = i # The following changes the original XML tree: # The following pylint warning can safely be disabled, see # https://stackoverflow.com/a/25314665 # pylint: disable=cell-var-from-loop parent[first:after] = sorted(elems, key=lambda e: e.attrib[attr]) sort_elements = [ # Sort sibling elements with <first> tag by its <second> attribute ("IPARAMVALUE", "NAME"), ("PROPERTY", "NAME"), ("PROPERTY.ARRAY", "NAME"), ("PARAMETER", "NAME"), ("KEYBINDING", "NAME"), ] sort_children(x_act, sort_elements) sort_children(x_exp, sort_elements) ns_act = _ensure_unicode(etree.tostring(x_act)) ns_exp = _ensure_unicode(etree.tostring(x_exp)) checker = doctestcompare.LXMLOutputChecker() # This tolerates differences in whitespace and attribute order if not checker.check_output(ns_act, ns_exp, 0): diff = checker.output_difference(doctest.Example("", ns_exp), ns_act, 0) raise AssertionError("XML is not as expected in %s: %s" % (entity, diff))