def test_datetime_builtin_type(self): xs = self.get_schema('<xs:element name="dt" type="xs:dateTime"/>') dt = xs.decode('<dt>2019-01-01T13:40:00</dt>', datetime_types=True) self.assertIsInstance(dt, datatypes.DateTime10) self.assertEqual(etree_tostring(xs.encode(dt)), '<dt>2019-01-01T13:40:00</dt>') dt = xs.decode('<dt>2019-01-01T13:40:00</dt>') self.assertIsInstance(dt, str) self.assertEqual(etree_tostring(xs.encode(dt)), '<dt>2019-01-01T13:40:00</dt>')
def test_element_tree(self): self.assertNotEqual(ElementTree.Element, ElementTree._Element_Py, msg="cElementTree not available!") elem = PyElementTree.Element('element') self.assertEqual(etree_tostring(elem), '<element />') self.assertEqual(importlib.import_module('xml.etree.ElementTree'), ElementTree)
def test_pure_python_element_tree(self): if sys.version_info >= (3, ): self.assertEqual(PyElementTree.Element, PyElementTree._Element_Py ) # C extensions disabled by defusedxml self.assertNotEqual(ElementTree.Element, PyElementTree.Element) else: self.assertNotEqual(PyElementTree.Element, PyElementTree._Element_Py) elem = PyElementTree.Element('element') self.assertEqual(etree_tostring(elem), '<element />')
def test_date_builtin_type(self): xs = self.get_schema('<xs:element name="dt" type="xs:date"/>') date = xs.decode('<dt>2001-04-15</dt>', datetime_types=True) self.assertEqual(etree_tostring(xs.encode(date)), '<dt>2001-04-15</dt>') mdate_type = self.st_schema.types['mdate'] date = mdate_type.encode('2001-01-01') self.assertIsInstance(date, str) self.assertEqual(date, '2001-01-01') date = mdate_type.encode(datatypes.Date.fromstring('2001-01-01')) self.assertIsInstance(date, str) self.assertEqual(date, '2001-01-01')
def check_encode(self, xsd_component, data, expected, **kwargs): if isinstance(expected, type) and issubclass(expected, Exception): self.assertRaises(expected, xsd_component.encode, data, **kwargs) elif is_etree_element(expected): elem = xsd_component.encode(data, **kwargs) self.check_etree_elements(expected, elem) else: obj = xsd_component.encode(data, **kwargs) if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list): self.assertEqual(expected, obj[0]) self.assertTrue(isinstance(obj[0], type(expected))) elif is_etree_element(obj): self.assertEqual(expected, etree_tostring(obj).strip()) else: self.assertEqual(expected, obj) self.assertTrue(isinstance(obj, type(expected)))
def write(self, filename, output_format='xml', validation='strict', **kwargs): """ Write loaded XML data to a file. Binds the document to saved file if it's not already bound to another file. :param filename: filepath of the destination file. :param output_format: the data format of the output file. :param validation: validation mode, can be 'strict', 'lax' or 'skip'. :param kwargs: other options for the decoding method of the schema instance. """ if not isinstance(filename, str): raise TypeError("the filename argument must be a string!") output_format = output_format.strip().lower() if output_format == 'xml': with open(filename, 'w+') as f: f.write(etree_tostring(self.root)) elif output_format == 'json': obj = self.to_dict(validation, **kwargs) with open(filename, 'w+') as f: return json.dump(obj, f, sort_keys=True, indent=4) elif output_format == 'yaml': if yaml is None: raise RuntimeError("PyYAML library is not installed!") obj = self.to_dict(validation, **kwargs) with open(filename, 'w+') as f: yaml.dump(obj, stream=f, default_flow_style=False) else: raise ValueError( "Accepted output_format are 'xml', 'json' or 'yaml'!") if self.filename is None: self.filename = filename self.format = output_format
def test_py_element_string_serialization(self): elem = PyElementTree.Element('element') self.assertEqual(etree_tostring(elem), '<element />') self.assertEqual(etree_tostring(elem, xml_declaration=True), '<element />') self.assertEqual(etree_tostring(elem, encoding='us-ascii'), b'<element />') self.assertEqual( etree_tostring(elem, encoding='us-ascii', xml_declaration=True), b'<?xml version="1.0" encoding="us-ascii"?>\n<element />') self.assertEqual( etree_tostring(elem, encoding='ascii'), b"<?xml version='1.0' encoding='ascii'?>\n<element />") self.assertEqual( etree_tostring(elem, encoding='ascii', xml_declaration=False), b'<element />') self.assertEqual(etree_tostring(elem, encoding='utf-8'), b'<element />') self.assertEqual( etree_tostring(elem, encoding='utf-8', xml_declaration=True), b'<?xml version="1.0" encoding="utf-8"?>\n<element />') self.assertEqual( etree_tostring(elem, encoding='iso-8859-1'), b"<?xml version='1.0' encoding='iso-8859-1'?>\n<element />") self.assertEqual( etree_tostring(elem, encoding='iso-8859-1', xml_declaration=False), b"<element />") self.assertEqual(etree_tostring(elem, method='html'), '<element></element>') self.assertEqual(etree_tostring(elem, method='text'), '') root = PyElementTree.XML('<root>\n' ' text1\n' ' <elem>text2</elem>\n' '</root>') self.assertEqual(etree_tostring(root, method='text'), '\n text1\n text2')
def check_etree_encode(self, root, converter=None, **kwargs): namespaces = kwargs.get('namespaces', {}) data1 = self.schema.decode(root, converter=converter, **kwargs) if isinstance(data1, tuple): data1 = data1[0] # When validation='lax' for _ in iter_nested_items(data1, dict_class=ordered_dict_class): pass elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): # When validation='lax' if converter is not ParkerConverter: for e in elem1[1]: self.check_namespace_prefixes(unicode_type(e)) elem1 = elem1[0] # Checks the encoded element to not contains reserved namespace prefixes if namespaces and all('ns%d' % k not in namespaces for k in range(10)): self.check_namespace_prefixes( etree_tostring(elem1, namespaces=namespaces)) # Main check: compare original a re-encoded tree try: etree_elements_assert_equal(root, elem1, strict=False) except AssertionError as err: # If the check fails retry only if the converter is lossy (eg. ParkerConverter) # or if the XML case has defaults taken from the schema or some part of data # decoding is skipped by schema wildcards (set the specific argument in testfiles). if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: if debug_mode: pdb.set_trace() raise AssertionError( str(err) + msg_tmpl % "encoded tree differs from original") elif converter is ParkerConverter and any( XSI_TYPE in e.attrib for e in root.iter()): return # can't check encode equivalence if xsi:type is provided else: # Lossy or augmenting cases are checked after another decoding/encoding pass data2 = self.schema.decode(elem1, converter=converter, **kwargs) if isinstance(data2, tuple): data2 = data2[0] if sys.version_info >= (3, 6): # For Python < 3.6 cannot ensure attribute decoding order try: self.assertEqual( data1, data2, msg_tmpl % "re-decoded data changed") except AssertionError: if debug_mode: pdb.set_trace() raise elem2 = self.schema.encode(data2, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: etree_elements_assert_equal(elem1, elem2, strict=False) except AssertionError as err: if debug_mode: pdb.set_trace() raise AssertionError( str(err) + msg_tmpl % "encoded tree differs after second pass")
def test_gregorian_yearmonth_builtin_type(self): xs = self.get_schema('<xs:element name="td" type="xs:gYearMonth"/>') gyear_month = xs.decode('<td>2000-12</td>', datetime_types=True) self.assertEqual(etree_tostring(xs.encode(gyear_month)), '<td>2000-12</td>')
def test_duration_builtin_type(self): xs = self.get_schema('<xs:element name="td" type="xs:duration"/>') duration = xs.decode('<td>P5Y3MT60H30.001S</td>', datetime_types=True) self.assertEqual(etree_tostring(xs.encode(duration)), '<td>P5Y3M2DT12H30.001S</td>')
def test_date_builtin_type(self): xs = self.get_schema('<xs:element name="dt" type="xs:date"/>') date = xs.decode('<dt>2001-04-15</dt>', datetime_types=True) self.assertEqual(etree_tostring(xs.encode(date)), '<dt>2001-04-15</dt>')
def json2xml(): parser = argparse.ArgumentParser( prog=PROGRAM_NAME, add_help=True, description="encode a set of JSON files to XML.") parser.usage = "%(prog)s [OPTION]... [FILE]...\n" \ "Try '%(prog)s --help' for more information." parser.add_argument('-v', dest='verbosity', action='count', default=0, help="increase output verbosity.") parser.add_argument('--schema', type=str, metavar='PATH', help="path or URL to an XSD schema.") parser.add_argument('--version', type=xsd_version_number, default='1.0', help="XSD schema validator to use (default is 1.0).") parser.add_argument('-L', dest='locations', nargs=2, type=str, action='append', metavar="URI/URL", help="schema location hint overrides.") parser.add_argument( '--converter', type=str, metavar='NAME', help="use a different XML to JSON convention instead of " "the default converter. Option value can be one of " "{!r}.".format(tuple(CONVERTERS_MAP))) parser.add_argument( '-o', '--output', type=str, default='.', help="where to write the encoded XML files, current dir by default.") parser.add_argument('-f', '--force', action="store_true", default=False, help="do not prompt before overwriting") parser.add_argument('files', metavar='[JSON_FILE ...]', nargs='+', help="JSON files to be encoded to XML.") args = parser.parse_args() loglevel = get_loglevel(args.verbosity) schema_class = XMLSchema if args.version == '1.0' else XMLSchema11 converter = get_converter(args.converter) schema = schema_class(args.schema, locations=args.locations, loglevel=loglevel) base_path = pathlib.Path(args.output) if not base_path.exists(): base_path.mkdir() elif not base_path.is_dir(): raise XMLSchemaValueError("{!r} is not a directory".format( str(base_path))) tot_errors = 0 for json_path in map(pathlib.Path, args.files): xml_path = base_path.joinpath(json_path.name).with_suffix('.xml') if xml_path.exists() and not args.force: print("skip {}: the destination file exists!".format( str(xml_path))) continue with open(str(json_path)) as fp: try: root, errors = from_json( source=fp, schema=schema, converter=converter, validation='lax', ) except (xmlschema.XMLSchemaException, URLError) as err: tot_errors += 1 print("error with {}: {}".format(str(xml_path), str(err))) continue else: if not errors: print("{} converted to {}".format(str(json_path), str(xml_path))) else: tot_errors += len(errors) print("{} converted to {} with {} errors".format( str(json_path), str(xml_path), len(errors))) with open(str(xml_path), 'w') as fp: fp.write(etree_tostring(root)) sys.exit(tot_errors)
def check_decode_encode(self, root, converter=None, **kwargs): namespaces = kwargs.get('namespaces', {}) lossy = converter in (ParkerConverter, AbderaConverter, ColumnarConverter) losslessly = converter is JsonMLConverter unordered = converter not in (AbderaConverter, JsonMLConverter) or \ kwargs.get('unordered', False) decoded_data1 = self.schema.decode(root, converter=converter, **kwargs) if isinstance(decoded_data1, tuple): decoded_data1 = decoded_data1[0] # When validation='lax' for _ in iter_nested_items(decoded_data1, dict_class=ordered_dict_class): pass try: elem1 = self.schema.encode(decoded_data1, path=root.tag, converter=converter, **kwargs) except XMLSchemaValidationError as err: raise AssertionError( msg_tmpl.format("error during re-encoding", str(err))) if isinstance(elem1, tuple): # When validation='lax' if converter is not ParkerConverter and converter is not ColumnarConverter: for e in elem1[1]: self.check_namespace_prefixes(str(e)) elem1 = elem1[0] # Checks the encoded element to not contains reserved namespace prefixes if namespaces and all('ns%d' % k not in namespaces for k in range(10)): self.check_namespace_prefixes( etree_tostring(elem1, namespaces=namespaces)) # Main check: compare original a re-encoded tree try: etree_elements_assert_equal(root, elem1, strict=False, unordered=unordered) except AssertionError as err: # If the check fails retry only if the converter is lossy (eg. ParkerConverter) # or if the XML case has defaults taken from the schema or some part of data # decoding is skipped by schema wildcards (set the specific argument in testfiles). if lax_encode: pass # can't ensure encode equivalence if the test case use defaults elif lossy: pass # can't check encode equivalence if the converter is lossy elif losslessly: if debug_mode: pdb.set_trace() raise AssertionError( msg_tmpl.format("encoded tree differs from original", str(err))) else: # Lossy or augmenting cases are checked with another decoding/encoding pass decoded_data2 = self.schema.decode(elem1, converter=converter, **kwargs) if isinstance(decoded_data2, tuple): decoded_data2 = decoded_data2[0] if sys.version_info >= (3, 6): # For Python < 3.6 cannot ensure attribute decoding order try: self.assertEqual(decoded_data1, decoded_data2, msg=xml_file) except AssertionError: if debug_mode: pdb.set_trace() raise elem2 = self.schema.encode(decoded_data2, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: etree_elements_assert_equal(elem1, elem2, strict=False, unordered=unordered) except AssertionError as err: if debug_mode: pdb.set_trace() raise AssertionError( msg_tmpl.format( "encoded tree differs after second pass", str(err)))