def check_json_serialization(self, root, converter=None, **kwargs): lossy = converter in (ParkerConverter, AbderaConverter, ColumnarConverter) unordered = converter not in (AbderaConverter, JsonMLConverter) or \ kwargs.get('unordered', False) # Use str instead of float in order to preserve original data kwargs['decimal_type'] = str json_data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(json_data1, tuple): json_data1 = json_data1[0] elem1 = xmlschema.from_json(json_data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): elem1 = elem1[0] if lax_encode: kwargs['validation'] = kwargs.get('validation', 'lax') json_data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) if isinstance(json_data2, tuple): json_data2 = json_data2[0] if json_data2 != json_data1 and (lax_encode or lossy or unordered): # Can't ensure decode equivalence if the test case use defaults, # or the converter is lossy or the decoding is unordered. return if sys.version_info >= (3, 6): self.assertEqual(json_data2, json_data1, msg=xml_file) else: elem2 = xmlschema.from_json(json_data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: self.assertIsNone(etree_elements_assert_equal( elem1, elem2, strict=False, skip_comments=True, unordered=unordered), msg=xml_file) except AssertionError as err: self.assertIsNone(err, msg=xml_file)
def test_to_json_api(self): json_data = to_json(self.col_xml_file, lazy=True) self.assertIsInstance(json_data, str) self.assertIn('"@xmlns:col"', json_data) self.assertIn(r'"name": "Joan Mir\u00f3"', json_data) with self.assertRaises(TypeError) as ctx: to_json(self.col_xml_file, lazy=True, decimal_type=Decimal) self.assertIn("is not JSON serializable", str(ctx.exception)) col_1_error_xml_file = casepath( 'examples/collection/collection-1_error.xml') json_data, errors = to_json(col_1_error_xml_file, validation='lax', lazy=True) self.assertEqual(len(errors), 1) self.assertIsInstance(errors[0], XMLSchemaDecodeError) self.assertIn('"position": null', json_data) json_data, errors = to_json(col_1_error_xml_file, validation='lax', lazy=True, json_options={'default': lambda x: None}) self.assertEqual(len(errors), 0) self.assertIn('"object": [null, null]', json_data)
def check_json_serialization(self, root, converter=None, **kwargs): data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(data1, tuple): data1 = data1[0] elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): elem1 = elem1[0] data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) if isinstance(data2, tuple): data2 = data2[0] if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): return # can't check encode equivalence if xsi:type is provided elif sys.version_info >= (3, 6): self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass") else: elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True)) except AssertionError as err: self.assertIsNone(err, None)
def check_json_serialization(self, root, converter=None, **kwargs): lossy = converter in (ParkerConverter, AbderaConverter, ColumnarConverter) unordered = converter not in (AbderaConverter, JsonMLConverter) or \ kwargs.get('unordered', False) data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(data1, tuple): data1 = data1[0] elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): elem1 = elem1[0] data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) if isinstance(data2, tuple): data2 = data2[0] if data2 != data1 and (lax_encode or lossy or unordered): # Can't ensure decode equivalence if the test case use defaults, # or the converter is lossy or the decoding is unordered. return if sys.version_info >= (3, 6): if data1 != data2: print(data1) print(data2) print(converter, unordered) self.assertEqual( data2, data1, msg_tmpl % "serialized data changed at second pass") else: elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: self.assertIsNone( etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True, unordered=unordered)) except AssertionError as err: self.assertIsNone(err, None)
def test_json2xml_command_03(self, mock_out, mock_err): with open('vehicles-test.json', 'w') as fp: to_json('vehicles.xml', fp) self.run_json2xml('vehicles-test.json', '--schema=vehicles.xsd') os.unlink('vehicles-test.json') os.unlink('vehicles-test.xml') self.assertEqual(mock_err.getvalue(), '') self.assertEqual("vehicles-test.json converted to vehicles-test.xml\n", mock_out.getvalue()) self.assertEqual('0', str(self.ctx.exception))
def test_json_lazy_decoding(self): kwargs = {'xml_document': self.col_xml_file, 'schema': self.col_schema} col_json = xmlschema.to_json(**kwargs) self.assertIsInstance(col_json, str) self.assertEqual(len(col_json), 688) self.assertTrue(col_json.startswith('{"@xmlns:')) self.assertEqual(col_json[-1], '}') self.assertEqual(col_json, xmlschema.to_json(lazy=True, **kwargs)) json_data = xmlschema.to_json(path='object', **kwargs) self.assertIn(json_data, col_json) self.assertEqual(json_data, xmlschema.to_json(path='object', lazy=True, **kwargs)) self.assertEqual( json_data, xmlschema.to_json(validation='skip', path='object', lazy=True, **kwargs)) json_data = xmlschema.to_json(path='object/author', **kwargs) self.assertIsInstance(json_data, str) self.assertEqual(len(json_data), 259) self.assertEqual(json_data[:4], '[{"@') self.assertEqual(json_data[-1], ']') self.assertEqual( json_data, xmlschema.to_json(path='object/author', lazy=True, **kwargs)) self.assertEqual( json_data, xmlschema.to_json(validation='skip', path='object/author', lazy=True, **kwargs)) # Tests for issue #159 self.assertEqual( json_data, xmlschema.to_json(path='/col:collection/object/author', lazy=True, **kwargs)) self.assertEqual( json_data, xmlschema.to_json(validation='skip', path='/col:collection/object/author', lazy=True, **kwargs))
def test_json_path_decoding(self): xml_file = self.col_xml_file schema = self.col_schema json_data = xmlschema.to_json(xml_file, schema=schema, path='*') self.assertIsInstance(json_data, str) self.assertEqual(len(json_data), 493) self.assertEqual(json_data[:4], '[{"@') self.assertEqual(json_data[-1], ']') self.assertEqual( json_data, xmlschema.to_json(xml_file, schema=schema, path='object') ) self.assertEqual( json_data, xmlschema.to_json(xml_file, schema=schema, path='//object') ) self.assertEqual( json_data, xmlschema.to_json(xml_file, schema=schema, path='/col:collection/object') )
def test_json_dump_and_load(self): vh_xml_tree = ElementTree.parse(self.vh_xml_file) col_xml_tree = ElementTree.parse(self.col_xml_file) with open(self.vh_json_file, 'w') as f: xmlschema.to_json(self.vh_xml_file, f) with open(self.vh_json_file) as f: root = xmlschema.from_json(f, self.vh_schema) os.remove(self.vh_json_file) self.check_etree_elements(vh_xml_tree.getroot(), root) with open(self.col_json_file, 'w') as f: xmlschema.to_json(self.col_xml_file, f) with open(self.col_json_file) as f: root = xmlschema.from_json(f, self.col_schema) os.remove(self.col_json_file) self.check_etree_elements(col_xml_tree.getroot(), root)
def check_json_serialization(self, root, converter=None, **kwargs): lossy = converter in (ParkerConverter, AbderaConverter, ColumnarConverter) unordered = converter not in (AbderaConverter, JsonMLConverter) or \ kwargs.get('unordered', False) # Use str instead of float in order to preserve original data kwargs['decimal_type'] = str json_data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(json_data1, tuple): json_data1 = json_data1[0] elem1 = xmlschema.from_json(json_data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): elem1 = elem1[0] if lax_encode: kwargs['validation'] = kwargs.get('validation', 'lax') json_data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) if isinstance(json_data2, tuple): json_data2 = json_data2[0] if json_data2 != json_data1 and (lax_encode or lossy or unordered): # Can't ensure decode equivalence if the test case use defaults, # white spaces are replaced/collapsed or the converter is lossy # or the decoding is unordered. return self.assertEqual(json_data2, json_data1, msg=xml_file)
def test_json2xml_command_04(self, mock_out, mock_err): with open('vehicles-test.json', 'w') as fp: xmlschema.to_json('vehicles.xml', fp) self.run_json2xml('vehicles-test.json', '--schema=vehicles.xsd') self.assertEqual('0', str(self.ctx.exception)) self.run_json2xml('vehicles-test.json', '--schema=vehicles.xsd') with self.assertRaises(ValueError) as ctx: self.run_json2xml('vehicles-test.json', '--schema=vehicles.xsd', '--output=vehicles-test.xml') self.assertEqual(str(ctx.exception), "'vehicles-test.xml' is not a directory") os.unlink('vehicles-test.json') os.unlink('vehicles-test.xml') self.assertEqual(mock_err.getvalue(), '') self.assertIn("vehicles-test.json converted to vehicles-test.xml\n", mock_out.getvalue()) self.assertIn("skip vehicles-test.xml: the destination file exists!", mock_out.getvalue())
def check_json_serialization(self, root, converter=None, **kwargs): data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(data1, tuple): data1 = data1[0] elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem1, tuple): elem1 = elem1[0] data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) if isinstance(data2, tuple): data2 = data2[0] if sys.version_info >= (3, 6): self.assertEqual(data2, data1, msg_template % "serialized data changed at second pass") else: elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] try: self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True)) except AssertionError as err: self.assertIsNone(err, None)
def test_from_json_api(self): json_data = to_json(self.col_xml_file, lazy=True) with self.assertRaises(TypeError) as ctx: from_json(json_data, self.col_xsd_file) self.assertIn("invalid type <class 'str'> for argument 'schema'", str(ctx.exception)) col_schema = XMLSchema10(self.col_xsd_file) collection = from_json(json_data, schema=col_schema) self.assertEqual(collection.tag, '{http://example.com/ns/collection}collection') col_schema = XMLSchema10(self.col_xsd_file) collection = from_json(json_data, col_schema, json_options={'parse_float': Decimal}) self.assertEqual(collection.tag, '{http://example.com/ns/collection}collection')
def xml2json(): parser = argparse.ArgumentParser( prog=PROGRAM_NAME, add_help=True, description="decode a set of XML files to JSON.") parser.usage = "%(prog)s [OPTION]... [FILE]...\n" \ "Try '%(prog)s --help' for more information." parser.add_argument('-v', dest='verbosity', action='count', default=0, help="increase output verbosity.") parser.add_argument('--schema', type=str, metavar='PATH', help="path or URL to an XSD schema.") parser.add_argument('--version', type=xsd_version_number, default='1.0', help="XSD schema validator to use (default is 1.0).") parser.add_argument('-L', dest='locations', nargs=2, type=str, action='append', metavar="URI/URL", help="schema location hint overrides.") parser.add_argument( '--converter', type=str, metavar='NAME', help="use a different XML to JSON convention instead of " "the default converter. Option value can be one of " "{!r}.".format(tuple(CONVERTERS_MAP))) parser.add_argument( '--lazy', action='store_true', default=False, help="use lazy decoding mode (slower but use less memory).") parser.add_argument( '-o', '--output', type=str, default='.', help="where to write the encoded XML files, current dir by default.") parser.add_argument('-f', '--force', action="store_true", default=False, help="do not prompt before overwriting.") parser.add_argument('files', metavar='[XML_FILE ...]', nargs='+', help="XML files to be decoded to JSON.") args = parser.parse_args() loglevel = get_loglevel(args.verbosity) schema_class = XMLSchema if args.version == '1.0' else XMLSchema11 converter = get_converter(args.converter) if args.schema is not None: schema = schema_class(args.schema, locations=args.locations, loglevel=loglevel) else: schema = None base_path = pathlib.Path(args.output) if not base_path.exists(): base_path.mkdir() elif not base_path.is_dir(): raise XMLSchemaValueError("{!r} is not a directory".format( str(base_path))) tot_errors = 0 for xml_path in map(pathlib.Path, args.files): json_path = base_path.joinpath(xml_path.name).with_suffix('.json') if json_path.exists() and not args.force: print("skip {}: the destination file exists!".format( str(json_path))) continue with open(str(json_path), 'w') as fp: try: errors = to_json( xml_document=str(xml_path), fp=fp, schema=schema, cls=schema_class, converter=converter, lazy=args.lazy, validation='lax', ) except (xmlschema.XMLSchemaException, URLError) as err: tot_errors += 1 print("error with {}: {}".format(str(xml_path), str(err))) continue else: if not errors: print("{} converted to {}".format(str(xml_path), str(json_path))) else: tot_errors += len(errors) print("{} converted to {} with {} errors".format( str(xml_path), str(json_path), len(errors))) sys.exit(tot_errors)
#!/usr/bin/env python # Notes: # - Used by lk_json_from_xml_schema in core.sh # - Not intended to be invoked directly import xmlschema import sys schema = None if len(sys.argv) < 2 else sys.argv[1] xml = sys.stdin.read() if len(sys.argv) < 3 else sys.argv[2] converter = xmlschema.XMLSchemaConverter(preserve_root=True, strip_namespaces=True) print(xmlschema.to_json(xml, schema=schema, converter=converter))