Esempio n. 1
0
    def test_datetime_builtin_type(self):
        xs = self.get_schema('<xs:element name="dt" type="xs:dateTime"/>')

        dt = xs.decode('<dt>2019-01-01T13:40:00</dt>', datetime_types=True)
        self.assertIsInstance(dt, datatypes.DateTime10)
        self.assertEqual(etree_tostring(xs.encode(dt)),
                         '<dt>2019-01-01T13:40:00</dt>')

        dt = xs.decode('<dt>2019-01-01T13:40:00</dt>')
        self.assertIsInstance(dt, str)
        self.assertEqual(etree_tostring(xs.encode(dt)),
                         '<dt>2019-01-01T13:40:00</dt>')
Esempio n. 2
0
 def test_element_tree(self):
     self.assertNotEqual(ElementTree.Element,
                         ElementTree._Element_Py,
                         msg="cElementTree not available!")
     elem = PyElementTree.Element('element')
     self.assertEqual(etree_tostring(elem), '<element />')
     self.assertEqual(importlib.import_module('xml.etree.ElementTree'),
                      ElementTree)
Esempio n. 3
0
    def test_pure_python_element_tree(self):
        if sys.version_info >= (3, ):
            self.assertEqual(PyElementTree.Element, PyElementTree._Element_Py
                             )  # C extensions disabled by defusedxml
            self.assertNotEqual(ElementTree.Element, PyElementTree.Element)
        else:
            self.assertNotEqual(PyElementTree.Element,
                                PyElementTree._Element_Py)

        elem = PyElementTree.Element('element')
        self.assertEqual(etree_tostring(elem), '<element />')
Esempio n. 4
0
    def test_date_builtin_type(self):
        xs = self.get_schema('<xs:element name="dt" type="xs:date"/>')
        date = xs.decode('<dt>2001-04-15</dt>', datetime_types=True)
        self.assertEqual(etree_tostring(xs.encode(date)),
                         '<dt>2001-04-15</dt>')

        mdate_type = self.st_schema.types['mdate']

        date = mdate_type.encode('2001-01-01')
        self.assertIsInstance(date, str)
        self.assertEqual(date, '2001-01-01')

        date = mdate_type.encode(datatypes.Date.fromstring('2001-01-01'))
        self.assertIsInstance(date, str)
        self.assertEqual(date, '2001-01-01')
Esempio n. 5
0
 def check_encode(self, xsd_component, data, expected, **kwargs):
     if isinstance(expected, type) and issubclass(expected, Exception):
         self.assertRaises(expected, xsd_component.encode, data, **kwargs)
     elif is_etree_element(expected):
         elem = xsd_component.encode(data, **kwargs)
         self.check_etree_elements(expected, elem)
     else:
         obj = xsd_component.encode(data, **kwargs)
         if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list):
             self.assertEqual(expected, obj[0])
             self.assertTrue(isinstance(obj[0], type(expected)))
         elif is_etree_element(obj):
             self.assertEqual(expected, etree_tostring(obj).strip())
         else:
             self.assertEqual(expected, obj)
             self.assertTrue(isinstance(obj, type(expected)))
Esempio n. 6
0
    def write(self,
              filename,
              output_format='xml',
              validation='strict',
              **kwargs):
        """
        Write loaded XML data to a file. Binds the document to saved file if
        it's not already bound to another file.

        :param filename: filepath of the destination file.
        :param output_format: the data format of the output file.
        :param validation: validation mode, can be 'strict', 'lax' or 'skip'.
        :param kwargs: other options for the decoding method of the schema instance.
        """
        if not isinstance(filename, str):
            raise TypeError("the filename argument must be a string!")

        output_format = output_format.strip().lower()
        if output_format == 'xml':
            with open(filename, 'w+') as f:
                f.write(etree_tostring(self.root))

        elif output_format == 'json':
            obj = self.to_dict(validation, **kwargs)
            with open(filename, 'w+') as f:
                return json.dump(obj, f, sort_keys=True, indent=4)

        elif output_format == 'yaml':
            if yaml is None:
                raise RuntimeError("PyYAML library is not installed!")

            obj = self.to_dict(validation, **kwargs)
            with open(filename, 'w+') as f:
                yaml.dump(obj, stream=f, default_flow_style=False)
        else:
            raise ValueError(
                "Accepted output_format are 'xml', 'json' or 'yaml'!")

        if self.filename is None:
            self.filename = filename
            self.format = output_format
Esempio n. 7
0
    def test_py_element_string_serialization(self):
        elem = PyElementTree.Element('element')
        self.assertEqual(etree_tostring(elem), '<element />')
        self.assertEqual(etree_tostring(elem, xml_declaration=True),
                         '<element />')

        self.assertEqual(etree_tostring(elem, encoding='us-ascii'),
                         b'<element />')
        self.assertEqual(
            etree_tostring(elem, encoding='us-ascii', xml_declaration=True),
            b'<?xml version="1.0" encoding="us-ascii"?>\n<element />')

        self.assertEqual(
            etree_tostring(elem, encoding='ascii'),
            b"<?xml version='1.0' encoding='ascii'?>\n<element />")
        self.assertEqual(
            etree_tostring(elem, encoding='ascii', xml_declaration=False),
            b'<element />')
        self.assertEqual(etree_tostring(elem, encoding='utf-8'),
                         b'<element />')
        self.assertEqual(
            etree_tostring(elem, encoding='utf-8', xml_declaration=True),
            b'<?xml version="1.0" encoding="utf-8"?>\n<element />')

        self.assertEqual(
            etree_tostring(elem, encoding='iso-8859-1'),
            b"<?xml version='1.0' encoding='iso-8859-1'?>\n<element />")
        self.assertEqual(
            etree_tostring(elem, encoding='iso-8859-1', xml_declaration=False),
            b"<element />")

        self.assertEqual(etree_tostring(elem, method='html'),
                         '<element></element>')
        self.assertEqual(etree_tostring(elem, method='text'), '')

        root = PyElementTree.XML('<root>\n'
                                 '  text1\n'
                                 '  <elem>text2</elem>\n'
                                 '</root>')
        self.assertEqual(etree_tostring(root, method='text'),
                         '\n  text1\n  text2')
Esempio n. 8
0
        def check_etree_encode(self, root, converter=None, **kwargs):
            namespaces = kwargs.get('namespaces', {})
            data1 = self.schema.decode(root, converter=converter, **kwargs)
            if isinstance(data1, tuple):
                data1 = data1[0]  # When validation='lax'

            for _ in iter_nested_items(data1, dict_class=ordered_dict_class):
                pass

            elem1 = self.schema.encode(data1,
                                       path=root.tag,
                                       converter=converter,
                                       **kwargs)
            if isinstance(elem1, tuple):
                # When validation='lax'
                if converter is not ParkerConverter:
                    for e in elem1[1]:
                        self.check_namespace_prefixes(unicode_type(e))
                elem1 = elem1[0]

            # Checks the encoded element to not contains reserved namespace prefixes
            if namespaces and all('ns%d' % k not in namespaces
                                  for k in range(10)):
                self.check_namespace_prefixes(
                    etree_tostring(elem1, namespaces=namespaces))

            # Main check: compare original a re-encoded tree
            try:
                etree_elements_assert_equal(root, elem1, strict=False)
            except AssertionError as err:
                # If the check fails retry only if the converter is lossy (eg. ParkerConverter)
                # or if the XML case has defaults taken from the schema or some part of data
                # decoding is skipped by schema wildcards (set the specific argument in testfiles).
                if converter not in (ParkerConverter, AbderaConverter,
                                     JsonMLConverter) and not skip_strict:
                    if debug_mode:
                        pdb.set_trace()
                    raise AssertionError(
                        str(err) +
                        msg_tmpl % "encoded tree differs from original")
                elif converter is ParkerConverter and any(
                        XSI_TYPE in e.attrib for e in root.iter()):
                    return  # can't check encode equivalence if xsi:type is provided
                else:
                    # Lossy or augmenting cases are checked after another decoding/encoding pass
                    data2 = self.schema.decode(elem1,
                                               converter=converter,
                                               **kwargs)
                    if isinstance(data2, tuple):
                        data2 = data2[0]

                    if sys.version_info >= (3, 6):
                        # For Python < 3.6 cannot ensure attribute decoding order
                        try:
                            self.assertEqual(
                                data1, data2,
                                msg_tmpl % "re-decoded data changed")
                        except AssertionError:
                            if debug_mode:
                                pdb.set_trace()
                            raise

                    elem2 = self.schema.encode(data2,
                                               path=root.tag,
                                               converter=converter,
                                               **kwargs)
                    if isinstance(elem2, tuple):
                        elem2 = elem2[0]

                    try:
                        etree_elements_assert_equal(elem1, elem2, strict=False)
                    except AssertionError as err:
                        if debug_mode:
                            pdb.set_trace()
                        raise AssertionError(
                            str(err) + msg_tmpl %
                            "encoded tree differs after second pass")
Esempio n. 9
0
 def test_gregorian_yearmonth_builtin_type(self):
     xs = self.get_schema('<xs:element name="td" type="xs:gYearMonth"/>')
     gyear_month = xs.decode('<td>2000-12</td>', datetime_types=True)
     self.assertEqual(etree_tostring(xs.encode(gyear_month)), '<td>2000-12</td>')
Esempio n. 10
0
 def test_duration_builtin_type(self):
     xs = self.get_schema('<xs:element name="td" type="xs:duration"/>')
     duration = xs.decode('<td>P5Y3MT60H30.001S</td>', datetime_types=True)
     self.assertEqual(etree_tostring(xs.encode(duration)), '<td>P5Y3M2DT12H30.001S</td>')
Esempio n. 11
0
 def test_date_builtin_type(self):
     xs = self.get_schema('<xs:element name="dt" type="xs:date"/>')
     date = xs.decode('<dt>2001-04-15</dt>', datetime_types=True)
     self.assertEqual(etree_tostring(xs.encode(date)), '<dt>2001-04-15</dt>')
Esempio n. 12
0
def json2xml():
    parser = argparse.ArgumentParser(
        prog=PROGRAM_NAME,
        add_help=True,
        description="encode a set of JSON files to XML.")
    parser.usage = "%(prog)s [OPTION]... [FILE]...\n" \
                   "Try '%(prog)s --help' for more information."

    parser.add_argument('-v',
                        dest='verbosity',
                        action='count',
                        default=0,
                        help="increase output verbosity.")
    parser.add_argument('--schema',
                        type=str,
                        metavar='PATH',
                        help="path or URL to an XSD schema.")
    parser.add_argument('--version',
                        type=xsd_version_number,
                        default='1.0',
                        help="XSD schema validator to use (default is 1.0).")
    parser.add_argument('-L',
                        dest='locations',
                        nargs=2,
                        type=str,
                        action='append',
                        metavar="URI/URL",
                        help="schema location hint overrides.")
    parser.add_argument(
        '--converter',
        type=str,
        metavar='NAME',
        help="use a different XML to JSON convention instead of "
        "the default converter. Option value can be one of "
        "{!r}.".format(tuple(CONVERTERS_MAP)))
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        default='.',
        help="where to write the encoded XML files, current dir by default.")
    parser.add_argument('-f',
                        '--force',
                        action="store_true",
                        default=False,
                        help="do not prompt before overwriting")
    parser.add_argument('files',
                        metavar='[JSON_FILE ...]',
                        nargs='+',
                        help="JSON files to be encoded to XML.")

    args = parser.parse_args()

    loglevel = get_loglevel(args.verbosity)
    schema_class = XMLSchema if args.version == '1.0' else XMLSchema11
    converter = get_converter(args.converter)
    schema = schema_class(args.schema,
                          locations=args.locations,
                          loglevel=loglevel)

    base_path = pathlib.Path(args.output)
    if not base_path.exists():
        base_path.mkdir()
    elif not base_path.is_dir():
        raise XMLSchemaValueError("{!r} is not a directory".format(
            str(base_path)))

    tot_errors = 0
    for json_path in map(pathlib.Path, args.files):
        xml_path = base_path.joinpath(json_path.name).with_suffix('.xml')
        if xml_path.exists() and not args.force:
            print("skip {}: the destination file exists!".format(
                str(xml_path)))
            continue

        with open(str(json_path)) as fp:
            try:
                root, errors = from_json(
                    source=fp,
                    schema=schema,
                    converter=converter,
                    validation='lax',
                )
            except (xmlschema.XMLSchemaException, URLError) as err:
                tot_errors += 1
                print("error with {}: {}".format(str(xml_path), str(err)))
                continue
            else:
                if not errors:
                    print("{} converted to {}".format(str(json_path),
                                                      str(xml_path)))
                else:
                    tot_errors += len(errors)
                    print("{} converted to {} with {} errors".format(
                        str(json_path), str(xml_path), len(errors)))

        with open(str(xml_path), 'w') as fp:
            fp.write(etree_tostring(root))

    sys.exit(tot_errors)
Esempio n. 13
0
        def check_decode_encode(self, root, converter=None, **kwargs):
            namespaces = kwargs.get('namespaces', {})

            lossy = converter in (ParkerConverter, AbderaConverter,
                                  ColumnarConverter)
            losslessly = converter is JsonMLConverter
            unordered = converter not in (AbderaConverter, JsonMLConverter) or \
                kwargs.get('unordered', False)

            decoded_data1 = self.schema.decode(root,
                                               converter=converter,
                                               **kwargs)
            if isinstance(decoded_data1, tuple):
                decoded_data1 = decoded_data1[0]  # When validation='lax'

            for _ in iter_nested_items(decoded_data1,
                                       dict_class=ordered_dict_class):
                pass

            try:
                elem1 = self.schema.encode(decoded_data1,
                                           path=root.tag,
                                           converter=converter,
                                           **kwargs)
            except XMLSchemaValidationError as err:
                raise AssertionError(
                    msg_tmpl.format("error during re-encoding", str(err)))

            if isinstance(elem1, tuple):
                # When validation='lax'
                if converter is not ParkerConverter and converter is not ColumnarConverter:
                    for e in elem1[1]:
                        self.check_namespace_prefixes(str(e))
                elem1 = elem1[0]

            # Checks the encoded element to not contains reserved namespace prefixes
            if namespaces and all('ns%d' % k not in namespaces
                                  for k in range(10)):
                self.check_namespace_prefixes(
                    etree_tostring(elem1, namespaces=namespaces))

            # Main check: compare original a re-encoded tree
            try:
                etree_elements_assert_equal(root,
                                            elem1,
                                            strict=False,
                                            unordered=unordered)
            except AssertionError as err:
                # If the check fails retry only if the converter is lossy (eg. ParkerConverter)
                # or if the XML case has defaults taken from the schema or some part of data
                # decoding is skipped by schema wildcards (set the specific argument in testfiles).
                if lax_encode:
                    pass  # can't ensure encode equivalence if the test case use defaults
                elif lossy:
                    pass  # can't check encode equivalence if the converter is lossy
                elif losslessly:
                    if debug_mode:
                        pdb.set_trace()
                    raise AssertionError(
                        msg_tmpl.format("encoded tree differs from original",
                                        str(err)))
                else:
                    # Lossy or augmenting cases are checked with another decoding/encoding pass
                    decoded_data2 = self.schema.decode(elem1,
                                                       converter=converter,
                                                       **kwargs)
                    if isinstance(decoded_data2, tuple):
                        decoded_data2 = decoded_data2[0]

                    if sys.version_info >= (3, 6):
                        # For Python < 3.6 cannot ensure attribute decoding order
                        try:
                            self.assertEqual(decoded_data1,
                                             decoded_data2,
                                             msg=xml_file)
                        except AssertionError:
                            if debug_mode:
                                pdb.set_trace()
                            raise

                    elem2 = self.schema.encode(decoded_data2,
                                               path=root.tag,
                                               converter=converter,
                                               **kwargs)
                    if isinstance(elem2, tuple):
                        elem2 = elem2[0]

                    try:
                        etree_elements_assert_equal(elem1,
                                                    elem2,
                                                    strict=False,
                                                    unordered=unordered)
                    except AssertionError as err:
                        if debug_mode:
                            pdb.set_trace()
                        raise AssertionError(
                            msg_tmpl.format(
                                "encoded tree differs after second pass",
                                str(err)))