Exemple #1
0
    def test_get_qname_functions(self):
        self.assertEqual(get_qname(XSD_NAMESPACE, 'element'), XSD_ELEMENT)
        self.assertEqual(get_qname(XSI_NAMESPACE, 'type'), XSI_TYPE)

        self.assertEqual(get_qname(XSI_NAMESPACE, ''), '')
        self.assertEqual(get_qname(XSI_NAMESPACE, None), None)
        self.assertEqual(get_qname(XSI_NAMESPACE, 0), 0)
        self.assertEqual(get_qname(XSI_NAMESPACE, False), False)
        self.assertRaises(TypeError, get_qname, XSI_NAMESPACE, True)
        self.assertEqual(get_qname(None, True), True)

        self.assertEqual(get_qname(None, 'element'), 'element')
        self.assertEqual(get_qname(None, ''), '')
        self.assertEqual(get_qname('', 'element'), 'element')
Exemple #2
0
    def test_get_qname_functions(self):
        self.assertEqual(get_qname(XSD_NAMESPACE, 'element'), XSD_ELEMENT)
        self.assertEqual(get_qname(XSI_NAMESPACE, 'type'), XSI_TYPE)

        self.assertEqual(get_qname(XSI_NAMESPACE, ''), '')
        self.assertEqual(get_qname(XSI_NAMESPACE, None), None)
        self.assertEqual(get_qname(XSI_NAMESPACE, 0), 0)
        self.assertEqual(get_qname(XSI_NAMESPACE, False), False)
        self.assertRaises(TypeError, get_qname, XSI_NAMESPACE, True)
        self.assertEqual(get_qname(None, True), True)

        self.assertEqual(get_qname(None, 'element'), 'element')
        self.assertEqual(get_qname(None, ''), '')
        self.assertEqual(get_qname('', 'element'), 'element')
Exemple #3
0
    def iter_encode(self, element_data, validation='lax', **kwargs):
        """
        Creates an iterator for encoding data to a list containing Element data.

        :param element_data: an ElementData instance with unencoded data.
        :param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
        :param kwargs: Keyword arguments for the encoding process.
        :return: Yields a couple with the text of the Element and a list of 3-tuples \
        (key, decoded data, decoder), eventually preceded by a sequence of validation \
        or encoding errors.
        """
        if not element_data.content:  # <tag/> or <tag></tag>
            yield element_data.content
            return

        converter = kwargs.get('converter')
        if not isinstance(converter, XMLSchemaConverter):
            converter = kwargs['converter'] = self.schema.get_converter(
                converter, **kwargs)

        errors = []
        text = None
        children = []
        level = kwargs.get('level', 0)
        indent = kwargs.get('indent', 4)
        padding = '\n' + ' ' * indent * level
        default_namespace = converter.get('')
        losslessly = converter.losslessly

        model = ModelVisitor(self)
        cdata_index = 0

        if isinstance(element_data.content, dict):
            content = self.sort_content(element_data.content)
        else:
            content = element_data.content

        for index, (name, value) in enumerate(content):
            if isinstance(name, int):
                if not children:
                    text = padding + value if text is None else text + value + padding
                elif children[-1].tail is None:
                    children[-1].tail = padding + value
                else:
                    children[-1].tail += value + padding
                cdata_index += 1
                continue

            if not default_namespace or name[0] == '{':
                tag = name
            else:
                tag = '{%s}%s' % (default_namespace, name)

            while model.element is not None:
                if tag in model.element.names or model.element.name is None \
                        and model.element.is_matching(tag, default_namespace):
                    xsd_element = model.element
                else:
                    for xsd_element in model.element.iter_substitutes():
                        if tag in xsd_element.names:
                            break
                    else:
                        for particle, occurs, expected in model.advance():
                            errors.append((index - cdata_index, particle,
                                           occurs, expected))
                        continue

                if isinstance(xsd_element, XsdAnyElement):
                    value = get_qname(default_namespace, name), value
                for result in xsd_element.iter_encode(value, validation,
                                                      **kwargs):
                    if isinstance(result, XMLSchemaValidationError):
                        yield result
                    else:
                        children.append(result)

                for particle, occurs, expected in model.advance(True):
                    errors.append(
                        (index - cdata_index, particle, occurs, expected))
                break
            else:
                if validation == "strict" or losslessly:
                    errors.append((index - cdata_index, self, 0, []))

                for xsd_element in self.iter_elements():
                    if tag in xsd_element.names or xsd_element.name is None \
                            and xsd_element.is_matching(name, default_namespace):
                        if isinstance(xsd_element, XsdAnyElement):
                            value = get_qname(default_namespace, name), value
                        for result in xsd_element.iter_encode(
                                value, validation, **kwargs):
                            if isinstance(result, XMLSchemaValidationError):
                                yield result
                            else:
                                children.append(result)
                        break
                else:
                    if validation != 'skip':
                        reason = '%r does not match any declared element of the model group.' % name
                        yield self.validation_error(validation, reason, value,
                                                    **kwargs)

        if model.element is not None:
            index = len(element_data.content) - cdata_index
            for particle, occurs, expected in model.stop():
                errors.append((index, particle, occurs, expected))

        # If the validation is not strict tries to solve model errors with a reorder of the children
        if errors and validation != 'strict':
            children = self.sort_children(children, default_namespace)

        if children:
            if children[-1].tail is None:
                children[-1].tail = padding[:-indent] or '\n'
            else:
                children[-1].tail = children[-1].tail.strip() + (
                    padding[:-indent] or '\n')

        if validation != 'skip' and errors:
            attrib = {
                k: unicode_type(v)
                for k, v in element_data.attributes.items()
            }
            if validation == 'lax' and converter.etree_element_class is not etree_element:
                child_tags = [
                    converter.etree_element(e.tag, attrib=e.attrib)
                    for e in children
                ]
                elem = converter.etree_element(element_data.tag, text,
                                               child_tags, attrib)
            else:
                elem = converter.etree_element(element_data.tag, text,
                                               children, attrib)

            for index, particle, occurs, expected in errors:
                yield self.children_validation_error(validation, elem, index,
                                                     particle, occurs,
                                                     expected, **kwargs)

        yield text, children
Exemple #4
0
    def _parse(self):
        super(XsdGroup, self)._parse()
        self.clear()
        elem = self.elem
        self._parse_particle(elem)

        if elem.tag == XSD_GROUP:
            # Global group (group)
            name = elem.get('name')
            ref = elem.get('ref')
            if name is None:
                if ref is not None:
                    # Reference to a global group
                    if self.parent is None:
                        self.parse_error("a group reference cannot be global")

                    try:
                        self.name = self.schema.resolve_qname(ref)
                    except ValueError as err:
                        self.parse_error(err, elem)
                        return

                    try:
                        xsd_group = self.schema.maps.lookup_group(self.name)
                    except KeyError:
                        self.parse_error("missing group %r" %
                                         self.prefixed_name)
                        xsd_group = self.schema.create_any_content_group(
                            self, self.name)

                    if isinstance(xsd_group, tuple):
                        # Disallowed circular definition, substitute with any content group.
                        self.parse_error(
                            "Circular definitions detected for group %r:" %
                            self.ref, xsd_group[0])
                        self.model = 'sequence'
                        self.mixed = True
                        self.append(
                            XsdAnyElement(ANY_ELEMENT, self.schema, self))
                    else:
                        self.model = xsd_group.model
                        if self.model == 'all':
                            if self.max_occurs != 1:
                                self.parse_error(
                                    "maxOccurs must be 1 for 'all' model groups"
                                )
                            if self.min_occurs not in (0, 1):
                                self.parse_error(
                                    "minOccurs must be (0 | 1) for 'all' model groups"
                                )
                            if self.schema.XSD_VERSION == '1.0' and isinstance(
                                    self.parent, XsdGroup):
                                self.parse_error(
                                    "in XSD 1.0 the 'all' model group cannot be nested"
                                )
                        self.append(xsd_group)
                else:
                    self.parse_error(
                        "missing both attributes 'name' and 'ref'")
                return
            elif ref is None:
                # Global group
                self.name = get_qname(self.target_namespace, name)
                content_model = self._parse_component(elem)
                if self.parent is not None:
                    self.parse_error(
                        "attribute 'name' not allowed for a local group")
                else:
                    if 'minOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'minOccurs' not allowed for a global group"
                        )
                    if 'maxOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'maxOccurs' not allowed for a global group"
                        )
                    if 'minOccurs' in content_model.attrib:
                        self.parse_error(
                            "attribute 'minOccurs' not allowed for the model of a global group",
                            content_model)
                    if 'maxOccurs' in content_model.attrib:
                        self.parse_error(
                            "attribute 'maxOccurs' not allowed for the model of a global group",
                            content_model)
                    if content_model.tag not in {
                            XSD_SEQUENCE, XSD_ALL, XSD_CHOICE
                    }:
                        self.parse_error(
                            'unexpected tag %r' % content_model.tag,
                            content_model)
                        return

            else:
                self.parse_error("found both attributes 'name' and 'ref'")
                return
        elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
            # Local group (sequence|all|choice)
            if 'name' in elem.attrib:
                self.parse_error(
                    "attribute 'name' not allowed for a local group")
            content_model = elem
            self.name = None
        elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}:
            self.name = self.model = None
            return
        else:
            self.parse_error('unexpected tag %r' % elem.tag, elem)
            return

        self._parse_content_model(elem, content_model)
Exemple #5
0
    def iter_encode(self, element_data, validation='lax', converter=None, level=0, indent=4, **kwargs):
        """
        Creates an iterator for encoding data to a list containing Element data.

        :param element_data: an ElementData instance with unencoded data.
        :param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
        :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \
        for the encoding.
        :param level: the depth of the element data in the tree structure.
        :param indent: number of spaces for XML indentation (default is 4).
        :param kwargs: keyword arguments for the encoding process.
        :return: yields a couple with the text of the Element and a list of 3-tuples \
        (key, decoded data, decoder), eventually preceded by a sequence of validation \
        or encoding errors.
        """
        if not element_data.content:  # <tag/> or <tag></tag>
            yield element_data.content
            return

        errors = []
        text = None
        children = []
        padding = '\n' + ' ' * indent * level

        try:
            default_namespace = converter.get('')
        except (AttributeError, TypeError):
            converter = self.schema.get_converter(converter, level=level, **kwargs)
            default_namespace = converter.get('')

        model = ModelVisitor(self)
        cdata_index = 0
        if isinstance(element_data.content, dict) or kwargs.get('unordered'):
            content = model.iter_unordered_content(element_data.content)
        elif converter.losslessly:
            content = element_data.content
        else:
            content = model.iter_collapsed_content(element_data.content)

        for index, (name, value) in enumerate(content):
            if isinstance(name, int):
                if not children:
                    text = padding + value if text is None else text + value + padding
                elif children[-1].tail is None:
                    children[-1].tail = padding + value
                else:
                    children[-1].tail += value + padding
                cdata_index += 1
                continue

            if self.interleave and self.interleave.is_matching(name, default_namespace, self):
                xsd_element = self.interleave
                value = get_qname(default_namespace, name), value
            else:
                while model.element is not None:
                    xsd_element = model.element.match(name, default_namespace, self)
                    if xsd_element is None:
                        for particle, occurs, expected in model.advance():
                            errors.append((index - cdata_index, particle, occurs, expected))
                        continue
                    elif isinstance(xsd_element, XsdAnyElement):
                        value = get_qname(default_namespace, name), value

                    for particle, occurs, expected in model.advance(True):
                        errors.append((index - cdata_index, particle, occurs, expected))
                    break
                else:
                    if self.suffix and self.suffix.is_matching(name, default_namespace, self):
                        xsd_element = self.suffix
                        value = get_qname(default_namespace, name), value
                    else:
                        errors.append((index - cdata_index, self, 0, []))
                        for xsd_element in self.iter_elements():
                            if not xsd_element.is_matching(name, default_namespace, self):
                                continue
                            elif isinstance(xsd_element, XsdAnyElement):
                                value = get_qname(default_namespace, name), value
                            break
                        else:
                            if validation != 'skip':
                                reason = '%r does not match any declared element of the model group.' % name
                                yield self.validation_error(validation, reason, value, **kwargs)
                            continue

            for result in xsd_element.iter_encode(
                    value, validation, converter=converter, level=level, indent=indent, **kwargs):
                if isinstance(result, XMLSchemaValidationError):
                    yield result
                else:
                    children.append(result)

        if model.element is not None:
            index = len(element_data.content) - cdata_index
            for particle, occurs, expected in model.stop():
                errors.append((index, particle, occurs, expected))

        if children:
            if children[-1].tail is None:
                children[-1].tail = padding[:-indent] or '\n'
            else:
                children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n')

        if validation != 'skip' and errors:
            attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()}
            if validation == 'lax' and converter.etree_element_class is not etree_element:
                child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children]
                elem = converter.etree_element(element_data.tag, text, child_tags, attrib)
            else:
                elem = converter.etree_element(element_data.tag, text, children, attrib)

            for index, particle, occurs, expected in errors:
                yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs)

        yield text, children
Exemple #6
0
    def _parse(self):
        super(XsdGroup, self)._parse()
        self.clear()
        elem = self.elem
        self._parse_particle(elem)

        if elem.tag == XSD_GROUP:
            # Global group or reference
            if self._parse_reference():
                try:
                    xsd_group = self.schema.maps.lookup_group(self.name)
                except KeyError:
                    self.parse_error("missing group %r" % self.prefixed_name)
                    xsd_group = self.schema.create_any_content_group(self, self.name)

                if isinstance(xsd_group, tuple):
                    # Disallowed circular definition, substitute with any content group.
                    self.parse_error("Circular definitions detected for group %r:" % self.name, xsd_group[0])
                    self.model = 'sequence'
                    self.mixed = True
                    self.append(self.schema.BUILDERS.any_element_class(ANY_ELEMENT, self.schema, self))
                else:
                    self.model = xsd_group.model
                    if self.model == 'all':
                        if self.max_occurs != 1:
                            self.parse_error("maxOccurs must be 1 for 'all' model groups")
                        if self.min_occurs not in (0, 1):
                            self.parse_error("minOccurs must be (0 | 1) for 'all' model groups")
                        if self.xsd_version == '1.0' and isinstance(self.parent, XsdGroup):
                            self.parse_error("in XSD 1.0 the 'all' model group cannot be nested")
                    self.append(xsd_group)
                    self.ref = xsd_group
                return

            try:
                self.name = get_qname(self.target_namespace, elem.attrib['name'])
            except KeyError:
                return
            else:
                content_model = self._parse_child_component(elem, strict=True)
                if self.parent is not None:
                    self.parse_error("attribute 'name' not allowed for a local group")
                else:
                    if 'minOccurs' in elem.attrib:
                        self.parse_error("attribute 'minOccurs' not allowed for a global group")
                    if 'maxOccurs' in elem.attrib:
                        self.parse_error("attribute 'maxOccurs' not allowed for a global group")
                    if 'minOccurs' in content_model.attrib:
                        self.parse_error(
                            "attribute 'minOccurs' not allowed for the model of a global group", content_model
                        )
                    if 'maxOccurs' in content_model.attrib:
                        self.parse_error(
                            "attribute 'maxOccurs' not allowed for the model of a global group", content_model
                        )
                    if content_model.tag not in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
                        self.parse_error('unexpected tag %r' % content_model.tag, content_model)
                        return

        elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
            # Local group (sequence|all|choice)
            if 'name' in elem.attrib:
                self.parse_error("attribute 'name' not allowed for a local group")
            content_model = elem
            self.name = None
        elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}:
            self.name = self.model = None
            return
        else:
            self.parse_error('unexpected tag %r' % elem.tag)
            return

        self._parse_content_model(elem, content_model)
def etree_elements_assert_equal(elem,
                                other,
                                strict=True,
                                skip_comments=True,
                                unordered=False):
    """
    Tests the equality of two XML Element trees.

    :param elem: the master Element tree, reference for namespace mapping.
    :param other: the other Element tree that has to be compared.
    :param strict: asserts strictly equality. `True` for default.
    :param skip_comments: skip comments from comparison.
    :param unordered: children may have different order.
    :raise: an AssertionError containing information about first difference encountered.
    """
    if unordered:
        children = sorted(elem, key=lambda x: '' if callable(x.tag) else x.tag)
        other_children = iter(
            sorted(other, key=lambda x: '' if callable(x.tag) else x.tag))
    else:
        children = elem
        other_children = iter(other)

    namespace = ''
    for e1 in children:
        if skip_comments and callable(e1.tag):
            continue

        try:
            while True:
                e2 = next(other_children)
                if not skip_comments or not callable(e2.tag):
                    break
        except StopIteration:
            raise AssertionError("Node %r has more children than %r" %
                                 (elem, other))

        if strict or e1 is elem:
            if e1.tag != e2.tag:
                raise AssertionError("%r != %r: tags differ" % (e1, e2))
        else:
            namespace = get_namespace(e1.tag) or namespace
            if get_qname(namespace, e1.tag) != get_qname(namespace, e2.tag):
                raise AssertionError("%r != %r: tags differ." % (e1, e2))

        # Attributes
        if e1.attrib != e2.attrib:
            if strict:
                msg = "{!r} != {!r}: attributes differ: {!r} != {!r}"
                raise AssertionError(msg.format(e1, e2, e1.attrib, e2.attrib))
            else:
                msg = "%r != %r: attribute keys differ: %r != %r"
                if sorted(e1.attrib.keys()) != sorted(e2.attrib.keys()):
                    raise AssertionError(
                        msg % (e1, e2, e1.attrib.keys(), e2.attrib.keys()))
                for k in e1.attrib:
                    a1, a2 = e1.attrib[k].strip(), e2.attrib[k].strip()
                    if a1 != a2:
                        try:
                            if float(a1) != float(a2):
                                raise ValueError()
                        except (ValueError, TypeError):
                            msg = "%r != %r: attribute %r values differ: %r != %r"
                            raise AssertionError(msg %
                                                 (e1, e2, k, a1, a2)) from None

        # Number of children
        if skip_comments:
            nc1 = len([c for c in e1 if not callable(c.tag)])
            nc2 = len([c for c in e2 if not callable(c.tag)])
        else:
            nc1 = len(e1)
            nc2 = len(e2)
        if nc1 != nc2:
            msg = "%r != %r: children number differ: %r != %r"
            raise AssertionError(msg % (e1, e2, nc1, nc2))

        # Text
        if e1.text != e2.text:
            message = "%r != %r: texts differ: %r != %r" % (e1, e2, e1.text,
                                                            e2.text)
            if strict:
                raise AssertionError(message)
            elif e1.text is None:
                if e2.text.strip():
                    raise AssertionError(message)
            elif e2.text is None:
                if e1.text.strip():
                    raise AssertionError(message)
            elif _REGEX_SPACES.sub('', e1.text.strip()) != _REGEX_SPACES.sub(
                    '', e2.text.strip()):
                text1 = e1.text.strip()
                text2 = e2.text.strip()
                if text1 == 'false':
                    if text2 != '0':
                        raise AssertionError(message)
                elif text1 == 'true':
                    if text2 != '1':
                        raise AssertionError(message)
                elif text2 == 'false':
                    if text1 != '0':
                        raise AssertionError(message)
                elif text2 == 'true':
                    if text1 != '1':
                        raise AssertionError(message)
                else:
                    try:
                        items1 = text1.split()
                        items2 = text2.split()
                        if len(items1) != len(items2):
                            raise ValueError()
                        if not all(
                                float(x1) == float(x2)
                                for x1, x2 in zip(items1, items2)):
                            raise ValueError()
                    except (AssertionError, ValueError, TypeError):
                        raise AssertionError(message) from None

        # Tail
        if e1.tail != e2.tail:
            message = "%r != %r: tails differ: %r != %r" % (e1, e2, e1.tail,
                                                            e2.tail)
            if strict:
                raise AssertionError(message)
            elif e1.tail is None:
                if e2.tail.strip():
                    raise AssertionError(message)
            elif e2.tail is None:
                if e1.tail.strip():
                    raise AssertionError(message)
            elif e1.tail.strip() != e2.tail.strip():
                raise AssertionError(message)

        etree_elements_assert_equal(e1, e2, strict, skip_comments, unordered)

    try:
        next(other_children)
    except StopIteration:
        pass
    else:
        raise AssertionError("Node %r has lesser children than %r." %
                             (elem, other))
Exemple #8
0
def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True):
    """
    Tests the equality of two XML Element trees.

    :param elem: the master Element tree, reference for namespace mapping.
    :param other: the other Element tree that has to be compared.
    :param strict: asserts strictly equality. `True` for default.
    :param skip_comments: Skip comments for e
    :raise: an AssertionError containing information about first difference encountered.
    """
    _REGEX_SPACES = re.compile(r'\s+')

    other_elements = iter(other.iter())
    namespace = ''
    for e1 in elem.iter():
        if skip_comments and e1.tag is lxml_etree_comment:
            continue

        try:
            e2 = next(other_elements)
        except StopIteration:
            assert False, "Second tree ends before the first: %r." % e1

        if strict or e1 is elem:
            assert e1.tag == e2.tag, "%r != %r: tags differ." % (e1, e2)
        else:
            namespace = get_namespace(e1.tag) or namespace
            assert get_qname(namespace, e1.tag) == get_qname(
                namespace, e1.tag), "%r != %r: tags differ." % (e1, e2)

        # Attributes
        if e1.attrib != e2.attrib:
            if strict:
                raise AssertionError("%r != %r: attribute differ: %r != %r." %
                                     (e1, e2, e1.attrib, e2.attrib))
            else:
                assert e1.attrib.keys() == e2.attrib.keys(), \
                    "%r != %r: attribute keys differ: %r != %r." % (e1, e2, e1.attrib.keys(), e2.attrib.keys())
                for k in e1.attrib:
                    a1, a2 = e1.attrib[k].strip(), e2.attrib[k].strip()
                    if a1 != a2:
                        try:
                            assert float(a1) == float(a2)
                        except (AssertionError, ValueError, TypeError):
                            raise AssertionError(
                                "%r != %r: attribute %r differ: %r != %r." %
                                (e1, e2, k, a1, a2))

        # Number of children
        if skip_comments:
            nc1 = len([c for c in e1 if c.tag is not lxml_etree_comment])
            nc2 = len([c for c in e2 if c.tag is not lxml_etree_comment])
        else:
            nc1 = len(e1)
            nc2 = len(e2)
        assert nc1 == nc2, "%r != %r: children number differ: %r != %r." % (
            e1, e2, nc1, nc2)

        # Text
        if e1.text != e2.text:
            message = "%r != %r: texts differ: %r != %r." % (e1, e2, e1.text,
                                                             e2.text)
            if strict:
                raise AssertionError(message)
            elif e1.text is None:
                assert not e2.text.strip(), message
            elif e2.text is None:
                assert not e1.text.strip(), message
            elif _REGEX_SPACES.sub(e1.text.strip(), '') != _REGEX_SPACES.sub(
                    e2.text.strip(), ''):
                try:
                    assert float(e1.text.strip()) == float(e2.text.strip())
                except (AssertionError, ValueError, TypeError):
                    raise AssertionError(message)

        # Tail
        if e1.tail != e2.tail:
            message = "%r != %r: tails differ: %r != %r." % (e1, e2, e1.tail,
                                                             e2.tail)
            if strict:
                raise AssertionError(message)
            elif e1.tail is None:
                assert not e2.tail.strip(), message
            elif e2.text is None:
                assert not e1.tail.strip(), message
            else:
                assert e1.tail.strip() == e2.tail.strip(), message

    try:
        e2 = next(other_elements)
    except StopIteration:
        pass
    else:
        assert False, "First tree ends before the second: %r." % e2
Exemple #9
0
    def iter_encode(self, element_data, validation='lax', converter=None, **kwargs):
        """
        Creates an iterator for encoding data to a list containing Element data.

        :param element_data: an ElementData instance with unencoded data.
        :param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
        :param converter: an :class:`XMLSchemaConverter` subclass or instance.
        :param kwargs: Keyword arguments for the encoding process.
        :return: Yields a couple with the text of the Element and a list of 3-tuples \
        (key, decoded data, decoder), eventually preceded by a sequence of validation \
        or encoding errors.
        """
        if not element_data.content:  # <tag/> or <tag></tag>
            yield element_data.content
            return

        if not isinstance(converter, XMLSchemaConverter):
            converter = self.schema.get_converter(converter, **kwargs)

        errors = []
        text = None
        children = []
        level = kwargs.get('level', 0)
        indent = kwargs.get('indent', 4)
        padding = '\n' + ' ' * indent * level
        default_namespace = converter.get('')
        losslessly = converter.losslessly

        model = XsdModelVisitor(self)
        cdata_index = 0

        for index, (name, value) in enumerate(element_data.content):
            if isinstance(name, int):
                if not children:
                    text = padding + value if text is None else text + value + padding
                elif children[-1].tail is None:
                    children[-1].tail = padding + value
                else:
                    children[-1].tail += value + padding
                cdata_index += 1
                continue

            if not default_namespace or name[0] == '{':
                tag = name
            else:
                tag = '{%s}%s' % (default_namespace, name)

            while model.element is not None:
                if tag in model.element.names or model.element.name is None \
                        and model.element.is_matching(tag, default_namespace):
                    xsd_element = model.element
                else:
                    for xsd_element in model.element.iter_substitutes():
                        if tag in xsd_element.names:
                            break
                    else:
                        for particle, occurs, expected in model.advance():
                            errors.append((index - cdata_index, particle, occurs, expected))
                        continue

                if isinstance(xsd_element, XsdAnyElement):
                    value = get_qname(default_namespace, name), value
                for result in xsd_element.iter_encode(value, validation, converter, **kwargs):
                    if isinstance(result, XMLSchemaValidationError):
                        yield result
                    else:
                        children.append(result)

                for particle, occurs, expected in model.advance(True):
                    errors.append((index - cdata_index, particle, occurs, expected))
                break
            else:
                if losslessly:
                    errors.append((index - cdata_index, self, 0, []))

                for xsd_element in self.iter_elements():
                    if tag in xsd_element.names or xsd_element.name is None \
                            and xsd_element.is_matching(name, default_namespace):
                        if isinstance(xsd_element, XsdAnyElement):
                            value = get_qname(default_namespace, name), value
                        for result in xsd_element.iter_encode(value, validation, converter, **kwargs):
                            if isinstance(result, XMLSchemaValidationError):
                                yield result
                            else:
                                children.append(result)
                        break
                else:
                    if validation != 'skip':
                        reason = '%r does not match any declared element of the model group.' % name
                        yield self.validation_error(validation, reason, value, **kwargs)

        if model.element is not None:
            index = len(element_data.content) - cdata_index
            for particle, occurs, expected in model.stop():
                errors.append((index, particle, occurs, expected))

        # If the validation is not strict tries to solve model errors with a reorder of the children
        if errors and validation != 'strict':
            children = self.sort_children(children, default_namespace)

        if children:
            if children[-1].tail is None:
                children[-1].tail = padding[:-indent] or '\n'
            else:
                children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n')

        if validation != 'skip' and errors:
            attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()}
            if validation == 'lax' and converter.etree_element_class is not etree_element:
                child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children]
                elem = converter.etree_element(element_data.tag, text, child_tags, attrib)
            else:
                elem = converter.etree_element(element_data.tag, text, children, attrib)

            for index, particle, occurs, expected in errors:
                yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs)

        yield text, children
Exemple #10
0
    def _parse(self):
        super(XsdGroup, self)._parse()
        if self and not hasattr(self, '_elem'):
            self.clear()
        elem = self.elem
        self._parse_particle(elem)

        if elem.tag == XSD_GROUP:
            # Global group (group)
            name = elem.get('name')
            ref = elem.get('ref')
            if name is None:
                if ref is not None:
                    # Reference to a global group
                    if self.is_global:
                        self.parse_error("a group reference cannot be global", elem)
                    self.name = prefixed_to_qname(ref, self.namespaces)

                    try:
                        xsd_group = self.schema.maps.lookup_group(self.name)
                    except KeyError:
                        self.parse_error("missing group %r" % self.prefixed_name, elem)
                        xsd_group = self.schema.create_any_content_group(self, self.name)

                    if isinstance(xsd_group, tuple):
                        # Disallowed circular definition, substitute with any content group.
                        self.parse_error("Circular definitions detected for group %r:" % self.ref, xsd_group[0])
                        self.model = 'sequence'
                        self.mixed = True
                        self.append(XsdAnyElement(ANY_ELEMENT, self.schema, self))
                    else:
                        self.model = xsd_group.model
                        self.append(xsd_group)
                else:
                    self.parse_error("missing both attributes 'name' and 'ref'", elem)
                return
            elif ref is None:
                # Global group
                self.name = get_qname(self.target_namespace, name)
                content_model = self._parse_component(elem)
                if not self.is_global:
                    self.parse_error("attribute 'name' not allowed for a local group", self)
                else:
                    if 'minOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'minOccurs' not allowed for a global group", self
                        )
                    if 'maxOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'maxOccurs' not allowed for a global group", self
                        )
                if content_model.tag not in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
                    self.parse_error('unexpected tag %r' % content_model.tag, content_model)
                    return
            else:
                self.parse_error("found both attributes 'name' and 'ref'", elem)
                return
        elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
            # Local group (sequence|all|choice)
            content_model = elem
            self.name = None
        elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}:
            self.name = self.model = None
            return
        else:
            self.parse_error('unexpected tag %r' % elem.tag, elem)
            return

        self._parse_content_model(elem, content_model)
Exemple #11
0
    def _parse(self):
        super(XsdGroup, self)._parse()
        if self and not hasattr(self, '_elem'):
            self.clear()
        elem = self.elem
        self._parse_particle(elem)

        if elem.tag == XSD_GROUP:
            # Global group (group)
            name = elem.get('name')
            ref = elem.get('ref')
            if name is None:
                if ref is not None:
                    # Reference to a global group
                    if self.is_global:
                        self.parse_error("a group reference cannot be global",
                                         elem)
                    self.name = prefixed_to_qname(ref, self.namespaces)

                    try:
                        xsd_group = self.schema.maps.lookup_group(self.name)
                    except KeyError:
                        self.parse_error(
                            "missing group %r" % self.prefixed_name, elem)
                        xsd_group = self.schema.create_any_content_group(
                            self, self.name)

                    if isinstance(xsd_group, tuple):
                        # Disallowed circular definition, substitute with any content group.
                        self.parse_error(
                            "Circular definitions detected for group %r:" %
                            self.ref, xsd_group[0])
                        self.model = 'sequence'
                        self.mixed = True
                        self.append(
                            XsdAnyElement(ANY_ELEMENT, self.schema, self))
                    else:
                        self.model = xsd_group.model
                        self.append(xsd_group)
                else:
                    self.parse_error(
                        "missing both attributes 'name' and 'ref'", elem)
                return
            elif ref is None:
                # Global group
                self.name = get_qname(self.target_namespace, name)
                content_model = self._parse_component(elem)
                if not self.is_global:
                    self.parse_error(
                        "attribute 'name' not allowed for a local group", self)
                else:
                    if 'minOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'minOccurs' not allowed for a global group",
                            self)
                    if 'maxOccurs' in elem.attrib:
                        self.parse_error(
                            "attribute 'maxOccurs' not allowed for a global group",
                            self)
                if content_model.tag not in {
                        XSD_SEQUENCE, XSD_ALL, XSD_CHOICE
                }:
                    self.parse_error('unexpected tag %r' % content_model.tag,
                                     content_model)
                    return
            else:
                self.parse_error("found both attributes 'name' and 'ref'",
                                 elem)
                return
        elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
            # Local group (sequence|all|choice)
            content_model = elem
            self.name = None
        elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}:
            self.name = self.model = None
            return
        else:
            self.parse_error('unexpected tag %r' % elem.tag, elem)
            return

        self._parse_content_model(elem, content_model)