Beispiel #1
0
    def detect_type(self, data: Union[Dict, List]) -> Type[T]:
        if not data:
            raise ParserError("Document is empty, can not detect type")

        keys = list(data[0].keys() if isinstance(data, list) else data.keys())
        clazz: Optional[Type[T]] = self.context.find_type_by_fields(set(keys))

        if clazz is None:
            raise ParserError(
                f"No class found matching the document keys({keys})")

        return clazz
Beispiel #2
0
    def parse_context(self, context: iterparse, clazz: Type[T]) -> T:
        """
        Dispatch elements to handlers as they arrive and are fully parsed.

        :raises ParserError: When the requested type doesn't match the result object
        """
        obj = None
        meta = self.context.build(clazz)
        self.namespaces.clear()
        objects: ParsedObjects = []
        queue: XmlNodes = [RootNode(position=0, meta=meta, config=self.config)]

        for event, element in context:
            if event == EventType.START_NS:
                self.add_namespace(element)
            if event == EventType.START:
                self.queue(element, queue, objects)
            elif event == EventType.END:
                obj = self.dequeue(element, queue, objects)

        if not obj:
            raise ParserError(
                f"Failed to create target class `{clazz.__name__}`")

        return obj
Beispiel #3
0
    def bind_element_children(
        cls, params: Dict, meta: XmlMeta, position: int, objects: List,
    ):
        """Return a dictionary of qualified object names and their values for
        the given queue item."""

        while len(objects) > position:
            qname, value = objects.pop(position)
            arg = meta.find_var(qname, FindMode.NOT_WILDCARD) or meta.find_var(
                qname, FindMode.WILDCARD
            )

            if not arg:
                raise ParserError("Impossible exception!")

            if not arg.init:
                continue

            if value is None:
                value = ""

            if not cls.bind_element_param(params, arg, value):
                lookup = QName(value.qname) if isinstance(value, AnyElement) else qname
                wild = cls.find_eligible_wildcard(meta, lookup, params)

                if not wild:
                    logger.warning("Unassigned parsed object %s", qname)
                else:
                    cls.bind_element_wildcard_param(params, wild, qname, value)
Beispiel #4
0
    def parse_context(self, data: Dict, clazz: Type[T]) -> T:
        """
        Recursively build the given model from the input dict data.

        :raise TypeError: When parsing fails for any reason
        """
        params = {}

        if isinstance(data, list) and len(data) == 1:
            data = data[0]

        for var in self.class_meta(clazz).vars.values():
            value = self.get_value(data, var)

            if value is None:
                continue
            elif var.is_list:
                params[var.name] = [self.bind_value(var, val) for val in value]
            else:
                params[var.name] = self.bind_value(var, value)

        try:
            return clazz(**params)  # type: ignore
        except Exception:
            raise ParserError("Parsing failed")
Beispiel #5
0
    def bind(self, qname: str, text: NoneStr, tail: NoneStr,
             objects: List) -> bool:
        self.events.append(("end", qname, text, tail))

        if self.level > 0:
            self.level -= 1
            return False

        self.events.insert(
            0, ("start", qname, copy.deepcopy(self.attrs), self.ns_map))

        obj = None
        max_score = -1.0
        parent_namespace = target_uri(qname)
        for clazz in self.var.types:

            if is_dataclass(clazz):
                self.context.build(clazz, parent_ns=parent_namespace)
                candidate = self.parse_class(clazz)
            else:
                candidate = self.parse_value(text, [clazz])

            score = ParserUtils.score_object(candidate)
            if score > max_score:
                max_score = score
                obj = candidate

        if obj:
            objects.append((self.var.qname, obj))

            return True

        raise ParserError(f"Failed to parse union node: {self.var.qname}")
Beispiel #6
0
    def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> XmlNode:
        for var in self.fetch_vars(qname):
            node = self.build_node(var, attrs, ns_map, position)
            if node:
                return node

        if self.config.fail_on_unknown_properties:
            raise ParserError(f"Unknown property {self.meta.qname}:{qname}")
        return SkipNode()
Beispiel #7
0
    def parse(self, source: Any, clazz: Optional[Type[T]] = None) -> T:
        """Parse the XML input stream and return the resulting object tree."""
        handler = self.handler(clazz=clazz, parser=self)
        result = handler.parse(source)

        if result is not None:
            return result

        target_class = clazz.__name__ if clazz else ""
        raise ParserError(f"Failed to create target class `{target_class}`")
Beispiel #8
0
    def bind_choice_dataclass(self, value: Dict, var: XmlVar) -> Any:
        """Bind data to the best matching choice model."""
        keys = set(value.keys())
        for choice in var.choices:
            if choice.clazz:
                attrs = {f.name for f in fields(choice.clazz)}
                if attrs == keys:
                    return self.bind_value(choice, value)

        raise ParserError(
            f"XmlElements undefined choice: `{var.name}` for `{value}`")
Beispiel #9
0
    def start(
        self,
        clazz: Optional[Type],
        queue: List[XmlNode],
        objects: List[Parsed],
        qname: str,
        attrs: Dict,
        ns_map: Dict,
    ):
        """
        Start element notification receiver.

        Build and queue the XmlNode for the starting element.

        :param clazz: Root class type, if it's missing look for any
            suitable models from the current context.
        :param queue: The active XmlNode queue
        :param objects: The list of all intermediate parsed objects
        :param qname: Qualified name
        :param attrs: Attribute key-value map
        :param ns_map: Namespace prefix-URI map
        """
        try:
            item = queue[-1]
            child = item.child(qname, attrs, ns_map, len(objects))
        except IndexError:
            xsi_type = ParserUtils.xsi_type(attrs, ns_map)

            # Match element qname directly
            if clazz is None:
                clazz = self.context.find_type(qname)

            # Root is xs:anyType try xsi:type
            if clazz is None and xsi_type:
                clazz = self.context.find_type(xsi_type)

            # Exit if we still have no binding model
            if clazz is None:
                raise ParserError(f"No class found matching root: {qname}")

            meta = self.context.fetch(clazz, xsi_type=xsi_type)
            derived = xsi_type is not None and meta.qname != qname

            child = ElementNode(
                position=0,
                meta=meta,
                config=self.config,
                attrs=attrs,
                ns_map=ns_map,
                context=self.context,
                derived=derived,
            )

        queue.append(child)
Beispiel #10
0
    def parse(self, source: io.BytesIO, clazz: Optional[Type[T]] = None) -> T:
        """Parse the JSON input stream and return the resulting object tree."""
        ctx = json.load(source)

        if clazz is None:
            clazz = self.context.find_type_by_fields(set(ctx.keys()))

        if clazz is None:
            raise ParserError(
                f"No class found matching the document keys({list(ctx.keys())})"
            )

        return self.bind_dataclass(ctx, clazz)
Beispiel #11
0
    def verify_type(self, clazz: Optional[Type[T]],
                    data: Union[Dict, List]) -> Type[T]:
        if clazz is None:
            return self.detect_type(data)

        origin = getattr(clazz, "__origin__", None)
        list_type = origin in (list, List) or clazz is List
        if origin is not None and not list_type:
            raise ParserError(f"Origin {origin} is not supported")

        if list_type != isinstance(data, list):
            if list_type:
                raise ParserError("Document is object, expected array")
            else:
                raise ParserError("Document is array, expected object")

        if list_type:
            args = getattr(clazz, "__args__", ())
            if args is None or len(args) != 1 or not is_dataclass(args[0]):
                raise ParserError("List argument must be a dataclass")

            clazz = args[0]

        return clazz  # type: ignore
Beispiel #12
0
    def bind_choice_generic(self, value: Dict, var: XmlVar) -> Any:
        """Bind data to a either a derived or a user derived model."""
        qname = value["qname"]
        choice = var.find_choice(qname)

        if not choice:
            raise ParserError(
                f"XmlElements undefined choice: `{var.name}` for qname `{qname}`"
            )

        if "value" in value:
            return DerivedElement(qname,
                                  self.bind_value(choice, value["value"]))

        return self.bind_dataclass(value, AnyElement)
Beispiel #13
0
    def queue_node(self, element: Element):
        """
        Queue the necessary metadata to bind the given element when it's fully
        parsed.

        Checks for the last item in queue:
        - Is none or has none meta                        -> inside a wildcard
        - Element tag exists in known variables           -> dataclass or primitive
        - Element tag equals meta qualified name          -> root element
        - Element tag unknown but queue supports wildcard -> start a wildcard

        :raises ParserError: When the parser cant compute next queue item.
        """
        qname = QName(element.tag)
        queue_item = None
        item = self.queue[-1]

        if isinstance(item, (SkipQueueItem, PrimitiveQueueItem)):
            queue_item = self.create_skip_queue_item()
        elif isinstance(item, WildcardQueueItem):
            queue_item = self.create_wildcard_queue_item(item.qname)
        elif isinstance(item, ClassQueueItem):

            if self.index == 0:
                queue_item = self.queue.pop()
            else:
                var = item.meta.get_var(qname)
                if var and var.dataclass:
                    queue_item = self.create_class_queue_item(
                        var, item.meta.qname)
                elif var and var.is_any_element:
                    queue_item = self.create_wildcard_queue_item(var.qname)
                elif var:
                    queue_item = self.create_primitive_queue_item(var)

        if queue_item is None:
            parent = item.meta.qname if isinstance(
                item, ClassQueueItem) else "unknown"
            raise ParserError(
                f"{parent} does not support mixed content: {qname}")

        self.index += 1
        self.queue.append(queue_item)
        self.emit_event(EventType.START,
                        element.tag,
                        item=item,
                        element=element)
Beispiel #14
0
    def bind_dataclass(self, data: Dict, clazz: Type[T]) -> T:
        """Recursively build the given model from the input dict data."""
        params = {}
        for var in self.context.build(clazz).vars:
            value = data.get(var.lname)

            if value is None or not var.init:
                continue

            if var.list_element:
                if not isinstance(value, list):
                    raise ParserError(
                        f"Key `{var.name}` value is not iterable")

                params[var.name] = [self.bind_value(var, val) for val in value]
            else:
                params[var.name] = self.bind_value(var, value)

        return clazz(**params)  # type: ignore
Beispiel #15
0
    def parse_context(self, context: iterparse, clazz: Type[T]) -> T:
        """
        Dispatch elements to handlers as they arrive and are fully parsed.

        :raises ValueError: When the requested type doesn't match the result object
        """
        obj = None
        for event, element in context:
            if event == EventType.START:
                self.queue_node(element)
            elif event == EventType.END:
                obj = self.dequeue_node(element)
                if obj is not None:
                    element.clear()

        if not obj or not isinstance(obj, clazz):
            raise ParserError(f"Failed to create target class `{clazz.__name__}`")

        return obj
Beispiel #16
0
    def dequeue_node(self, element: Element) -> Optional[T]:
        """
        Build an objects tree for the given element.

        Construct a dataclass instance with the attributes of the given element and any
        pending objects that belong to the model. Otherwise parse as a primitive type
        the element's text content.

        :returns object: A dataclass object or a python primitive value.
        """
        item = self.queue.pop()
        qname = obj = None

        if isinstance(item, SkipQueueItem):
            return None
        elif isinstance(item, PrimitiveQueueItem):
            qname = QName(element.tag)
            value = element.text
            ns_map = element.nsmap
            obj = self.parse_value(item.types, value, item.default, ns_map)
        elif isinstance(item, WildcardQueueItem):
            obj = self.parse_any_element(element)
            if not obj:
                return None
            obj.children = self.fetch_any_children(item)
            qname = item.qname
        elif isinstance(item, ClassQueueItem):
            params: Dict = dict()
            self.bind_element_attrs(params, item.meta, element)
            self.bind_element_text(params, item.meta, element)
            self.bind_element_children(params, item, element)
            self.bind_element_wild_text(params, item.meta, element)

            qname = QName(element.tag)
            obj = item.meta.clazz(**params)
        else:  # unknown :)
            raise ParserError(f"Failed to create object from {element.tag}")

        self.objects.append((qname, obj))
        self.emit_event(EventType.END, element.tag, obj=obj, element=element)
        self.namespaces.add_all(element.nsmap)
        return obj
Beispiel #17
0
    def bind(self, qname: str, text: NoneStr, tail: NoneStr, objects: List) -> bool:
        self.events.append(("end", qname, text, tail))

        if self.level > 0:
            self.level -= 1
            return False

        self.events.insert(0, ("start", qname, copy.deepcopy(self.attrs), self.ns_map))

        obj = None
        max_score = -1.0
        for clazz in self.var.types:
            candidate = self.parse_class(clazz)
            score = ParserUtils.score_object(candidate)
            if score > max_score:
                max_score = score
                obj = candidate

        if obj:
            objects.append((self.var.qname, obj))

            return True

        raise ParserError(f"Failed to parse union node: {self.var.qname}")