def detect_type(self, data: Union[Dict, List]) -> Type[T]: if not data: raise ParserError("Document is empty, can not detect type") keys = list(data[0].keys() if isinstance(data, list) else data.keys()) clazz: Optional[Type[T]] = self.context.find_type_by_fields(set(keys)) if clazz is None: raise ParserError( f"No class found matching the document keys({keys})") return clazz
def parse_context(self, context: iterparse, clazz: Type[T]) -> T: """ Dispatch elements to handlers as they arrive and are fully parsed. :raises ParserError: When the requested type doesn't match the result object """ obj = None meta = self.context.build(clazz) self.namespaces.clear() objects: ParsedObjects = [] queue: XmlNodes = [RootNode(position=0, meta=meta, config=self.config)] for event, element in context: if event == EventType.START_NS: self.add_namespace(element) if event == EventType.START: self.queue(element, queue, objects) elif event == EventType.END: obj = self.dequeue(element, queue, objects) if not obj: raise ParserError( f"Failed to create target class `{clazz.__name__}`") return obj
def bind_element_children( cls, params: Dict, meta: XmlMeta, position: int, objects: List, ): """Return a dictionary of qualified object names and their values for the given queue item.""" while len(objects) > position: qname, value = objects.pop(position) arg = meta.find_var(qname, FindMode.NOT_WILDCARD) or meta.find_var( qname, FindMode.WILDCARD ) if not arg: raise ParserError("Impossible exception!") if not arg.init: continue if value is None: value = "" if not cls.bind_element_param(params, arg, value): lookup = QName(value.qname) if isinstance(value, AnyElement) else qname wild = cls.find_eligible_wildcard(meta, lookup, params) if not wild: logger.warning("Unassigned parsed object %s", qname) else: cls.bind_element_wildcard_param(params, wild, qname, value)
def parse_context(self, data: Dict, clazz: Type[T]) -> T: """ Recursively build the given model from the input dict data. :raise TypeError: When parsing fails for any reason """ params = {} if isinstance(data, list) and len(data) == 1: data = data[0] for var in self.class_meta(clazz).vars.values(): value = self.get_value(data, var) if value is None: continue elif var.is_list: params[var.name] = [self.bind_value(var, val) for val in value] else: params[var.name] = self.bind_value(var, value) try: return clazz(**params) # type: ignore except Exception: raise ParserError("Parsing failed")
def bind(self, qname: str, text: NoneStr, tail: NoneStr, objects: List) -> bool: self.events.append(("end", qname, text, tail)) if self.level > 0: self.level -= 1 return False self.events.insert( 0, ("start", qname, copy.deepcopy(self.attrs), self.ns_map)) obj = None max_score = -1.0 parent_namespace = target_uri(qname) for clazz in self.var.types: if is_dataclass(clazz): self.context.build(clazz, parent_ns=parent_namespace) candidate = self.parse_class(clazz) else: candidate = self.parse_value(text, [clazz]) score = ParserUtils.score_object(candidate) if score > max_score: max_score = score obj = candidate if obj: objects.append((self.var.qname, obj)) return True raise ParserError(f"Failed to parse union node: {self.var.qname}")
def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> XmlNode: for var in self.fetch_vars(qname): node = self.build_node(var, attrs, ns_map, position) if node: return node if self.config.fail_on_unknown_properties: raise ParserError(f"Unknown property {self.meta.qname}:{qname}") return SkipNode()
def parse(self, source: Any, clazz: Optional[Type[T]] = None) -> T: """Parse the XML input stream and return the resulting object tree.""" handler = self.handler(clazz=clazz, parser=self) result = handler.parse(source) if result is not None: return result target_class = clazz.__name__ if clazz else "" raise ParserError(f"Failed to create target class `{target_class}`")
def bind_choice_dataclass(self, value: Dict, var: XmlVar) -> Any: """Bind data to the best matching choice model.""" keys = set(value.keys()) for choice in var.choices: if choice.clazz: attrs = {f.name for f in fields(choice.clazz)} if attrs == keys: return self.bind_value(choice, value) raise ParserError( f"XmlElements undefined choice: `{var.name}` for `{value}`")
def start( self, clazz: Optional[Type], queue: List[XmlNode], objects: List[Parsed], qname: str, attrs: Dict, ns_map: Dict, ): """ Start element notification receiver. Build and queue the XmlNode for the starting element. :param clazz: Root class type, if it's missing look for any suitable models from the current context. :param queue: The active XmlNode queue :param objects: The list of all intermediate parsed objects :param qname: Qualified name :param attrs: Attribute key-value map :param ns_map: Namespace prefix-URI map """ try: item = queue[-1] child = item.child(qname, attrs, ns_map, len(objects)) except IndexError: xsi_type = ParserUtils.xsi_type(attrs, ns_map) # Match element qname directly if clazz is None: clazz = self.context.find_type(qname) # Root is xs:anyType try xsi:type if clazz is None and xsi_type: clazz = self.context.find_type(xsi_type) # Exit if we still have no binding model if clazz is None: raise ParserError(f"No class found matching root: {qname}") meta = self.context.fetch(clazz, xsi_type=xsi_type) derived = xsi_type is not None and meta.qname != qname child = ElementNode( position=0, meta=meta, config=self.config, attrs=attrs, ns_map=ns_map, context=self.context, derived=derived, ) queue.append(child)
def parse(self, source: io.BytesIO, clazz: Optional[Type[T]] = None) -> T: """Parse the JSON input stream and return the resulting object tree.""" ctx = json.load(source) if clazz is None: clazz = self.context.find_type_by_fields(set(ctx.keys())) if clazz is None: raise ParserError( f"No class found matching the document keys({list(ctx.keys())})" ) return self.bind_dataclass(ctx, clazz)
def verify_type(self, clazz: Optional[Type[T]], data: Union[Dict, List]) -> Type[T]: if clazz is None: return self.detect_type(data) origin = getattr(clazz, "__origin__", None) list_type = origin in (list, List) or clazz is List if origin is not None and not list_type: raise ParserError(f"Origin {origin} is not supported") if list_type != isinstance(data, list): if list_type: raise ParserError("Document is object, expected array") else: raise ParserError("Document is array, expected object") if list_type: args = getattr(clazz, "__args__", ()) if args is None or len(args) != 1 or not is_dataclass(args[0]): raise ParserError("List argument must be a dataclass") clazz = args[0] return clazz # type: ignore
def bind_choice_generic(self, value: Dict, var: XmlVar) -> Any: """Bind data to a either a derived or a user derived model.""" qname = value["qname"] choice = var.find_choice(qname) if not choice: raise ParserError( f"XmlElements undefined choice: `{var.name}` for qname `{qname}`" ) if "value" in value: return DerivedElement(qname, self.bind_value(choice, value["value"])) return self.bind_dataclass(value, AnyElement)
def queue_node(self, element: Element): """ Queue the necessary metadata to bind the given element when it's fully parsed. Checks for the last item in queue: - Is none or has none meta -> inside a wildcard - Element tag exists in known variables -> dataclass or primitive - Element tag equals meta qualified name -> root element - Element tag unknown but queue supports wildcard -> start a wildcard :raises ParserError: When the parser cant compute next queue item. """ qname = QName(element.tag) queue_item = None item = self.queue[-1] if isinstance(item, (SkipQueueItem, PrimitiveQueueItem)): queue_item = self.create_skip_queue_item() elif isinstance(item, WildcardQueueItem): queue_item = self.create_wildcard_queue_item(item.qname) elif isinstance(item, ClassQueueItem): if self.index == 0: queue_item = self.queue.pop() else: var = item.meta.get_var(qname) if var and var.dataclass: queue_item = self.create_class_queue_item( var, item.meta.qname) elif var and var.is_any_element: queue_item = self.create_wildcard_queue_item(var.qname) elif var: queue_item = self.create_primitive_queue_item(var) if queue_item is None: parent = item.meta.qname if isinstance( item, ClassQueueItem) else "unknown" raise ParserError( f"{parent} does not support mixed content: {qname}") self.index += 1 self.queue.append(queue_item) self.emit_event(EventType.START, element.tag, item=item, element=element)
def bind_dataclass(self, data: Dict, clazz: Type[T]) -> T: """Recursively build the given model from the input dict data.""" params = {} for var in self.context.build(clazz).vars: value = data.get(var.lname) if value is None or not var.init: continue if var.list_element: if not isinstance(value, list): raise ParserError( f"Key `{var.name}` value is not iterable") params[var.name] = [self.bind_value(var, val) for val in value] else: params[var.name] = self.bind_value(var, value) return clazz(**params) # type: ignore
def parse_context(self, context: iterparse, clazz: Type[T]) -> T: """ Dispatch elements to handlers as they arrive and are fully parsed. :raises ValueError: When the requested type doesn't match the result object """ obj = None for event, element in context: if event == EventType.START: self.queue_node(element) elif event == EventType.END: obj = self.dequeue_node(element) if obj is not None: element.clear() if not obj or not isinstance(obj, clazz): raise ParserError(f"Failed to create target class `{clazz.__name__}`") return obj
def dequeue_node(self, element: Element) -> Optional[T]: """ Build an objects tree for the given element. Construct a dataclass instance with the attributes of the given element and any pending objects that belong to the model. Otherwise parse as a primitive type the element's text content. :returns object: A dataclass object or a python primitive value. """ item = self.queue.pop() qname = obj = None if isinstance(item, SkipQueueItem): return None elif isinstance(item, PrimitiveQueueItem): qname = QName(element.tag) value = element.text ns_map = element.nsmap obj = self.parse_value(item.types, value, item.default, ns_map) elif isinstance(item, WildcardQueueItem): obj = self.parse_any_element(element) if not obj: return None obj.children = self.fetch_any_children(item) qname = item.qname elif isinstance(item, ClassQueueItem): params: Dict = dict() self.bind_element_attrs(params, item.meta, element) self.bind_element_text(params, item.meta, element) self.bind_element_children(params, item, element) self.bind_element_wild_text(params, item.meta, element) qname = QName(element.tag) obj = item.meta.clazz(**params) else: # unknown :) raise ParserError(f"Failed to create object from {element.tag}") self.objects.append((qname, obj)) self.emit_event(EventType.END, element.tag, obj=obj, element=element) self.namespaces.add_all(element.nsmap) return obj
def bind(self, qname: str, text: NoneStr, tail: NoneStr, objects: List) -> bool: self.events.append(("end", qname, text, tail)) if self.level > 0: self.level -= 1 return False self.events.insert(0, ("start", qname, copy.deepcopy(self.attrs), self.ns_map)) obj = None max_score = -1.0 for clazz in self.var.types: candidate = self.parse_class(clazz) score = ParserUtils.score_object(candidate) if score > max_score: max_score = score obj = candidate if obj: objects.append((self.var.qname, obj)) return True raise ParserError(f"Failed to parse union node: {self.var.qname}")