def _parse_node(self, node): node_tree = object_dict() # Save attrs and text, hope there will not be a child with same name if node.text: # node_tree.value = node.text if not node.getchildren(): node_tree = node.text # node_tree = (node.text.split(', ') # if node.text.find(', ') != -1 else node.text) else: node_tree.value = node.text for (k,v) in node.attrib.items(): k,v = self._namespace_split(k, object_dict({'value':v})) node_tree[k] = v #Save childrens for child in node.getchildren(): tag, tree = self._namespace_split(child.tag, self._parse_node(child)) if tag not in node_tree: # the first time, so store it in dict node_tree[tag] = tree continue old = node_tree[tag] if not isinstance(old, list): node_tree.pop(tag) node_tree[tag] = [old] # multi times, so change old dict to a list node_tree[tag].append(tree) # add the new one return '' if node_tree == {} else node_tree
def fromstring(self, s): xmlstring = self.convert_to_utf(s) #if xmlstring: try: t = ET.fromstring(xmlstring) root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t)) return object_dict({root_tag.encode(): root_tree}) except: pass return object_dict({'parse_error': 'parse error'})
def _append_attribute(self, node, node_tree = None): if node_tree is None: node_tree = object_dict() bCreateAttr = True for attr in node.attributes.keys(): if bCreateAttr: node_tree[XML_ATTRIBUTE] = object_dict() bCreateAttr = False k,v = self._namespace_split(attr.encode(), node.getAttribute(attr).encode()) node_tree[XML_ATTRIBUTE][k] = v return node_tree
def fromstring(self, s): s = self.convert_to_utf(s) #if s: try: doc = parseString(s) t = doc.documentElement root_tag, root_tree = self._namespace_split(t.nodeName, self._parse_node(t)) return object_dict({root_tag.encode(): root_tree}) except: pass return object_dict({'parse_error': 'parse error'})
def _append_attribute(self, node, node_tree = None): if node_tree is None: node_tree = object_dict() if len(node.attrib.items()) <1: return node_tree node_tree[XML_ATTRIBUTE]=object_dict() itemdicts = node_tree[XML_ATTRIBUTE] for (attr,item) in node.attrib.items(): k,v = self._namespace_split(attr, item) itemdicts[k] = v return node_tree
def _parse_node(self, node): node_tree = object_dict() if node.text and node.attrib: if node.tag in node.attrib: raise ValueError("Name conflict: Attribute name conflicts with " "tag name. Check the documentation.") node.attrib.update({node.tag: node.text}) node.text = '' # Save attrs and text. Fair warning, if there's a child node with the same name # as an attribute, values will become a list. if node.text and node.text.strip(): node_tree = node.text else: for k, v in node.attrib.items(): k, v = self._namespace_split(k, v) node_tree[k] = v # Save children. for child in node.getchildren(): tag, tree = self._namespace_split(child.tag, self._parse_node(child)) if tag not in node_tree: # First encounter, store it in dict. node_tree[tag] = tree continue old = node_tree[tag] if not isinstance(old, list): # Multiple encounters, change dict to a list node_tree.pop(tag) node_tree[tag] = [old] node_tree[tag].append(tree) # Add the new one. if not node_tree: node_tree = None return node_tree
def __parse_node(node): tmp = object_dict() # save attrs and text, hope there will not be a child with same name if node.text: # Uncomment the below line to get value attribute for each tag #tmp['value'] = node.text tmp = node.text for (k,v) in node.attrib.items(): tmp[k] = v for ch in node.getchildren(): cht = ch.tag chp = __parse_node(ch) if cht not in tmp: # the first time, so store it in dict tmp[cht] = chp continue old = tmp[cht] if not isinstance(old, list): tmp.pop(cht) tmp[cht] = [old] # multi times, so change old dict to a list tmp[cht].append(chp) # add the new one return tmp
def _parse_node(self, node): node_tree = object_dict() #append attribute self._append_attribute(node, node_tree) if len(node.attributes.keys()) <1: node_dicts = node_tree else: node_tree[XML_VALUE] ={} node_dicts = node_tree[XML_VALUE] for child in node.childNodes: nodename = child.nodeName.encode() if child.nodeType in (child.TEXT_NODE, child.CDATA_SECTION_NODE): nodeval = child.nodeValue.encode() if nodeval not in self.XML_SUPERWORD: '''coding may be had error''' node_tree = child.nodeValue.decode().encode() continue #Save childrens tag, tree = self._namespace_split(nodename, self._parse_node(child)) """ # the first time, so store it in dict if tag not in node_tree: node_tree[tag] = tree continue """ # the first time, if the node have child so store it in list ,other raise store it in dict if tag not in node_dicts: node_dicts[tag] = tree nodenum = len(child.childNodes) if nodenum<1: node_dicts[tag] = tree else: if nodenum ==1: if child.childNodes[0].nodeType in (child.TEXT_NODE, child.CDATA_SECTION_NODE): node_dicts[tag] = tree else: node_dicts.pop(tag) node_dicts[tag] = [tree] else: node_dicts.pop(tag) node_dicts[tag] = [tree] continue old = node_dicts[tag] if not isinstance(old, list): node_dicts.pop(tag) # multi times, so change old dict to a list node_dicts[tag] = [old] # add the new one node_dicts[tag].append(tree) return node_tree
def _parse_node(self, node): node_tree = None #print node;print node.text if node.text: if node.text not in self.XML_SUPERWORD: '''coding may be had error''' node_tree = node.text return node_tree node_tree = object_dict() if len(node.attrib.items()) <1: node_dicts = node_tree else: node_tree[XML_VALUE] ={} node_dicts = node_tree[XML_VALUE] for child in node.getchildren(): nodename = child.tag #Save childrens tag, tree = self._namespace_split(nodename, self._parse_node(child)) """ # the first time, so store it in dict if tag not in node_tree: node_tree[tag] = tree continue """ # the first time, if the node have child so store it in list ,other raise store it in dict if tag not in node_dicts: node_dicts[tag] = tree if len(child.getchildren())<1: node_dicts[tag] = tree else: node_dicts.pop(tag) node_dicts[tag] = [tree] continue old = node_dicts[tag] if not isinstance(old, list): node_dicts.pop(tag) # multi times, so change old dict to a list node_dicts[tag] = [old] # add the new one node_dicts[tag].append(tree) #append attribute self._append_attribute(node, node_tree) return node_tree
def fromstring(self, s): """parse a string""" t = ET.fromstring(s) root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t)) return object_dict({root_tag: root_tree})
def fromstring(s): """parse a string""" t = ET.fromstring(s) return object_dict({t.tag: __parse_node(t)})
def parse(file): """parse a xml file to a dict""" f = open(file, 'r') t = ET.parse(f).getroot() return object_dict({t.tag: __parse_node(t)})