def read_xml(in_path): # f = codecs.open(in_path,"r", ) # content = f.read() # f.flush() # f.close() content = MDCompressFile.uncompress_file(in_path) tree = etree.parse(StringIO(content), parser=etree.XMLParser(huge_tree=True)) return tree
def praseXML(path): ''' 解析xml 获取命名空间, 和实例 :param path:xml文档路径 :return: 命名空间 和 实例集合 ''' itemArr = []#存储xml中实例 try: tree = etree.parse(StringIO.StringIO(MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True)) root = tree.getroot() nsmap = root.nsmap #nsmap双向映射 pamsn = {v:k for k,v in nsmap.items()} #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认 # xsdPath = path[:-4]+'.xsd' # xsdNsmap = None # xsdPamsn = None # if os.path.exists(xsdPath): # xsdTree = etree.parse(xsdPath) # xsdRoot = xsdTree.getroot() # xsdNsmap = xsdRoot.nsmap # xsdPamsn = {v: k for k, v in xsdNsmap.items()} #获取当前文件名年数 for child in root: # 去除辅助性元素 try: if child.tag.split('}')[-1] in SupportItem: continue except Exception, e: continue nameSpaceLink = str(child.tag).strip('{').split('}')[0] #判断 tag前命名引用 是否是存在namespace中间的 if pamsn[nameSpaceLink] != None: #规则过滤 attDic = child.attrib #拼接参数 tag = child.tag text = child.text value = {} try: value['CONTENTTEXT'] = text except Exception,e: print e print 'something wrong to get <CONTENTTEXT> ============' #处理tag属性 for attTemp in attDic: try: temp = attTemp.split('}') # 判断属性中是否带有命名空间 if len(temp) > 1: if pamsn[temp[0].strip('{')] != None: value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp] else: value[attTemp] = attDic[attTemp] except Exception, e: continue dic = {pamsn[nameSpaceLink] +':'+ tag.split('}')[-1]:value} itemArr.append(dic)
def praseXML(path): ''' 解析xml 获取命名空间, 和实例 :param path:xml文档路径 :return: 命名空间 和 实例集合 ''' itemArr = [] #存储xml中实例 try: tree = etree.parse(StringIO.StringIO( MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True)) root = tree.getroot() nsmap = root.nsmap #nsmap双向映射 pamsn = {v: k for k, v in nsmap.items()} #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认 # xsdPath = path[:-4]+'.xsd' # xsdNsmap = None # xsdPamsn = None # if os.path.exists(xsdPath): # xsdTree = etree.parse(xsdPath) # xsdRoot = xsdTree.getroot() # xsdNsmap = xsdRoot.nsmap # xsdPamsn = {v: k for k, v in xsdNsmap.items()} #获取当前文件名年数 for child in root: # 去除辅助性元素 try: if child.tag.split('}')[-1] in SupportItem: continue except Exception, e: continue nameSpaceLink = str(child.tag).strip('{').split('}')[0] #判断 tag前命名引用 是否是存在namespace中间的 if pamsn[nameSpaceLink] != None: #规则过滤 attDic = child.attrib #拼接参数 tag = child.tag text = child.text value = {} try: value['CONTENTTEXT'] = text except Exception, e: print e print 'something wrong to get <CONTENTTEXT> ============' #处理tag属性 for attTemp in attDic: try: temp = attTemp.split('}') # 判断属性中是否带有命名空间 if len(temp) > 1: if pamsn[temp[0].strip('{')] != None: value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp] else: value[attTemp] = attDic[attTemp] except Exception, e: continue dic = {pamsn[nameSpaceLink] + ':' + tag.split('}')[-1]: value} itemArr.append(dic)