Ejemplo n.º 1
0
def read_xml(in_path):
#	f = codecs.open(in_path,"r", )
#	content = f.read()
#	f.flush()
#	f.close()
	content = MDCompressFile.uncompress_file(in_path)
	tree = etree.parse(StringIO(content), parser=etree.XMLParser(huge_tree=True))
	return tree
Ejemplo n.º 2
0
def praseXML(path):
    '''
    解析xml 获取命名空间, 和实例
    :param path:xml文档路径
    :return: 命名空间 和 实例集合
    '''
    itemArr = []#存储xml中实例
    try:
        tree = etree.parse(StringIO.StringIO(MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True))
        root = tree.getroot()
        nsmap = root.nsmap
        #nsmap双向映射
        pamsn = {v:k for k,v in nsmap.items()}

        #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认
        # xsdPath = path[:-4]+'.xsd'
        # xsdNsmap = None
        # xsdPamsn = None
        # if os.path.exists(xsdPath):
        #     xsdTree = etree.parse(xsdPath)
        #     xsdRoot = xsdTree.getroot()
        #     xsdNsmap = xsdRoot.nsmap
        #     xsdPamsn = {v: k for k, v in xsdNsmap.items()}


        #获取当前文件名年数
        for child in root:
            # 去除辅助性元素
            try:
                if child.tag.split('}')[-1] in SupportItem:
                    continue
            except Exception, e:
                continue

            nameSpaceLink = str(child.tag).strip('{').split('}')[0]

            #判断 tag前命名引用 是否是存在namespace中间的
            if pamsn[nameSpaceLink] != None:
                #规则过滤
                attDic = child.attrib
                #拼接参数
                tag = child.tag
                text = child.text
                value = {}
                try:
                    value['CONTENTTEXT'] = text
                except Exception,e:
                    print e
                    print  'something wrong to get <CONTENTTEXT> ============'
                #处理tag属性
                for attTemp in attDic:
                    try:
                        temp = attTemp.split('}')
                        # 判断属性中是否带有命名空间
                        if len(temp) > 1:
                            if pamsn[temp[0].strip('{')] != None:
                                value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp]
                        else:
                            value[attTemp] = attDic[attTemp]
                    except Exception, e:
                        continue

                dic = {pamsn[nameSpaceLink] +':'+ tag.split('}')[-1]:value}
                itemArr.append(dic)
Ejemplo n.º 3
0
def praseXML(path):
    '''
    解析xml 获取命名空间, 和实例
    :param path:xml文档路径
    :return: 命名空间 和 实例集合
    '''
    itemArr = []  #存储xml中实例
    try:
        tree = etree.parse(StringIO.StringIO(
            MDCompressFile.uncompress_file(path)),
                           parser=etree.XMLParser(huge_tree=True))
        root = tree.getroot()
        nsmap = root.nsmap
        #nsmap双向映射
        pamsn = {v: k for k, v in nsmap.items()}

        #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认
        # xsdPath = path[:-4]+'.xsd'
        # xsdNsmap = None
        # xsdPamsn = None
        # if os.path.exists(xsdPath):
        #     xsdTree = etree.parse(xsdPath)
        #     xsdRoot = xsdTree.getroot()
        #     xsdNsmap = xsdRoot.nsmap
        #     xsdPamsn = {v: k for k, v in xsdNsmap.items()}

        #获取当前文件名年数
        for child in root:
            # 去除辅助性元素
            try:
                if child.tag.split('}')[-1] in SupportItem:
                    continue
            except Exception, e:
                continue

            nameSpaceLink = str(child.tag).strip('{').split('}')[0]

            #判断 tag前命名引用 是否是存在namespace中间的
            if pamsn[nameSpaceLink] != None:
                #规则过滤
                attDic = child.attrib
                #拼接参数
                tag = child.tag
                text = child.text
                value = {}
                try:
                    value['CONTENTTEXT'] = text
                except Exception, e:
                    print e
                    print 'something wrong to get <CONTENTTEXT> ============'
                #处理tag属性
                for attTemp in attDic:
                    try:
                        temp = attTemp.split('}')
                        # 判断属性中是否带有命名空间
                        if len(temp) > 1:
                            if pamsn[temp[0].strip('{')] != None:
                                value[pamsn[temp[0].strip('{')] + ':' +
                                      temp[-1]] = attDic[attTemp]
                        else:
                            value[attTemp] = attDic[attTemp]
                    except Exception, e:
                        continue

                dic = {pamsn[nameSpaceLink] + ':' + tag.split('}')[-1]: value}
                itemArr.append(dic)