Example #1
0
def read_xml(in_path):
#	f = codecs.open(in_path,"r", )
#	content = f.read()
#	f.flush()
#	f.close()
	content = MDCompressFile.uncompress_file(in_path)
	tree = etree.parse(StringIO(content), parser=etree.XMLParser(huge_tree=True))
	return tree
Example #2
0
def writeXML(path, xml):
    '''创建xml'''
    currentDirName = path.split('/')[-1]
    print '当前目录名称============='
    print path
    try:
        file = open(path, mode='w')
        file.write(MDCompressFile.gzip_compress(xml.toprettyxml(indent = "\t", newl = "\n", encoding = "utf-8")))
        file.close()
        print '文件目录', path
        print '生成xml成功。。'

    except Exception, e:
        print e
Example #3
0
def writeXML(path, xml):
    '''创建xml'''
    currentDirName = path.split('/')[-1]
    print '当前目录名称============='
    print path
    try:
        file = open(path, mode='w')
        file.write(
            MDCompressFile.gzip_compress(
                xml.toprettyxml(indent="\t", newl="\n", encoding="utf-8")))
        file.close()
        print '文件目录', path
        print '生成xml成功。。'

    except Exception, e:
        print e
Example #4
0
def downXBRLDoc(url, cik,params):
	'''
	下载xbrl文档
	'''
	print "开始下载URL:", url
	period = params[0]
	accepted = params[1].split(' ')[0]
	try:
		content = downUrlRetrieve(url)
		fileDir = os.path.join(os.getcwd() , 'XBRLDown_1' , cik+'#'+accepted+'#'+period)
		fileName = os.path.basename(url)
		if not os.path.exists(fileDir):
			os.makedirs(fileDir)
			print '创建目录。。', fileDir
		desktopPath = os.path.join(fileDir, fileName)
		print '本地化地址-------------' , desktopPath
		print  time.strftime('%Y-%m-%d %X', time.localtime( time.time() ) )
		with open(desktopPath, "wb") as code:
			code.write(MDCompressFile.gzip_compress(content))
#			code.write(content)
	except Exception,e :
		print e
		print '出错了..'
Example #5
0
def downUrlRetrieve(dirName, url, fileName, files):
    '''
    下载URL
    '''
    print "downloading with requests"
    try:
        r = requests.get(url)
        fileDir = os.path.join(os.path.expanduser("/"),'home','XBRL', '%s' % dirName)
        if not os.path.exists(fileDir):
            os.makedirs(fileDir)
            print '创建目录。。', fileDir
        desktopPath = os.path.join(fileDir, fileName)
        with open(desktopPath, "wb") as code:
            code.write(MDCompressFile.gzip_compress(r.content))
            # print '当前目录数为', dirCount
            print  time.strftime('%Y-%m-%d %X', time.localtime(time.time()))
            print '写入文件-------------' , desktopPath
            # code.write(r.content)
    except Exception,e :
        print e
        print "+++++++++++++++++++++++++++++++++++++"
        # consur.fail.insert(files)
        print '错误!插入数据库'
Example #6
0
def downUrlRetrieve(dirName, url, fileName, files):
    '''
    下载URL
    '''
    print "downloading with requests"
    try:
        r = requests.get(url)
        fileDir = os.path.join(os.path.expanduser("/"), 'home', 'XBRL',
                               '%s' % dirName)
        if not os.path.exists(fileDir):
            os.makedirs(fileDir)
            print '创建目录。。', fileDir
        desktopPath = os.path.join(fileDir, fileName)
        with open(desktopPath, "wb") as code:
            code.write(MDCompressFile.gzip_compress(r.content))
            # print '当前目录数为', dirCount
            print time.strftime('%Y-%m-%d %X', time.localtime(time.time()))
            print '写入文件-------------', desktopPath
            # code.write(r.content)
    except Exception, e:
        print e
        print "+++++++++++++++++++++++++++++++++++++"
        # consur.fail.insert(files)
        print '错误!插入数据库'
Example #7
0
def praseXML(path):
    '''
    解析xml 获取命名空间, 和实例
    :param path:xml文档路径
    :return: 命名空间 和 实例集合
    '''
    itemArr = []#存储xml中实例
    try:
        tree = etree.parse(StringIO.StringIO(MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True))
        root = tree.getroot()
        nsmap = root.nsmap
        #nsmap双向映射
        pamsn = {v:k for k,v in nsmap.items()}

        #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认
        # xsdPath = path[:-4]+'.xsd'
        # xsdNsmap = None
        # xsdPamsn = None
        # if os.path.exists(xsdPath):
        #     xsdTree = etree.parse(xsdPath)
        #     xsdRoot = xsdTree.getroot()
        #     xsdNsmap = xsdRoot.nsmap
        #     xsdPamsn = {v: k for k, v in xsdNsmap.items()}


        #获取当前文件名年数
        for child in root:
            # 去除辅助性元素
            try:
                if child.tag.split('}')[-1] in SupportItem:
                    continue
            except Exception, e:
                continue

            nameSpaceLink = str(child.tag).strip('{').split('}')[0]

            #判断 tag前命名引用 是否是存在namespace中间的
            if pamsn[nameSpaceLink] != None:
                #规则过滤
                attDic = child.attrib
                #拼接参数
                tag = child.tag
                text = child.text
                value = {}
                try:
                    value['CONTENTTEXT'] = text
                except Exception,e:
                    print e
                    print  'something wrong to get <CONTENTTEXT> ============'
                #处理tag属性
                for attTemp in attDic:
                    try:
                        temp = attTemp.split('}')
                        # 判断属性中是否带有命名空间
                        if len(temp) > 1:
                            if pamsn[temp[0].strip('{')] != None:
                                value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp]
                        else:
                            value[attTemp] = attDic[attTemp]
                    except Exception, e:
                        continue

                dic = {pamsn[nameSpaceLink] +':'+ tag.split('}')[-1]:value}
                itemArr.append(dic)
Example #8
0
def praseXML(path):
    '''
    解析xml 获取命名空间, 和实例
    :param path:xml文档路径
    :return: 命名空间 和 实例集合
    '''
    itemArr = []  #存储xml中实例
    try:
        tree = etree.parse(StringIO.StringIO(
            MDCompressFile.uncompress_file(path)),
                           parser=etree.XMLParser(huge_tree=True))
        root = tree.getroot()
        nsmap = root.nsmap
        #nsmap双向映射
        pamsn = {v: k for k, v in nsmap.items()}

        #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认
        # xsdPath = path[:-4]+'.xsd'
        # xsdNsmap = None
        # xsdPamsn = None
        # if os.path.exists(xsdPath):
        #     xsdTree = etree.parse(xsdPath)
        #     xsdRoot = xsdTree.getroot()
        #     xsdNsmap = xsdRoot.nsmap
        #     xsdPamsn = {v: k for k, v in xsdNsmap.items()}

        #获取当前文件名年数
        for child in root:
            # 去除辅助性元素
            try:
                if child.tag.split('}')[-1] in SupportItem:
                    continue
            except Exception, e:
                continue

            nameSpaceLink = str(child.tag).strip('{').split('}')[0]

            #判断 tag前命名引用 是否是存在namespace中间的
            if pamsn[nameSpaceLink] != None:
                #规则过滤
                attDic = child.attrib
                #拼接参数
                tag = child.tag
                text = child.text
                value = {}
                try:
                    value['CONTENTTEXT'] = text
                except Exception, e:
                    print e
                    print 'something wrong to get <CONTENTTEXT> ============'
                #处理tag属性
                for attTemp in attDic:
                    try:
                        temp = attTemp.split('}')
                        # 判断属性中是否带有命名空间
                        if len(temp) > 1:
                            if pamsn[temp[0].strip('{')] != None:
                                value[pamsn[temp[0].strip('{')] + ':' +
                                      temp[-1]] = attDic[attTemp]
                        else:
                            value[attTemp] = attDic[attTemp]
                    except Exception, e:
                        continue

                dic = {pamsn[nameSpaceLink] + ':' + tag.split('}')[-1]: value}
                itemArr.append(dic)