def read_xml(in_path): # f = codecs.open(in_path,"r", ) # content = f.read() # f.flush() # f.close() content = MDCompressFile.uncompress_file(in_path) tree = etree.parse(StringIO(content), parser=etree.XMLParser(huge_tree=True)) return tree
def writeXML(path, xml): '''创建xml''' currentDirName = path.split('/')[-1] print '当前目录名称=============' print path try: file = open(path, mode='w') file.write(MDCompressFile.gzip_compress(xml.toprettyxml(indent = "\t", newl = "\n", encoding = "utf-8"))) file.close() print '文件目录', path print '生成xml成功。。' except Exception, e: print e
def writeXML(path, xml): '''创建xml''' currentDirName = path.split('/')[-1] print '当前目录名称=============' print path try: file = open(path, mode='w') file.write( MDCompressFile.gzip_compress( xml.toprettyxml(indent="\t", newl="\n", encoding="utf-8"))) file.close() print '文件目录', path print '生成xml成功。。' except Exception, e: print e
def downXBRLDoc(url, cik,params): ''' 下载xbrl文档 ''' print "开始下载URL:", url period = params[0] accepted = params[1].split(' ')[0] try: content = downUrlRetrieve(url) fileDir = os.path.join(os.getcwd() , 'XBRLDown_1' , cik+'#'+accepted+'#'+period) fileName = os.path.basename(url) if not os.path.exists(fileDir): os.makedirs(fileDir) print '创建目录。。', fileDir desktopPath = os.path.join(fileDir, fileName) print '本地化地址-------------' , desktopPath print time.strftime('%Y-%m-%d %X', time.localtime( time.time() ) ) with open(desktopPath, "wb") as code: code.write(MDCompressFile.gzip_compress(content)) # code.write(content) except Exception,e : print e print '出错了..'
def downUrlRetrieve(dirName, url, fileName, files): ''' 下载URL ''' print "downloading with requests" try: r = requests.get(url) fileDir = os.path.join(os.path.expanduser("/"),'home','XBRL', '%s' % dirName) if not os.path.exists(fileDir): os.makedirs(fileDir) print '创建目录。。', fileDir desktopPath = os.path.join(fileDir, fileName) with open(desktopPath, "wb") as code: code.write(MDCompressFile.gzip_compress(r.content)) # print '当前目录数为', dirCount print time.strftime('%Y-%m-%d %X', time.localtime(time.time())) print '写入文件-------------' , desktopPath # code.write(r.content) except Exception,e : print e print "+++++++++++++++++++++++++++++++++++++" # consur.fail.insert(files) print '错误!插入数据库'
def downUrlRetrieve(dirName, url, fileName, files): ''' 下载URL ''' print "downloading with requests" try: r = requests.get(url) fileDir = os.path.join(os.path.expanduser("/"), 'home', 'XBRL', '%s' % dirName) if not os.path.exists(fileDir): os.makedirs(fileDir) print '创建目录。。', fileDir desktopPath = os.path.join(fileDir, fileName) with open(desktopPath, "wb") as code: code.write(MDCompressFile.gzip_compress(r.content)) # print '当前目录数为', dirCount print time.strftime('%Y-%m-%d %X', time.localtime(time.time())) print '写入文件-------------', desktopPath # code.write(r.content) except Exception, e: print e print "+++++++++++++++++++++++++++++++++++++" # consur.fail.insert(files) print '错误!插入数据库'
def praseXML(path): ''' 解析xml 获取命名空间, 和实例 :param path:xml文档路径 :return: 命名空间 和 实例集合 ''' itemArr = []#存储xml中实例 try: tree = etree.parse(StringIO.StringIO(MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True)) root = tree.getroot() nsmap = root.nsmap #nsmap双向映射 pamsn = {v:k for k,v in nsmap.items()} #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认 # xsdPath = path[:-4]+'.xsd' # xsdNsmap = None # xsdPamsn = None # if os.path.exists(xsdPath): # xsdTree = etree.parse(xsdPath) # xsdRoot = xsdTree.getroot() # xsdNsmap = xsdRoot.nsmap # xsdPamsn = {v: k for k, v in xsdNsmap.items()} #获取当前文件名年数 for child in root: # 去除辅助性元素 try: if child.tag.split('}')[-1] in SupportItem: continue except Exception, e: continue nameSpaceLink = str(child.tag).strip('{').split('}')[0] #判断 tag前命名引用 是否是存在namespace中间的 if pamsn[nameSpaceLink] != None: #规则过滤 attDic = child.attrib #拼接参数 tag = child.tag text = child.text value = {} try: value['CONTENTTEXT'] = text except Exception,e: print e print 'something wrong to get <CONTENTTEXT> ============' #处理tag属性 for attTemp in attDic: try: temp = attTemp.split('}') # 判断属性中是否带有命名空间 if len(temp) > 1: if pamsn[temp[0].strip('{')] != None: value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp] else: value[attTemp] = attDic[attTemp] except Exception, e: continue dic = {pamsn[nameSpaceLink] +':'+ tag.split('}')[-1]:value} itemArr.append(dic)
def praseXML(path): ''' 解析xml 获取命名空间, 和实例 :param path:xml文档路径 :return: 命名空间 和 实例集合 ''' itemArr = [] #存储xml中实例 try: tree = etree.parse(StringIO.StringIO( MDCompressFile.uncompress_file(path)), parser=etree.XMLParser(huge_tree=True)) root = tree.getroot() nsmap = root.nsmap #nsmap双向映射 pamsn = {v: k for k, v in nsmap.items()} #xsd文件获取命名空间, xsd获取命名空间 为了进一步确认是否是拓展或者未确认 # xsdPath = path[:-4]+'.xsd' # xsdNsmap = None # xsdPamsn = None # if os.path.exists(xsdPath): # xsdTree = etree.parse(xsdPath) # xsdRoot = xsdTree.getroot() # xsdNsmap = xsdRoot.nsmap # xsdPamsn = {v: k for k, v in xsdNsmap.items()} #获取当前文件名年数 for child in root: # 去除辅助性元素 try: if child.tag.split('}')[-1] in SupportItem: continue except Exception, e: continue nameSpaceLink = str(child.tag).strip('{').split('}')[0] #判断 tag前命名引用 是否是存在namespace中间的 if pamsn[nameSpaceLink] != None: #规则过滤 attDic = child.attrib #拼接参数 tag = child.tag text = child.text value = {} try: value['CONTENTTEXT'] = text except Exception, e: print e print 'something wrong to get <CONTENTTEXT> ============' #处理tag属性 for attTemp in attDic: try: temp = attTemp.split('}') # 判断属性中是否带有命名空间 if len(temp) > 1: if pamsn[temp[0].strip('{')] != None: value[pamsn[temp[0].strip('{')] + ':' + temp[-1]] = attDic[attTemp] else: value[attTemp] = attDic[attTemp] except Exception, e: continue dic = {pamsn[nameSpaceLink] + ':' + tag.split('}')[-1]: value} itemArr.append(dic)