def readZip(f, **kwargs): from mien.parsers.nmpml import elements as dialect f=ZipFile(f, 'r') xml=f.read('xml') xml=StringIO(xml) doc=xm.readTree(xml) xml.close() doc=xm.assignClasses(doc, dialect) try: dat=f.read('data') except: print "No data archive in zip file" return doc from mien.parsers.datahash import readMD dat=StringIO(dat) dat=readMD(dat, return_raw_hash=True) des=doc.getElements('Data') for de in des: try: d, h=dat[de.upath()] except: print "can't find data for element %s" % (de.upath(),) d, h=(zeros(0), {}) de.datinit(d, h) f.close() return doc
def read(fname, toObject=False): '''returns a tree structure for the document (made of dicts)''' if type(fname) in [str, unicode]: fname = urllib.urlopen(fname) sp = QuickParser() sp.feed(fname.read()) sp.close() # checkdata(sp.elements[0]) # print len(sp.elements) if not toObject: return sp.elements[0] return assignClasses(sp.elements[0], {})
def deserialize(f, **kwargs): #st=time.time() if not type(f) in [str, unicode]: f=f.read() l=struct.unpack('<I', f[:4])[0] doc=zlib.decompress(f[4:l+4]) doc=StringIO(doc) doc=xm.readTree(doc) from mien.parsers.nmpml import elements as dialect doc=xm.assignClasses(doc, dialect) f=f[l+4:] try: if f: from mien.parsers.datahash import readMD try: f2=StringIO(f) f=readMD(f2, return_raw_hash=True) except: f=zlib.decompress(f) f2=StringIO(f) f=readMD(f2, return_raw_hash=True) del(f2) des=doc.getElements('Data') for de in des: try: d, h=f[de.upath()] except: print "can't find data for element %s" % (de.upath(),) d, h=(zeros(0), {}) de.datinit(d, h) except: print "cant load data" #print time.time()-st;st=time.time() return doc
def read(fname, **kwargs): '''reads from file fname. This function will attempt to automatically identify the format of the file, and will ask the user for confirmation if it can't make a unique ID. To override this behavior use the keyword argument "format". This function will return None if it fails, or an NmpmlObject if it succeeds. fname may be the name of a local file, or it may be a url. Key word arguments: "format" - set this to a key of "filetypes" to force the format of the input file to be treated as a particular format. "gui" - Set this to a mien.wx.base.BaseGui instance to use that GUI's methods for user interaction during the load. Set it to True to cause this function to make its own GUI for interaction (otherwise, you will get text-mode interaction) "select" - ifTrue, ask the user to select subsets of a document to load (by default, the whole document is automatically loaded) "convertxml" - if True, cast generic xml to a simplified version of the nmpml dialect. This will cause some advanced function of the interfaces to work. On the other hand, this will may cause the xml to be modified. Tags without an attribute "Name" will be given one, and the value of the "Name" tag will be rendered sibling-unique. This may mean that if the resulting xml is saved back to a file, other parsers will not read it correctly. By default, a compatibility class is used for generic XML that enables most of the features of the Nmpml interfaces without modifying the xml.S Note that this flag will only convert "generic" xml, so if there is a user-defined xml dialect that is non-empty, but not nmpml-compliant, this flag will not make it functional. ''' url=fname parts=parseurl(url) fname=parts[2] kwargs['unparsed_url']=url kwargs['parsed_url']=parts format=kwargs.get('format') if not format: format=get_file_format(fname, kwargs.get('gui')) kwargs['format']=format if format=="unknown xml": ft={'xml dialect':'automatic', 'read':True} elif not filetypes.has_key(format): fl=match_extension(format) if not fl: print "Unknown format %s" % format return kwargs['format']=fl[0] ft=filetypes[fl[0]] else: ft=filetypes[format] if not ft["read"]: print "format %s is write only" % format return fileobj, cleanup=openurlread(url) if not ft.has_key('xml dialect'): doc=ft["read"](fileobj, **kwargs) cleanup() else: doc=xm.readTree(fileobj) cleanup() if ft['xml dialect']=='automatic': format = get_xml_dialect(doc) kwargs['format']=format ft=filetypes[format] dialect=ft['xml dialect'] if dialect=={}: if kwargs.get("convertxml"): dialect={'default class':filetypes['nmpml']['xml dialect']['default class']} else: from mien.nmpml.basic_tools import NmpmlCompat dialect={'default class':NmpmlCompat} doc=xm.assignClasses(doc, dialect) if kwargs.get('select'): if doc.fileinformation.get('select_done'): del(doc.fileinformation['select_done']) else: doc=select_elements(doc, **kwargs) doc.fileinformation["filename"]=url doc.fileinformation["type"]=format if not ft.get('autoload'): doc.onLoad() return doc