예제 #1
0
파일: mzip.py 프로젝트: gic888/MIEN
def readZip(f, **kwargs):
	from mien.parsers.nmpml import elements as dialect
	f=ZipFile(f, 'r')
	xml=f.read('xml')
	xml=StringIO(xml)
	doc=xm.readTree(xml)
	xml.close()
	doc=xm.assignClasses(doc, dialect)	
	try:
		dat=f.read('data')
	except:
		print "No data archive in zip file"
		return doc
	from mien.parsers.datahash import readMD	
	dat=StringIO(dat)
	dat=readMD(dat, return_raw_hash=True)
	des=doc.getElements('Data')
	for de in des:
		try:
			d, h=dat[de.upath()]
		except:
			print "can't find data for element %s" % (de.upath(),)
			d, h=(zeros(0), {})
		de.datinit(d, h)
	f.close()	
	return doc		
예제 #2
0
파일: html2dict.py 프로젝트: gic888/MIEN
def read(fname, toObject=False):
	'''returns a tree structure for the document (made of dicts)'''
	if type(fname) in [str, unicode]:
		fname = urllib.urlopen(fname)
	sp = QuickParser()
	sp.feed(fname.read())
	sp.close()
	# checkdata(sp.elements[0])
	# print len(sp.elements)
	if not toObject:
		return sp.elements[0]
	return assignClasses(sp.elements[0], {})
예제 #3
0
파일: mzip.py 프로젝트: gic888/MIEN
def deserialize(f, **kwargs):
	#st=time.time()
	if not type(f) in [str, unicode]:
		f=f.read()
	l=struct.unpack('<I', f[:4])[0]
	doc=zlib.decompress(f[4:l+4])
	doc=StringIO(doc)
	doc=xm.readTree(doc)
	from mien.parsers.nmpml import elements as dialect
	doc=xm.assignClasses(doc, dialect)	
	f=f[l+4:]
	try:
		if f:
			from mien.parsers.datahash import readMD	
			try:
				f2=StringIO(f)
				f=readMD(f2, return_raw_hash=True)
			except:
				f=zlib.decompress(f)		
				f2=StringIO(f)
				f=readMD(f2, return_raw_hash=True)
			del(f2)
			des=doc.getElements('Data')
			for de in des:
				try:
					d, h=f[de.upath()]
				except:
					print "can't find data for element %s" % (de.upath(),)
					d, h=(zeros(0), {})
				de.datinit(d, h)
	except:
		print "cant load data"
					
	#print time.time()-st;st=time.time()

	return doc		
예제 #4
0
파일: fileIO.py 프로젝트: gic888/MIEN
def read(fname, **kwargs):
	'''reads from file fname. This function will attempt to automatically 
identify the format of the file, and will ask the user for confirmation
if it can't make a unique ID. To override this behavior use the keyword
argument "format". This function will return None if it fails, or an 
NmpmlObject if it succeeds.

fname may be the name of a local file, or it may be a url. 

Key word arguments:

"format" - set this to a key of "filetypes" to force the format of the input 
	file to be treated as a particular format.
"gui" - Set this to a mien.wx.base.BaseGui instance to use that GUI's methods
	for user interaction during the load. Set it to True to cause this 
	function to make its own GUI for interaction (otherwise, you will get
	text-mode interaction)
"select" - ifTrue, ask the user to select subsets of a document to load 
	(by default, the whole document is automatically loaded)
"convertxml" - if True, cast generic xml to a simplified version of the
	nmpml dialect. This will cause some advanced function of the interfaces
	to work. On the other hand, this will may cause the xml to be modified. 
	Tags without an attribute "Name" will be given one, and the value of the  
	"Name" tag will be rendered sibling-unique. This may mean that if the 
	resulting xml is saved back to a file, other parsers will not read it
	correctly.
	By default, a compatibility class is used for generic XML that enables
	most of the features of the Nmpml interfaces without modifying the xml.S
	Note that this flag will only convert "generic" xml, so if there is a
	user-defined xml dialect that is non-empty, but not nmpml-compliant, 
	this flag will not make it functional.
'''	
	url=fname
	parts=parseurl(url)
	fname=parts[2]
	kwargs['unparsed_url']=url
	kwargs['parsed_url']=parts
	format=kwargs.get('format')
	if not format:
		format=get_file_format(fname, kwargs.get('gui'))
		kwargs['format']=format
	if format=="unknown xml":
		ft={'xml dialect':'automatic', 'read':True}
	elif not filetypes.has_key(format):
		fl=match_extension(format)
		if not fl:
			print "Unknown format %s" % format
			return
		kwargs['format']=fl[0]
		ft=filetypes[fl[0]]
	else:	
		ft=filetypes[format]
	if not ft["read"]:
		print "format %s is write only" % format
		return	
	fileobj, cleanup=openurlread(url)
	if not ft.has_key('xml dialect'):		
		doc=ft["read"](fileobj, **kwargs)
		cleanup()
	else:
		doc=xm.readTree(fileobj)
		cleanup()
		if ft['xml dialect']=='automatic':
			format = get_xml_dialect(doc)
			kwargs['format']=format
			ft=filetypes[format]
		dialect=ft['xml dialect']
		if dialect=={}:
			if kwargs.get("convertxml"):
				dialect={'default class':filetypes['nmpml']['xml dialect']['default class']}
			else:
				from mien.nmpml.basic_tools import NmpmlCompat
				dialect={'default class':NmpmlCompat}
		doc=xm.assignClasses(doc, dialect)	
	if kwargs.get('select'):
		if doc.fileinformation.get('select_done'):
			del(doc.fileinformation['select_done'])
		else:	
 			doc=select_elements(doc, **kwargs)
	doc.fileinformation["filename"]=url
	doc.fileinformation["type"]=format
	if not ft.get('autoload'):
		doc.onLoad()
	return doc