Example #1
0
def parse(str):
	"""Parse a string into an XML document."""
	from lib.easy.regex import grp, rep, opt, alt, matchone, matchall
	
	id = '[A-Za-z_][A-Za-z0-9_:-]*'
	string = alt('"[^"]*"', "'[^']*'")
	
	ws = r'[\n\r\t ]*'
	wsr = r'[\n\r\t ]+'
	
	attr = grp(id, 'name') + ws + '=' + ws + grp(string, 'value')
	attr_anon = id + ws + '=' + ws + string
	tag = '<' + ws + grp(id, 'name') + alt(ws, wsr + grp(rep(attr_anon + ws, '+'), 'attrs')) + opt(grp('/', 'end')) + ws + '>'
	tag_anon = '<' + ws + id + alt(ws, wsr + rep(attr_anon + ws, '+')) + opt('/') + ws + '>'
	tagend = '<' + ws + '/' + ws + grp(id, 'name') + ws + '>'
	tagend_anon = '<' + ws + '/' + ws + id + ws + '>'
	declaration = '<[?!][^<>]+>'
	
	data_elem = alt(grp('[^<&]+', 'char'), '&' + grp(rep('[a-z]', '+'), 'name') + ';')
	data_elem_anon = alt('[^<&]+', '&' + rep('[a-z]', '+') + ';')
	data = rep(data_elem_anon, '+')
	
	handler = ParseHandler()
	
	def parse_data(data):
		def fn(match):
			if 'char' in match:
				return match['char']
			else:
				return _xml_entities[match['name']]
		
		return ''.join(map(fn, matchall(data_elem, data)))
	
	for i in matchall(alt(grp(tag_anon, 'tag'), grp(tagend_anon, 'tagend'), grp(data, 'data'), declaration), str):
		# XML entities and declarations are ignored
		if 'tag' in i:
			match = matchone(tag, i['tag'])
			attrs = { }
			
			if 'attrs' in match:
				for j in matchall(attr + ws, match['attrs']):
					attrs[j['name']] = parse_data(j['value'][1:-1])
			
			if 'end' in match:
				handler.tag(match['name'], attrs)
			else:
				handler.tag_start(match['name'], attrs)
		elif 'tagend' in i:
			handler.tag_end(matchone(tagend, i['tagend'])['name'])
		elif 'data' in i:
			handler.data(parse_data(i['data']))
	
	return handler.result
Example #2
0
	def parse_data(data):
		def fn(match):
			if 'char' in match:
				return match['char']
			else:
				return _xml_entities[match['name']]
		
		return ''.join(map(fn, matchall(data_elem, data)))