Beispiel #1
0
def parseRDFa(dom,base,graph = None,options=None) :
	"""The standard processing of an RDFa DOM into a Graph. This method is aimed at the inclusion of
	the library in other RDF applications using RDFLib.

	@param dom: DOM node for the document element (as returned from an XML parser)
	@param base: URI for the default "base" value (usually the URI of the file to be processed)
	@keyword graph: a graph. If the value is None, the graph is created.
	@type graph: RDFLib Graph
	@keyword options: Options for the distiller (in case of C{None}, the default options are used)
	@type options: L{Options}
	@return: the graph
	@rtype: RDFLib Graph
	"""
	if graph == None :
		graph = Graph()
	if options == None :
		options = Options()

	html = dom.documentElement

	# Creation of the top level execution context

	# Perform the built-in and external transformations on the HTML tree. This is,
	# in simulated form, the hGRDDL approach of Ben Adida
	for trans in options.transformers + builtInTransformers :
		trans(html,options)

	# collect the initial state. This takes care of things
	# like base, top level namespace settings, etc.
	# Ensure the proper initialization
	state = ExecutionContext(html,graph,base=base,options=options)

	# The top level subject starts with the current document; this
	# is used by the recursion
	subject = URIRef(state.base)

	# parse the whole thing recursively and fill the graph
	parse_one_node(html,graph,subject,state,[])
	if options.warning_graph != None :
		for t in options.warning_graph : graph.add(t)

	# That is it...
	return Graph
Beispiel #2
0
def _process_DOM(dom,base,outputFormat,options,local=False) :
	"""Core processing. The transformers ("pre-processing") is done
	on the DOM tree, the state is initialized, and the "real" RDFa parsing is done. Finally,
	the result (which is an RDFLib Graph) is serialized using RDFLib's serializers.

	The real work is done in the L{parser function<Parse.parse_one_node>}.

	@param dom: XML DOM Tree node (for the top level)
	@param base: URI for the default "base" value (usually the URI of the file to be processed)
	@param outputFormat: serialization format
	@param options: Options for the distiller
	@type options: L{Options}
	@keyword local: whether the call is for a local usage or via CGI (influences the way
	exceptions are handled)
	@return: serialized graph
	@rtype: string
	"""
	def _register_XML_serializer(formatstring) :
		from rdflib.plugin import register
		from rdflib.syntax import serializer, serializers
		register(formatstring,serializers.Serializer,"pyRdfa.serializers.PrettyXMLSerializer","PrettyXMLSerializer")
	def _register_Turtle_serializer(formatstring) :
		from rdflib.plugin import register
		from rdflib.syntax import serializer, serializers
		register(formatstring,serializers.Serializer,"pyRdfa.serializers.TurtleSerializer","TurtleSerializer")

	# Exchaning the pretty xml serializer agaist the version stored with this package
	if outputFormat == "pretty-xml" :
		outputFormat = "my-xml"
		_register_XML_serializer(outputFormat)
	elif outputFormat == "turtle" or outputFormat == "n3" :
		outputFormat = "my-turtle"
		_register_Turtle_serializer(outputFormat)

	# Create the RDF Graph
	graph   = Graph()
	# get the DOM tree

	html 	= dom.documentElement

	# Perform the built-in and external transformations on the HTML tree. This is,
	# in simulated form, the hGRDDL approach of Ben Adida
	for trans in options.transformers + builtInTransformers :
		trans(html,options)

	# collect the initial state. This takes care of things
	# like base, top level namespace settings, etc.
	# Ensure the proper initialization
	state = ExecutionContext(html,graph,base=base,options=options)

	# The top level subject starts with the current document; this
	# is used by the recursion
	subject = URIRef(state.base)

	# parse the whole thing recursively and fill the graph
	if local :
		parse_one_node(html,graph,subject,state,[])
		if options.warning_graph != None :
			for t in options.warning_graph : graph.add(t)
		retval = graph.serialize(format=outputFormat)
	else :
		# This is when the code is run as part of a Web CGI service. The
		# difference lies in the way exceptions are handled...
		try :
			# this is a recursive procedure through the full DOM Tree
			parse_one_node(html,graph,subject,state,[])
		except :
			# error in the input...
			(type,value,traceback) = sys.exc_info()
			msg = 'Error in RDFa content: "%s"' % value
			raise RDFaError, msg
		# serialize the graph and return the result
		retval = None
		try :
			if options.warning_graph != None :
				for t in options.warning_graph : graph.add(t)
			retval = graph.serialize(format=outputFormat)
		except :
			# XML Parsing error in the input
			(type,value,traceback) = sys.exc_info()
			msg = 'Error in graph serialization: "%s"' % value
			raise RDFaError, msg
	return retval