def parseRDFa(dom,base,graph = None,options=None) : """The standard processing of an RDFa DOM into a Graph. This method is aimed at the inclusion of the library in other RDF applications using RDFLib. @param dom: DOM node for the document element (as returned from an XML parser) @param base: URI for the default "base" value (usually the URI of the file to be processed) @keyword graph: a graph. If the value is None, the graph is created. @type graph: RDFLib Graph @keyword options: Options for the distiller (in case of C{None}, the default options are used) @type options: L{Options} @return: the graph @rtype: RDFLib Graph """ if graph == None : graph = Graph() if options == None : options = Options() html = dom.documentElement # Creation of the top level execution context # Perform the built-in and external transformations on the HTML tree. This is, # in simulated form, the hGRDDL approach of Ben Adida for trans in options.transformers + builtInTransformers : trans(html,options) # collect the initial state. This takes care of things # like base, top level namespace settings, etc. # Ensure the proper initialization state = ExecutionContext(html,graph,base=base,options=options) # The top level subject starts with the current document; this # is used by the recursion subject = URIRef(state.base) # parse the whole thing recursively and fill the graph parse_one_node(html,graph,subject,state,[]) if options.warning_graph != None : for t in options.warning_graph : graph.add(t) # That is it... return Graph
def _process_DOM(dom,base,outputFormat,options,local=False) : """Core processing. The transformers ("pre-processing") is done on the DOM tree, the state is initialized, and the "real" RDFa parsing is done. Finally, the result (which is an RDFLib Graph) is serialized using RDFLib's serializers. The real work is done in the L{parser function<Parse.parse_one_node>}. @param dom: XML DOM Tree node (for the top level) @param base: URI for the default "base" value (usually the URI of the file to be processed) @param outputFormat: serialization format @param options: Options for the distiller @type options: L{Options} @keyword local: whether the call is for a local usage or via CGI (influences the way exceptions are handled) @return: serialized graph @rtype: string """ def _register_XML_serializer(formatstring) : from rdflib.plugin import register from rdflib.syntax import serializer, serializers register(formatstring,serializers.Serializer,"pyRdfa.serializers.PrettyXMLSerializer","PrettyXMLSerializer") def _register_Turtle_serializer(formatstring) : from rdflib.plugin import register from rdflib.syntax import serializer, serializers register(formatstring,serializers.Serializer,"pyRdfa.serializers.TurtleSerializer","TurtleSerializer") # Exchaning the pretty xml serializer agaist the version stored with this package if outputFormat == "pretty-xml" : outputFormat = "my-xml" _register_XML_serializer(outputFormat) elif outputFormat == "turtle" or outputFormat == "n3" : outputFormat = "my-turtle" _register_Turtle_serializer(outputFormat) # Create the RDF Graph graph = Graph() # get the DOM tree html = dom.documentElement # Perform the built-in and external transformations on the HTML tree. This is, # in simulated form, the hGRDDL approach of Ben Adida for trans in options.transformers + builtInTransformers : trans(html,options) # collect the initial state. This takes care of things # like base, top level namespace settings, etc. # Ensure the proper initialization state = ExecutionContext(html,graph,base=base,options=options) # The top level subject starts with the current document; this # is used by the recursion subject = URIRef(state.base) # parse the whole thing recursively and fill the graph if local : parse_one_node(html,graph,subject,state,[]) if options.warning_graph != None : for t in options.warning_graph : graph.add(t) retval = graph.serialize(format=outputFormat) else : # This is when the code is run as part of a Web CGI service. The # difference lies in the way exceptions are handled... try : # this is a recursive procedure through the full DOM Tree parse_one_node(html,graph,subject,state,[]) except : # error in the input... (type,value,traceback) = sys.exc_info() msg = 'Error in RDFa content: "%s"' % value raise RDFaError, msg # serialize the graph and return the result retval = None try : if options.warning_graph != None : for t in options.warning_graph : graph.add(t) retval = graph.serialize(format=outputFormat) except : # XML Parsing error in the input (type,value,traceback) = sys.exc_info() msg = 'Error in graph serialization: "%s"' % value raise RDFaError, msg return retval