def rdf_from_sources(self, names, outputFormat="pretty-xml", rdfOutput=False): """ Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF extracted, and serialization is done in the specified format. @param names: list of sources, each can be a URI, a file name, or a file-like object @keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml" and "pretty-xml", as well as "turtle" and "n3" are synonyms. @return: a serialized RDF Graph @rtype: string """ try: from pyRdfaExtras import MyGraph graph = MyGraph() except: graph = Graph() for prefix in _bindings: graph.bind(prefix, Namespace(_bindings[prefix])) # the value of rdfOutput determines the reaction on exceptions... for name in names: self.graph_from_source(name, graph, rdfOutput) return graph.serialize(format=outputFormat)
def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) : """ Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF extracted, and serialization is done in the specified format. @param names: list of sources, each can be a URI, a file name, or a file-like object @keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only. @keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph @type rdfOutput: boolean @return: a serialized RDF Graph @rtype: string """ # This is better because it gives access to the various, non-standard serializations # If it does not work because the extra are not installed, fall back to the standard # rdlib distribution... try : from pyRdfaExtras import MyGraph graph = MyGraph() except : graph = Graph() graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#')) # the value of rdfOutput determines the reaction on exceptions... for name in names : self.graph_from_source(name, graph, rdfOutput) retval = graph.serialize(format=outputFormat) return retval
def rdf_from_sources(self, names, outputFormat="turtle", rdfOutput=False): """ Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF extracted, and serialization is done in the specified format. @param names: list of sources, each can be a URI, a file name, or a file-like object @keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only. @keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph @type rdfOutput: boolean @return: a serialized RDF Graph @rtype: string """ # This is better because it gives access to the various, non-standard serializations # If it does not work because the extra are not installed, fall back to the standard # rdlib distribution... try: from pyRdfaExtras import MyGraph graph = MyGraph() except: graph = Graph() graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#')) # the value of rdfOutput determines the reaction on exceptions... for name in names: self.graph_from_source(name, graph, rdfOutput) retval = graph.serialize(format=outputFormat) return retval
def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) : """ Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF extracted, and serialization is done in the specified format. @param names: list of sources, each can be a URI, a file name, or a file-like object @keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml" and "pretty-xml", as well as "turtle" and "n3" are synonyms. @return: a serialized RDF Graph @rtype: string """ try : from pyRdfaExtras import MyGraph graph = MyGraph() except : graph = Graph() for prefix in _bindings : graph.bind(prefix, Namespace(_bindings[prefix])) # the value of rdfOutput determines the reaction on exceptions... for name in names : self.graph_from_source(name, graph, rdfOutput) return graph.serialize(format=outputFormat)
class Validator: """ Shell around the distiller and the error message management. @ivar default_graph: default graph for the results @ivar processor_graph: processor graph (ie, errors and warnings) @ivar uri: file like object or URI of the source @ivar base: base value for the generated RDF output @ivar media_type: media type of the source @ivar vocab_expansion: whether vocabulary expansion should occur or not @ivar rdfa_lite: whether RDFa 1.1 Lite should be checked @ivar hturtle: whether the embedded turtle should be included in the output @ivar domtree: the Document Node of the final domtree where the final HTML code should be added @ivar message: the Element Node in the final DOM Tree where the error/warning messages should be added @ivar code: the Element Node in the final DOM Tree where the generated code should be added @ivar errors: separate class instance to generate the error code @type errors: L{Errors} """ def __init__(self, uri, base, media_type, vocab_expansion=False, rdfa_lite=False, embedded_rdf=False): """ @param uri: the URI for the content to be analyzed @type uri: file-like object (e.g., when content goes through an HTTP Post) or a string @param base: the base URI for the generated RDF @type base: string @param media_type: Media Type, see the media type management of pyRdfa. If "", the distiller will try to find the media type out. @type media_type: pyRdfa.host.MediaTypes value """ # Create the graphs into which the content is put self.default_graph = Graph() self.processor_graph = Graph() self.uri = uri self.base = base self.media_type = media_type self.embedded_rdf = embedded_rdf self.rdfa_lite = rdfa_lite self.vocab_expansion = vocab_expansion # Get the DOM tree that will be the scaffold for the output parser = html5lib.HTMLParser( tree=html5lib.treebuilders.getTreeBuilder("dom")) self.domtree = parser.parse(html_page % date.today().isoformat()) # find the warning/error content for div in self.domtree.getElementsByTagName("div"): if div.hasAttribute("id") and div.getAttribute("id") == "Message": self.message = div break # find the turtle output content for pre in self.domtree.getElementsByTagName("pre"): if pre.hasAttribute("id") and pre.getAttribute("id") == "output": self.code = pre break self.errors = Errors(self) # end __init__ def parse(self): """ Parse the RDFa input and store the processor and default graphs. The final media type is also updated. """ transformers = [] if self.rdfa_lite: from pyRdfa.transform.lite import lite_prune transformers.append(lite_prune) options = Options(output_default_graph=True, output_processor_graph=True, transformers=transformers, vocab_expansion=self.vocab_expansion, embedded_rdf=self.embedded_rdf, add_informational_messages=True) processor = pyRdfa(options=options, base=self.base, media_type=self.media_type) processor.graph_from_source(self.uri, graph=self.default_graph, pgraph=self.processor_graph, rdfOutput=True) # Extracting some parameters for the error messages self.processor = processor def complete_DOM(self): """ Add the generated graph, in turtle encoding, as well as the error messages, to the final DOM tree """ # Add the RDF code in the DOM tree outp = self.default_graph.serialize(format="turtle") if PY3: u = str(outp, encoding='utf-8') else: u = unicode(outp.decode('utf-8')) dstr = self.domtree.createTextNode(u) self.code.appendChild(dstr) # Settle the error message self.errors.interpret() def run(self): """ Run the two steps of validation, and return the serialized version of the DOM Tree, ready to be displayed """ self.parse() self.complete_DOM() if PY3: from html5lib.serializer import serialize return serialize(self.domtree, tree='dom') else: return str(self.domtree.toxml(encoding="utf-8"))