Beispiel #1
0
	def _get_HTML_literal(self, Pnode) :
		"""
		Get (recursively) the XML Literal content of a DOM Node. 
	
		@param Pnode: DOM Node
		@return: string
		"""	
		rc = ""		
		for node in Pnode.childNodes:
			if node.nodeType == node.TEXT_NODE:
				rc = rc + self._putBackEntities(node.data)
			elif node.nodeType == node.ELEMENT_NODE :
				rc = rc + return_XML(self.state, node, base = False, xmlns = False )
		return rc
Beispiel #2
0
    def _get_HTML_literal(self, Pnode):
        """
		Get (recursively) the XML Literal content of a DOM Node. 
	
		@param Pnode: DOM Node
		@return: string
		"""
        rc = ""
        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + self._putBackEntities(node.data)
            elif node.nodeType == node.ELEMENT_NODE:
                rc = rc + return_XML(self.state, node, base=False, xmlns=False)
        return rc
Beispiel #3
0
def handle_embeddedRDF(node, graph, state) :
	"""
	Handles embedded RDF. There are two possibilities:
	
	 - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). 
	 - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. 
	
	@param node: a DOM node for the top level element
	@param graph: target rdf graph
	@type graph: RDFLib's Graph object instance
	@param state: the inherited state (namespaces, lang, etc)
	@type state: L{state.ExecutionContext}
	@return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. 
	@rtype: Boolean
	"""
	#def _get_prefixes_in_turtle() :
	#	retval = ""
	#	for key in state.term_or_curie.ns :
	#		retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key])
	#	retval += '\n'
	#	return retval
	
	# This feature is optional!
	def _get_literal(Pnode):
		"""
		Get the full text
		@param Pnode: DOM Node
		@return: string
		"""
		rc = ""
		for node in Pnode.childNodes:
			if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE] :
				rc = rc + node.data
		# Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-(
		return rc.replace("<![CDATA[","").replace("]]>","")

	if state.options.embedded_rdf  :
		# Embedded turtle, per the latest Turtle draft
		if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script" :
			if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle" :
				#prefixes = _get_prefixes_in_turtle()
				#content  = _get_literal(node)
				#rdf = StringIO(prefixes + content)
				content  = _get_literal(node)
				rdf = StringIO(content)
				try :
					graph.parse(rdf, format="n3", publicID = state.base)
					state.options.add_info("The output graph includes triples coming from an embedded Turtle script")
				except :
					(type,value,traceback) = sys.exc_info()
					state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value)
			return True
		elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#" :
			rdf = StringIO(return_XML(state, node))
			try :
				graph.parse(rdf)
				state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree")
			except :
				(type,value,traceback) = sys.exc_info()
				state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value)
			return True
		else :
			return False
	else :
		return False