Example #1
0
def set_prefixes(html,options) :
	def _handle_prefix(node) :
		if node.hasAttribute("prefix") :
			for pref in node.getAttribute("prefix").strip().split() :
				# this should be of the format prefix=uri
				spec = pref.split('=')
				if len(spec) >= 2 :
					node.setAttributeNS("","xmlns:%s" % spec[0],spec[1])
		return False
		# We have to collect the current bnode id-s from the file to avoid conflicts
	traverse_tree(html,_handle_prefix)
Example #2
0
def set_prefixes(html, options):
    def _handle_prefix(node):
        if node.hasAttribute("prefix"):
            for pref in node.getAttribute("prefix").strip().split():
                # this should be of the format prefix=uri
                spec = pref.split('=')
                if len(spec) >= 2:
                    node.setAttributeNS("", "xmlns:%s" % spec[0], spec[1])
        return False
        # We have to collect the current bnode id-s from the file to avoid conflicts

    traverse_tree(html, _handle_prefix)
def decorate_li_s(html, options):
    """
	The main transformer entry point. See the module description for details.
	@param html: a DOM node for the top level html element
	@param options: invocation options
	@type options: L{Options<pyRdfa.Options>}
	"""
    _bids = []

    def _collect_BIDs(node):
        """Check and collect the possible bnode id-s in the file that might occur in CURIE-s. The
		function is called recursively on each node. The L{_bids} variable is filled with the initial values.
		@param node: a DOM element node
		"""
        def _suspect(val):
            if len(val) > 1:
                if val[0] == "_" and val[1] == ":":
                    if not val in _bids: _bids.append(val)
                elif val[0] == "[" and val[-1] == "]":
                    _suspect(val[1:-1])

        for a in ["about", "resource", "typeof"]:
            if node.hasAttribute(a): _suspect(node.getAttribute(a))
        return False

    def _give_BID():
        """Generate a new value that can be used as a bnode id...
		@return: a string of the form _:XXXX where XXXX is unique (ie, not yet stored in the L{_bids} array).
		"""
        while True:
            i = random.randint(1, 10000)
            val = "_:x%s" % i
            if not val in _bids:
                _bids.append(val)
                return val

    def _check_if_hit(node, rdfprefix):
        """
		Check if the node has one of the C{typeof} values that would trigger the transformation.
		@param node: DOM node (standing for a C{<ul>} or a C{<ol>})
		@param rdfprefix: prefix to be used for the RDF namespace
		@return: the value of _CONTAINER, _COLLECTION, or _NONE
		"""
        if node.hasAttribute("typeof"):
            types = node.getAttribute("typeof").split()
            for t in types:
                # check if it is a namespaces thing at all...
                if t.find(":") != -1:
                    key = t.split(":", 1)[0]
                    lname = t.split(":", 1)[1]
                    if key == rdfprefix:
                        if lname in ["Seq", "Alt", "Bag"]:
                            return _CONTAINER
                        elif lname in ["List"]:
                            return _COLLECTION
        return _NONE

    def _decorate_container(node, rdfprefix):
        """Take care of containers (ie, Seq, Alt, and Bag).
		@param node: the node for the C{<ul>/<ol>}
		@param rdfprefix: the prefix of the RDF namespace
		"""
        index = 1
        originalNodes = [
            n for n in node.childNodes
            if n.nodeType == node.ELEMENT_NODE and n.tagName == "li"
        ]
        for n in originalNodes:
            pr = "%s:_%s" % (rdfprefix, index)
            index += 1
            if not _has_one_of_attributes(n, "href", "resource", "typeof",
                                          "about", "rel", "rev", "property"):
                # the simple case...
                n.setAttribute("property", pr)
            else:
                # the original node should not be changed, but should be reparanted into a new
                # enclosure
                newEnclosure = node.ownerDocument.createElement("div")
                newEnclosure.setAttribute("rel", pr)
                node.replaceChild(newEnclosure, n)
                newEnclosure.appendChild(n)

    def _decorate_collection(node, rdfprefix):
        """Take care of collection (a.k.a. Lists).
		@param node: the node for the C{<ul>/<ol>}
		@param rdfprefix: the prefix of the RDF namespace
		"""
        List = "%s:List" % rdfprefix
        first = "%s:first" % rdfprefix
        rest = "%s:rest" % rdfprefix
        nil = "[%s:nil]" % rdfprefix
        rtype = "%s:type" % rdfprefix
        # the list of 'li'-s is needed in advance (eg, for their numbers)
        originalNodes = [
            n for n in node.childNodes
            if n.nodeType == node.ELEMENT_NODE and n.tagName == "li"
        ]
        # a bnode id should be generated for the top level node
        if node.hasAttribute("about"):
            currId = node.getAttribute("about")
        else:
            currId = "[%s]" % _give_BID()
            node.setAttribute("about", currId)

        index = 1
        for i in xrange(0, len(originalNodes)):
            n = originalNodes[i]
            # first the current <li> must be massaged
            if not _has_one_of_attributes(n, "href", "resource", "typeof",
                                          "about", "rel", "rev", "property"):
                # the simple case, the node is changed in situ..
                n.setAttribute("about", currId)
                n.setAttribute("property", first)
            else:
                # an enclosure for that node should be created, and the original node
                # is just reparented
                newEnclosure = node.ownerDocument.createElement("div")
                newEnclosure.setAttribute("rel", first)
                newEnclosure.setAttribute("about", currId)
                node.replaceChild(newEnclosure, n)
                newEnclosure.appendChild(n)
            # An extra <li> is necessary to add some additional info...
            newLi = node.ownerDocument.createElement("li")
            newLi.setAttribute("about", currId)
            newLi.setAttribute("rel", rest)
            if i != 0: newLi.setAttribute("typeof", List)
            if i == len(originalNodes) - 1:
                # This is the last element
                newLi.setAttribute("resource", nil)
                node.appendChild(newLi)
            else:
                newId = "[%s]" % _give_BID()
                newLi.setAttribute("resource", newId)
                currId = newId
                node.insertBefore(newLi, originalNodes[i + 1])

    uls = [n for n in html.getElementsByTagName("ul")]
    ols = [n for n in html.getElementsByTagName("ol")]
    if len(uls) == 0 and len(ols) == 0: return

    #--------------------------------------------------------------------------------
    # We have to extract the prefix used for rdf. It may not be 'rdf'...
    rdfprefix = rdf_prefix(html)

    #--------------------------------------------------------------------------------
    # We have to collect the current bnode id-s from the file to avoid conflicts
    traverse_tree(html, _collect_BIDs)
    # We will need the random function to generate unique bid-s
    random.seed(None)

    #--------------------------------------------------------------------------------

    for node in uls + ols:
        # check if this is one of those guys...
        t = _check_if_hit(node, rdfprefix)
        if t == _CONTAINER:
            _decorate_container(node, rdfprefix)
        elif t == _COLLECTION:
            _decorate_collection(node, rdfprefix)
def decorate_li_s(html,options) :
	"""
	The main transformer entry point. See the module description for details.
	@param html: a DOM node for the top level html element
	@param options: invocation options
	@type options: L{Options<pyRdfa.Options>}
	"""
	_bids = []
	def _collect_BIDs(node) :
		"""Check and collect the possible bnode id-s in the file that might occur in CURIE-s. The
		function is called recursively on each node. The L{_bids} variable is filled with the initial values.
		@param node: a DOM element node
		"""
		def _suspect(val) :
			if len(val) > 1 :
				if val[0] == "_" and val[1] == ":" :
					if not val in _bids : _bids.append(val)
				elif val[0] == "[" and val[-1] == "]" :
					_suspect(val[1:-1])
		for a in ["about","resource","typeof"] :
			if node.hasAttribute(a) : _suspect(node.getAttribute(a))
		return False

	def _give_BID() :
		"""Generate a new value that can be used as a bnode id...
		@return: a string of the form _:XXXX where XXXX is unique (ie, not yet stored in the L{_bids} array).
		"""
		while True :
			i = random.randint(1,10000)
			val = "_:x%s" % i
			if not val in _bids :
				_bids.append(val)
				return val

	def _check_if_hit(node,rdfprefix) :
		"""
		Check if the node has one of the C{typeof} values that would trigger the transformation.
		@param node: DOM node (standing for a C{<ul>} or a C{<ol>})
		@param rdfprefix: prefix to be used for the RDF namespace
		@return: the value of _CONTAINER, _COLLECTION, or _NONE
		"""
		if node.hasAttribute("typeof") :
			types = node.getAttribute("typeof").split()
			for t in types :
				# check if it is a namespaces thing at all...
				if t.find(":") != -1 :
					key   = t.split(":",1)[0]
					lname = t.split(":",1)[1]
					if key == rdfprefix :
						if lname in ["Seq","Alt","Bag"] :
							return _CONTAINER
						elif lname in ["List"] :
							return _COLLECTION
		return _NONE

	def _decorate_container(node,rdfprefix) :
		"""Take care of containers (ie, Seq, Alt, and Bag).
		@param node: the node for the C{<ul>/<ol>}
		@param rdfprefix: the prefix of the RDF namespace
		"""
		index = 1
		originalNodes = [n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ]
		for n in originalNodes :
			pr = "%s:_%s"  % (rdfprefix,index)
			index += 1
			if not _has_one_of_attributes(n,"href","resource","typeof","about","rel","rev","property") :
				# the simple case...
				n.setAttribute("property",pr)
			else :
				# the original node should not be changed, but should be reparanted into a new
				# enclosure
				newEnclosure = node.ownerDocument.createElement("div")
				newEnclosure.setAttribute("rel",pr)
				node.replaceChild(newEnclosure,n)
				newEnclosure.appendChild(n)


	def _decorate_collection(node,rdfprefix) :
		"""Take care of collection (a.k.a. Lists).
		@param node: the node for the C{<ul>/<ol>}
		@param rdfprefix: the prefix of the RDF namespace
		"""
		List  = "%s:List"  % rdfprefix
		first = "%s:first" % rdfprefix
		rest  = "%s:rest"  % rdfprefix
		nil   = "[%s:nil]"   % rdfprefix
		rtype = "%s:type"  % rdfprefix
		# the list of 'li'-s is needed in advance (eg, for their numbers)
		originalNodes = [ n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ]
		# a bnode id should be generated for the top level node
		if node.hasAttribute("about") :
			currId = node.getAttribute("about")
		else :
			currId = "[%s]" % _give_BID()
			node.setAttribute("about",currId)

		index = 1
		for i in xrange(0,len(originalNodes)) :
			n = originalNodes[i]
			# first the current <li> must be massaged
			if not _has_one_of_attributes(n,"href","resource","typeof","about","rel","rev","property") :
				# the simple case, the node is changed in situ..
				n.setAttribute("about",currId)
				n.setAttribute("property",first)
			else :
				# an enclosure for that node should be created, and the original node
				# is just reparented
				newEnclosure = node.ownerDocument.createElement("div")
				newEnclosure.setAttribute("rel",first)
				newEnclosure.setAttribute("about",currId)
				node.replaceChild(newEnclosure,n)
				newEnclosure.appendChild(n)
			# An extra <li> is necessary to add some additional info...
			newLi = node.ownerDocument.createElement("li")
			newLi.setAttribute("about",currId)
			newLi.setAttribute("rel",rest)
			if i != 0 : newLi.setAttribute("typeof",List)
			if i == len(originalNodes) - 1 :
				# This is the last element
				newLi.setAttribute("resource",nil)
				node.appendChild(newLi)
			else :
				newId = "[%s]" % _give_BID()
				newLi.setAttribute("resource",newId)
				currId = newId
				node.insertBefore(newLi,originalNodes[i+1])

	uls = [ n for n in html.getElementsByTagName("ul") ]
	ols = [ n for n in html.getElementsByTagName("ol") ]
	if len(uls) == 0 and len(ols) == 0 : return

	#--------------------------------------------------------------------------------
	# We have to extract the prefix used for rdf. It may not be 'rdf'...
	rdfprefix = rdf_prefix(html)

	#--------------------------------------------------------------------------------
	# We have to collect the current bnode id-s from the file to avoid conflicts
	traverse_tree(html,_collect_BIDs)
	# We will need the random function to generate unique bid-s
	random.seed(None)

	#--------------------------------------------------------------------------------

	for node in uls+ols :
		# check if this is one of those guys...
		t = _check_if_hit(node,rdfprefix)
		if t == _CONTAINER :
			_decorate_container(node,rdfprefix)
		elif t == _COLLECTION :
			_decorate_collection(node,rdfprefix)