def set_prefixes(html,options) : def _handle_prefix(node) : if node.hasAttribute("prefix") : for pref in node.getAttribute("prefix").strip().split() : # this should be of the format prefix=uri spec = pref.split('=') if len(spec) >= 2 : node.setAttributeNS("","xmlns:%s" % spec[0],spec[1]) return False # We have to collect the current bnode id-s from the file to avoid conflicts traverse_tree(html,_handle_prefix)
def set_prefixes(html, options): def _handle_prefix(node): if node.hasAttribute("prefix"): for pref in node.getAttribute("prefix").strip().split(): # this should be of the format prefix=uri spec = pref.split('=') if len(spec) >= 2: node.setAttributeNS("", "xmlns:%s" % spec[0], spec[1]) return False # We have to collect the current bnode id-s from the file to avoid conflicts traverse_tree(html, _handle_prefix)
def decorate_li_s(html, options): """ The main transformer entry point. See the module description for details. @param html: a DOM node for the top level html element @param options: invocation options @type options: L{Options<pyRdfa.Options>} """ _bids = [] def _collect_BIDs(node): """Check and collect the possible bnode id-s in the file that might occur in CURIE-s. The function is called recursively on each node. The L{_bids} variable is filled with the initial values. @param node: a DOM element node """ def _suspect(val): if len(val) > 1: if val[0] == "_" and val[1] == ":": if not val in _bids: _bids.append(val) elif val[0] == "[" and val[-1] == "]": _suspect(val[1:-1]) for a in ["about", "resource", "typeof"]: if node.hasAttribute(a): _suspect(node.getAttribute(a)) return False def _give_BID(): """Generate a new value that can be used as a bnode id... @return: a string of the form _:XXXX where XXXX is unique (ie, not yet stored in the L{_bids} array). """ while True: i = random.randint(1, 10000) val = "_:x%s" % i if not val in _bids: _bids.append(val) return val def _check_if_hit(node, rdfprefix): """ Check if the node has one of the C{typeof} values that would trigger the transformation. @param node: DOM node (standing for a C{<ul>} or a C{<ol>}) @param rdfprefix: prefix to be used for the RDF namespace @return: the value of _CONTAINER, _COLLECTION, or _NONE """ if node.hasAttribute("typeof"): types = node.getAttribute("typeof").split() for t in types: # check if it is a namespaces thing at all... if t.find(":") != -1: key = t.split(":", 1)[0] lname = t.split(":", 1)[1] if key == rdfprefix: if lname in ["Seq", "Alt", "Bag"]: return _CONTAINER elif lname in ["List"]: return _COLLECTION return _NONE def _decorate_container(node, rdfprefix): """Take care of containers (ie, Seq, Alt, and Bag). @param node: the node for the C{<ul>/<ol>} @param rdfprefix: the prefix of the RDF namespace """ index = 1 originalNodes = [ n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ] for n in originalNodes: pr = "%s:_%s" % (rdfprefix, index) index += 1 if not _has_one_of_attributes(n, "href", "resource", "typeof", "about", "rel", "rev", "property"): # the simple case... n.setAttribute("property", pr) else: # the original node should not be changed, but should be reparanted into a new # enclosure newEnclosure = node.ownerDocument.createElement("div") newEnclosure.setAttribute("rel", pr) node.replaceChild(newEnclosure, n) newEnclosure.appendChild(n) def _decorate_collection(node, rdfprefix): """Take care of collection (a.k.a. Lists). @param node: the node for the C{<ul>/<ol>} @param rdfprefix: the prefix of the RDF namespace """ List = "%s:List" % rdfprefix first = "%s:first" % rdfprefix rest = "%s:rest" % rdfprefix nil = "[%s:nil]" % rdfprefix rtype = "%s:type" % rdfprefix # the list of 'li'-s is needed in advance (eg, for their numbers) originalNodes = [ n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ] # a bnode id should be generated for the top level node if node.hasAttribute("about"): currId = node.getAttribute("about") else: currId = "[%s]" % _give_BID() node.setAttribute("about", currId) index = 1 for i in xrange(0, len(originalNodes)): n = originalNodes[i] # first the current <li> must be massaged if not _has_one_of_attributes(n, "href", "resource", "typeof", "about", "rel", "rev", "property"): # the simple case, the node is changed in situ.. n.setAttribute("about", currId) n.setAttribute("property", first) else: # an enclosure for that node should be created, and the original node # is just reparented newEnclosure = node.ownerDocument.createElement("div") newEnclosure.setAttribute("rel", first) newEnclosure.setAttribute("about", currId) node.replaceChild(newEnclosure, n) newEnclosure.appendChild(n) # An extra <li> is necessary to add some additional info... newLi = node.ownerDocument.createElement("li") newLi.setAttribute("about", currId) newLi.setAttribute("rel", rest) if i != 0: newLi.setAttribute("typeof", List) if i == len(originalNodes) - 1: # This is the last element newLi.setAttribute("resource", nil) node.appendChild(newLi) else: newId = "[%s]" % _give_BID() newLi.setAttribute("resource", newId) currId = newId node.insertBefore(newLi, originalNodes[i + 1]) uls = [n for n in html.getElementsByTagName("ul")] ols = [n for n in html.getElementsByTagName("ol")] if len(uls) == 0 and len(ols) == 0: return #-------------------------------------------------------------------------------- # We have to extract the prefix used for rdf. It may not be 'rdf'... rdfprefix = rdf_prefix(html) #-------------------------------------------------------------------------------- # We have to collect the current bnode id-s from the file to avoid conflicts traverse_tree(html, _collect_BIDs) # We will need the random function to generate unique bid-s random.seed(None) #-------------------------------------------------------------------------------- for node in uls + ols: # check if this is one of those guys... t = _check_if_hit(node, rdfprefix) if t == _CONTAINER: _decorate_container(node, rdfprefix) elif t == _COLLECTION: _decorate_collection(node, rdfprefix)
def decorate_li_s(html,options) : """ The main transformer entry point. See the module description for details. @param html: a DOM node for the top level html element @param options: invocation options @type options: L{Options<pyRdfa.Options>} """ _bids = [] def _collect_BIDs(node) : """Check and collect the possible bnode id-s in the file that might occur in CURIE-s. The function is called recursively on each node. The L{_bids} variable is filled with the initial values. @param node: a DOM element node """ def _suspect(val) : if len(val) > 1 : if val[0] == "_" and val[1] == ":" : if not val in _bids : _bids.append(val) elif val[0] == "[" and val[-1] == "]" : _suspect(val[1:-1]) for a in ["about","resource","typeof"] : if node.hasAttribute(a) : _suspect(node.getAttribute(a)) return False def _give_BID() : """Generate a new value that can be used as a bnode id... @return: a string of the form _:XXXX where XXXX is unique (ie, not yet stored in the L{_bids} array). """ while True : i = random.randint(1,10000) val = "_:x%s" % i if not val in _bids : _bids.append(val) return val def _check_if_hit(node,rdfprefix) : """ Check if the node has one of the C{typeof} values that would trigger the transformation. @param node: DOM node (standing for a C{<ul>} or a C{<ol>}) @param rdfprefix: prefix to be used for the RDF namespace @return: the value of _CONTAINER, _COLLECTION, or _NONE """ if node.hasAttribute("typeof") : types = node.getAttribute("typeof").split() for t in types : # check if it is a namespaces thing at all... if t.find(":") != -1 : key = t.split(":",1)[0] lname = t.split(":",1)[1] if key == rdfprefix : if lname in ["Seq","Alt","Bag"] : return _CONTAINER elif lname in ["List"] : return _COLLECTION return _NONE def _decorate_container(node,rdfprefix) : """Take care of containers (ie, Seq, Alt, and Bag). @param node: the node for the C{<ul>/<ol>} @param rdfprefix: the prefix of the RDF namespace """ index = 1 originalNodes = [n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ] for n in originalNodes : pr = "%s:_%s" % (rdfprefix,index) index += 1 if not _has_one_of_attributes(n,"href","resource","typeof","about","rel","rev","property") : # the simple case... n.setAttribute("property",pr) else : # the original node should not be changed, but should be reparanted into a new # enclosure newEnclosure = node.ownerDocument.createElement("div") newEnclosure.setAttribute("rel",pr) node.replaceChild(newEnclosure,n) newEnclosure.appendChild(n) def _decorate_collection(node,rdfprefix) : """Take care of collection (a.k.a. Lists). @param node: the node for the C{<ul>/<ol>} @param rdfprefix: the prefix of the RDF namespace """ List = "%s:List" % rdfprefix first = "%s:first" % rdfprefix rest = "%s:rest" % rdfprefix nil = "[%s:nil]" % rdfprefix rtype = "%s:type" % rdfprefix # the list of 'li'-s is needed in advance (eg, for their numbers) originalNodes = [ n for n in node.childNodes if n.nodeType == node.ELEMENT_NODE and n.tagName == "li" ] # a bnode id should be generated for the top level node if node.hasAttribute("about") : currId = node.getAttribute("about") else : currId = "[%s]" % _give_BID() node.setAttribute("about",currId) index = 1 for i in xrange(0,len(originalNodes)) : n = originalNodes[i] # first the current <li> must be massaged if not _has_one_of_attributes(n,"href","resource","typeof","about","rel","rev","property") : # the simple case, the node is changed in situ.. n.setAttribute("about",currId) n.setAttribute("property",first) else : # an enclosure for that node should be created, and the original node # is just reparented newEnclosure = node.ownerDocument.createElement("div") newEnclosure.setAttribute("rel",first) newEnclosure.setAttribute("about",currId) node.replaceChild(newEnclosure,n) newEnclosure.appendChild(n) # An extra <li> is necessary to add some additional info... newLi = node.ownerDocument.createElement("li") newLi.setAttribute("about",currId) newLi.setAttribute("rel",rest) if i != 0 : newLi.setAttribute("typeof",List) if i == len(originalNodes) - 1 : # This is the last element newLi.setAttribute("resource",nil) node.appendChild(newLi) else : newId = "[%s]" % _give_BID() newLi.setAttribute("resource",newId) currId = newId node.insertBefore(newLi,originalNodes[i+1]) uls = [ n for n in html.getElementsByTagName("ul") ] ols = [ n for n in html.getElementsByTagName("ol") ] if len(uls) == 0 and len(ols) == 0 : return #-------------------------------------------------------------------------------- # We have to extract the prefix used for rdf. It may not be 'rdf'... rdfprefix = rdf_prefix(html) #-------------------------------------------------------------------------------- # We have to collect the current bnode id-s from the file to avoid conflicts traverse_tree(html,_collect_BIDs) # We will need the random function to generate unique bid-s random.seed(None) #-------------------------------------------------------------------------------- for node in uls+ols : # check if this is one of those guys... t = _check_if_hit(node,rdfprefix) if t == _CONTAINER : _decorate_container(node,rdfprefix) elif t == _COLLECTION : _decorate_collection(node,rdfprefix)