def _inlineURNs(self, root, rdfContainer): for relation in [ {"tag": "oa:annotatedBy", "partname": "foafAgent"}, {"tag": "oa:hasBody", "partname": "oacBody"}, {"tag": "oa:hasSource"}, {"tag": "oa:hasSelector"}, {"tag": "oa:hasTarget", "partname": "oacConstrainedTarget"}, ]: nodes = xpath(root, "%s[@rdf:resource]" % relation["tag"]) for node in nodes: urn = getAttrib(node, "rdf:resource") if urn: resolvedNode = rdfContainer.resolve(urn) if resolvedNode is not None and not isAnnotation(resolvedNode): node.append(resolvedNode) self._inlineURNs(resolvedNode, rdfContainer) del node.attrib[expandNs("rdf:resource")] elif urn.startswith(self._baseUrl): if self.call.isAvailable(identifier=urn, partname="oacBody") == (True, True): data = self.call.getStream(identifier=urn, partname="oacBody") node.append(parse(StringIO(data.read())).getroot()) del node.attrib[expandNs("rdf:resource")] elif "partname" in relation and self.call.isAvailable( identifier=urn, partname=relation["partname"] ) == (True, True): data = self.call.getStream(identifier=urn, partname=relation["partname"]) node.append(parse(StringIO(data.read())).getroot()) del node.attrib[expandNs("rdf:resource")]
def testOne(self): XML = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="urn:nr:1"/> <rdf:Description rdf:about="urn:nr:2"/> <rdf:Description rdf:about="urn:nr:3"/> <rdf:Description rdf:about="urn:nr:4"/> </rdf:RDF>""" container = RdfContainer(parse(StringIO(XML))) result = container.resolve('urn:nr:3') self.assertEquals('urn:nr:3', getAttrib(result, "rdf:about"))
def testOacBodiesStored(self): headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.1", operation="searchRetrieve", query="IamUnique42"), parse='lxml') oacBody = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF/oa:Annotation/oa:hasBody/oa:Body")[0] about = getAttrib(oacBody, "rdf:about") _,_,path,_,_ = urlsplit(about) headers, body = getRequest(self.portNumber, path, parse=False) self.assertTrue('200' in headers, headers) lines = body.split('\n') self.assertEquals('<?xml version="1.0" encoding="utf-8"?>', lines[0]) self.assertEquals('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">', lines[1]) self.assertEquals('</rdf:RDF>', lines[-1])
def process(self): for resolvable in self.listResolvables(): items = resolvable['items'] for item in items: for url in item['urls']: try: lxmlNode = parse(self._urlopen(url)) except: print "Error retrieving", url continue for node in item['filter'](lxmlNode): identifier = getAttrib(node, "rdf:about") newNode = parse(StringIO(tostring(node))) yield self.all.add(identifier=identifier, partname=item['partname'], lxmlNode=newNode) self.call.inject(resolvable['identifier'])
def identifierFromXml(lxmlNode): nodeWithAbout = aboutNode(lxmlNode) if nodeWithAbout is not None: return getAttrib(nodeWithAbout, 'rdf:about')