def work(self, marc): proc = self.process() proc.use(marc.identifier) work = Graph(identifier=URIRef(marc.identifier + "/work")) work.add((work.identifier, RDF["type"], OBP["Work"])) work += self.rewrite(marc, work, DC["title"]) work += self.rewrite(marc, work, DC["description"]) work += self.rewrite(marc, work, BIBO["lccn"]) work += self.rewrite(marc, work, OBP["scn"]) contributors = self.contributors(marc) for c in contributors: work.add((work.identifier, DC["contributor"], c.identifier)) subjects = self.subjects(marc) for s in subjects: work.add((work.identifier, DC["subject"], s.identifier)) if not s.exists((s.identifier, RDF["type"], FOAF["Person"])): work += s manif = self.manifestation(marc) work.add((work.identifier, OBP["hasManifestation"], manif.identifier)) proc.result(work) self.context.add(work)
def test_proxy(self): params = { "uri": "http://www.w3.org/People/EM/contact#me", "format": "text/n3" } response = self.app.get("/proxy?" + urlencode(params)) data = StringIO(response.body) g = Graph() g.parse(data, format="n3")
def command(self): uri = self.get_uri() g = Graph(identifier=uri) g.parse(uri, format="n3") if self.options.format == "thimbl": print serialize_thimbl(g) else: print g.serialize(format=self.options.format)
def index_aggregate(a): doc = xapian.Document() doc.add_value(VAL_URI, a.identifier) docid = u"URI" + a.identifier doc.add_term(docid) log.debug("Aggregate: %s" % a.identifier) def add_value(g, val_id, subject, predicate): val = [] for s, p, o in g.triples((subject, predicate, None)): if not o.language or o.language == "en": ### TODO: fix this val.append(o) if val: val = u", ".join(val) doc.add_value(val_id, val) return val ## create an abbreviated graph to store in the xapian database extract = Graph() add_value(a, VAL_LABEL, a.identifier, RDFS.label) for g in a.contexts(): log.debug("Indexing: %s" % g.identifier) for pred in (RDF.type, RDFS.label, RDFS.comment, DC.title, DC.description, FOAF.name): for statement in a.triples((g.identifier, pred, None)): extract.add(statement) title = add_value(g, VAL_TITLE, g.identifier, DC.title) if title: doc.add_term(u"ZT" + title[:160]) name = add_value(g, VAL_NAME, g.identifier, FOAF.name) if name: doc.add_term(u"NA" + name[:160]) doc.set_data(extract.serialize(format="n3")) ## take any fields that contain text, stem them according to their ## language (or english if unsupported or unspecified) and put them ## in the index termgen = xapian.TermGenerator() termgen.set_document(doc) for pred in (RDFS.label, RDFS.comment, DC.title, DC.description, FOAF.name, FOAF.first_name, FOAF.last_name, FOAF.surname): for s, p, o in a.triples((None, pred, None)): termgen.increase_termpos() if o.language: try: stemmer = xapian.Stem(o.language) except xapian.InvalidArgumentError: stemmer = xapian.Stem("en") else: stemmer = xapian.Stem("en") termgen.set_stemmer(stemmer) termgen.index_text(o) return docid, doc
def create(self, data): # create object content = Graph() # apply form.rdftype content.add((content.identifier, RDF['type'], self.rdftype)) # apply form data form.applyChanges(self, content, data) for group in self.groups: form.applyChanges(group, content, data) return content
def get(self, identifier): # simple check out mechanism. # the handler returns the same graph as long as it's not put back if identifier in self._cache: return self._cache[identifier] graph = self.store.get_context(identifier) # make a copy of the graph cgraph = Graph(identifier=identifier) for t in graph: cgraph.add(t) self._cache[identifier] = cgraph return cgraph
def getContent(self): registry = getUtility(IRegistry) settings = registry.forInterface(IRDFSettings, check=False) graph_uri = settings.fresnel_graph_uri graph = Graph(identifier=graph_uri) graph.parse(StringIO(master), format='n3') graph.parse(StringIO(custom), format='n3') return dict( lens=graph.serialize(format='n3') )
def load_file(fileobj): '''Loads the specified COFOG-like file into the database with key names 'cofog1', 'cofog2' and 'cofog3'. ''' # TODO: replace with simple import of the cofog rdf data which already has # relevant structure from wdmmgrdf.model import handler ctx = handler.context(u'importer', u'loading cofog') g = Graph(identifier=COFOG_IDENTIFIER) g.parse(fileobj, format='n3') log.info('add %s' % g.identifier) ctx.add(g) log.info('commit changes') cs = ctx.commit()
def publisher(self, marc): proc = self.process() proc.use(marc.identifier) publisher = Graph(identifier=URIRef(marc.identifier + "/publisher")) for s, p, o in marc.triples((marc.identifier, DC["publisher"], None)): publisher += marc.bnc((o, None, None)).replace( (o, None, None), (publisher.identifier, None, None)) publisher.remove((publisher.identifier, DC["spatial"], None)) proc.result(publisher) self.context.add(publisher) return publisher
def manifestation(self, marc): proc = self.process() proc.use(marc.identifier) manif = Graph(identifier=URIRef(marc.identifier + "/manifestation")) manif.add((manif.identifier, RDF["type"], OBP["Manifestation"])) publisher = self.publisher(marc) manif.add((manif.identifier, DC["publisher"], publisher.identifier)) for _s, _p, o in marc.triples( (marc.identifier, DC["publisher"], None)): for s, p, loc in marc.triples((o, DC["spatial"], None)): manif.add((manif.identifier, DC["spatial"], loc)) manif += self.rewrite(marc, manif, BIBO["isbn"]) manif += self.rewrite(marc, manif, BIBO["isbn10"]) manif += self.rewrite(marc, manif, BIBO["isbn13"]) manif += self.rewrite(marc, manif, DC["date"]) manif += self.rewrite(marc, manif, DC["extent"]) manif += self.rewrite(marc, manif, OBP["dimensions"]) manif += self.rewrite(marc, manif, OBP["edition"]) manif += self.rewrite(marc, manif, OBP["lccall"]) manif += self.rewrite(marc, manif, OBP["nlmcall"]) manif += self.rewrite(marc, manif, OBP["nbn"]) manif += self.rewrite(marc, manif, OBP["physicalDetail"]) manif += self.rewrite(marc, manif, RDFS["seeAlso"]) proc.result(manif) self.context.add(manif) return manif
def test_05_put(self): response = self.app.get( url("/graph", uri=test_graph, format="application/rdf+xml")) data = StringIO(response.body) g = Graph() g.parse(data, format="xml") ## now put it back body = g.serialize(format="pretty-xml") response = self.app.put( url("/graph", uri=test_graph), params=body, headers={"Content-type": "application/rdf+xml"}) assert response.body.find("urn:uuid:") == -1
def create(cls, uri=None): '''Create an object with uri `uri` and associated to a graph identified by same uri''' if uri is None: uri = cls.new_identifier() uri = u(uri) graph = Graph(identifier=uri) out = cls(uri, graph=graph) return out
def applyChanges(self, data): data, errors = self.extractData() if errors: self.status = self.formErrorsMessage return registry = getUtility(IRegistry) settings = registry.forInterface(IRDFSettings, check=False) graph_uri = settings.fresnel_graph_uri localGraph = self.localGraph graph = Graph(identifier=graph_uri) graph.parse(StringIO(data['lens']), format='turtle') localGraph.remove_context(graph) quads = ((s,p,o,graph) for (s,p,o) in graph.triples((None, None, None))) localGraph.addN(quads) rdftool = getUtility(IORDF) rdftool.clearCache()
def _get_graph(self): uri = self._uri() content_type, format = self._accept(uri) if uri.endswith("bibtex"): content_type = "text/x-bibtex" format = "bibtex" uri_str, _ = uri.rsplit(".", 1) uri = URIRef(uri_str) graph = handler.get(uri) if len(graph) == 0: graph.rollback() cursor = handler.rdflib.store.cursor() cursor.execute("SET result_timeout = 10000") q = construct_graph % {"agent": uri.n3()} graph = handler.rdflib.store.sparql_query(q, cursor=cursor) graph = Graph(graph.store, identifier=graph.identifier) # ordf extensions cursor.close() if len(graph) == 0: abort(404, "No such graph: %s" % uri) if format == "html": c.graph = graph c.model = model.Entry.get_by_uri(uri) response.content_type = str(content_type) # should really iterate through the potential views if URIRef("http://purl.org/ontology/bibo/Book") in list( c.model.type): data = render("view_bibo_book.html") else: data = self._render_graph() elif format == "bibtex": b = Bibtex() b.load_from_graph(graph) data = b.to_bibtex() response.content_type = str(content_type) response.headers['Content-Location'] = "%s.bibtex" % b.uniquekey response.headers['Location'] = "%s.bibtex" % b.uniquekey else: data = graph.serialize(format=format) response.content_type = str(content_type) graph.rollback() # log.warn("XXX cursor: %s" % handler.rdflib.store._cursor) return data
def contributors(self, marc): result = [] i = 0 for s, p, o in marc.triples( (marc.identifier, DC["contributor"], None)): proc = self.process() proc.use(marc.identifier) identifier = URIRef(marc.identifier + "/contributor/%d" % i) contributor = Graph(identifier=identifier) contributor += marc.bnc((o, None, None)).replace( (o, None, None), (identifier, None, None)) if not contributor.exists((identifier, RDF["type"], None)): contributor.add((identifier, RDF["type"], FOAF["Person"])) proc.result(contributor) self.context.add(contributor) result.append(contributor) i += 1 return result
def setUp(cls): from openbiblio import handler if cls.done: return ctx = handler.context(getuser(), "Initial Data") for graph in cls.data(): ## delete any stale history ctx.add(graph) ctx.commit() ctx = handler.context(getuser(), "Bibtex Graph data") ident = URIRef("http://bnb.bibliographica.org/entry/GB9361575") data = Graph(identifier=ident) data.parse(os.path.join(cls.testdata, "GB9361575.rdf")) ctx.add(data) ctx.commit() cls.done = True
def importLocalRDF(context): # FIXME: there is no internal store at the moment. # maybe we can do something like import to named store # or just let IORDF tool decide where to store it? # TODO: allow to replace / add # clear whole store # clear single graphs # support not just turtle xml = context.readDataFile('ontologies.xml') if xml is None: LOG.debug('Nothing to import.') return LOG.info('Import RDF data into local triple store') root = ET.fromstring(xml) tool = getUtility(IORDF) for node in root: if node.tag not in('local', 'external'): raise ValueError('Unknown node: {}'.format(node.tag)) if node.tag in ('local',): LOG.warn("Import to local store no longer supported.") continue file = node.get('file') uri = node.get('uri') filename = 'ontologies/{}'.format(file) data = context.readDataFile(filename) if data is None: raise ValueError('File missing: {}'.format(filename)) if not uri: raise ValueError('Missing URI for graph: {}'.format(filename)) # node.tag == 'external' LOG.info('load {} into external store.'.format(file)) graph = Graph(identifier=uri) graph.parse(data=data, format='turtle') tool.getHandler().put(graph)
def create_collection(user, object_dict={}): defaults = { 'uri': 'http://bibliographica.org/collection/' + str(uuid.uuid4()), 'title': 'Untitled', 'user': user, 'works': [] } values = dict(defaults) values.update(object_dict) uri = values['uri'] ident = URIRef(uri) data = Graph(identifier=ident) ourdata = collection_n3 % values for work in values['works']: membership = '<%s> rdfs:member <%s> .\n' % (work, ident) ourdata += membership data.parse(data=ourdata, format='n3') ctx = handler.context(user, "Creating collection: %s" % uri) ctx.add(data) ctx.commit() return uri
def _get_graph(self): uri = self._uri() content_type, format = self._accept(uri) if uri.endswith("bibtex"): content_type = "text/x-bibtex" format = "bibtex" uri_str, _ = uri.rsplit(".", 1) uri = URIRef(uri_str) graph = handler.get(uri) if len(graph) == 0: graph.rollback() cursor = handler.rdflib.store.cursor() cursor.execute("SET result_timeout = 10000") q = construct_graph % {"agent": uri.n3()} graph = handler.rdflib.store.sparql_query(q, cursor=cursor) graph = Graph(graph.store, identifier=graph.identifier) # ordf extensions cursor.close() if len(graph) == 0: abort(404, "No such graph: %s" % uri) if format == "html": c.graph = graph data = self._render_graph() elif format == "bibtex": b = Bibtex() b.load_from_graph(graph) data = b.to_bibtex() response.content_type = str(content_type) response.headers['Content-Location'] = "%s.bibtex" % b.uniquekey response.headers['Location'] = "%s.bibtex" % b.uniquekey else: data = graph.serialize(format=format) response.content_type = str(content_type) graph.rollback() # log.warn("XXX cursor: %s" % handler.rdflib.store._cursor) return data
def rdf_data(): graph_uri = "http://purl.org/okfn/obp#" log.info("Loading %s" % graph_uri) graph = Graph(identifier=graph_uri) fp = pkg_resources.resource_stream("openbiblio", os.path.join("n3", "obp.n3")) graph.parse(fp, format="n3") fp.close() yield graph for lens in pkg_resources.resource_listdir("openbiblio", "lenses"): if not lens.endswith(".n3"): continue lens_uri = OBPL[lens[:-3]] graph = Graph(identifier=lens_uri) fp = pkg_resources.resource_stream("openbiblio", os.path.join("lenses", lens)) graph.parse(fp, format="n3") fp.close() yield graph
def subjects(self, marc): result = [] i = 0 for s, p, o in marc.triples((marc.identifier, DC["subject"], None)): if isinstance(o, Literal): subject = Graph() subject.add((subject.identifier, RDF["value"], o)) result.append(subject) elif marc.exists((o, RDF["type"], FOAF["Person"])): proc = self.process() proc.use(marc.identifier) identifier = URIRef(marc.identifier + "/subject/%d" % i) subject = Graph(identifier=identifier) subject += marc.bnc((o, None, None)).replace( (o, None, None), (identifier, None, None)) proc.result(subject) self.context.add(subject) i += 1 else: subject = Graph(identifier=o) subject += marc.bnc((o, None, None)) result.append(subject) return result
def rdf_data(): s = LicensesService2() g = Graph(identifier=CC[""]) g.parse("http://creativecommons.org/schema.rdf") yield g fp = pkg_resources.resource_stream("licenses", os.path.join("n3", "license.n3")) g = Graph(identifier=LICENSES["lens"]) g.parse(fp, format="n3") fp.close() yield g for ld in s.get_licenses(): ident = LICENSES[ld["id"]] g = Graph(identifier=ident) l = License(ident, graph=g) l.label = Literal(ld["title"]) l.prefLabel = Literal(ld["title"]) l.notation = Literal(ld["id"]) l.lens = LICENSES.lens if ld.get("url"): url = URIRef(ld["url"]) sa = Graph() try: sa.parse(url) except: pass try: sa.parse(url, format="rdfa") except: pass sa.remove((url, XHV.icon, None)) sa.remove((url, XHV.alternate, None)) sa.remove((url, XHV.stylesheet, None)) for ll in sa.distinct_objects(url, XHV.license): l.license = ll sa.remove((url, XHV.license, None)) if sa.bnc((url, None, None)): [g.add((ident, p, o)) for s,p,o in sa.bnc((url, None, None))] l.sameAs = url else: l.seeAlso = URIRef(ld["url"]) yield g
def purge(self, uri): uri = u(uri) handler.remove(Graph(identifier=uri))
def get(self, identifier): return Graph(identifier=identifier)
def test_02_n3(self): response = self.app.get(url("/graph", uri=test_graph, format="text/n3")) data = StringIO(response.body) g = Graph() g.parse(data, format="n3")
def test_03_rdfxml(self): response = self.app.get( url("/graph", uri=test_graph, format="application/rdf+xml")) data = StringIO(response.body) g = Graph() g.parse(data, format="xml")
def test_04_autoneg(self): response = self.app.get(url("/graph", uri=test_graph + '.n3'), headers={"Accept": "text/n3"}) data = StringIO(response.body) g = Graph() g.parse(data, format="n3")
def rdf_data(): s = LicensesService2() g = Graph(identifier=CC[""]) g.parse("http://creativecommons.org/schema.rdf") yield g fp = pkg_resources.resource_stream("licenses", os.path.join("n3", "license.n3")) g = Graph(identifier=LICENSES["lens"]) g.parse(fp, format="n3") fp.close() yield g for ld in s.get_licenses(): ident = LICENSES[ld["id"]] g = Graph(identifier=ident) l = License(ident, graph=g) l.label = Literal(ld["title"]) l.prefLabel = Literal(ld["title"]) l.notation = Literal(ld["id"]) l.lens = LICENSES.lens if ld.get("url"): url = URIRef(ld["url"]) sa = Graph() try: sa.parse(url) except: pass try: sa.parse(url, format="rdfa") except: pass sa.remove((url, XHV.icon, None)) sa.remove((url, XHV.alternate, None)) sa.remove((url, XHV.stylesheet, None)) for ll in sa.distinct_objects(url, XHV.license): l.license = ll sa.remove((url, XHV.license, None)) if sa.bnc((url, None, None)): [g.add((ident, p, o)) for s, p, o in sa.bnc((url, None, None))] l.sameAs = url else: l.seeAlso = URIRef(ld["url"]) yield g
def rdf(self, *av, **kw): g = Graph(*av, **kw) g.add((g.identifier, RDF["type"], OBP["MarcRecord"])) def merge(d, s): for k, v in d.items(): ns, term = k.split(":") p = namespaces[ns][term] for o in v: if isinstance(o, dict): b = BNode() g.add((s, p, b)) merge(o, b) else: g.add((s, p, o)) ident = g.identifier merge(self, ident) for s, p, o in g.triples((ident, BIBO["isbn"], None)): g.add((ident, RDFS["seeAlso"], URIRef("urn:isbn:%s" % o))) g.add((ident, RDFS["seeAlso"], URIRef("http://purl.org/NET/book/isbn/%s#book" % o))) g.add( (ident, RDFS["seeAlso"], URIRef("http://www4.wiwiss.fu-berlin.de/bookmashup/books/%s" % o))) if len(o) == 10: g.add((ident, BIBO["isbn10"], o)) elif len(o) == 13: g.add((ident, BIBO["isbn13"], o)) for s, p, o in g.triples((ident, BIBO["issn"], None)): g.add((ident, RDFS["seeAlso"], URIRef("urn:issn:%s" % o))) for s, p, o in g.triples((ident, BIBO["lccn"], None)): g.add( (ident, RDFS["seeAlso"], URIRef(u"http://lccn.loc.gov/" + o))) self.nbn(g) self.scn(g) self.lccall(g) self.lccopy(g) self.isPartOf(g) return g
def __init__(frag, field): Graph.__init__(frag) frag.field = field frag()
def data(cls): ident = URIRef("http://bibliographica.org/test") data = Graph(identifier=ident) data.parse(os.path.join(cls.testdata, "fixtures.rdf")) yield data