def parse_handle_to_model(self, rooted=False, storage=None, parse_format='turtle', context=None, **kwargs): '''Parse self.handle into RDF model self.model.''' if storage is None: # store RDF model in memory for now storage = new_storage() if self.model is None: self.model = RDF.Model(storage) if self.model is None: raise CDAOError("new RDF.model failed") model = self.model self.rooted = rooted parser = RDF.Parser(name=parse_format) if parser is None: raise Exception('Failed to create RDF.Parser for MIME type %s' % mime_type) if 'base_uri' in kwargs: base_uri = kwargs['base_uri'] else: base_uri = RDF.Uri(string="file://"+os.path.abspath(self.handle.name)) statements = parser.parse_string_as_stream(self.handle.read(), base_uri) for s in statements: model.append(s) return self.parse_model(model, context=context)
def posts(self,*args): alltags = Set() extratags = "" user = None for arg in args: if isinstance(arg,Tag): alltags.add(arg) if isinstance(arg,User): user = arg if len(alltags)>0: extratags = "+"+"+".join([str(tag) for tag in alltags]) if user is not None: url = "http://del.icio.us/rss/"+str(user)+"/"+self.name+extratags else: url = "http://del.icio.us/rss/tag/"+self.name model = RDF.Model() parser = RDF.Parser() try: parser.parse_string_into_model(model,get_url_contents(url),RDF.Uri("http://foo")) posts = [RSSTagPost(model,p.subject,self) for p in model.find_statements(RDF.Statement(None,RDF.Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),RDF.Uri("http://purl.org/rss/1.0/item")))] if user is not None: for post in posts: post.user = user return posts except: return []
def voidify(self): """present stats in VoID (http://www.w3.org/TR/void/)""" serializer = self.get_serializer() ########################### # VoID dataset definition # ########################### void_model = RDF.Model() void_dataset_uri = self.namespaces.get_namespace( 'ls_void') + "?source=" + self.rdf_stats.uri #TODO: URI encode ? void_dataset_entity = RDF.Uri(void_dataset_uri) void_model.append( RDF.Statement(void_dataset_entity, self.namespaces.get_rdf_namespace("rdf").type, self.namespaces.get_rdf_namespace("void").Dataset)) #self.generate_general_void_metadata(void_model, void_dataset_entity) #Number of triples number_of_triples_node = RDF.Node( literal=str(self.rdf_stats.get_no_of_triples()), datatype=self.namespaces.get_rdf_namespace("xsd").integer.uri) void_model.append( RDF.Statement(void_dataset_entity, self.namespaces.get_rdf_namespace("void").triples, number_of_triples_node)) # voidify results from custom stats for stat in lodstats.stats.stats_to_do: stat.voidify(void_model, void_dataset_entity) return serializer.serialize_model_to_string(void_model)
def main(specloc="file:index.rdf"): """The meat and potatoes: Everything starts here.""" m = RDF.Model() p = RDF.Parser() p.parse_into_model(m, specloc) classlist, proplist = specInformation(m) # Build HTML list of terms. azlist = buildazlist(classlist, proplist) # Generate Term HTML termlist = "<h3>Classes and Properties (full detail)</h3>" termlist += "<div class='termdetails'>" termlist += docTerms('Class', classlist, m) termlist += docTerms('Property', proplist, m) termlist += "</div>" # Generate RDF from original namespace. u = urllib.urlopen(specloc) rdfdata = u.read() rdfdata.replace("""<?xml version="1.0"?>""", "") # wip.template is a template file for the spec, python-style % escapes # for replaced sections. f = open("../0.1/template.html", "r") template = f.read() print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata)
def merge_two_streams(self, stream_1, stream_2): model = RDF.Model() model.add_statements(stream_1) model.add_statements(stream_2) #serialize as ttl serializer = self.get_serializer() return serializer.serialize_model_to_string(model)
def posts(self, *args): alltags = Set() for arg in args: if isinstance(arg, Tag): alltags.add(arg) url = "http://del.icio.us/rss/" + self.user if len(alltags) > 0: url += "/" + "+".join([str(tag) for tag in alltags]) model = RDF.Model() parser = RDF.Parser() try: parser.parse_string_into_model(model, get_url_contents(url), RDF.Uri("http://foo")) posts = [ RSSTagPost(model, p.subject) for p in model.find_statements( RDF.Statement( None, RDF.Uri( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), RDF.Uri("http://purl.org/rss/1.0/item"))) ] for post in posts: post.user = self return posts except: return []
def localEvaluation(modelSourceUri, queryString): model = RDF.Model() # TODO add possibility to explicitely set the content type and register things like namespaces = ttlParser.namespaces_seen() model.load(modelSourceUri) query = RDF.SPARQLQuery(queryString) result = query.execute(model) printQueryResults(result)
def posts(): url = "http://del.icio.us/rss/" model = RDF.Model() parser = RDF.Parser() parser.parse_string_into_model(model,get_url_contents(url),RDF.Uri("http://foo")) posts = [RSSTagPost(model,p.subject) for p in model.find_statements(RDF.Statement(None,RDF.Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),RDF.Uri("http://purl.org/rss/1.0/item")))] return posts
def __init__(self, name): storage_type, options = self.get_store_options(name) # workaround: sqlite doesn't support 'dir' so prepend directory to the name if storage_type == 'sqlite': name = os.path.abspath(os.path.join(config.CATALOG_DATA_DIR, name)) self._store = RDF.Storage(storage_name=storage_type, name=name, options_string=options) self._model = RDF.Model(self._store)
def make_query(rdf, query): model = RDF.Model() parser = RDF.Parser() parser.parse_into_model(model, rdf) sparql = """ %s %s""" % (PREFIX, query) q = RDF.Query(sparql, query_language="sparql") return q.execute(model)
def model_from_uri(uri=None, **opts): if RDF is None: raise ConnectionError( 'Redland support is not available, install from librdf.org') options = [] for key, val in opts.items(): options.append("%s='%s'" % (key, val)) options = ','.join(options) if uri == None or uri == 'memory': options += ",hash-type='memory'" store = RDF.Storage(storage_name='hashes', name='memory_store', options_string=options) elif uri.startswith('sqlite://'): file = uri[9:] try: store = RDF.Storage(storage_name='sqlite', name=file, options_string=options) except RDF.RedlandError: raise ConnectionError("Can't connect to %s" % uri) elif uri.startswith('bdb://'): file = uri[6:] if not os.path.isfile(file+'-sp2o.db') and not 'new=' in options: options += ",new='yes'" options += ",hash-type='bdb'" store = RDF.Storage(storage_name='hashes', name=file, options_string=options) elif uri.startswith('mysql://'): uri = uri[8:] userpass, hostdb = uri.split('@') user, pwd = userpass.split(':') host, db = hostdb.split('/') if ':' in host: host, port = host.split(':') else: port = '3306' if '#' in db: db, name = db.split('#') else: name = 'main' options += (",host='%s',port='%s',database='%s',user='******'," "password='******'" % (host, port, db, user, pwd)) try: store = RDF.Storage(storage_name='mysql', name=name, options_string=options) except RDF.RedlandError: raise ConnectionError("Can't connect to %s" % uri) else: raise ConnectionError('Unknown dburi: %s' % uri) return RDF.Model(store)
def __init__(self, source='dummy', defaultStatements=(), **kw): # Create a new hash memory store storage = RDF.HashStorage( source, options="new='yes',hash-type='memory',contexts='yes'") model = RDF.Model(storage) super(RedlandHashMemModel, self).__init__(model) for stmt in defaultStatements: self.addStatement(stmt) model.sync()
def modeltest(): from IPython import embed # this hardlocks ms = RDF.MemoryStorage('test') m = RDF.Model(ms) p1 = Path('~/git/NIF-Ontology/ttl/NIF-Molecule.ttl').expanduser() p = RDF.Parser(name='turtle') p.parse_into_model(m, p1.as_uri()) embed()
def load(): g = RDF.Model() for dirpath, dirnames, filenames in os.walk(os.getcwd()): for filename in filenames: if filename.endswith('.nt') or filename.endswith('.ttl'): print >>sys.stderr, 'Loading %s ...' % os.path.join(dirpath, filename), g.load('file:' + os.path.join(dirpath, filename)) print >>sys.stderr, '%d triples' % len(g) return g
def specgen(self, mode="spec"): """The meat and potatoes: Everything starts here.""" m = RDF.Model() p = RDF.Parser() try: p.parse_into_model(m, self.specloc) except IOError, e: raise Usage("Error reading from ontology: %s" % str(e))
def _initOntology(self): #storage = RDF.HashStorage('dbpedia', options="hash-type='bdb'") storage = RDF.MemoryStorage() model = RDF.Model(storage) rdfParser = RDF.Parser(name="rdfxml") ontologyPath = 'file://' + os.path.join(self._getCurrentDir(), 'dbpedia_3.9.owl') rdfParser.parse_into_model(model, ontologyPath, "http://example.org/") return model
def __init__(self, source='', defaultStatements=(), **kw): if os.path.exists(source + '-sp2o.db'): storage = RDF.HashStorage( source, options="hash-type='bdb',contexts='yes'") model = RDF.Model(storage) else: # Create a new BDB store storage = RDF.HashStorage( source, options="new='yes',hash-type='bdb',contexts='yes'") model = RDF.Model(storage) for stmt in defaultStatements: if stmt.scope: context = URI2node(stmt.scope) else: context = None model.add_statement(statement2Redland(stmt), context=context) model.sync() super(RedlandHashBdbModel, self).__init__(model)
def _turtle_to_ntriples(self, data): # Turtle syntax is not supported by allegro graph # HACK workaround using redland import RDF model = RDF.Model() parser = RDF.TurtleParser() try: parser.parse_string_into_model(model, data.read(), '-') except RDF.RedlandError, err: raise TripleStoreError(err)
def make_query(self, rdf, query): """Make sparql query.""" model = RDF.Model() parser = RDF.Parser() parser.parse_into_model(model, rdf) sparql = """ %s %s""" % (self.rdf_prefix, query) q = RDF.Query(sparql, query_language="sparql") return q.execute(model)
def _rdfxml_to_ntriples(self, data): # Ntriples syntax is not supported by allegro graph # as a result format for SPARQL Construct Queries # HACK workaround using redland import RDF model = RDF.Model() parser = RDF.Parser() try: parser.parse_string_into_model(model, data.read(), '-') except RDF.RedlandError, err: raise TripleStoreError(err)
def get_complete_metadata(self, user_uri, work_uri, format='json'): work = Work.from_model(self._model, work_uri, user_uri) if not self._can_access('read', work): raise EntryAccessError("Can't access work {0}".format(work_uri)) if format not in ('ntriples', 'rdfxml', 'json'): raise ParamError('invalid RDF format: {0}'.format(format)) query_format = """ PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX catalog: <http://catalog.commonsmachinery.se/ns#> PREFIX rem3: <http://scam.sf.net/schema#> CONSTRUCT { ?s ?p ?o . ?work dc:source ?sourceWork . } WHERE { BIND (<%s> AS ?work) BIND (<%s> AS ?user) ?work catalog:creator ?creator . ?work catalog:visible ?visible . ?work rem3:metadata ?workMetadata . ?work catalog:source ?sourceRef . ?sourceRef rem3:resource ?sourceWork . { ?sourceWork rem3:metadata ?sourceMetadata . } UNION { ?sourceRef rem3:cachedExternalMetadata ?sourceMetadata . } GRAPH ?g { ?s ?p ?o . } FILTER((?g = ?workMetadata || ?g = ?sourceMetadata) && ((?visible = "public") || (?visible = "private") && (?creator = ?user))) } """ query_string = query_format % (work_uri, user_uri) query = RDF.Query(query_string) query_results = query.execute(self._model) # TODO: use results.to_string() with proper format URIs temp_model = RDF.Model(RDF.MemoryStorage()) for statement in query_results.as_stream(): temp_model.append(statement) result = temp_model.to_string(name=format, base_uri=None) return result
def _ntriples_to_turtle(self, data): # Turtle syntax is not supported by allegro graph # HACK workaround using redland import RDF model = RDF.Model() parser = RDF.Parser('ntriples') data = data.read() data = (data.strip() + '\n') try: parser.parse_string_into_model(model, data, '-') except RDF.RedlandError, err: raise TripleStoreError(err)
def specgen(specloc, template, instances=False, mode="spec"): """The meat and potatoes: Everything starts here.""" global spec_url global spec_ns global ns_list m = RDF.Model() p = RDF.Parser() try: p.parse_into_model(m, specloc) except IOError, e: print "Error reading from ontology:", str(e) usage()
def create_results(self): dataset_count = self._get_dataset_count() vocabulary_counts = self._get_vocabulary_counts() self.results = RDF.Model() for vocabulary_count in vocabulary_counts: absolute_frequency = vocabulary_count['dataset_count'] relative_frequency = absolute_frequency / dataset_count self._append(vocabulary_uri=vocabulary_count['vocabulary'], absolute_frequency=absolute_frequency, relative_frequency=relative_frequency, complementary_frequency=0.5 * math.cos(math.pi * relative_frequency) + 0.5, inverse_frequency=math.log1p(1 / relative_frequency))
def voidify(self, serialize_as="ntriples"): model = self.update_model(RDF.Model()) # serialize to string and return serializer = RDF.Serializer(name=serialize_as) if serialize_as == "ntriples": return serializer.serialize_model_to_string(void_model) serializer.set_namespace("void", "http://rdfs.org/ns/void#") serializer.set_namespace( "rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") serializer.set_namespace("qb", "http://purl.org/linked-data/cube#") serializer.set_namespace("xstats", "http://example.org/XStats#") #serializer.set_namespace("thisdataset", dataset_ns._prefix) return serializer.serialize_model_to_string(void_model)
def createModel(): storage = RDF.Storage( storage_name="hashes", name="geolink", options_string="new='yes',hash-type='memory',dir='.'") if storage is None: raise Exception("new RDF.Storage failed") model = RDF.Model(storage) if model is None: raise Exception("new RDF.model failed") return model
def parse(self, filename): # memory model model = RDF.Model() if model is None: raise Exception("new RDF.model failed") # parse the file uri = RDF.Uri(string="file:" + filename) # all the triples in the model for s in self.parser.parse_as_stream(uri, const.base_uri): model.add_statement(s) self.get_root(model) self.get_nodes(model) print "parsed", filename
def createVoIDModel(to): """Creates an RDF Model according to the VoID Dataset spec for the given arguments. Returns: RDF.Model""" # Validate the to string if not isinstance(to, str): logging.error( "Value of 'to' parameter not a string. Failed to update VoID file. Value=%s.", to) return None if not len(to) > 0: logging.error( "Value of 'to' parameter is zero-length. Failed to update VoID file. Value=%s.", to) return None # Prepare the model m = RDF.Model(RDF.MemoryStorage()) rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' void = "http://rdfs.org/ns/void#" d1lod = "http://dataone.org/" dcterms = "http://purl.org/dc/terms/" subject_node = RDF.Node(blank="d1lod") # Add in our statements m.append( RDF.Statement(subject_node, RDF.Uri(rdf + 'type'), RDF.Uri(void + 'Dataset'))) m.append( RDF.Statement(subject_node, RDF.Uri(void + 'feature'), RDF.Uri(d1lod + 'fulldump'))) m.append( RDF.Statement(subject_node, RDF.Uri(dcterms + 'modified'), RDF.Node(to))) m.append( RDF.Statement(subject_node, RDF.Uri(void + 'dataDump'), RDF.Uri(d1lod + DUMP_FILENAME))) return m
def load_rdf(self): mtime = os.path.getmtime(self.filename) if self.model is not None and mtime <= self.modelMtime: return self.modelMtime = mtime log.info("loading rdf from %r" % self.filename) self.model = RDF.Model(RDF.MemoryStorage()) u = RDF.Uri("file:%s" % self.filename) try: for s in RDF.Parser('turtle').parse_as_stream(u): self.model.add_statement(s) except (Exception, ), e: # e.__class__.__module__ is "RDF", not the real module! if e.__class__.__name__ != "RedlandError": raise raise ValueError("Error parsing %s: %s" % (u, e))
def _ntriples_to_turtle(self, data): # Turtle syntax is not supported by allegro graph # HACK workaround using redland import RDF model = RDF.Model() parser = RDF.Parser('ntriples') data = data.read() data = (data.strip() + '\n') try: parser.parse_string_into_model(model, data, '-') except RDF.RedlandError as err: raise TripleStoreError(err) serializer = RDF.Serializer(name='turtle') for prefix, ns in self._nsmap.items(): serializer.set_namespace(prefix, ns) return StringIO(serializer.serialize_model_to_string(model))