def get_degrees(directory_in_str): stats = {} graph = Graph() pathlist = Path(directory_in_str).glob('**/*.rdf') nodes = set() edges = [] for path in pathlist: path_in_str = str(path) graph.parse(path_in_str) stats['length'] = len(graph) stats['num_of_nodes'] = len(graph.all_nodes()) #print('graph length: %d, nodes: %d' % (len(graph), len(graph.all_nodes()))) #print ('all node out-degrees:') in_degrees = {} out_degrees = {} for node in graph.all_nodes(): out_triple = graph.triples([node, None, None]) out_degree = len(list(out_triple)) node_id = urllib.quote_plus(node.encode('utf-8')) out_degrees[node_id] = out_degree in_triple = graph.triples([None, None, node]) in_degree = len(list(in_triple)) in_degrees[node_id] = in_degree stats['in_degree'] = in_degrees stats['out_degree'] = out_degrees return stats
def test_issue494_collapsing_bnodes(): """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" g = Graph() g += [ (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['object'], URIRef(u'source')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['predicate'], BNode('vcb3')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['subject'], BNode('vcb2')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['type'], RDF['Statement']), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['object'], URIRef(u'target')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['predicate'], BNode('vcb0')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['subject'], URIRef(u'source')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['type'], RDF['Statement']), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['object'], BNode('vr0KcS4')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['predicate'], BNode('vrby3JV')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['subject'], URIRef(u'source')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['type'], RDF['Statement']), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['object'], URIRef(u'source')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['predicate'], BNode('vcb5')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['subject'], URIRef(u'target')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['type'], RDF['Statement']), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['object'], URIRef(u'source')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['predicate'], BNode('vcb4')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['subject'], URIRef(u'source')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['type'], RDF['Statement']), ] print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) print('triple_bnode degrees:') for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): print(len(list(g.triples([triple_bnode, None, None])))) print('all node degrees:') g_node_degs = sorted([ len(list(g.triples([node, None, None]))) for node in g.all_nodes() ], reverse=True) print(g_node_degs) cg = to_canonical_graph(g) print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) print('triple_bnode degrees:') for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): print(len(list(cg.triples([triple_bnode, None, None])))) print('all node degrees:') cg_node_degs = sorted([ len(list(cg.triples([node, None, None]))) for node in cg.all_nodes() ], reverse=True) print(cg_node_degs) assert len(g) == len(cg), \ 'canonicalization changed number of triples in graph' assert len(g.all_nodes()) == len(cg.all_nodes()), \ 'canonicalization changed number of nodes in graph' assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 'canonicalization changed number of statements' assert g_node_degs == cg_node_degs, \ 'canonicalization changed node degrees'
predicate2 = URIRef(f"http://xmlns.com/foaf/0.1/votes") predicate3 = URIRef(f"http://xmlns.com/foaf/0.1/episode") with open(f'ratings.list', encoding='latin-1') as f: for line_number, line in enumerate(f): info = re.match( '^ +[0-9.]+ +([0-9]+) +([0-9.]+) +"?([^"\n]+)"? \(([0-9]+)[^\)]*\)( {(.+)})?', line) if info: # print(info.group(1),info.group(2),info.group(3)) year_string = info.group(4).strip() if year_string == str(year): movie_string = info.group(3).strip() movie_name = Literal(movie_string, datatype=XSD.string) movie = URIRef( f"http://imdb.org/movie/{urllib.parse.quote(movie_string)}" ) g.add((movie, FOAF.name, movie_name)) rating_string = info.group(2).strip() rating_name = Literal(rating_string, datatype=XSD.integer) g.add((movie, predicate1, rating_name)) votes_string = info.group(1).strip() votes_name = Literal(votes_string, datatype=XSD.float) g.add((movie, predicate2, votes_name)) if info.group(6): episode_string = info.group(6).strip() episode_name = Literal(episode_string) g.add((movie, predicate3, episode_name)) print(len(g.all_nodes())) os.makedirs("ratings-data/", exist_ok=True) g.serialize(destination=f"ratings-data/ratings-{year}.nt", format='nt')
def test_issue725_collapsing_bnodes_2(): g = Graph() g += [ (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v2')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v1')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v5')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v4')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), URIRef(u'urn:gp_learner:fixed_var:source')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v1')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v3')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')) ] turtle = ''' @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . [] a rdf:Statement ; rdf:object [ ] ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object _:v1 ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object [ ] ; rdf:predicate [ ] ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object <urn:gp_learner:fixed_var:source> ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object _:v1 ; rdf:predicate [ ] ; rdf:subject <urn:gp_learner:fixed_var:target> .''' # g = Graph() # g.parse(data=turtle, format='turtle') stats = {} cg = rdflib.compare.to_canonical_graph(g, stats=stats) # print ('graph g length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) # print ('triple_bnode degrees:') # for triple_bnode in g.subjects(rdflib.RDF['type'], rdflib.RDF['Statement']): # print (len(list(g.triples([triple_bnode, None, None])))) # print ('all node out-degrees:') # print (sorted( # [len(list(g.triples([node, None, None]))) for node in g.all_nodes()])) # print ('all node in-degrees:') # print (sorted( # [len(list(g.triples([None, None, node]))) for node in g.all_nodes()])) # print(g.serialize(format='n3')) # # print ('graph cg length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) # print ('triple_bnode degrees:') # for triple_bnode in cg.subjects(rdflib.RDF['type'], # rdflib.RDF['Statement']): # print (len(list(cg.triples([triple_bnode, None, None])))) # print ('all node out-degrees:') # print (sorted( # [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()])) # print ('all node in-degrees:') # print (sorted( # [len(list(cg.triples([None, None, node]))) for node in cg.all_nodes()])) # print(cg.serialize(format='n3')) assert (len(g.all_nodes()) == len(cg.all_nodes())) cg = to_canonical_graph(g) assert len(g) == len(cg), \ 'canonicalization changed number of triples in graph' assert len(g.all_nodes()) == len(cg.all_nodes()), \ 'canonicalization changed number of nodes in graph' assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 'canonicalization changed number of statements' # counter for subject, predicate and object nodes g_pos_counts = Counter(), Counter(), Counter() for t in g: for i, node in enumerate(t): g_pos_counts[i][t] += 1 g_count_signature = [sorted(c.values()) for c in g_pos_counts] cg_pos_counts = Counter(), Counter(), Counter() for t in cg: for i, node in enumerate(t): cg_pos_counts[i][t] += 1 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] assert g_count_signature == cg_count_signature, \ 'canonicalization changed node position counts'
def test_issue494_collapsing_bnodes(): """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" g = Graph() g += [ (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["object"], URIRef("source")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["predicate"], BNode("vcb3")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["subject"], BNode("vcb2")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["type"], RDF["Statement"]), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["object"], URIRef("target")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["predicate"], BNode("vcb0")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["subject"], URIRef("source")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["type"], RDF["Statement"]), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["object"], BNode("vr0KcS4")), ( BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["predicate"], BNode("vrby3JV"), ), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["subject"], URIRef("source")), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["type"], RDF["Statement"]), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["object"], URIRef("source")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["predicate"], BNode("vcb5")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["subject"], URIRef("target")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["type"], RDF["Statement"]), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["object"], URIRef("source")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["predicate"], BNode("vcb4")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["subject"], URIRef("source")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["type"], RDF["Statement"]), ] # print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) # print('triple_bnode degrees:') # for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): # print(len(list(g.triples([triple_bnode, None, None])))) # print('all node degrees:') g_node_degs = sorted( [len(list(g.triples([node, None, None]))) for node in g.all_nodes()], reverse=True, ) # print(g_node_degs) cg = to_canonical_graph(g) # print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) # print('triple_bnode degrees:') # for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): # print(len(list(cg.triples([triple_bnode, None, None])))) # print('all node degrees:') cg_node_degs = sorted( [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()], reverse=True, ) # print(cg_node_degs) assert len(g) == len( cg), "canonicalization changed number of triples in graph" assert len(g.all_nodes()) == len( cg.all_nodes()), "canonicalization changed number of nodes in graph" assert len(list(g.subjects(RDF["type"], RDF["Statement"]))) == len( list(cg.subjects(RDF["type"], RDF["Statement"])) ), "canonicalization changed number of statements" assert g_node_degs == cg_node_degs, "canonicalization changed node degrees" # counter for subject, predicate and object nodes g_pos_counts = Counter(), Counter(), Counter() for t in g: for i, node in enumerate(t): g_pos_counts[i][t] += 1 g_count_signature = [sorted(c.values()) for c in g_pos_counts] cg = to_canonical_graph(g) cg_pos_counts = Counter(), Counter(), Counter() for t in cg: for i, node in enumerate(t): cg_pos_counts[i][t] += 1 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] assert (g_count_signature == cg_count_signature ), "canonicalization changed node position counts"
from rdflib import Graph, Namespace, OWL, RDF, RDFS, URIRef from glob import glob import os PATH = "/Users/hoekstra/Dropbox/projects/designpatterns_stats/*.owl" DESTINATION = "/Users/hoekstra/projects/designpatterns/stripped" for f in glob(PATH): (_, target_name) = os.path.split(f) g = Graph() g.parse(f, format='turtle') print g.serialize(format='turtle') g.remove((None, None, OWL['AnnotationProperty'])) print g.serialize(format='turtle') print list(g.all_nodes()) annotations = [ uri for uri in list(g.all_nodes()) if isinstance(uri, URIRef) and 'http://www.ontologydesignpatterns.org/schemas/cpannotationschema.owl#' in str(uri) ] print annotations break
def check(path): rdfs_graph = Graph() rdfs_graph.load('rdf-schema.ttl', format='turtle') rdfs_nodes = list(rdfs_graph.all_nodes()) with open('{}/grading.csv'.format(path), 'w') as out: fieldnames = [ 'Username', 'Assignment 2b |846597', 'Assignment 2c |846599', 'query', 'syntax', 'asserted', 'inferred', 'baseline', 'subjects_objects', 'predicates', 'inferred through schema', 'hash' ] fieldnames += [ os.path.basename(fn) for fn in glob('../constraints/*.rq') ] writer = csv.DictWriter(out, fieldnames) writer.writeheader() for f in glob("{}/*.ttl".format(path)): (basename, ext) = os.path.splitext(os.path.basename(f)) basename = basename.split('_')[-1] line = {'Username': basename} print "==========\n{}\n==========".format(basename) try: with open(f, 'r') as fi: contents = fi.readlines() h = md5.new() h.update(''.join(contents)) hexdigest = h.hexdigest() except: traceback.print_exc() print "Could not create hash of {}".format(f) g = Graph() try: g.load(f, format='turtle') line['syntax'] = 1 except Exception: print "Could not parse {}".format(f) line['syntax'] = 0 print(traceback.format_exc()) # Baseline is: # 1. for every *new* subject, predicate or object that is a URIRef, # a new triple is generated by inference rule 1 # 2. for every *new* predicate, one additional triple is produced (subproperty of itself) # 3. for rdf:Property 2 more triples that define it. # 4. for rdf:type 2 more triples subjects_objects = len( set([ so for so in [so for so in g.all_nodes() if type(so) == URIRef] if so not in rdfs_nodes ])) predicates = len(set([p for p in g.predicates()])) baseline = subjects_objects + 2 * predicates + 2 + 2 # Only count the asserted triples that do not define any RDFS or RDF terms, or specify that some subject is of type RDFS Class or Property. asserted_triples = [(s, p, o) for (s, p, o) in g.triples((None, None, None)) if s not in rdfs_nodes and o not in rdfs_nodes] # for (s,p,o) in asserted_triples: # if type(o) == URIRef: # print g.qname(s), g.qname(p), g.qname(o) # else: # print g.qname(s), g.qname(p), o asserted = len(asserted_triples) for constraint_file in glob('../constraints/*.rq'): with open(constraint_file) as cf: query = cf.read() constraint = os.path.basename(constraint_file) result = g.query(query) count = 0 for r in result: count += 1 line[constraint] = count # print "{}: {}".format(constraint, count) try: DeductiveClosure(RDFS_Semantics, rdfs_closure=True, axiomatic_triples=False, datatype_axioms=False).expand(g) except: traceback.print_exc() inferred = len([(s, p, o) for (s, p, o) in g.triples((None, None, None))]) class_use = len( set([ s for (s, p, o) in g.triples((None, RDF.type, None)) if o not in rdfs_nodes ])) property_use = len(set([p for (s, p, o) in asserted_triples])) try: with codecs.open('{}/{}.rq'.format(path, basename), "r", encoding='utf-8-sig', errors='ignore') as qf: query = qf.read() # Remove CR query = query.replace('\r\n', '\n') # try: # query = query.decode("utf-8-sig").encode('utf-8') # except: # print "..." # This kills the RDFLib parser query = query.replace('prefix', 'PREFIX') try: # adding triples to database all_triples = g.serialize(format='turtle') except: # but if something's wrong with the original graph, just use an empty string all_triples = "" update(all_triples) # running query qresults = sparql(query) # If stardog gives an error, we set the qcount to -1, # and make sure that if rdflib does better, we overwrite that, # otherwise, we fallback to the -1 qcount of stardog. if qresults != -1: # counting results stardog_qcount = 0 for r in qresults: stardog_qcount += 1 else: stardog_qcount = -1 # clearing database update(all_triples, action='clear') try: qresults = g.query(query) qcount = 0 for r in qresults: qcount += 1 # Use whichever is higher if stardog_qcount > qcount: qcount = stardog_qcount except: qcount = stardog_qcount except IOError: print "Could not find query" qcount = -2 except: print "Query failed" try: print query except: "..." print(traceback.format_exc()) qcount = -1 line['query'] = qcount line[ 'asserted'] = asserted # asserted triples that could not be inferred, line['inferred'] = inferred # total triples after inference, line[ 'baseline'] = baseline # minimal expected number of new triples (baseline) for file without schema line['subjects_objects'] = subjects_objects line['predicates'] = predicates line[ 'inferred through schema'] = inferred - baseline - asserted # triples inferred through the schema line['hash'] = hexdigest line = grade(line) writer.writerow(line) del (g)
class Model: def __init__(self): self.graph = Graph() self.loaded = set() def load(self, source, format=None): if source not in self.loaded: self.loaded.add(source) try: self.graph.parse(source, format=format) except Exception as e: print e return False return True def size(self): return len(self.graph) def pred(self, subj): return list(set(self.graph.predicates(subj))) def types(self): return set(self.graph.objects(predicate=RDF.type)) def contains_resource(self, ref): resources = filter(lambda x: type(x) == URIRef, self.graph.all_nodes()) return ref in resources def get_resource_objects(self, subj, pred): return filter(lambda x: type(x) == URIRef, self.graph.objects(subj, pred)) def get_objects(self, subj, pred): return list(self.graph.objects(subj, pred)) def get_subjects(self, pred, obj): return list(self.graph.subjects(pred, obj)) def get_properties(self, subj): properties = {} for pred, obj in self.graph.predicate_objects(subj): if pred in properties: properties[pred].append(obj) else: properties[pred] = [obj] return properties def get_reverse_properties(self, obj): properties = {} for subj, pred in self.graph.subject_predicates(obj): if pred in properties: properties[pred].append(subj) else: properties[pred] = [subj] return properties def norm(self, ref): return self.graph.namespace_manager.normalizeUri(ref) if ref else None def to_uriref(self, string): """Expand QName to UriRef based on existing namespaces.""" if not string: return None elif re.match('[^:/]*:[^:/]+', string): prefix, name = string.split(':') try: namespace = dict(self.graph.namespaces())[prefix] return namespace + name except: return None else: return URIRef(string)
def generateRDF(outf, softuri, tooldictf, softdictf=None, tooluris=True, normalize=normalizeArcpyToolString, toolwebsites='ResourceCatalogue\\ArcGIStoolwebsites.json'): from rdflib import URIRef, BNode, Literal, Namespace, Graph from rdflib.namespace import RDF, FOAF, RDFS dbo = Namespace("http://dbpedia.org/ontology/") dbp = Namespace("http://dbpedia.org/resource/") dc = Namespace("http://purl.org/dc/elements/1.1/") dct = Namespace("http://purl.org/dc/terms/") wf = Namespace("http://geographicknowledge.de/vocab/Workflow.rdf#") gis = Namespace("http://geographicknowledge.de/vocab/GISConcepts.rdf#") tools = Namespace("http://geographicknowledge.de/vocab/GISTools.rdf#") tdict = readToolBoxes(tooldictf) g = Graph() if softdictf != None: softdict = readSoft(softdictf) softwarelist = [] for software, v in softdict.items(): softwarelist.append(software) g.add((URIRef(software), RDF.type, dbo.Software)) if 'name' in v.keys(): g.add((URIRef(software), FOAF['name'], Literal(v['name']))) if 'website' in v.keys(): g.add((URIRef(software), FOAF['isPrimaryTopicOf'], URIRef(v['website']))) if v['companies'] != []: g.add((URIRef(software), dbo.developer, URIRef(v['companies'][0]))) w = getWebsite(softwarelist) if w != []: for ww in w: g.add((URIRef(ww[1]), FOAF['homepage'], URIRef(ww[0]))) else: #there is already some software tools, then load them g.parse(outf, format='turtle') toolws = readSoft(toolwebsites) #Now add the tools of some software softuri if URIRef(softuri) in g.all_nodes(): for toolbox, toollist in tdict.items(): tb = urllib.pathname2url(toolbox) g.add((tools[tb], RDF.type, gis.Toolbox)) g.add((tools[tb], dct.isPartOf, URIRef(softuri))) g.add((tools[tb], FOAF.name, Literal(toolbox))) for t in toollist: toolst = (URIRef(t) if tooluris else tools[t]) if t in toolws: g.add((toolst, FOAF['homepage'], URIRef(toolws[t]['website']))) g.add((toolst, RDF.type, gis.Tool)) g.add((toolst, dct.isPartOf, tools[tb])) g.add((toolst, FOAF.name, Literal(normalize(t)))) g.bind('dbo', URIRef("http://dbpedia.org/ontology/")) g.bind('dbp', URIRef("http://dbpedia.org/resource/")) g.bind('dc', URIRef("http://purl.org/dc/elements/1.1/")) g.bind('dct', URIRef("http://purl.org/dc/terms/")) g.bind('wf', URIRef("http://geographicknowledge.de/vocab/Workflow.rdf#")) g.bind('gis', URIRef("http://geographicknowledge.de/vocab/GISConcepts.rdf#")) g.bind('tools', URIRef("http://geographicknowledge.de/vocab/GISTools.rdf#")) g.bind('foaf', FOAF) g.bind('rdf', RDF) g.bind('rdfs', RDFS) print 'number of triples generated: ' + str(len(g)) g.serialize(destination=outf, format='turtle')