def enterNewAnnotationMenu(self): ''' Interactive input for a new annotation ''' self.printNewAnnotationMenu() i = 1 for year in self.yearsAnnotations: print '{}) {}'.format(i, year["year"]) i += 1 print year = raw_input('Table to annotate: ') cell = raw_input('Cell to annotate: ') author = raw_input('Author: ') corrected = raw_input('Corrected value (leave blank if none): ') flag = raw_input('Flag: ') graphURI = URIRef(self.yearsAnnotations[int(year) - 1]["uri"]) d2sGraphURI = graphURI.replace("cedar-project.nl", "www.data2semantics.org") annoURI = URIRef(d2sGraphURI + '/NOORDBRABANT/' + cell) cellURI = annoURI.replace("annotations", "data") # Create the new annotation query = """ PREFIX oa: <http://www.w3.org/ns/openannotation/core/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> INSERT INTO GRAPH <""" + graphURI + """> { <""" + annoURI + """> a oa:Annotation; oa:annotated \"""" + str( datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date; oa:annotator \"""" + author + """\"; oa:generated \"""" + str(datetime.datetime.now().strftime( "%Y-%m-%d")) + """\"^^xsd:date; oa:generator <https://cedar-project.nl/tools/cedar-demo.py>; oa:hasBody [ rdf:value \"""" + corrected + ' ' + flag + """\" ]; oa:hasTarget <""" + cellURI + """>; oa:modelVersion <http://www.openannotation.org/spec/core/20120509.html> . } """ # query = "INSERT INTO GRAPH <http://cedar-project.nl/annotations/VT_1859_01_H1> {<http://a> rdf:type <http:b>}" print query self.sparql.setQuery(query) self.sparql.setReturnFormat(JSON) self.results = self.sparql.query().convert()
def changeUrls(graph, urlFrom, urlTo): for s, p, o in graph.triples((None, None, None)): if urlFrom in s: graph.remove((s, p, o)) s = URIRef(s.replace(urlFrom, urlTo)) graph.add((s, p, o)) if urlFrom in p: graph.remove((s, p, o)) p = URIRef(p.replace(urlFrom, urlTo)) graph.add((s, p, o)) if urlFrom in o: graph.remove((s, p, o)) o = URIRef(o.replace(urlFrom, urlTo)) graph.add((s, p, o))
def change_urls_in_result(self, graph): for s, p, o in graph.triples((None, None, None)): if self._remote_tests_location in s: graph.remove((s, p, o)) s = URIRef(s.replace(self._remote_tests_location, self._tests_location)) graph.add((s, p, o)) if self._remote_tests_location in p: graph.remove((s, p, o)) p = URIRef(p.replace(self._remote_tests_location, self._tests_location)) graph.add((s, p, o)) if self._remote_tests_location in o: graph.remove((s, p, o)) o = URIRef(o.replace(self._remote_tests_location, self._tests_location)) graph.add((s, p, o))
def enterNewAnnotationMenu(self): ''' Interactive input for a new annotation ''' self.printNewAnnotationMenu() i = 1 for year in self.yearsAnnotations: print '{}) {}'.format(i,year["year"]) i += 1 print year = raw_input('Table to annotate: ') cell = raw_input('Cell to annotate: ') author = raw_input('Author: ') corrected = raw_input('Corrected value (leave blank if none): ') flag = raw_input('Flag: ') graphURI = URIRef(self.yearsAnnotations[int(year)-1]["uri"]) d2sGraphURI = graphURI.replace("cedar-project.nl", "www.data2semantics.org") annoURI = URIRef(d2sGraphURI + '/NOORDBRABANT/' + cell) cellURI = annoURI.replace("annotations", "data") # Create the new annotation query = """ PREFIX oa: <http://www.w3.org/ns/openannotation/core/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> INSERT INTO GRAPH <""" + graphURI + """> { <""" + annoURI + """> a oa:Annotation; oa:annotated \"""" + str(datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date; oa:annotator \"""" + author + """\"; oa:generated \"""" + str(datetime.datetime.now().strftime("%Y-%m-%d")) + """\"^^xsd:date; oa:generator <https://cedar-project.nl/tools/cedar-demo.py>; oa:hasBody [ rdf:value \"""" + corrected + ' ' + flag + """\" ]; oa:hasTarget <""" + cellURI + """>; oa:modelVersion <http://www.openannotation.org/spec/core/20120509.html> . } """ # query = "INSERT INTO GRAPH <http://cedar-project.nl/annotations/VT_1859_01_H1> {<http://a> rdf:type <http:b>}" print query self.sparql.setQuery(query) self.sparql.setReturnFormat(JSON) self.results = self.sparql.query().convert()
def get_gw_resource(rid): r_uri = URIRef(proxy.base + '/' + rid) if cache is not None: g, ttl = cache.create(gid=r_uri, loader=proxy.load, format='text/turtle') headers = {'Cache-Control': 'max-age={}'.format(ttl)} else: g, headers = proxy.load(r_uri, **request.args) return serialize(g, uri=URIRef(r_uri.replace('=', '%3D'))), headers
def get_service(): g = new_graph() me = URIRef(url_for('get_service', _external=True)) g.add((me, RDF.type, SERVICE_TYPE)) for db_resource in service_graph.subjects(RDF.type, PARTITION.Root): db_resource = URIRef( url_for('get_resource', rid=db_resource.replace(URI_PREFIX, ""), _external=True)) g.add((me, CONTAINMENT_LINK, db_resource)) response = make_response(g.serialize(format='turtle')) response.headers['Content-Type'] = 'text/turtle' return response
def relativize(self, uri): base = URIRef(self.base) basedir = URIRef(self.base if base.endswith('/') else base.rsplit('/', 1)[0]) if base is not None: if uri == base: uri = URIRef('') elif uri == basedir: uri = URIRef('.') elif uri.startswith(basedir + '/'): uri = URIRef(uri.replace(basedir + '/', "", 1)) return uri
def _formatNodeURIRef(uriref, anno_uri, body_uri): ''' Rewrite a URIRef according to the node configuration * uriref:rdflib.URIRef * anno_uri:String as hexadecimal * body_uri:String as hexadecimal ''' if isinstance(uriref, URIRef) and NODE_URI in uriref: uriref = URIRef(uriref.replace(NODE_URI, getattr(settings, 'NODE_URI', NODE_URI) + '/')) if isinstance(uriref, URIRef) and CH_NODE in uriref: uriref = URIRef(uriref.replace(CH_NODE + ':', getattr(settings, 'NODE_URI', NODE_URI) + '/')) if isinstance(uriref, URIRef) and ANNO_URI in uriref: uriref = URIRef(uriref.replace(ANNO_URI, "resource/" + anno_uri)) if isinstance(uriref, URIRef) and BODY_URI in uriref: uriref = URIRef(uriref.replace(BODY_URI, "resource/" + body_uri)) return uriref
def relativize(self, uri): base = URIRef(self.base) basedir = URIRef( self.base if base.endswith('/') else base.rsplit('/', 1)[0]) if base is not None: if uri == base: uri = URIRef('') elif uri == basedir: uri = URIRef('.') elif uri.startswith(basedir + '/'): uri = URIRef(uri.replace(basedir + '/', "", 1)) return uri
def _update_namespace(self): # updating the namespace OLD = "file://" + os.path.join( os.path.dirname(__file__), 'data', WIT_FILENAME) NEW = "http://www.w3.org/2005/Incubator/webid/earl/%s#" % WIT for cid, _, source in self.g.triples((None, None, None)): if source: try: context = self.g.get_context(cid) for s, p, o in context: context.remove((s, p, o)) if isinstance(s, URIRef) and OLD in s: s = URIRef(s.replace(OLD, NEW)) if isinstance(s, rdflib.URIRef) and OLD in p: p = URIRef(p.replace(OLD, NEW)) if isinstance(o, URIRef) and OLD in o: o = URIRef(o.replace(OLD, NEW)) context.add((s, p, o)) except Exception, e: #print e raise e
def _update_namespace(self): # updating the namespace OLD = "file://" + os.path.join(os.path.dirname(__file__), 'data', WIT_FILENAME) NEW = "http://www.w3.org/2005/Incubator/webid/earl/%s#" % WIT for cid, _, source in self.g.triples((None, None, None)): if source: try: context = self.g.get_context(cid) for s, p, o in context: context.remove((s, p, o)) if isinstance(s, URIRef) and OLD in s: s = URIRef(s.replace(OLD, NEW)) if isinstance(s, rdflib.URIRef) and OLD in p: p = URIRef(p.replace(OLD, NEW)) if isinstance(o, URIRef) and OLD in o: o = URIRef(o.replace(OLD, NEW)) context.add((s, p, o)) except Exception as e: # print e raise e
def tableView_objectValueForTableColumn_row_(self, tableView, tableColumn, row): id = tableColumn.identifier() uri = self.resources[row] if id=="uri": base =self.context base = base.split("#", 1)[0] uri = URIRef(uri.replace(base, "", 1)) # relativize return uri elif id=="label": return self.redfoot.label(uri, "") elif id=="comment": return self.redfoot.comment(uri, "") else: return ""
def clean_actor_uris(self, uris) -> Graph or None: cleaned_graph = Graph() self.__sparql.setQuery( Queries.lmdb_actors_to_dbpedia_movie_uri_pairs()) response = self.__sparql.query().convert() pairs = list( map(Stardog.__to_movie_clean_uri_pair, response["results"]["bindings"])) if len(pairs) == 0: return None for lmdb_resource, dbp_uri in pairs: lmdb_resource = URIRef(lmdb_resource) dbp_uri = URIRef( dbp_uri.replace('"', "").replace("'", "").replace("`", "")) cleaned_graph.add((lmdb_resource, namespace.OWL.sameAs, dbp_uri)) cleaned_graph.serialize('Data/CleanedLMDBActorSameAs.ttl', format='turtle') return cleaned_graph # It is now safe to import the cleaned data using stardog.
class Cluster: def __init__(self, model, uri): self.model = model self.uri = URIRef(uri) self.__prototype = None self.__type = None self.__members = [] self.__forward = None self.__backward = None self.__targets = None self.__selected_targets = None self.__target_wiki = None self.__freebases = None self.__qids = Counter() self.__selected_qnodes = None self.__q_urls = {} self.__groundtruth = None self.__debug_info = None self.__all_labels = None if model.graph: self.__open_clause = 'GRAPH <%s> {' % self.model.graph self.__close_clause = '}' else: self.__open_clause = self.__close_clause = '' @property def href(self): res = self.uri.replace('http://www.isi.edu/gaia', '/cluster').replace('http://www.columbia.edu', '/cluster') res = res.replace('/entities/', '/entities/' + self.model.repo + '/') res = res.replace('/events/', '/events/' + self.model.repo + '/') if self.model.graph: res = res + '?g=' + self.model.graph return res @property def label(self): if self.uri in self.model.pickled and 'label' in self.model.pickled[ self.uri]: return self.model.pickled[self.uri]['label'] return self.prototype.label @property def all_labels(self): if not self.__all_labels: self.__all_labels = Counter() for m in self.members: for l, c in m.all_labels: if l in self.__all_labels: self.__all_labels[l] += c else: self.__all_labels[l] = c return self.__all_labels.most_common() @property def prototype(self): if not self.__prototype: self._init_cluster_prototype() return self.__prototype @property def type(self): if self.uri in self.model.pickled and 'type' in self.model.pickled[ self.uri]: return self.model.pickled[self.uri]['type'] if not self.__type: self._init_cluster_prototype() return self.__type @property def members(self): if not self.__members: self._init_cluster_members() return self.__members @property def targets(self): if self.__targets is None: self._init_cluster_members() return self.__targets.most_common() @property def selected_targets(self): if self.__selected_targets is None: self.__selected_targets = self.debug_info.selected_targets return self.__selected_targets def get_target_stats(self, target): return self.debug_info.target_statistics[target] @property def target_wiki(self): if self.__target_wiki is None: self._init_cluster_members() return self.__target_wiki @property def freebases(self): if self.__freebases is None: self._init_cluster_members() return self.__freebases.most_common() @property def targetsSize(self): return len(self.targets) @property def qids(self): if not self.__qids: self._init_qnodes() return self.__qids.most_common() @property def selected_qnodes(self): if not self.__selected_qnodes: self.__selected_qnodes = self.debug_info.selected_qnodes return self.__selected_qnodes def get_qnode_stats(self, qurl): if qurl in self.debug_info.qnode_statistics: return self.debug_info.qnode_statistics[qurl] else: return None @property def q_urls(self): if not self.__q_urls: self._init_qnodes() return self.__q_urls @property def size(self): if self.__members: return len(self.__members) return self._query_for_size() @property def forward(self): if self.__forward is None: self.__forward = set() self._init_forward_clusters() return self.__forward @property def backward(self): if self.__backward is None: self.__backward = set() self._init_backward_clusters() return self.__backward @property def neighbors(self): return self.forward | self.backward def neighborhood(self, hop=1): if hop == 1 and self.prototype.type != AIDA.Relation: hood = self.neighbors # for neighbor in [x for x in self.neighbors if x.subject.proto] for neighbor in self.neighbors: if neighbor.subject.prototype.type == AIDA.Relation: hood |= neighbor.subject.neighbors return hood if hop <= 1: return self.neighbors hood = set() for neighbor in self.neighbors: hood |= neighbor.subject.neighborhood(hop - 1) hood |= neighbor.object.neighborhood(hop - 1) return hood @property def img(self): import os.path _, name = split_uri(self.uri) svgpath = 'static/img/' + name + '.svg' if os.path.isfile(svgpath): return name from graph import SuperEdgeBasedGraph graph = SuperEdgeBasedGraph(self.model, self.neighborhood(), self, self.uri) path = graph.dot() return graph.name @classmethod def ask(cls, sparql, graph, uri): if graph: open_clause = 'GRAPH <%s> {' % graph close_clause = '}' else: open_clause = close_clause = '' query = "ASK { %s ?cluster a aida:SameAsCluster %s}" % (open_clause, close_clause) for ans in sparql.query(query, namespaces, {'cluster': URIRef(uri)}): return ans return False @property def groundtruth(self): if self.__groundtruth is None: self._init_groundtruth() return self.__groundtruth @property def has_debug(self): return debug.has_debug(self.model.repo, self.model.graph) @property def debug_info(self): if self.__debug_info is None: if debug.has_debug(self.model.repo, self.model.graph): self._init_debug_info() else: self.__debug_info = False return self.__debug_info def _init_cluster_prototype(self): query = """ SELECT ?prototype (MIN(?label) AS ?mlabel) ?type ?category WHERE { %s ?cluster aida:prototype ?prototype . ?prototype a ?type . OPTIONAL { ?prototype aida:hasName ?label } . OPTIONAL { ?statement a rdf:Statement ; rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?category ; } %s } GROUP BY ?prototype ?type ?category """ % (self.__open_clause, self.__close_clause) for prototype, label, type_, cate in self.model.sparql.query( query, namespaces, {'cluster': self.uri}): if not label and cate: _, label = split_uri(cate) self.__prototype = ClusterMember(self.model, prototype, label, type_) self.__type = cate def _init_cluster_members(self): self.__targets = Counter() self.__target_wiki = {} self.__freebases = Counter() query = """ SELECT ?member (MIN(?label) AS ?mlabel) ?type WHERE { %s ?membership aida:cluster ?cluster ; aida:clusterMember ?member . MINUS {?cluster aida:prototype ?member} %s OPTIONAL { ?member aida:hasName ?label } . OPTIONAL {?statement a rdf:Statement ; rdf:subject ?member ; rdf:predicate rdf:type ; rdf:object ?type }. } GROUP BY ?member ?type """ % (self.__open_clause, self.__close_clause) for member, label, type_ in self.model.sparql.query( query, namespaces, {'cluster': self.uri}): m = ClusterMember( model=self.model, uri=str(member), label=label, type_=type_, debug_info=self.debug_info.members[str(member)]['raw_object']) self.__members.append(m) for target in m.targets.keys(): self.__targets[target] += 1 for freebase in m.freebases.keys(): self.__freebases[freebase] += 1 query = ''' SELECT ?qnode ?qnodeLabel WHERE { ?qnode wdt:P1566 ?target . SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } ''' for target in self.__targets.keys(): target_t = target[target.index(':') + 1:] for qnode, qnodeLabel in wikidata_sparql.query( query, namespaces, {'target': Literal(target_t)}): url = str(qnode) qnode = url[url.rfind('/') + 1:] self.__target_wiki[target] = {} self.__target_wiki[target]['qnode'] = qnode self.__target_wiki[target]['url'] = url self.__target_wiki[target]['label'] = str(qnodeLabel) def _init_qnodes(self): for fbid, count in self.freebases: if ":NIL" not in fbid: fbid = '/' + fbid.replace('.', '/') query = """ SELECT ?qid ?label WHERE { ?qid wdt:P646 ?freebase . ?qid rdfs:label ?label filter (lang(?label) = "en") . } LIMIT 1 """ for qid, label in wikidata_sparql.query( query, namespaces, {'freebase': Literal(fbid)}): qnodeURL = str(qid) qid = qnodeURL.rsplit('/', 1)[1] self.__qids[qid] = count if qid not in self.__q_urls: self.__q_urls[qid] = qnodeURL def _init_groundtruth(self): # query to find cluster of the missing member query = ''' SELECT ?cluster WHERE { %s ?membership aida:cluster ?cluster ; aida:clusterMember ?member . %s } ''' % (self.__open_clause, self.__close_clause) member_set = set([str(m.uri) for m in self.members]) gt_set = set() for m in member_set: if self.model.graph: res = requests.get(groundtruth_url + '/' + self.model.repo + '?g=' + self.model.graph + '&e=' + m) else: res = requests.get(groundtruth_url + '/' + self.model.repo + '?e=' + m) if res.status_code == 404: self.__groundtruth = False return if len(res.json()) > 0: gt_set = set(res.json()) break if len(gt_set) > 0: hit = member_set.intersection(gt_set) miss = member_set.difference(gt_set) missing = gt_set.difference(member_set) missing_dict = {} if missing: for m in missing: for c, in self.model.sparql.query(query, namespaces, {'member': URIRef(m)}): missing_dict[m] = str(c).replace( 'http://www.isi.edu/gaia/entities/', '') self.__groundtruth = Groundtruth(gt_set, hit, miss, missing_dict) else: self.__groundtruth = False def _init_debug_info(self): info = debug.get_debug_for_cluster(self.model.repo, self.model.graph, str(self.uri)) if info: self.__debug_info = DebugInfo(info) else: self.__debug_info = False def _init_forward_clusters(self): query = """ SELECT ?p ?o ?cnt WHERE { %s ?s aida:prototype ?proto1 . ?o aida:prototype ?proto2 . ?se rdf:subject ?proto1 ; rdf:predicate ?p ; rdf:object ?proto2 ; aida:confidence/aida:confidenceValue ?conf . BIND(ROUND(1/(2*(1-?conf))) as ?cnt) %s } """ % (self.__open_clause, self.__close_clause) for p, o, cnt in self.model.sparql.query(query, namespaces, {'s': self.uri}): self.__forward.add( SuperEdge(self, Cluster(self.model, o), p, int(float(str(cnt))))) def _init_backward_clusters(self): query = """ SELECT ?s ?p ?cnt WHERE { %s ?s aida:prototype ?proto1 . ?o aida:prototype ?proto2 . ?se rdf:subject ?proto1 ; rdf:predicate ?p ; rdf:object ?proto2 ; aida:confidence/aida:confidenceValue ?conf . BIND(ROUND(1/(2*(1-?conf))) as ?cnt) %s } """ % (self.__open_clause, self.__close_clause) for s, p, cnt in self.model.sparql.query(query, namespaces, {'o': self.uri}): self.__backward.add( SuperEdge(Cluster(self.model, s), self, p, int(float(str(cnt))))) def _query_for_size(self): if self.uri in self.model.pickled and 'size' in self.model.pickled[ self.uri]: return self.model.pickled[self.uri]['size'] query = """ SELECT (COUNT(?member) AS ?size) WHERE { %s ?membership aida:cluster ?cluster ; aida:clusterMember ?member . MINUS {?cluster aida:prototype ?member} %s } """ % (self.__open_clause, self.__close_clause) for size, in self.model.sparql.query(query, namespaces, {'cluster': self.uri}): return int(size) return 0 def __hash__(self): return self.uri.__hash__() def __eq__(self, other): return isinstance(other, Cluster) and str(self.uri) == str(other.uri)
def main(args): # load graph g = rdflib.Graph() g.parse(args.input, publicID=URI_TMP, format="xml") # Tripleを含まないgraphをファイルから作成し、そこにTripleを追加していく g2 = rdflib.Graph() g2.parse(args.header, publicID=URI_TMP, format="xml") # bifd.owl g3 = rdflib.Graph() g3.parse(args.bifd, publicID=URI_TMP, format="xml") convert_uris = load_dict(args.subject) convert_ps = load_dict(args.predicate) convert_ps["https://wba-initiative.org/bifd/label"] = str(RDFS.label) # 処理対象のクラスの抽出 このうちのs.tsvに記載のあるものしか最終出力に含めない query_class = g.query( """SELECT ?class WHERE { ?class rdf:type owl:Class. } """) keep_s = set() for c in query_class: keep_s.add(c[0]) query_references = g.query( """SELECT ?uri ?p ?v WHERE { ?uri rdf:type swivt:Subject. ?uri ?p ?v. filter (?p in (property:BibTex-3Ahas_doi, URI("https://wba-initiative.org/noprefix/URLhas"), rdfs:label)) filter (strstarts(str(?uri), "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/-2A")) } """) references = {} references_val = {} references_s_o = {} for x in query_references: p = str(x[1]) if x[0] not in references: references[x[0]] = [p] else: references[x[0]].append(p) references_val["{}\t{}".format(str(x[0]), str(x[1]))] = str(x[2]) for k in references.keys(): predicates = [] for p in references[k]: predicates.append(p) if "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3ABibTex-3Ahas_doi" in predicates: o = references_val["{}\t{}".format(str(k), "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3ABibTex-3Ahas_doi")] references_s_o[str(k.split("/")[-1])] = o continue if "https://wba-initiative.org/noprefix/URLhas" in predicates: o = references_val["{}\t{}".format(str(k), "https://wba-initiative.org/noprefix/URLhas")] references_s_o[str(k.split("/")[-1])] = o continue if str(RDFS.label) in predicates: o = references_val["{}\t{}".format(str(k), str(RDFS.label))] references_s_o[str(k.split("/")[-1])] = o continue if True: print("Error: no info for references provided.") exit(1) obo_id_dict = {} for s, p, o in g: if s not in keep_s: continue if str(s) in convert_uris.keys(): s = URIRef(convert_uris[str(s)]) if str(o) in convert_uris.keys(): o = URIRef(convert_uris[str(o)]) if str(p) in convert_ps.keys(): p = URIRef(convert_ps[str(p)]) if str(p) == "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3AOBO_ID": obo_id_dict[str(s)] = str(o) if str(s) in convert_uris.values() and (p == RDFS.subClassOf or p == RDFS.label or str(p).startswith(BIFD_PREFIX) or o == OWL.Class): g2.add((s, p, o)) for s, p, o in g: if s not in keep_s: continue if str(s) in convert_uris.keys(): s = URIRef(convert_uris[str(s)]) if str(p) == "http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Property-3AOBO_ID": if str(s) in obo_id_dict.keys(): reg = re.compile(r'^[a-zA-Z_][\w.-]*$') if reg.match(obo_id_dict[str(s)]): # check if it results in a valid uri if str(s) in convert_uris.values(): g2.add((s, OWL.sameAs, URIRef("http://purl.obolibrary.org/obo/{}".format(obo_id_dict[str(s)])))) query_object_property = g3.query( """SELECT ?op WHERE { ?op rdf:type owl:ObjectProperty. }""") object_properties = set() for res in query_object_property: p = str(res[0]).strip("/") object_properties.add(p) for s, p, o in g2: if str(p) == 'https://wba-initiative.org/bifd/reference': k = o.replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/", '') if k in references_s_o.keys(): g2.add((s, p, Literal(references_s_o[k], datatype=XSD.string))) g2.remove((s, p, o)) if str(p) == 'https://wba-initiative.org/bifd/taxon': g2.add((s, p, Literal("http://purl.obolibrary.org/obo/{}".format(obo_id_dict[str(o)]), datatype=XSD.string))) if str(p) in convert_ps.values() and p != RDFS.label and str(p) in object_properties: # プロパティの制約条件の変換 if str(p) == "https://wba-initiative.org/bifd/transmitter" or str(p) == "https://wba-initiative.org/bifd/modType": continue g2.remove((s, p, o)) blank_node = BNode() g2.add((s, RDFS.subClassOf, blank_node)) g2.add((blank_node, RDF.type, OWL.Restriction)) g2.add((blank_node, OWL.onProperty, p)) g2.add((blank_node, OWL.someValuesFrom, o)) for s, p, o in g2: if o.startswith("http://183.181.89.140/mediawiki/index.php/Special:URIResolver"): g2.remove((s, p, o)) # s.tsvに含まれる変換対象のURIではない、oのURIの変換を正規表現ベースでやる o = URIRef(o.replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Category-3ABIF-3A", "https://wba-initiative.org/bifd/") \ .replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Category-3A", "http://wba-initiative.org/wbra/") # Glutamateは特別扱い .replace("http://183.181.89.140/mediawiki/index.php/Special:URIResolver/Glutamate", "https://wba-initiative.org/bifd/Glutamate")) g2.add((s, p, o)) g2.serialize(args.output, publicID=URI_TMP, format="pretty-xml")
def relativize(self, uri): base = self.base if base is not None and uri.startswith(base): uri = URIRef(uri.replace(base, "", 1)) return uri
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if (roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if (charname.find(":") > -1): nmsp, nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref, nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if (actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if (locname.find(":") > -1): nmsp, nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI( graph, locref, nom[0:-1]) loc += 1 graph.add(( location, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(nom[0:-1]))) else: location = extractCURIEorURI( graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str( perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum) + "])" speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum - 1) + "]/l[" + str( stage_array[diff]) + "]/stage)" #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str( stagenum) + "])" if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if (len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add( (group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[ 0:-2] + " and " + striped graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], peep)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if (len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + chunk graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
def createimage2graph(inputfile, entity_map, ontology, filesubject, lowerlevel, consolidatedGraph, triple_dir, generateEmbTriples): #filesubject is the publication URI, which has to be linked to the image components # g = Graph() # g.parse(ontology,format="n3") # len(g) imagetriples = [] block_dict = { "Figure":"Figure", "conv": "ConvBlock", "deconv":"DeconvBlock", "dense":"DenseBlock", "flatten":"FlattenBlock", "dropout":"DropoutBlock", "pooling":"PoolingBlock", "unpooling":"UnpoolingBlock", "concat":"ConcatBlock", "rnn":"RnnBlock", "rnnseq": "RnnSeqBlock", "lstm":"LSTMBlock", "lstmseq":"LSTMSeqBlock", "norm":"NormBlock", "embed":"EmbedBlock", "activation":"ActivationBlock", "loss":"LossBlock", "output":"OutputBlock", "input":"InputBlock", "upsample":"UpsamplingBlock" } # Namespaces dcc_namespace = "https://github.com/deepcurator/DCC/" # Classes Figure = URIRef(dcc_namespace + "Figure") # ActivationBlock = URIRef(dcc_namespace + "ActivationBlock") # EmbedBlock = URIRef(dcc_namespace + "EmbedBlock") # NormBlock = URIRef(dcc_namespace + "NormBlock") # LSTMSeqBlock = URIRef(dcc_namespace + "LSTMSeqBlock") # LSTMBlock = URIRef(dcc_namespace + "LSTMBlock") # RNNSeqBlock = URIRef(dcc_namespace + "RNNSeqBlock") # RNNBlock = URIRef(dcc_namespace + "RNNBlock") # ConcatBlock = URIRef(dcc_namespace + "ConcatBlock") # UnpoolingBlock = URIRef(dcc_namespace + "UnpoolingBlock") # PoolingBlock = URIRef(dcc_namespace + "PoolingBlock") # DropoutBlock = URIRef(dcc_namespace + "DropoutBlock") # FlattenBlock = URIRef(dcc_namespace + "FlattenBlock") # DenseBlock = URIRef(dcc_namespace + "DenseBlock") # DeconvBlock = URIRef(dcc_namespace + "DeconvBlock") # ConvBlock = URIRef(dcc_namespace + "ConvBlock") # LossBlock = URIRef(dcc_namespace + "LossBlock") # Properties partOf = URIRef(dcc_namespace + "partOf") followedBy = URIRef(dcc_namespace + "followedBy") # Open the image2graph with open(inputfile,encoding="ISO-8859-1") as f: lines = f.readlines() lines = [x.strip() for x in lines] # Each line in the image2graph is a triple # Split the triple into s,p,o # Create the URIRefs for RDF based on the ontology # URIRefs require the namespace and the class term from ontology for line in lines: triple = line.split(" ") subject = triple[0] predicate = triple[1] obj = triple[2] filename = inputfile.split(os.path.sep)[-1] filename = filename.split('.txt')[0] if (subject.startswith(":")): subject = subject[1:] if (obj.startswith(":")): obj = obj[1:] # print(line + "\n") if(predicate == "partOf"): ## Subject is a component ## Create a unique URI for that filename = inputfile.split('/')[-1] filename = filename.split('.txt')[0] # print(subject + "\tpart of\t" + obj[4:]) imagetriples.append(subject.replace("\\", "/") + "\tpart of\t" + obj[4:].replace("\\", "/")) subject = URIRef(dcc_namespace + filename[4:].replace("\\", "/") + "_" + subject.replace("\\", "/")) obj = URIRef(dcc_namespace + obj[4:].replace("\\", "/")) # g.add((subject,partOf,obj)) consolidatedGraph.add((subject,partOf,obj)) elif(predicate == "hasCaption"): triplesubj = subject subject = URIRef(dcc_namespace + subject) literaltext = Literal(obj) consolidatedGraph.add((subject,URIRef(dcc_namespace + "hasCaptionText"),literaltext)) elif(predicate == "isA"): triplesubj = subject subject = URIRef(dcc_namespace + subject) # if(obj in entity_map): # print("found obj in entity map") # print("Found " + obj + " in cso") # csovalue = entity_map[obj] # str_value = str(csovalue) # print("CSO value is then " + str_value) # g.add((subject,RDF.type, URIRef(dcc_namespace + block_dict.get(obj)))) if(obj == "Figure"): # print(filesubject + "\thas Figure\t" + obj) imagetriples.append(filesubject + "\thas Figure\t" + obj) consolidatedGraph.add((URIRef(dcc_namespace + filesubject),URIRef(dcc_namespace + "hasFigure"),subject)) # print(triplesubj +"\tisA\t" + block_dict.get(obj)) imagetriples.append(triplesubj +"\tisA\t" + block_dict.get(obj)) consolidatedGraph.add((subject,RDF.type, URIRef(dcc_namespace + block_dict.get(obj)))) elif(predicate == "isType"): filename = inputfile.split(os.path.sep)[-1] # print("FILENAME: " + filename) filename = filename.split('.txt')[0] # print(subject + "\tisA\t" + block_dict.get(obj)) # print(obj) imagetriples.append(subject + "\tisA\t" + block_dict.get(obj)) subject = URIRef(dcc_namespace + filename[4:] + "_" + subject) # print("Subject is " + subject) # g.add((subject, RDF.type, URIRef(dcc_namespace + block_dict.get(obj)))) consolidatedGraph.add((subject, RDF.type, URIRef(dcc_namespace + block_dict.get(obj)))) # Link CSO if(obj in entity_map): # print("found obj in entity map") # print("Found " + obj + " in cso") csovalue = entity_map[obj] str_value = str(csovalue) # print("CSO value is then " + str_value) if("cso" in str_value): consolidatedGraph.add((subject,URIRef(dcc_namespace + "hasCSOEquivalent"),csovalue)) if generateEmbTriples: save_image_triple_file(imagetriples,filesubject,lowerlevel, triple_dir)
stmt = (subject2, URIRef("http://schema.org/geo"), subject) g.add(stmt) stmt = (subject2, URIRef("http://www.w3.org/2000/01/rdf-schema#label"), Literal(label)) g.add(stmt) stmt = (subject2, URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(obj["@id"])) g.add(stmt) stmt = (subject2, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), aaa) g.add(stmt) path = subject2.replace("https://w3id.org/hi", "../../docs") + ".json" print(path) dirname = os.path.dirname(path) os.makedirs(dirname, exist_ok=True) g.serialize(destination=path, format='json-ld') all += g path = "data/all.rdf" all.serialize(destination=path, format='pretty-xml') with open('data/p.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') # 改行コード(\n)を指定しておく writer.writerows(rows)
class Cluster: def __init__(self, uri): self.uri = URIRef(uri) self.__prototype = None self.__type = None self.__members = [] self.__forward = None self.__backward = None self.__targets = Counter() self.__qnodes = Counter() self.__qnodesURL = {} @property def href(self): return self.uri.replace('http://www.isi.edu/gaia', '/cluster').replace('http://www.columbia.edu', '/cluster') @property def label(self): if self.uri in pickled and 'label' in pickled[self.uri]: return pickled[self.uri]['label'] return self.prototype.label @property def prototype(self): if not self.__prototype: self._init_cluster_prototype() return self.__prototype @property def type(self): if self.uri in pickled and 'type' in pickled[self.uri]: return pickled[self.uri]['type'] if not self.__type: self._init_cluster_prototype() return self.__type @property def members(self): if not self.__members: self._init_cluster_members() return self.__members @property def targets(self): if not self.__targets: self._init_cluster_members() return self.__targets.most_common() @property def targetsSize(self): return len(self.targets) @property def qnodes(self): if not self.__qnodes: self._init_qnodes() return self.__qnodes.most_common() @property def qnodesURL(self): if not self.__qnodesURL: self._init_qnodes() return self.__qnodesURL @property def size(self): if self.__members: return len(self.__members) return self._query_for_size() @property def forward(self): if self.__forward is None: self.__forward = set() self._init_forward_clusters() return self.__forward @property def backward(self): if self.__backward is None: self.__backward = set() self._init_backward_clusters() return self.__backward @property def neighbors(self): return self.forward | self.backward def neighborhood(self, hop=1): if hop == 1 and self.prototype.type != AIDA.Relation: hood = self.neighbors # for neighbor in [x for x in self.neighbors if x.subject.proto] for neighbor in self.neighbors: if neighbor.subject.prototype.type == AIDA.Relation: hood |= neighbor.subject.neighbors return hood if hop <= 1: return self.neighbors hood = set() for neighbor in self.neighbors: hood |= neighbor.subject.neighborhood(hop-1) hood |= neighbor.object.neighborhood(hop-1) return hood @property def img(self): import os.path _, name = split_uri(self.uri) svgpath = 'static/img/' + name + '.svg' if os.path.isfile(svgpath): return name from graph import SuperEdgeBasedGraph graph = SuperEdgeBasedGraph(self.neighborhood(), self, self.uri) path = graph.dot() return graph.name @classmethod def ask(cls, uri): query = "ASK { ?cluster a aida:SameAsCluster }" for ans in sparql.query(query, namespaces, {'cluster': URIRef(uri)}): return ans return False def _init_cluster_prototype(self): query = """ SELECT ?prototype (MIN(?label) AS ?mlabel) ?type ?category WHERE { ?cluster aida:prototype ?prototype . ?prototype a ?type . OPTIONAL { ?prototype aida:hasName ?label } . ?statement a rdf:Statement ; rdf:subject ?prototype ; rdf:predicate rdf:type ; rdf:object ?category ; } GROUP BY ?prototype ?type ?category """ for prototype, label, type_, cate in sparql.query(query, namespaces, {'cluster': self.uri}): if not label: _, label = split_uri(cate) self.__prototype = ClusterMember(prototype, label, type_) self.__type = cate def _init_cluster_members(self): query = """ SELECT ?member (MIN(?label) AS ?mlabel) ?type ?target WHERE { ?membership aida:cluster ?cluster ; aida:clusterMember ?member . OPTIONAL { ?member aida:hasName ?label } . OPTIONAL { ?member aida:link/aida:linkTarget ?target } . ?statement a rdf:Statement ; rdf:subject ?member ; rdf:predicate rdf:type ; rdf:object ?type . } GROUP BY ?member ?type ?target """ for member, label, type_, target in sparql.query(query, namespaces, {'cluster': self.uri}): self.__members.append(ClusterMember(member, label, type_, target)) if target: self.__targets[str(target)] += 1 def _init_qnodes(self): for target, count in self.targets: if ":NIL" not in target: fbid = '/' + target[target.find(':')+1:].replace('.', '/') query = """ SELECT ?qid ?label WHERE { ?qid wdt:P646 ?freebase . ?qid rdfs:label ?label filter (lang(?label) = "en") . } LIMIT 1 """ for qid, label in wikidata_sparql.query(query, namespaces, {'freebase': Literal(fbid)}): qnodeURL = str(qid) qid = qnodeURL.rsplit('/', 1)[1] self.__qnodes[qid] = count if qid not in self.__qnodesURL: self.__qnodesURL[qid] = qnodeURL def _init_forward_clusters(self): query = """ SELECT ?p ?o ?cnt WHERE { ?s aida:prototype ?proto1 . ?o aida:prototype ?proto2 . ?se rdf:subject ?proto1 ; rdf:predicate ?p ; rdf:object ?proto2 ; aida:confidence/aida:confidenceValue ?conf . BIND(ROUND(1/(2*(1-?conf))) as ?cnt) } """ for p, o, cnt in sparql.query(query, namespaces, {'s': self.uri}): self.__forward.add(SuperEdge(self, Cluster(o), p, int(cnt))) def _init_backward_clusters(self): query = """ SELECT ?s ?p ?cnt WHERE { ?s aida:prototype ?proto1 . ?o aida:prototype ?proto2 . ?se rdf:subject ?proto1 ; rdf:predicate ?p ; rdf:object ?proto2 ; aida:confidence/aida:confidenceValue ?conf . BIND(ROUND(1/(2*(1-?conf))) as ?cnt) } """ for s, p, cnt in sparql.query(query, namespaces, {'o': self.uri}): self.__backward.add(SuperEdge(Cluster(s), self, p, int(cnt))) def _query_for_size(self): if self.uri in pickled and 'size' in pickled[self.uri]: return pickled[self.uri]['size'] query = """ SELECT (COUNT(?member) AS ?size) WHERE { ?membership aida:cluster ?cluster ; aida:clusterMember ?member . } """ for size, in sparql.query(query, namespaces, {'cluster': self.uri}): return int(size) return 0 def __hash__(self): return self.uri.__hash__() def __eq__(self, other): return isinstance(other, Cluster) and str(self.uri) == str(other.uri)
def add_data(data, category): if category == 'Manufacturers': for man in data[category]: name = URIRef(ns + man['Name']) url = Literal(man['Site']) country = Literal(man['Country']) info = Literal(man['Description']) g.add((name, RDF.type, OWL.NamedIndividual)) g.add((name, RDF.type, ns.Manufacturer)) if (name, RDFS.comment, None) not in g: g.add((name, RDFS.comment, info)) if (name, RDFS.seeAlso, None) not in g: g.add((name, RDFS.seeAlso, url)) # if (name, RDFS.comment, country) not in g: # g.add((name, RDFS.comment, country)) if category == 'Guitars': for key in data[category].keys(): strings_num = 6 if key == 'Acoustic guitars': cls = ns.AcousticGuitar elif key == 'Classical guitars': cls = ns.ClassicalGuitar elif key == 'Electric guitars': cls = ns.ElectricGuitar elif key == 'Bass guitars': cls = ns.BassGuitar strings_num = 4 for guitar in data[category][key]: name = guitar['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = ns + URIRef(name) price = Literal(guitar['Price']) brand = Literal(guitar['Brand']) color = Literal(guitar['Color']) pickup = Literal(guitar['Pickup']) strings = Literal(guitar['Strings']) if 'Strings number' in guitar.keys(): strings_num = Literal(guitar['Strings number']) strings_num = Literal(strings_num) g.add((name, RDF.type, OWL.NamedIndividual)) g.add((name, RDF.type, cls)) g.add((name, ns.hasManufacturer, URIRef(ns + brand))) g.add((name, ns.hasColor, color)) g.add((name, ns.hasPickup, pickup)) if (name, ns.hasPrice, None) in g: g.set((name, ns.hasPrice, price)) else: g.add((name, ns.hasPrice, price)) g.add((name, ns.hasStrings, strings)) g.add((name, ns.numStrings, strings_num)) if category == 'Amplifiers' or category == 'Bass amplifiers': for key in data[category].keys(): if key == 'Combo': cls = ns.ComboAmplifier speakers_num = Literal(1) elif key == 'Heads': cls = ns.Head elif key == 'Preamplifiers': cls = ns.Preamplifier for amp in data[category][key]: name = amp['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = ns + URIRef(name) price = Literal(amp['Price']) brand = Literal(amp['Brand'].replace(' ', '_')) type = amp['Type'] if type == 'transistor': type = ns.Digital elif type == 'tube': type = ns.Tube elif type == 'hybrid': type = ns.Hybrid if 'Power' in amp.keys(): power = Literal(amp['Power']) if 'Speakers' in amp.keys(): speakers_num = Literal(amp['Speakers']) if key == 'Combo': g.add((name, ns.numSpeakers, speakers_num)) g.add((name, RDF.type, OWL.NamedIndividual)) g.add((name, RDF.type, cls)) g.add((name, ns.hasManufacturer, URIRef(ns + brand))) g.add((name, RDF.type, type)) if 'Power' in amp.keys(): g.add((name, ns.hasPower, power)) if (name, ns.hasPrice, None) in g: g.set((name, ns.hasPrice, price)) else: g.add((name, ns.hasPrice, price)) if category == 'Bass amplifiers': g.add((name, RDF.type, ns.BassAmplifier)) if category == 'Pickups': for pickup in data[category]: name = pickup['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = ns + URIRef(name) price = Literal(pickup['Price']) brand = Literal(pickup['Brand'].replace(' ', '_')) type = Literal(pickup['Type']) active = pickup['Active'] use = pickup['Use'] g.add((name, RDF.type, OWL.NamedIndividual)) if active: g.add((name, RDF.type, ns.Active)) else: g.add((name, RDF.type, ns.Passive)) g.add((name, ns.hasManufacturer, URIRef(ns + brand))) g.add((name, ns.hasType, type)) if use == 'electric': g.add((name, ns.isSuitableFor, ns.ElectricGuitar)) elif use == 'bass': g.add((name, ns.isSuitableFor, ns.BassGuitar)) if (name, ns.hasPrice, None) in g: g.set((name, ns.hasPrice, price)) else: g.add((name, ns.hasPrice, price)) if category == 'Strings': for strings in data[category]: name = strings['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = ns + URIRef(name) price = Literal(strings['Price']) brand = Literal(strings['Brand'].replace(' ', '_')) material = Literal(strings['Material']) gauge = Literal(strings['Gauge']) use = strings['Use'] number = Literal(strings['Number']) g.add((name, RDF.type, OWL.NamedIndividual)) g.add((name, ns.hasManufacturer, URIRef(ns + brand))) g.add((name, ns.hasMaterial, material)) g.add((name, ns.hasGauge, gauge)) if use == 'electric': g.add((name, ns.isSuitableFor, ns.ElectricGuitar)) elif use == 'acoustic': g.add((name, ns.isSuitableFor, ns.AcousticGuitar)) elif use == 'classical': g.add((name, ns.isSuitableFor, ns.ClassicalGuitar)) elif use == 'bass': g.add((name, ns.isSuitableFor, ns.BassGuitar)) if (name, ns.hasPrice, None) in g: g.set((name, ns.hasPrice, price)) else: g.add((name, ns.hasPrice, price))
root = xmlschema_doc.getroot() for complexType in root.findall(".//{http://www.w3.org/2001/XMLSchema}complexType"): print(complexType.attrib["name"]) URIRef(complexType.attrib["name"]) description = complexType.find(".//{http://www.w3.org/2001/XMLSchema}documentation") # print(description.__dict__) # print(description.text) name = URIRef(complexType.attrib["name"]) print(dir(description)) if "text" in dir(description): ZinGraph.add((name, DCTERMS.description, Literal(description.text, lang="nl"))) ZinGraph.add((name, RDFS.label, Literal(name.replace("CDT_", ""), lang="nl"))) for element in complexType.findall(".//{http://www.w3.org/2001/XMLSchema}element"): element_name = URIRef(element.attrib["name"]) ZinGraph.add((element_name, RDFS.label, Literal(element_name, lang="nl"))) if "type" in element.attrib.keys(): element_type = URIRef(element.attrib["type"].replace("iwlz:", "http://www.istandaarden.nl/iwlz/1_2/basisschema/schema/1_2/")) ZinGraph.add((element_name, RDF.type, element_type)) ZinGraph.add((element_name, DCTERMS.isPartOf, name)) #description2 = element.find(".//{http://www.w3.org/2001/XMLSchema}documentation") #print(description2.text) #for grandchild in child: # print(grandchild.tag, grandchild.attrib)
def insert_data(data, category): if category == 'Manufacturers': for man in data[category]: name = URIRef(namespace + man['Name']) url = Literal(man['Website']) country = Literal(man['Country']) info = Literal(man['Description']) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, namespace.Manufacturers)) if (name, RDFS.comment, None) not in graph: graph.add((name, RDFS.comment, info)) graph.add((name, RDFS.comment, country)) if (name, RDFS.seeAlso, None) not in graph: graph.add((name, RDFS.seeAlso, url)) if category == 'Body': for key in data[category].keys(): if key == 'Bumpers': cls = namespace.Bumpers elif key == 'Body_kits': cls = namespace.Body_kits elif key == 'Body_kits': cls = namespace.Wings elif key == 'Side_skirts': cls = namespace.Side_skirts elif key == 'Hoods': cls = namespace.Hoods elif key == 'Wings': cls = namespace.Wings #Iterating through keys and assign required vars for body in data[category][key]: name = body['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(body['Price']) manufacturer = Literal(body['Manufacturer']) if 'CompatibleWithCar' in body.keys(): cwc = Literal(body['CompatibleWithCar']) if 'Material' in body.keys(): material = Literal(body['Material']) #Add found data to ontology graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if 'CompatibleWithCar' in body.keys(): graph.add((name, namespace.compatibleWithCar, cwc)) if 'Material' in body.keys(): graph.add((name, namespace.hasMaterial, material)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Brake_system': for key in data[category].keys(): if key == 'Brake_disks': cls = namespace.Brake_disks elif key == 'Brake_pads': cls = namespace.Brake_pads elif key == 'Calipers': cls = namespace.Calipers for bs in data[category][key]: name = bs['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(bs['Price']) manufacturer = Literal(bs['Manufacturer'].replace(' ', '_')) if 'Material' in bs.keys(): material = Literal(bs['Material']) if 'ForPublicRoad' in bs.keys(): fpr = Literal(bs['ForPublicRoad']) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if 'Material' in bs.keys(): graph.add((name, namespace.hasMaterial, material)) if 'ForPublicRoad' in bs.keys(): graph.add((name, namespace.forPublicRoad, fpr)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Engine': for key in data[category].keys(): if key == 'ECU': cls = namespace.ECU elif key == 'Exhaust_systems': cls = namespace.Exhaust_systems elif key == 'Fuel_systems': cls = namespace.Fuel_systems elif key == 'Intake_systems': cls = namespace.Intake_systems elif key == 'Stroker_kits': cls = namespace.Stroker_kits elif key == 'Turbochargers': cls = namespace.Turbochargers for engine in data[category][key]: name = engine['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(engine['Price']) manufacturer = Literal(engine['Manufacturer'].replace( ' ', '_')) if 'CompatibleWithEngine' in engine.keys(): cwe = Literal(engine['CompatibleWithEngine']) if 'CalculatedPotential' in engine.keys(): potential = Literal(engine['CalculatedPotential']) if 'Material' in engine.keys(): material = Literal(engine['Material']) graph.add((name, RDF.type, OWL.NamedIndividual)) print(name, RDF.type, cls) graph.add((name, RDF.type, cls)) print(name, namespace.hasManufacturer, URIRef(namespace + manufacturer)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if 'Material' in engine.keys(): graph.add((name, namespace.hasMaterial, material)) if 'CompatibleWithEngine' in engine.keys(): graph.add((name, namespace.compatibleWithEngine, cwe)) if 'CalculatedPotential' in engine.keys(): graph.add( (name, namespace.hasCalculatedPotential, potential)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Interior': for key in data[category].keys(): if key == 'Roll_cages': cls = namespace.Roll_cages elif key == 'Seats': cls = namespace.Seats elif key == 'Steering_wheels': cls = namespace.Steering_wheels for seat in data[category][key]: name = seat['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(seat['Price']) manufacturer = Literal(seat['Manufacturer'].replace(' ', '_')) if 'CompatibleWithCar' in seat.keys(): cwc = Literal(seat['CompatibleWithCar']) if 'ForPublicRoad' in seat.keys(): fpr = Literal(seat['ForPublicRoad']) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if 'CompatibleWithCar' in seat.keys(): graph.add((name, namespace.compatibleWithCar, cwc)) if 'ForPublicRoad' in seat.keys(): graph.add((name, namespace.forPublicRoad, fpr)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Suspension': for key in data[category].keys(): if key == 'Anti_roll_bars': cls = namespace.Anti_roll_bars elif key == 'Body_stiffness': cls = namespace.Body_stiffness elif key == 'Springs_and_shock_absorbers': cls = namespace.Springs_and_shock_absorbers for spring in data[category][key]: name = spring['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(spring['Price']) manufacturer = Literal(spring['Manufacturer'].replace( ' ', '_')) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Transmission': for key in data[category].keys(): if key == 'Clutches': cls = namespace.Clutches elif key == 'Driveshafts': cls = namespace.Driveshafts for clutch in data[category][key]: name = clutch['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(clutch['Price']) manufacturer = Literal(clutch['Manufacturer'].replace( ' ', '_')) material = Literal(clutch['Material']) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) graph.add((name, namespace.hasMaterial, material)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price)) if category == 'Wheels': for key in data[category].keys(): if key == 'Rims': cls = namespace.Rims elif key == 'Tires': cls = namespace.Tires for wheel in data[category][key]: name = wheel['Name'].replace(' ', '_') for char in ['®', '#', ',', '`']: name = name.replace(char, '') name = namespace + URIRef(name) price = Literal(wheel['Price']) manufacturer = Literal(wheel['Manufacturer'].replace(' ', '_')) if 'Type' in wheel.keys(): wheelType = Literal(wheel['Type']) if 'Size' in wheel.keys(): size = Literal(wheel['Size']) if 'MaxSpeed' in wheel.keys(): maxSpeed = Literal(wheel['MaxSpeed']) if 'ForPublicRoad' in wheel.keys(): fpr = Literal(wheel['ForPublicRoad']) graph.add((name, RDF.type, OWL.NamedIndividual)) graph.add((name, RDF.type, cls)) graph.add((name, namespace.hasManufacturer, URIRef(namespace + manufacturer))) if 'Type' in wheel.keys(): graph.add((name, namespace.hasType, wheelType)) if 'Size' in wheel.keys(): graph.add((name, namespace.hasSize, size)) if 'MaxSpeed' in wheel.keys(): graph.add((name, namespace.hasMaxSpeed, maxSpeed)) if 'ForPublicRoad' in wheel.keys(): graph.add((name, namespace.forPublicRoad, fpr)) if (name, namespace.hasPrice, None) in graph: graph.set((name, namespace.hasPrice, price)) else: graph.add((name, namespace.hasPrice, price))
class FairMetricData(): def __init__(self, id): self.base = 'https://purl.org/fair-metrics/' self.id = URIRef(id) self.assertion = URIRef(id+'#assertion') # id = id.replace(self.base, '') # HACK -- remove this line before merging commit self.g = ConjunctiveGraph() self.g.parse(id, format='trig') def getID(self): return self.id def getShortID(self): return self.id.replace(self.base, '') def getAuthors(self): authors = [o.toPython() for o in self.g.objects(subject=self.assertion, predicate=DCTERMS.author)] authors.sort() return ' \\\\ '.join(authors) def getTitle(self): return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=RDFS.comment)]) def getShortTitle(self): return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=DCTERMS.title)]) def getTopicDescription(self): descs = [] for o in self.g.objects(subject=self.id, predicate=FOAF.primaryTopic): # o should be fair:A1.1 for o2 in fairGraph.objects(subject=o, predicate=DCTERMS.description): descs.append(o2.toPython()) return ' '.join(descs) def getTopicTitle(self): descs = [] for o in self.g.objects(subject=self.id, predicate=FOAF.primaryTopic): # o should be fair:A1.1 for o2 in fairGraph.objects(subject=o, predicate=DCTERMS.title): descs.append(o2.toPython()) return ' '.join(descs) def getMeasuring(self): # return fm:measuring return self.getFMPropertyValue(FM.measuring) def getRationale(self): # return fm:rationale return self.getFMPropertyValue(FM.rationale) def getRequirements(self): # return fm:requirements return self.getFMPropertyValue(FM.requirements) def getProcedure(self): # return fm:procedure return self.getFMPropertyValue(FM.procedure) def getValidation(self): # return fm:validation return self.getFMPropertyValue(FM.validation) def getRelevance(self): # return fm:relevance return self.getFMPropertyValue(FM.relevance) def getExamples(self): # return fm:examples return self.getFMPropertyValue(FM.examples) def getComments(self): # return fm:comments return self.getFMPropertyValue(FM.comments) def getFMPropertyLabel(self, property): return ', '.join([ o.toPython() for o in fairTermGraph.objects(subject=FM[property], predicate=RDFS['label'])]) def getFMPropertyValue(self, property): return ', '.join([o.toPython() for o in self.g.objects(subject=self.assertion, predicate=property)])
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if(roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if(charname.find(":") > -1): nmsp,nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref,nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if(actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if(locname.find(":") > -1): nmsp,nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI(graph, locref, nom[0:-1]) loc += 1 graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1]))) else: location = extractCURIEorURI(graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])"; speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)"; #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])"; if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add((group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[0:-2] + " and " + striped graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], peep)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + chunk graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
class ClusterMember: def __init__(self, model, uri, label=None, type_=None, debug_info=None): self.model = model self.uri = URIRef(uri) self.__id = None self.__label = label self.__all_labels = None self.__type = type_ self.__targets = None self.__freebases = None self.__qids = None self.__q_labels = None self.__q_aliases = None self.__q_urls = None self.__source = None self.__context_pos = [] self.__context_extractor = None self.__cluster: Cluster = None self.__debug_info = debug_info if model.graph: self.__open_clause = 'GRAPH <%s> {' % self.model.graph self.__close_clause = '}' else: self.__open_clause = self.__close_clause = '' @property def id(self): if not self.__id: self.__id = self.uri.replace( 'http://www.isi.edu/gaia/entities/', '').replace('http://www.columbia.edu/entities/', '') return self.__id @property def label(self): if not self.__label: self._init_member() return self.__label @property def all_labels(self): if not self.__all_labels: self.__all_labels = Counter() query = """ SELECT ?label (COUNT(?label) AS ?n) WHERE { ?member aida:justifiedBy/skos:prefLabel ?label . } GROUP BY ?label ORDER BY DESC(?n) """ for label, n in self.model.sparql.query(query, namespaces, {'member': self.uri}): if label: label = " ".join(label.split()) # remove double spaces self.__all_labels[label] = int(n) query = """ SELECT ?label (COUNT(?label) AS ?n) WHERE { ?member aida:hasName ?label . } GROUP BY ?label ORDER BY DESC(?n) """ for label, n in self.model.sparql.query(query, namespaces, {'member': self.uri}): if label: label = " ".join(label.split()) # remove double spaces if label in self.__all_labels: self.__all_labels[label] += int(n) else: self.__all_labels[label] = int(n) return self.__all_labels.most_common() @property def type(self): if not self.__type: self._init_member() return self.__type @property def type_text(self): _, text = split_uri(self.type) return text @property def targets(self): if self.__targets is None: self._init_member() return self.__targets @property def freebases(self): if self.__freebases is None: self._init_member() return self.__freebases @property def qids(self): if self.__qids is None and self.freebases: self._init_qnode() return self.__qids @property def q_urls(self): if self.__qids is None and self.freebases: self._init_qnode() return self.__q_urls @property def q_labels(self): if self.__q_labels is None and self.freebases: self._init_qnode() return self.__q_labels @property def q_aliases(self): if self.__q_aliases is None and self.freebases: self._init_qnode() return self.__q_aliases def _init_qnode(self): self.__qids = {} # qid to score self.__q_urls = {} self.__q_labels = {} self.__q_aliases = {} for fbid, score in self.freebases.items(): if ":NIL" not in fbid: fbid = '/' + fbid[fbid.find(':') + 1:].replace('.', '/') query = """ SELECT ?qid ?label WHERE { ?qid wdt:P646 ?freebase . ?qid rdfs:label ?label filter (lang(?label) = "en") . } LIMIT 1 """ for q_url, label in wikidata_sparql.query( query, namespaces, {'freebase': Literal(fbid)}): qid = str(q_url).rsplit('/', 1)[1] self.__qids[qid] = score self.__q_urls[qid] = str(q_url) self.__q_labels[qid] = str(label) query = """ SELECT ?qid ?alias WHERE { ?qid wdt:P646 ?freebase . ?qid skos:altLabel ?alias filter (lang(?alias) = "en") . } """ aliases = [] qid = None for q_url, alias in wikidata_sparql.query( query, namespaces, {'freebase': Literal(fbid)}): qid = str(q_url).rsplit('/', 1)[1] aliases.append(str(alias)) self.__q_aliases[qid] = ', '.join(aliases) @property def context_extractor(self): if self.__context_extractor is None: self.__context_extractor = LTFSourceContext(self.source) return self.__context_extractor @property def roles(self): query = """ SELECT ?pred ?obj ?objtype (MIN(?objlbl) AS ?objlabel) WHERE { ?statement rdf:subject ?event ; rdf:predicate ?pred ; rdf:object ?obj . ?objstate rdf:subject ?obj ; rdf:predicate rdf:type ; rdf:object ?objtype . OPTIONAL { ?obj aida:hasName ?objlbl } } GROUP BY ?pred ?obj ?objtype """ for pred, obj, obj_type, obj_lbl in self.model.sparql.query( query, namespaces, {'event': self.uri}): if not obj_lbl: _, obj_lbl = split_uri(obj_type) # _, pred = split_uri(pred) ind = pred.find('_') pred = pred[ind + 1:] yield pred, ClusterMember(self.model, obj, obj_lbl, obj_type) @property def events_by_role(self): query = """ SELECT ?pred ?event ?event_type (MIN(?lbl) AS ?label) WHERE { ?event a aida:Event . ?statement rdf:subject ?event ; rdf:predicate ?pred ; rdf:object ?obj . ?event_state rdf:subject ?event ; rdf:predicate rdf:type ; rdf:object ?event_type . OPTIONAL { ?event aida:justifiedBy/skos:prefLabel ?lbl } } GROUP BY ?pred ?event ?event_type """ for pred, event, event_type, event_lbl in self.model.sparql.query( query, namespaces, {'obj': self.uri}): if not event_lbl: _, event_lbl = split_uri(event_type) ind = pred.find('_') pred = pred[ind + 1:] yield pred, ClusterMember(self.model, event, event_lbl, event_type) @property def entity_relations(self): query = """ SELECT ?relation ?pred2 ?obj2 ?relation_type (min(?lbl) as ?label) WHERE { ?relation a aida:Relation . ?s1 rdf:subject ?relation ; rdf:predicate ?pred ; rdf:object ?obj . ?s2 rdf:subject ?relation ; rdf:predicate rdf:type ; rdf:object ?relation_type . ?s3 rdf:subject ?relation ; rdf:predicate ?pred2 ; rdf:object ?obj2 . OPTIONAL {?obj2 aida:hasName ?lbl} filter(?s3 != ?s2 && ?s3 != ?s1) } groupby ?relation ?pred2 ?obj2 ?relation_type """ for relation, pred, obj, relation_type, label in self.model.sparql.query( query, namespaces, {'obj': self.uri}): _, relation_type = split_uri(relation_type) ind = pred.find('_') pred = pred[ind + 1:] yield relation_type, obj, label @property def cluster(self): if self.__cluster is None: query = "SELECT ?cluster WHERE { %s ?membership aida:cluster ?cluster ; aida:clusterMember ?member . MINUS {?cluster aida:prototype ?member} %s}" % ( self.__open_clause, self.__close_clause) for cluster, in self.model.sparql.query(query, namespaces, {'member': self.uri}): self.__cluster = self.model.get_cluster(cluster) return self.__cluster def _init_member(self): query = """ SELECT ?label ?type WHERE { OPTIONAL { ?member aida:hasName ?label } OPTIONAL { ?member aida:justifiedBy ?justification . ?justification skos:prefLabel ?label } ?statement rdf:subject ?member ; rdf:predicate rdf:type ; rdf:object ?type . } LIMIT 1 """ for label, type_ in self.model.sparql.query(query, namespaces, {'member': self.uri}): if not label: _, label = split_uri(type_) self.__label = label self.__type = type_ self.__targets = {} if self.__debug_info: if self.__debug_info['targets']: for i in range(0, len(self.__debug_info['targets'])): target = self.__debug_info['targets'][i] score = self.__debug_info['target_scores'][i] self.__targets[target] = score else: query = """ SELECT ?target WHERE { ?member aida:link/aida:linkTarget ?target } """ for target, in self.model.sparql.query(query, namespaces, {'member': self.uri}): self.__targets[str(target)] = 0 self.__freebases = {} if self.__debug_info: if self.__debug_info['fbid']: for i in range(0, len(self.__debug_info['fbid'])): fbid = self.__debug_info['fbid'][i] score = self.__debug_info['fbid_score_avg'][i] self.__freebases[fbid] = score else: query = """ SELECT DISTINCT ?fbid { ?member aida:privateData [ aida:jsonContent ?fbid ; aida:system <http://www.rpi.edu/EDL_Freebase> ] } """ for j_fbid, in self.model.sparql.query(query, namespaces, {'member': self.uri}): fbids = json.loads(j_fbid).get('freebase_link').keys() for fbid in fbids: self.__freebases[fbid] = 0 def _init_source(self): query = """ SELECT DISTINCT ?source ?start ?end WHERE { ?member aida:justifiedBy ?justification . ?justification aida:source ?source ; aida:startOffset ?start ; aida:endOffsetInclusive ?end . } ORDER BY ?start """ for source, start, end in self.model.sparql.query( query, namespaces, {'member': self.uri}): self.__source = str(source) self.__context_pos.append((int(start), int(end))) @property def source(self): if not self.__source: self._init_source() return self.__source @property def mention(self): if self.context_extractor.doc_exists(): for start, end in self.__context_pos: res = self.context_extractor.query_context(start, end) if not res: continue yield res def __hash__(self): return self.uri.__hash__()