def get_uri_root(uri: URIRef) -> str: """Gets the root of a URI (everything but the fragmant, or name) TODO should this be a universal util? """ pathlike_uri = Path(uri) if uri[-1] == "/": return uri if "#" in pathlike_uri.name: return "#".join(uri.split("#")[0:-1]) + "#" else: return "/".join(uri.split("/")[0:-1]) + "/"
def process_journal(records, writer, mappings): record, fields = majority_vote(records, ('Journal',), mappings) if record.get('issn'): uri = URIRef('urn:issn:%s' % record['issn']) graph_uri = URIRef('/graph/issn/%s' % record['issn']) elif record.get('x-nlm-ta'): uri = URIRef('/id/journal/%s' % sluggify(record['x-nlm-ta'])) graph_uri = URIRef('/graph/journal/%s' % sluggify(record['x-nlm-ta'])) elif record.get('name'): uri = URIRef('/id/journal/%s' % sluggify(record['name'])) graph_uri = URIRef('/graph/journal/%s' % sluggify(record['name'])) else: sys.stderr.write("Unidentifiable: %s" % record) return for id, _ in fields['id']: mappings['id'][id] = uri mappings['journal'][uri] = graph_uri.split('/', 3)[-1] writer.send((uri, RDF.type, FABIO.Journal, graph_uri)) for key, predicate in JOURNAL_DATA_PROPERTIES: if key in record: writer.send((uri, predicate, Literal(record[key]), graph_uri)) if isinstance(record.get('publisher'), URIRef): writer.send((uri, DCTERMS.publisher, record['publisher'], graph_uri))
def parse_answer(db_answer, prop_ans): answers = [] answer = '' print((db_answer['results']['bindings'])) for i in range(len(db_answer['results']['bindings'])): if db_answer['results']['bindings'][i][prop_ans]['type'] == 'uri': resource = URIRef( db_answer['results']['bindings'][i][prop_ans]['value']) string_value = resource.split('/')[-1].replace('_', ' ') answers.append(string_value) else: answers.append( db_answer['results']['bindings'][i][prop_ans]['value']) # answer = db_answer['results']['bindings'][0][prop_ans]['value'] answer = ",".join(str(x) for x in answers) return answer # recs_ent, prop_ent = parse_nlp(text) # name = resource_generator(recs_ent) # prop = property_generator(prop_ent) # answer = ask_DBpedia(name, prop) # db_answer = parse_answer(answer, prop) # print(db_answer)
def prefix_this(item): # DEBUG(f'item: {item} type: {type(item)}') if type(item) is RDFS_Resource: item = item.iri elif type(item) is URIRef: item = str(item) if type(item) is str and item.startswith('http'): iri = URIRef(item).n3(G.graph.namespace_manager) else: iri = item if iri.count('_') > 0: iri = iri.split('_', 1)[1] # DEBUG(f'prefixed {item} to: {iri}') return iri
def get_base_uri_and_local_name(uri: URIRef) -> Tuple[str, str]: local_name = uri.split("#")[-1].split("/")[-1] base_uri = uri.split(local_name)[0] return base_uri, local_name
from rdflib import URIRef resource = URIRef('http://dbpedia.org/resource/Ann_Dunham') print(resource.split('/')[-1])
class Resource: prefixes = """ PREFIX schema: <http://schema.org/> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> """ query = """ %(prefixes)s CONSTRUCT { <%(uri)s> ?p ?o . ?o schema:name ?oname ; skos:prefLabel ?olabel . ?wab schema:about <%(uri)s> ; schema:name ?wabname . } WHERE { { <%(uri)s> ?p ?o . OPTIONAL { { ?o schema:name ?oname } UNION { ?o skos:prefLabel ?olabel } } } UNION { # works about ?wab schema:about <%(uri)s> . ?wab schema:name ?wabname . } } """ def __init__(self, uri, graph = None): if isinstance(uri, URIRef) or isinstance(uri, BNode): self.uri = uri else: self.uri = URIRef(uri) if graph is not None: self.graph = graph else: self.graph = self.query_for_graph() def query_for_graph(self): sparql = SPARQLWrapper(ENDPOINT, returnFormat=TURTLE) sparql.setOnlyConneg(True) sparql.setQuery(self.query % {'uri': self.uri, 'prefixes': self.prefixes}) graph = Graph() graph.parse(sparql.query().response, format='turtle') return graph def exists(self): return len(self.graph) > 0 def typename(self): return self.__class__.__name__ def name(self): props = (SCHEMA.name, SKOS.prefLabel, DC.title, RDFS.label) labels = self.graph.preferredLabel(self.uri, lang='en', labelProperties=props) if len(labels) > 0: return labels[0][1] labels = self.graph.preferredLabel(self.uri, labelProperties=props) if len(labels) > 0: return labels[0][1] return "<%s>" % self.uri def __str__(self): return self.name() def sort_key(self): return self.name().lower() def url(self): if isinstance(self.uri, BNode): return None return uri_to_url(self.uri) def localname(self): ln = self.uri.split(':')[-1].split('/')[-1] if ln == '': return 'index' return ln def property_name(self, prop): return prop.split('/')[-1].split('#')[-1] # local name def properties(self, uri=None): if uri is None: uri = self.uri propvals = OrderedDict() # key: property URIRef, value: list of values props = set([prop for prop in self.graph.predicates(uri, None) if prop not in (RDF.type, SCHEMA.workExample, SCHEMA.exampleOfWork)]) for prop in sorted(props, key=lambda prop: self.property_name(prop).lower()): propname = self.property_name(prop) propvals[propname] = [] for obj in self.graph.objects(uri, prop): if isinstance(obj, URIRef) or self.graph.value(obj, SCHEMA.name, None, any=True) is not None: val = Resource(obj, self.graph) elif isinstance(obj, BNode): val = self.properties(obj) else: val = obj propvals[propname].append(val) propvals[propname].sort(key=lambda val: str(val).lower()) return propvals def has_instances(self): return False def has_works_about(self): return self.graph.value(None, SCHEMA.about, self.uri, any=True) is not None def works_about(self): works = [Work(work, self.graph) for work in self.graph.subjects(SCHEMA.about, self.uri)] works.sort(key=lambda w: w.sort_key()) return works def has_authored_works(self): return False def has_contributed_works(self): return False def is_agent(self): return False def serialize(self, fmt): if fmt == 'json-ld': context = {"@vocab": SCHEMA, "rdau": RDAU, "skos": SKOS, "skos:prefLabel": {"@container": "@language"} } return self.graph.serialize(format='json-ld', context=context) return self.graph.serialize(format=fmt)
def main(): """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """ TASK 2.5 SUBTASK OA.1 """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" np.random.seed(0) print('TASK 2.5 SUBTASK OA.1\n') # Load in reference and local (from task 2.4) ontologies ref_file_path = 'pizza.owl' loc_file_path = '2.2_protege_pizza_ontology.owl.xml' ref_onto = rdy.get_ontology(ref_file_path) ref_onto.load() loc_onto = rdy.get_ontology(loc_file_path) loc_onto.load() # Get classes ref_cl_ls = get_class_name(ref_onto) loc_cl_ls = get_class_name(loc_onto) # Get object properties ref_obj_prop_ls = get_obj_prop(ref_onto) loc_obj_prop_ls = get_obj_prop(loc_onto) # # Get data properties # ref_data_prop_ls = get_data_prop(ref_onto) # loc_data_prop_ls = get_data_prop(loc_onto) # Get individuals for all classes ref_cl_ind_ls = get_class_individual(ref_onto, ref_cl_ls) loc_cl_ind_ls = get_class_individual(loc_onto, loc_cl_ls) # Combine lists of classes, object and data properties and individuals by class # ref_onto_ls = ref_cl_ls + ref_obj_prop_ls + ref_data_prop_ls + ref_cl_ind_ls # loc_onto_ls = loc_cl_ls + loc_obj_prop_ls + loc_data_prop_ls + loc_cl_ind_ls ref_onto_ls = ref_cl_ls + ref_cl_ind_ls loc_onto_ls = loc_cl_ls + loc_cl_ind_ls # String based matching labels of the same (e.g. class v class) and different elements (e.g. class v individual) print( 'Performing string based matching of labels with same type and across different types...', end='\r') # Set thresholds to control Regex subset string match max_len_diff = 8 # Max length difference between two strings to recognise Regex subset match min_len = 5 # Min length to perform Regex match # Approach to determine TP, FP and FN adapted based on # Source: E. Jiménez-Ruiz, “INM713 Semantic Web Technologies and Knowledge Graphs # Laboratory 8: Ontology Alignment,” 2021. tp_exact_match_ls = [] tp_subset_match_ls = [] fp_match_ls = [] fn_match_ls = [] print('No. of elements in local graph: {}'.format(len(loc_onto_ls))) print('No. of elements in reference graph: {}'.format(len(ref_onto_ls))) iter_nr = len(loc_onto_ls) * len(ref_onto_ls) print('So iterate {} rounds to compare both sets of elements...'.format( iter_nr)) # Compare every element in local ontology for exact or subset match (to within a tolerance) for l_tup in loc_onto_ls: __exact_match_ls = [] __subset_match_ls = [] for r_tup in ref_onto_ls: # Difference in length between ref and local labels len_diff = len(l_tup[0]) - len(r_tup[0]) # If exact string match if l_tup[0] == r_tup[0]: is_same_type = True if l_tup[1] == r_tup[1] else False __exact_match_ls += [(len_diff, is_same_type, l_tup, r_tup)] # If local label string is a subset of reference label, # and lengths of both exceed min length thresholds set # and length difference (positive polarity) within threshold elif re.search(l_tup[0], r_tup[0]) is not None and \ len(l_tup[0]) >= min_len and len(r_tup[0]) >= min_len\ and (len_diff**2)**.5 <= max_len_diff: is_same_type = True if l_tup[1] == r_tup[1] else False __subset_match_ls += [(len_diff, is_same_type, l_tup, r_tup)] # If exact/subset match found of local element in ref element, then TRUE POSITIVE if len(__exact_match_ls) > 0: tp_exact_match_ls += __exact_match_ls if len(__subset_match_ls) > 0: tp_subset_match_ls += __subset_match_ls # If no match found, FALSE POSITIVE, in local but not in ref ontology if len(__exact_match_ls) + len(__subset_match_ls) == 0: fp_match_ls += [l_tup] # Reverse to compare every element in the reference ontology for exact or subset match for r_tup in ref_onto_ls: __match_ls = [] for l_tup in loc_onto_ls: if re.search(r_tup[0], l_tup[0]) is not None: __match_ls += [r_tup] # In reference ontology but not in local onto to be FALSE NEGATIVE if len(__match_ls) == 0: fn_match_ls += [r_tup] # Calculate True +ve tp = len(tp_exact_match_ls) + len(tp_subset_match_ls) # Calculate True +ve based on exact string matches tp_exact = len(tp_exact_match_ls) tp_exact_diff_type = len([r for r in tp_exact_match_ls if not r[1]]) # Where different types tp_exact_same_type = len([r for r in tp_exact_match_ls if r[1]]) # Where same type # Calculate True +ve based on subset string matches tp_subset = len(tp_subset_match_ls) tp_subset_diff_type = len([r for r in tp_subset_match_ls if not r[1]]) # Where different types tp_subset_same_type = len([r for r in tp_subset_match_ls if r[1]]) # Where same type # Calculate False +ve and -ve fp = len(fp_match_ls) fn = len(fn_match_ls) print('No. of True +ve: {}'.format(tp)) print('\tBy exact string match: {}'.format(tp_exact)) print( '\t\tOf which both local and reference ontologies have the same type: {}' .format(tp_exact_same_type)) print( '\t\tOf which both local and reference ontologies have different types: {}' .format(tp_exact_diff_type)) print('\tBy subset Regex match: {}'.format(tp_subset)) print( '\tbased on thresholds - min length {} and max char length difference {}' .format(min_len, max_len_diff)) print( '\t\tOf which both local and reference ontologies have the same type: {}' .format(tp_subset_same_type)) print( '\t\tOf which both local and reference ontologies have different types: {}' .format(tp_subset_diff_type)) print('No. of False +ve: {}'.format(fp)) print('No. of False -ve: {}'.format(fn)) # Calculate precision, recall and f1 score while handing div by 0 error precision = tp / (tp + fn) if (tp + fn) != 0 else .0 recall = tp / (tp + fp) if (tp + fp) != 0 else .0 f1 = tp / (tp + .5 * (fp + fn)) if (fp + fn) != 0 else .0 print('Precision: {}'.format(precision)) print('Recall: {}'.format(recall)) print('F1 score: {}'.format(f1)) # Analyse by types where match is find to determine appropriate equivalence to define # Extract local and reference onto types for each exact match result print('Exact match - different types exist:') print(list(set([(tup[2][1], tup[3][1]) for tup in tp_exact_match_ls]))) # Extract local and reference onto types for each exact match result print('Subset match - different types exist:') print(list(set([(tup[2][1], tup[3][1]) for tup in tp_subset_match_ls]))) print('TRUE +VE exact match listings:') for tup in tp_exact_match_ls: print('\t', tup) print('TRUE +VE subset match listings:') for tup in tp_subset_match_ls: print('\t', tup) # BUILD EQUIVALENCE TO THE TWO ONTOLOGIES print('Read in the reference and local files as RDF graphs...', end='\r') # Load the local and reference ontologies as RDFLib graph ref_g = rdflib.Graph().parse(ref_file_path) loc_g = rdflib.Graph().parse('2.2_protege_pizza_ontology.owl.xml') # Union the two graphs print('Union the two RDF graphs...', end='\r') uni_g = ref_g + loc_g # Prefix aa = Namespace('http://www.city.ac.uk/ds/inm713/aaron_altrock#') uni_g.bind('aa', aa) # Equivalence only graph print( 'Create a new RDF graph for the equivalent class and property triples...', end='\r') eqi_g = rdflib.Graph() eqi_g.bind('aa', aa) # Construct equivalence between class and individuals for len_diff, is_diff_type, loc_tup, ref_tup in tp_exact_match_ls + tp_subset_match_ls: print('Constructing equivalence between {}/{} and {}/{}...'.format( loc_tup[2], loc_tup[1], ref_tup[2], ref_tup[1]), end='\r') loc_type = loc_tup[1] ref_type = ref_tup[1] # Construct an equivalence # Class v Class if loc_type == 'class' and ref_type == 'class': # class in loc owl:equivalentClass in ref s = URIRef(loc_tup[2].iri) p = OWL.equivalentClass o = URIRef(ref_tup[2].iri) uni_g.add((s, p, o)) eqi_g.add((s, p, o)) # Class Individual v Class if loc_type == 'class_ind' and ref_type == 'class': # individual in loc a class in ref ref s = URIRef(loc_tup[2].iri) p = RDF.type o = URIRef(ref_tup[2].iri) uni_g.add((s, p, o)) eqi_g.add((s, p, o)) # Class Individual v Class Individual if loc_type == 'class_ind' and ref_type == 'class_ind': # class in loc owl:sameAs in ref s = URIRef(loc_tup[2].iri) p = OWL.sameAs o = URIRef(ref_tup[2].iri) uni_g.add((s, p, o)) eqi_g.add((s, p, o)) # Object Property v Object Property if loc_type == 'obj_prop' and ref_type == 'obj_prop': # local object property in loc owl:equivalentProperty in ref object property s = URIRef(loc_tup[2].iri) p = OWL.equivalentProperty o = URIRef(ref_tup[2].iri) uni_g.add((s, p, o)) eqi_g.add((s, p, o)) # Construct equivalence for object properties manually obj_prop_ls = [] for s, p, o in uni_g: if p == RDF.type and o == OWL.ObjectProperty: __tup = (s.split('#')[-1], s) print(__tup) obj_prop_ls += [__tup] equi_obj_prop_ls = [ (URIRef('http://www.city.ac.uk/ds/inm713/aaron_altrock#has_topping'), URIRef('http://www.co-ode.org/ontologies/pizza/pizza.owl#hasTopping')) ] # Add manually identified equivalent object properties for s, p in equi_obj_prop_ls: uni_g.add((s, OWL.equivalentProperty, o)) eqi_g.add((s, OWL.equivalentProperty, o)) # Save extended graph to Turtle format uni_g.serialize(destination='2.5_oa1_union_g.ttl', format='ttl') print('Saved the union-ed graph to 2.5_oa1_union_g.ttl.') eqi_g.serialize(destination='2.5_oa1_equivalence_g.ttl', format='ttl') print('Saved the equivalence triples to 2.5_oa1_equivalence_g.ttl.') # Save extended graph to OWL format uni_g.serialize(destination='2.5_oa1_union_g.owl.xml', format='xml') print('Saved the union-ed graph to 2.5_oa1_union_g.owl.xml.') eqi_g.serialize(destination='2.5_oa1_equivalence_g.owl.xml', format='xml') print('Saved the equivalence triples to 2.5_oa1_equivalence_g.owl.xml.') print('END')
def post(self, public=False): """ post=<parent post> content=<html content> we get the user from the x-foaf-agent header """ parent = self.get_argument('post', default=None) or self.get_argument("uri") assert parent is not None # maybe a legacy problem here with http/https, but blaster is still sending http parent = URIRef(parent) # this might be failing on ariblog, but that one is already safe ip = self.request.headers.get("X-Forwarded-For") if ip is not None: HoneypotChecker(open("priv-honeypotkey").read().strip()).check(ip) contentArg = self.get_argument("content", default="") if not contentArg.strip(): raise ValueError("no text") if contentArg.strip() == 'test': return "not adding test comment" spamCheck(parent, contentArg) content = Literal(contentArg, datatype=RDF.XMLLiteral) stmts = [] # gathered in one list for an atomic add foafHeader = self.request.headers.get('X-Foaf-Agent') if not public: assert foafHeader user = URIRef(foafHeader) # make bnode-ish users for anonymous ones. need to get that username passed in here else: if foafHeader: user = URIRef(foafHeader) else: user, moreStmts = newPublicUser( self.request.headers.get("X-Forwarded-For"), self.get_argument("name", ""), self.get_argument("email", "")) stmts.extend(moreStmts) secs = time.time() comment = newCommentUri(secs) now = literalFromUnix(secs) ctx = URIRef(parent + "/comments") stmts.extend([(parent, SIOC.has_reply, comment), (comment, DCTERMS.created, now), (comment, SIOC.has_creator, user), ]) stmts.extend(commentStatements(user, comment, content)) db.writeFile(stmts, ctx, fileWords=[parent.split('/')[-1], now]) try: self.sendAlerts(parent, user) except Exception, e: import traceback log.error(e) traceback.print_exc()