예제 #1
0
    def get_uri_root(uri: URIRef) -> str:
        """Gets the root of a URI (everything but the fragmant, or name)

        TODO should this be a universal util?
        """

        pathlike_uri = Path(uri)
        if uri[-1] == "/":
            return uri

        if "#" in pathlike_uri.name:
            return "#".join(uri.split("#")[0:-1]) + "#"
        else:
            return "/".join(uri.split("/")[0:-1]) + "/"
def process_journal(records, writer, mappings):
    record, fields = majority_vote(records, ('Journal',), mappings)

    if record.get('issn'):
        uri = URIRef('urn:issn:%s' % record['issn'])
        graph_uri = URIRef('/graph/issn/%s' % record['issn'])
    elif record.get('x-nlm-ta'):
        uri = URIRef('/id/journal/%s' % sluggify(record['x-nlm-ta']))
        graph_uri = URIRef('/graph/journal/%s' % sluggify(record['x-nlm-ta']))
    elif record.get('name'):
        uri = URIRef('/id/journal/%s' % sluggify(record['name']))
        graph_uri = URIRef('/graph/journal/%s' % sluggify(record['name']))
    else:
        sys.stderr.write("Unidentifiable: %s" % record)
        return

    for id, _ in fields['id']:
        mappings['id'][id] = uri
        mappings['journal'][uri] = graph_uri.split('/', 3)[-1]

    writer.send((uri, RDF.type, FABIO.Journal, graph_uri))

    for key, predicate in JOURNAL_DATA_PROPERTIES:
        if key in record:
            writer.send((uri, predicate, Literal(record[key]), graph_uri))

    if isinstance(record.get('publisher'), URIRef):
        writer.send((uri, DCTERMS.publisher, record['publisher'], graph_uri))
예제 #3
0
def process_journal(records, writer, mappings):
    record, fields = majority_vote(records, ('Journal',), mappings)

    if record.get('issn'):
        uri = URIRef('urn:issn:%s' % record['issn'])
        graph_uri = URIRef('/graph/issn/%s' % record['issn'])
    elif record.get('x-nlm-ta'):
        uri = URIRef('/id/journal/%s' % sluggify(record['x-nlm-ta']))
        graph_uri = URIRef('/graph/journal/%s' % sluggify(record['x-nlm-ta']))
    elif record.get('name'):
        uri = URIRef('/id/journal/%s' % sluggify(record['name']))
        graph_uri = URIRef('/graph/journal/%s' % sluggify(record['name']))
    else:
        sys.stderr.write("Unidentifiable: %s" % record)
        return

    for id, _ in fields['id']:
        mappings['id'][id] = uri
        mappings['journal'][uri] = graph_uri.split('/', 3)[-1]

    writer.send((uri, RDF.type, FABIO.Journal, graph_uri))

    for key, predicate in JOURNAL_DATA_PROPERTIES:
        if key in record:
            writer.send((uri, predicate, Literal(record[key]), graph_uri))

    if isinstance(record.get('publisher'), URIRef):
        writer.send((uri, DCTERMS.publisher, record['publisher'], graph_uri))
예제 #4
0
def parse_answer(db_answer, prop_ans):
    answers = []
    answer = ''
    print((db_answer['results']['bindings']))
    for i in range(len(db_answer['results']['bindings'])):
        if db_answer['results']['bindings'][i][prop_ans]['type'] == 'uri':
            resource = URIRef(
                db_answer['results']['bindings'][i][prop_ans]['value'])
            string_value = resource.split('/')[-1].replace('_', ' ')
            answers.append(string_value)
        else:
            answers.append(
                db_answer['results']['bindings'][i][prop_ans]['value'])
    # answer = db_answer['results']['bindings'][0][prop_ans]['value']
    answer = ",".join(str(x) for x in answers)
    return answer


# recs_ent, prop_ent = parse_nlp(text)
# name = resource_generator(recs_ent)
# prop = property_generator(prop_ent)

# answer = ask_DBpedia(name, prop)
# db_answer = parse_answer(answer, prop)
# print(db_answer)
예제 #5
0
def prefix_this(item):
    # DEBUG(f'item: {item} type: {type(item)}')
    if type(item) is RDFS_Resource:
        item = item.iri
    elif type(item) is URIRef:
        item = str(item)
    if type(item) is str and item.startswith('http'):
        iri = URIRef(item).n3(G.graph.namespace_manager)
    else:
        iri = item
    if iri.count('_') > 0:
        iri = iri.split('_', 1)[1]
    # DEBUG(f'prefixed {item} to: {iri}')
    return iri
예제 #6
0
def get_base_uri_and_local_name(uri: URIRef) -> Tuple[str, str]:
    local_name = uri.split("#")[-1].split("/")[-1]
    base_uri = uri.split(local_name)[0]
    return base_uri, local_name
예제 #7
0
from rdflib import URIRef
resource = URIRef('http://dbpedia.org/resource/Ann_Dunham')
print(resource.split('/')[-1])
예제 #8
0
class Resource:
    prefixes = """
      PREFIX schema: <http://schema.org/>
      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
    """

    query = """
      %(prefixes)s
      
      CONSTRUCT {
        <%(uri)s> ?p ?o .
        ?o schema:name ?oname ;
           skos:prefLabel ?olabel .
        ?wab schema:about <%(uri)s> ;
             schema:name ?wabname .
      }
      WHERE {
        {
          <%(uri)s> ?p ?o .
          OPTIONAL {
            { ?o schema:name ?oname }
            UNION
            { ?o skos:prefLabel ?olabel }
          }
        }
        UNION
        { # works about
          ?wab schema:about <%(uri)s> .
          ?wab schema:name ?wabname .
        }
      }
    """

    def __init__(self, uri, graph = None):
        if isinstance(uri, URIRef) or isinstance(uri, BNode):
            self.uri = uri
        else:
            self.uri = URIRef(uri)

        if graph is not None:
            self.graph = graph
        else:
            self.graph = self.query_for_graph()
    
    def query_for_graph(self):
        sparql = SPARQLWrapper(ENDPOINT, returnFormat=TURTLE)
        sparql.setOnlyConneg(True)
        sparql.setQuery(self.query % {'uri': self.uri, 'prefixes': self.prefixes})
        graph = Graph()
        graph.parse(sparql.query().response, format='turtle')
        return graph
    
    def exists(self):
        return len(self.graph) > 0
    
    def typename(self):
        return self.__class__.__name__

    def name(self):
        props = (SCHEMA.name, SKOS.prefLabel, DC.title, RDFS.label)
        labels = self.graph.preferredLabel(self.uri, lang='en', labelProperties=props)
        if len(labels) > 0:
            return labels[0][1]

        labels = self.graph.preferredLabel(self.uri, labelProperties=props)
        if len(labels) > 0:
            return labels[0][1]
        
        return "<%s>" % self.uri
    
    def __str__(self):
        return self.name()
      
    def sort_key(self):
        return self.name().lower()
    
    def url(self):
        if isinstance(self.uri, BNode):
            return None
        return uri_to_url(self.uri)
    
    def localname(self):
        ln = self.uri.split(':')[-1].split('/')[-1]
        if ln == '':
            return 'index'
        return ln
    
    def property_name(self, prop):
        return prop.split('/')[-1].split('#')[-1] # local name
    
    def properties(self, uri=None):
        if uri is None:
            uri = self.uri
        propvals = OrderedDict() # key: property URIRef, value: list of values
        props = set([prop for prop in self.graph.predicates(uri, None)
                     if prop not in (RDF.type, SCHEMA.workExample, SCHEMA.exampleOfWork)])
        for prop in sorted(props, key=lambda prop: self.property_name(prop).lower()):
            propname = self.property_name(prop)
            propvals[propname] = []
            for obj in self.graph.objects(uri, prop):
                if isinstance(obj, URIRef) or self.graph.value(obj, SCHEMA.name, None, any=True) is not None:
                    val = Resource(obj, self.graph)
                elif isinstance(obj, BNode):
                    val = self.properties(obj)
                else:
                    val = obj
                propvals[propname].append(val)
            propvals[propname].sort(key=lambda val: str(val).lower())
        return propvals
    
    def has_instances(self):
        return False
    
    def has_works_about(self):
        return self.graph.value(None, SCHEMA.about, self.uri, any=True) is not None
    
    def works_about(self):
        works = [Work(work, self.graph) for work in self.graph.subjects(SCHEMA.about, self.uri)]
        works.sort(key=lambda w: w.sort_key())
        return works

    def has_authored_works(self):
        return False

    def has_contributed_works(self):
        return False
      
    def is_agent(self):
        return False
    
    def serialize(self, fmt):
        if fmt == 'json-ld':
            context = {"@vocab": SCHEMA, "rdau": RDAU, "skos": SKOS, "skos:prefLabel": {"@container": "@language"} }
            return self.graph.serialize(format='json-ld', context=context)
        return self.graph.serialize(format=fmt)
예제 #9
0
def main():
    """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """
            TASK 2.5 SUBTASK OA.1
    """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """"""
    np.random.seed(0)

    print('TASK 2.5 SUBTASK OA.1\n')
    # Load in reference and local (from task 2.4) ontologies
    ref_file_path = 'pizza.owl'
    loc_file_path = '2.2_protege_pizza_ontology.owl.xml'

    ref_onto = rdy.get_ontology(ref_file_path)
    ref_onto.load()

    loc_onto = rdy.get_ontology(loc_file_path)
    loc_onto.load()

    # Get classes
    ref_cl_ls = get_class_name(ref_onto)
    loc_cl_ls = get_class_name(loc_onto)

    # Get object properties
    ref_obj_prop_ls = get_obj_prop(ref_onto)
    loc_obj_prop_ls = get_obj_prop(loc_onto)

    # # Get data properties
    # ref_data_prop_ls = get_data_prop(ref_onto)
    # loc_data_prop_ls = get_data_prop(loc_onto)

    # Get individuals for all classes
    ref_cl_ind_ls = get_class_individual(ref_onto, ref_cl_ls)
    loc_cl_ind_ls = get_class_individual(loc_onto, loc_cl_ls)

    # Combine lists of classes, object and data properties and individuals by class
    # ref_onto_ls = ref_cl_ls + ref_obj_prop_ls + ref_data_prop_ls + ref_cl_ind_ls
    # loc_onto_ls = loc_cl_ls + loc_obj_prop_ls + loc_data_prop_ls + loc_cl_ind_ls
    ref_onto_ls = ref_cl_ls + ref_cl_ind_ls
    loc_onto_ls = loc_cl_ls + loc_cl_ind_ls

    # String based matching labels of the same (e.g. class v class) and different elements (e.g. class v individual)
    print(
        'Performing string based matching of labels with same type and across different types...',
        end='\r')

    # Set thresholds to control Regex subset string match
    max_len_diff = 8  # Max length difference between two strings to recognise Regex subset match
    min_len = 5  # Min length to perform Regex match

    # Approach to determine TP, FP and FN adapted based on
    # Source: E. Jiménez-Ruiz, “INM713 Semantic Web Technologies and Knowledge Graphs
    # Laboratory 8: Ontology Alignment,” 2021.

    tp_exact_match_ls = []
    tp_subset_match_ls = []
    fp_match_ls = []
    fn_match_ls = []

    print('No. of elements in local graph: {}'.format(len(loc_onto_ls)))
    print('No. of elements in reference graph: {}'.format(len(ref_onto_ls)))
    iter_nr = len(loc_onto_ls) * len(ref_onto_ls)
    print('So iterate {} rounds to compare both sets of elements...'.format(
        iter_nr))

    # Compare every element in local ontology for exact or subset match (to within a tolerance)
    for l_tup in loc_onto_ls:
        __exact_match_ls = []
        __subset_match_ls = []
        for r_tup in ref_onto_ls:
            # Difference in length between ref and local labels
            len_diff = len(l_tup[0]) - len(r_tup[0])

            # If exact string match
            if l_tup[0] == r_tup[0]:
                is_same_type = True if l_tup[1] == r_tup[1] else False
                __exact_match_ls += [(len_diff, is_same_type, l_tup, r_tup)]

            # If local label string is a subset of reference label,
            # and lengths of both exceed min length thresholds set
            # and length difference (positive polarity) within threshold
            elif re.search(l_tup[0], r_tup[0]) is not None and \
                    len(l_tup[0]) >= min_len and len(r_tup[0]) >= min_len\
                    and (len_diff**2)**.5 <= max_len_diff:
                is_same_type = True if l_tup[1] == r_tup[1] else False
                __subset_match_ls += [(len_diff, is_same_type, l_tup, r_tup)]

        # If exact/subset match found of local element in ref element, then TRUE POSITIVE
        if len(__exact_match_ls) > 0:
            tp_exact_match_ls += __exact_match_ls
        if len(__subset_match_ls) > 0:
            tp_subset_match_ls += __subset_match_ls

        # If no match found, FALSE POSITIVE, in local but not in ref ontology
        if len(__exact_match_ls) + len(__subset_match_ls) == 0:
            fp_match_ls += [l_tup]

    # Reverse to compare every element in the reference ontology for exact or subset match
    for r_tup in ref_onto_ls:
        __match_ls = []
        for l_tup in loc_onto_ls:
            if re.search(r_tup[0], l_tup[0]) is not None:
                __match_ls += [r_tup]
        # In reference ontology but not in local onto to be FALSE NEGATIVE
        if len(__match_ls) == 0:
            fn_match_ls += [r_tup]

    # Calculate True +ve
    tp = len(tp_exact_match_ls) + len(tp_subset_match_ls)

    # Calculate True +ve based on exact string matches
    tp_exact = len(tp_exact_match_ls)
    tp_exact_diff_type = len([r for r in tp_exact_match_ls
                              if not r[1]])  # Where different types
    tp_exact_same_type = len([r for r in tp_exact_match_ls
                              if r[1]])  # Where same type

    # Calculate True +ve based on subset string matches
    tp_subset = len(tp_subset_match_ls)
    tp_subset_diff_type = len([r for r in tp_subset_match_ls
                               if not r[1]])  # Where different types
    tp_subset_same_type = len([r for r in tp_subset_match_ls
                               if r[1]])  # Where same type

    # Calculate False +ve and -ve
    fp = len(fp_match_ls)
    fn = len(fn_match_ls)

    print('No. of True +ve: {}'.format(tp))
    print('\tBy exact string match: {}'.format(tp_exact))
    print(
        '\t\tOf which both local and reference ontologies have the same type: {}'
        .format(tp_exact_same_type))
    print(
        '\t\tOf which both local and reference ontologies have different types: {}'
        .format(tp_exact_diff_type))
    print('\tBy subset Regex match: {}'.format(tp_subset))
    print(
        '\tbased on thresholds - min length {} and max char length difference {}'
        .format(min_len, max_len_diff))
    print(
        '\t\tOf which both local and reference ontologies have the same type: {}'
        .format(tp_subset_same_type))
    print(
        '\t\tOf which both local and reference ontologies have different types: {}'
        .format(tp_subset_diff_type))
    print('No. of False +ve: {}'.format(fp))
    print('No. of False -ve: {}'.format(fn))

    # Calculate precision, recall and f1 score while handing div by 0 error
    precision = tp / (tp + fn) if (tp + fn) != 0 else .0
    recall = tp / (tp + fp) if (tp + fp) != 0 else .0
    f1 = tp / (tp + .5 * (fp + fn)) if (fp + fn) != 0 else .0

    print('Precision: {}'.format(precision))
    print('Recall: {}'.format(recall))
    print('F1 score: {}'.format(f1))

    # Analyse by types where match is find to determine appropriate equivalence to define

    # Extract local and reference onto types for each exact match result
    print('Exact match - different types exist:')
    print(list(set([(tup[2][1], tup[3][1]) for tup in tp_exact_match_ls])))

    # Extract local and reference onto types for each exact match result
    print('Subset match - different types exist:')
    print(list(set([(tup[2][1], tup[3][1]) for tup in tp_subset_match_ls])))

    print('TRUE +VE exact match listings:')
    for tup in tp_exact_match_ls:
        print('\t', tup)
    print('TRUE +VE subset match listings:')
    for tup in tp_subset_match_ls:
        print('\t', tup)

    # BUILD EQUIVALENCE TO THE TWO ONTOLOGIES
    print('Read in the reference and local files as RDF graphs...', end='\r')
    # Load the local and reference ontologies as RDFLib graph
    ref_g = rdflib.Graph().parse(ref_file_path)
    loc_g = rdflib.Graph().parse('2.2_protege_pizza_ontology.owl.xml')

    # Union the two graphs
    print('Union the two RDF graphs...', end='\r')
    uni_g = ref_g + loc_g

    # Prefix
    aa = Namespace('http://www.city.ac.uk/ds/inm713/aaron_altrock#')
    uni_g.bind('aa', aa)

    # Equivalence only graph
    print(
        'Create a new RDF graph for the equivalent class and property triples...',
        end='\r')
    eqi_g = rdflib.Graph()
    eqi_g.bind('aa', aa)

    # Construct equivalence between class and individuals
    for len_diff, is_diff_type, loc_tup, ref_tup in tp_exact_match_ls + tp_subset_match_ls:
        print('Constructing equivalence between {}/{} and {}/{}...'.format(
            loc_tup[2], loc_tup[1], ref_tup[2], ref_tup[1]),
              end='\r')
        loc_type = loc_tup[1]
        ref_type = ref_tup[1]

        # Construct an equivalence
        # Class v Class
        if loc_type == 'class' and ref_type == 'class':
            # class in loc owl:equivalentClass in ref
            s = URIRef(loc_tup[2].iri)
            p = OWL.equivalentClass
            o = URIRef(ref_tup[2].iri)
            uni_g.add((s, p, o))
            eqi_g.add((s, p, o))

        # Class Individual v Class
        if loc_type == 'class_ind' and ref_type == 'class':
            # individual in loc a class in ref ref
            s = URIRef(loc_tup[2].iri)
            p = RDF.type
            o = URIRef(ref_tup[2].iri)
            uni_g.add((s, p, o))
            eqi_g.add((s, p, o))

        # Class Individual v Class Individual
        if loc_type == 'class_ind' and ref_type == 'class_ind':
            # class in loc owl:sameAs in ref
            s = URIRef(loc_tup[2].iri)
            p = OWL.sameAs
            o = URIRef(ref_tup[2].iri)
            uni_g.add((s, p, o))
            eqi_g.add((s, p, o))

        # Object Property v Object Property
        if loc_type == 'obj_prop' and ref_type == 'obj_prop':
            # local object property in loc owl:equivalentProperty in ref object property
            s = URIRef(loc_tup[2].iri)
            p = OWL.equivalentProperty
            o = URIRef(ref_tup[2].iri)
            uni_g.add((s, p, o))
            eqi_g.add((s, p, o))

    # Construct equivalence for object properties manually
    obj_prop_ls = []
    for s, p, o in uni_g:
        if p == RDF.type and o == OWL.ObjectProperty:
            __tup = (s.split('#')[-1], s)
            print(__tup)
            obj_prop_ls += [__tup]

    equi_obj_prop_ls = [
        (URIRef('http://www.city.ac.uk/ds/inm713/aaron_altrock#has_topping'),
         URIRef('http://www.co-ode.org/ontologies/pizza/pizza.owl#hasTopping'))
    ]

    # Add manually identified equivalent object properties
    for s, p in equi_obj_prop_ls:
        uni_g.add((s, OWL.equivalentProperty, o))
        eqi_g.add((s, OWL.equivalentProperty, o))

    # Save extended graph to Turtle format
    uni_g.serialize(destination='2.5_oa1_union_g.ttl', format='ttl')
    print('Saved the union-ed graph to 2.5_oa1_union_g.ttl.')
    eqi_g.serialize(destination='2.5_oa1_equivalence_g.ttl', format='ttl')
    print('Saved the equivalence triples to 2.5_oa1_equivalence_g.ttl.')

    # Save extended graph to OWL format
    uni_g.serialize(destination='2.5_oa1_union_g.owl.xml', format='xml')
    print('Saved the union-ed graph to 2.5_oa1_union_g.owl.xml.')
    eqi_g.serialize(destination='2.5_oa1_equivalence_g.owl.xml', format='xml')
    print('Saved the equivalence triples to 2.5_oa1_equivalence_g.owl.xml.')

    print('END')
예제 #10
0
    def post(self, public=False):
        """
        post=<parent post>
        content=<html content>

        we get the user from the x-foaf-agent header
        """
        parent = self.get_argument('post', default=None) or self.get_argument("uri")
        assert parent is not None
        # maybe a legacy problem here with http/https, but blaster is still sending http
        parent = URIRef(parent)

        # this might be failing on ariblog, but that one is already safe
        ip = self.request.headers.get("X-Forwarded-For")
        if ip is not None:
            HoneypotChecker(open("priv-honeypotkey").read().strip()).check(ip)

        contentArg = self.get_argument("content", default="")
        if not contentArg.strip():
            raise ValueError("no text")

        if contentArg.strip() == 'test':
            return "not adding test comment"

        spamCheck(parent, contentArg)
            
        content = Literal(contentArg, datatype=RDF.XMLLiteral)

        stmts = [] # gathered in one list for an atomic add

        foafHeader = self.request.headers.get('X-Foaf-Agent')
        if not public:
            assert foafHeader
            user = URIRef(foafHeader)
            # make bnode-ish users for anonymous ones. need to get that username passed in here
        else:
            if foafHeader:
                user = URIRef(foafHeader)
            else:
                user, moreStmts = newPublicUser(
                    self.request.headers.get("X-Forwarded-For"),
                    self.get_argument("name", ""),
                    self.get_argument("email", ""))
                stmts.extend(moreStmts)
                
        secs = time.time()
        comment = newCommentUri(secs)

        now = literalFromUnix(secs)

        ctx = URIRef(parent + "/comments")

        stmts.extend([(parent, SIOC.has_reply, comment),
                      (comment, DCTERMS.created, now),
                      (comment, SIOC.has_creator, user),
                      ])
        stmts.extend(commentStatements(user, comment, content))

        db.writeFile(stmts, ctx, fileWords=[parent.split('/')[-1], now])

        try:
            self.sendAlerts(parent, user)
        except Exception, e:
            import traceback
            log.error(e)
            traceback.print_exc()