Ejemplo n.º 1
0
def longTerm(g_onto, liste_map, g_map, raw_data, g_link) : #Complété, corrigé et fonctionnel. Revoir le return ? -----------------------------------------------------
    nbPossible = 0
    result = metric()
    result['name'] = "Long term subject"
    points = 0
    set_URIs = set()
    for s, _, _ in g_map.triples((None, None, None)) :
        if isinstance(s, rdflib.term.URIRef) :
            set_URIs.add(s)
    for elt in set_URIs :
        nbPossible = nbPossible + 1
        splitted_elt = elt.split('/')
        for elements in splitted_elt:
            try :
                if int(elements) > 1990 and int(elements) < 2050 :
                    points = points + 1
                else :
                    result['feedbacks'].append(elements + "should contain a date")
            except ValueError :
                pass
    if nbPossible == 0 :
        result['score'] = 1
    else :
        result['score'] = points/nbPossible
    return result
Ejemplo n.º 2
0
def disjointWith(g_onto, liste_map, g_map, raw_data, g_link) :
    result = metric()
    result['name'] = "Misuse of disjointWith"
    points = 0
    nbPossible = 0
    for s, _, o in g_map.triples((None, None, None)) :
        nbPossible = nbPossible + 1
        for _, _, o1 in g_onto.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#disjointWith'), None)) :
            if g_onto.triples((o, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None :
                points = points + 1
                result['feedbacks'].append(str(o) + "is disjoint with" + s)
            else :
                for s1, _, _ in g_onto.triples((None, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf') ,o)):
                    if g_onto.triples((s1, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None :
                        points = points + 1
                        result['feedbacks'].append(str(o) + "is disjoint with" + s)
        for _, _, o1 in g_onto.triples((o, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#disjointWith'), None)) :
            if g_onto.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None :
                points = points + 1
                result['feedbacks'].append(str(o) + "is disjoint with" + s)
            else :
                for s1, _, _ in g_onto.triples((None, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf') ,s)):
                    if g_onto.triples((s1, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None :
                        points = points + 1
                        result['feedbacks'].append(str(o) + "is disjoint with" + s)
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1-points/nbPossible
    return result
Ejemplo n.º 3
0
def existingVocab(g_onto, liste_map, g_map, raw_data, g_link) :
    set_URIs = set()
    result = metric()
    result['name'] = "Use of existing vocabulary"
    nbPossible = 0
    points = 0
    for s, p, o in g_map.triples((None, None, None)) :
        if isinstance(s, rdflib.term.URIRef) :
            deb = s.split('$')[0]
            if str(s) == deb :
                set_URIs.add(s)
        if isinstance(p, rdflib.term.URIRef) :
            if p == rdflib.term.URIRef('a') :
                p = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
            deb = p.split('$')[0]
            if str(p) == deb :
                set_URIs.add(p)
        if isinstance(o, rdflib.term.URIRef) :
            deb = o.split('$')[0]
            if str(o) == deb :
                set_URIs.add(o)
    for elt in set_URIs :
        nbPossible = nbPossible + 1
        lien = 'https://lov.linkeddata.es/dataset/lov/api/v2/term/search?q=' + elt + '&type=class'
        request = requests.get(lien)
        json_data = json.loads(request.text)
        if json_data["total_results"] != 0:
            points = points + 1
        else :
            result['feedbacks'].append(str(elt) + " is not referenced in LOV.")
    if nbPossible == 0 :
        result['score'] = 1
    else :
        result['score'] =  points/nbPossible
    return result
Ejemplo n.º 4
0
def localLink(g_onto, liste_map, g_map, raw_data, g_link):  #Fait
    points = 0
    nbPossible = 0
    result = metric()
    result['name'] = "Use of local links"
    set_URIs = set()
    links = set()
    for s, _, _ in g_map.triples((None, None, None)):
        set_URIs.add(s)
    for elt in set_URIs:
        deb = elt.split('$')[0]
        fin = elt.split('$(')[1].split(')')[0]
        if elt != deb:
            for elements in raw_data:
                link = deb + str(elements['fields'][fin])
                links.add(link)
    for link in links:
        nbPossible = nbPossible + 1
        a = requests.get(link)
        try:
            a.raise_for_status()
        except:
            points = points + 1
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1 - points / nbPossible
    return result
Ejemplo n.º 5
0
def localLinks(g_onto, liste_map, g_map, raw_data, g_link):
    result = metric()
    result['name'] = "Use of one global mapping"
    result['score'] = 1
    ressources = set()
    for s, _, _ in g_map.triples((None, None, None)):
        if isinstance(s, URIRef):
            ressources.add(s)
    isolated_ressources = 0
    for ressource in ressources:
        nb_link = 0
        for ressource, _, obj in g_map.triples((ressource, None, None)):
            if isinstance(obj, URIRef) and obj in ressources:
                nb_link += 1
                break
        for subj, _, ressource in g_map.triples((None, None, ressource)):
            if isinstance(subj, URIRef) and subj in ressources:
                nb_link += 1
                break
        if nb_link == 0:
            result['feedbacks'].append(
                f"resource {str(ressource)} is not linked to another resource")
            isolated_ressources += 1
    nb_ressources = len(ressources)
    if nb_ressources > 1:
        result['score'] = (nb_ressources - isolated_ressources) / nb_ressources
    return result
Ejemplo n.º 6
0
def humanDesc(
    g_onto, liste_map, g_map, raw_data, g_link
):  #Revoir le return, opérationnel sinon -------------------------------------------------
    nbPossible = 0
    result = metric()
    result['score'] = 0
    result['name'] = "Usage of description or label"
    points = 0
    set_URIs = set()
    for s, _, _ in g_map.triples((None, None, None)):
        if isinstance(s, rdflib.term.URIRef):
            set_URIs.add(s)
    for elt in set_URIs:
        passe = False
        nbPossible = nbPossible + 1
        for s2, _, _ in g_link.triples(
            (elt,
             rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'),
             None)):
            passe = True
        for s2, _, _ in g_link.triples(
            (elt,
             rdflib.term.URIRef(
                 'http://www.w3.org/2000/01/rdf-schema#comment'), None)):
            passe = True
        if passe:
            points = points + 1
        else:
            result['feedbacks'].append(
                f"Human readable description of the ressource {str(elt)} is missing. Please add an rdfs:comment or rdfs:label"
            )
    if nbPossible:
        result['score'] = points / (nbPossible)
    return result
Ejemplo n.º 7
0
def Error(g_onto, liste_map, g_map, raw_data, g_link):
    result = metric()
    result['name'] = "Availability error"
    points = 0
    set_URIs = set()
    for s, p, o in g_map.triples((None, None, None)):
        if isinstance(s, rdflib.term.URIRef):
            set_URIs.add(_process_URI(s))
        if isinstance(p, rdflib.term.URIRef) and p != rdflib.term.URIRef('a'):
            set_URIs.add(_process_URI(p))
        if isinstance(o, rdflib.term.URIRef):
            set_URIs.add(_process_URI(o))
    nbPossible = len(set_URIs)
    for elt in set_URIs:
        a = requests.get(elt)
        try:
            a.raise_for_status()
        except:
            result['feedbacks'].append(
                f"It seems that {str(elt)} is not available: returns HTTP code {a.status_code}"
            )
            points = points + 1
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1 - points / nbPossible
    return result
Ejemplo n.º 8
0
def externalLink(g_onto, liste_map, g_map, raw_data, g_link):  #Fonctionnel
    points = 0
    nbPossible = 0
    result = metric()
    result['name'] = "Use of external link"
    set_URIs = set()
    for _, p, o in g_map.triples((None, None, None)):
        if isinstance(p, rdflib.term.URIRef):
            set_URIs.add(p)
        if isinstance(o, rdflib.term.URIRef):
            set_URIs.add(o)
    for elt in set_URIs:
        deb = elt.split('$')[0]
        fin = ""
        try:
            fin = elt.split('$(')[1].split(')')[0]
        except:
            pass
        if str(elt) != str(deb):
            for elements in raw_data:
                link = deb + str(elements['fields'][fin])
                nbPossible = nbPossible + 1
                a = requests.get(link)
                try:
                    a.raise_for_status()
                except:
                    points = points + 1
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1 - points / nbPossible
    return result
Ejemplo n.º 9
0
def duplicatedRules(g_onto, liste_map, g_map, raw_data, g_link) :
    result = metric()
    result['name'] = "Duplicated rules"
    result['score'] = 1
    if len(liste_map) > len(g_map):
        result['score'] = len(g_map)/len(liste_map) # Propriété d'un graph RDF
        result['feedbacks'].append(f"{str(len(liste_map) - len(g_map))} rules are duplicated")
    return result
def equivalentClassesProperties(
    g_onto, liste_map, g_map, raw_data, g_link
):  #Ici on considère que si une classe equivalente est dans notre mapping, alors elle est correctement utilisée. Corrigé
    set_SO = set()
    set_P = set()
    points = 0
    nbPossible = 0
    result = metric()
    result['name'] = "Usage of equivalent classes and properties"
    for s, p, o in g_map.triples((None, None, None)):
        set_SO.add(s)
        set_SO.add(o)
        set_P.add(p)
    for subobj in set_SO:
        for _, _, o2 in g_onto.triples(
            (subobj,
             rdflib.term.URIRef(
                 'http://www.w3.org/2002/07/owl#equivalentClass'), None)):
            if not isinstance(o2, rdflib.term.BNode):
                nbPossible = nbPossible + 1
            if (None, None,
                    o2) in g_map and not isinstance(o2, rdflib.term.BNode):
                points = points + 1
            elif (o2, None,
                  None) in g_map and not isinstance(o2, rdflib.term.BNode):
                points = points + 1
            else:
                result['feedbacks'].append(
                    f"{str(o2)} equivalent class of {str(subobj)} is missing.")
    for pred in set_P:
        for _, _, o3 in g_onto.triples(
            (pred,
             rdflib.term.URIRef(
                 'https://www.w3.org/2002/07/owl#equivalentProperty'), None)):
            nbPossible = nbPossible + 1
            if (None, None,
                    o3) in g_map and not isinstance(o3, rdflib.term.BNode):
                points = points + 1
            elif (o3, None,
                  None) in g_map and not isinstance(o3, rdflib.term.BNode):
                points = points + 1
            else:
                result['feedbacks'].append(
                    f"{str(o3)} equivalent property of {str(pred)} is missing")

    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = points / nbPossible
    return result
Ejemplo n.º 11
0
def subClassesProperties(g_onto, liste_map, g_map, raw_data, g_link):
    result = metric()
    result['name'] = "Correct use of subclasses and properties"
    set_SO = set()
    set_P = set()
    points = 0
    nbPossible = 0
    for s, p, o in g_map.triples((None, None, None)):
        set_SO.add(s)
        set_SO.add(o)
        set_P.add(p)
    for subobj in set_SO:
        for _, _, o2 in g_onto.triples(
            (subobj,
             rdflib.term.URIRef(
                 'http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)):
            if not isinstance(o2, rdflib.term.BNode):
                nbPossible = nbPossible + 1
            if (None, None,
                    o2) in g_map and not isinstance(o2, rdflib.term.BNode):
                points = points + 1
            elif (o2, None,
                  None) in g_map and not isinstance(o2, rdflib.term.BNode):
                points = points + 1
            else:
                result['feedbacks'].append(
                    f"Super class {str(o2)} of {subobj} is missing.")
    for pred in set_P:
        for _, _, o3 in g_onto.triples(
            (pred,
             rdflib.term.URIRef(
                 'http://www.w3.org/2000/01/rdf-schema#subPropertyOf'), None)):
            if not isinstance(o3, rdflib.term.BNode):
                nbPossible = nbPossible + 1
            if (None, o3,
                    None) in g_map and not isinstance(o3, rdflib.term.BNode):
                points = points + 1
            else:
                result['feedbacks'].append(
                    f"Super property {str(o3)} of {pred} is missing.")
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = points / nbPossible
    return result
Ejemplo n.º 12
0
def verticalCoverage(g_onto, liste_map, g_map, raw_data, g_link):
    result = metric()
    result['name'] = "Data coverage"
    result['score'] = 1
    set_dollarVal = set()
    regexp = re.compile('\(([^)]+)')
    for s, _, o in g_map.triples((None, None, None)):
        if regexp.search(str(s)) is not None:
            set_dollarVal.add(re.search('\(([^)]+)', str(s)).group(1))
        if regexp.search(str(o)) is not None:
            set_dollarVal.add(re.search('\(([^)]+)', str(o)).group(1))
    if raw_data and len(raw_data[0]['fields']) > 0:
        result['score'] = len(set_dollarVal) / len(raw_data[0]['fields'])
        if result['score'] < 1:
            result['feedbacks'].append(
                f"only {len(set_dollarVal)}/{len(raw_data[0]['fields'])} fields of the dataset are mapped"
            )
    return result
Ejemplo n.º 13
0
def externalURIs(g_onto, liste_map, g_map, raw_data,
                 g_link):  #Revoir le return, sinon complet
    points = 0
    nbPossible = 0
    result = metric()
    result['name'] = "Use of external URIs"
    for s, _, o in g_map.triples((None, None, None)):
        if isinstance(s, rdflib.term.URIRef) and isinstance(
                o, rdflib.term.URIRef):  #Donc on a un lien entre deux URIs
            nbPossible = nbPossible + 1
            if not (
                    s, None, o
            ) in g_onto:  #Et si ça n'existe pas dans notre ontologie, alors on a créé un nouveau lien
                points = points + 1
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = points / nbPossible
    return result
Ejemplo n.º 14
0
def longURI(g_onto, liste_map, g_map, raw_data, g_link):
    result = metric()
    result['name'] = "Long URIs"
    nbPossible = 0
    points = 0
    set_URIs = set()
    for s, p, o in g_map.triples((None, None, None)):
        if isinstance(s, rdflib.term.URIRef):
            set_URIs.add(s)
    for s in set_URIs:
        if isinstance(s, rdflib.term.URIRef):
            nbPossible = nbPossible + 1
            if len(s) >= 80:
                points = points + 1
                result['feedbacks'].append(
                    f"{str(s)} is more than 79 characters")
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1 - points / nbPossible
    return result
Ejemplo n.º 15
0
def sameAs(g_onto, liste_map, g_map, raw_data, g_link) :
    result = metric()
    result['name'] = "Use of sameAs properties"
    nbPossible = 0
    points = 0
    set_URIs = set()
    for s, _, _ in g_map.triples((None, None, None)) :
        if isinstance(s, rdflib.term.URIRef) :
            set_URIs.add(s)
    for elt in set_URIs :
        nbPossible = nbPossible + 1
        for _, _, _  in g_map.triples((elt, rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'), None)) :
            points = points + 1
    if points < 1 :
        result['score'] = 0
        result['feedbacks'].append("No sameAs defined")
    else :
        result['score'] = 0
        if nbPossible != 0:
            result['score'] = points/(nbPossible)
    return result
Ejemplo n.º 16
0
def humanReadableURIs(g_onto, liste_map, g_map, raw_data, g_link) :
    nbPossible = 0
    points = 0
    result = metric()
    result['name'] = "Human readable URIs"
    set_URIs = set()
    for s, p, o in g_map.triples((None, None, None)):
        if isinstance(s, rdflib.term.URIRef):
            set_URIs.add(s)
    for s in set_URIs:
        nbPossible = nbPossible + 1
        uri = str(s)
        uri = uri.split('$')[0]
        if test_HumanReadable(uri):
            points = points + 1
        else :
            result['feedbacks'].append(f"It seems that {uri} is not a Human Readable URI")
    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = points/nbPossible
    return result
Ejemplo n.º 17
0
def domainRange(g_onto, liste_map, g_map, raw_data, g_link) :
    nbPossible = 0
    points = 0
    liste_O = []
    result = metric()
    result['name'] = "Domain and range of properties"
    for s, p, o in g_map.triples((None, None,None)):
        expected = ""
        nbPossible = nbPossible + 2
        boolean = True
        if p == rdflib.term.URIRef('a') or p == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
            boolean = True if isinstance(o, rdflib.URIRef) else False
            expected = "URI"
        elif p == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#label'):
            boolean = True if isinstance(o, rdflib.URIRef) else False
            expected = "URI"
        else:
            for _, _, o2 in g_link.triples((p, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#domain'), None)): #Pour toutes les valeurs domain de p
                expected = str(o2)
                for _, _, o3 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère le type du sujet
                    if o2 != o3 and o2 != rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Resource') : #Si le domaine de p est différent du type du sujet et que, cas général, le domaine de p n'est pas une ressource
                        liste_O.append(o3)	#On stock le type du sujet
                        for _, _, o4 in g_link.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#equivalentClass'), None)): #Pour tous les équivalents au sujet
                            for _, _, o6 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère les type des sujets equivalents
                                liste_O.append(o4) #On stock ces types aussi
                        for O in liste_O : #Pour l'ensemble des types récupérés
                            for _, _, o5 in g_link.triples((O, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)) : #on regarde pour toutes les sous-classes des types (équivalents ou non) de notre sujet
                                if o2 == o5: #Si ils sont équivalents au domaine
                                    boolean = True
                    else:
                        boolean = True
        if boolean:
            points = points + 1
        else:
            result['feedbacks'].append(f"{str(p)} has the wrong domain. Expected a {expected}")
        liste_O = []
        boolean = True
        for _, _, o2 in g_link.triples((p, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#range'), None)):
            boolean = False
            expected = str(o2)
            o3 = None
            if isinstance(o, rdflib.term.Literal):
                if o.datatype is not None:
                    o3 = o.datatype
                    if o2 == o3 : boolean = True
            if o3 is None:
                if isinstance(o, rdflib.term.URIRef):
                    for _, _, o3 in g_link.triples((o, rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None): #On récupère le type de l'objet
                        if o2 != o3:
                            liste_O.append(o3)	# On stock le type du sujet
                            for _, _, o4 in g_link.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#equivalentClass'), None)): #Pour tous les équivalents au sujet
                                for _, _, o6 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère les type des sujets equivalents
                                    liste_O.append(o4) # On stock ces types aussi
                            for O in liste_O : # Pour l'ensemble des types récupérés
                                for _, _, o5 in g_link.triples((O, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)) : #on regarde pour toutes les sous-classes des types (équivalents ou non) de notre sujet
                                    if o2 == o5: # Si ils sont équivalents au domaine
                                        boolean = True
                        else :
                            boolean = True
        if boolean:
            points = points + 1
        else:
            result['feedbacks'].append(f"{str(p)} has the wrong range. Expected a {expected}")

    if nbPossible == 0:
        result['score'] = 1
    else:
        result['score'] = 1-(nbPossible - points)/nbPossible
    return result