예제 #1
0
    def _add_owl_thing(cls, voc_builder: VocabularyBuilder):
        """Add owl_thing class to the vocabulary in the predefined source

        By definition each class is a subclass of owl:thing and owl:thing can be
        a target of relation but owl thing is never mentioned explicitly in
        ontology files.

        Args:
            voc_builder: Builder object for Vocabulary
        Returns:
            None
        """
        root_class = Class(iri="http://www.w3.org/2002/07/owl#Thing",
                           comment="Predefined root_class",
                           label="Thing",
                           predefined=True)

        # as it is the root object it is only a parent of classes which have no
        # parents yet
        for class_ in voc_builder.vocabulary.get_classes():
            if class_.parent_class_iris == []:
                class_.parent_class_iris.insert(0, root_class.iri)

        if root_class.iri not in voc_builder.vocabulary.classes:
            voc_builder.add_class(root_class)
            root_class.source_ids.add("PREDEFINED")
예제 #2
0
    def _parse_subclass_term(self, graph: rdflib.Graph,
                             voc_builder: VocabularyBuilder, node: rdflib.term,
                             class_iri: str):
        """Parse a subclass term of the given node and class_iri

        Args:
            graph (rdflib.graph): Graph describing ontology
            vocabulary (Vocabulary): Vocabualry to parse into
            node (rdflib.term)
            class_iri (str)

        Returns:
            None
        """

        # class could have been only defined in other source, than no class
        # is defined, but as we have found a relation for a class, the class
        # needs to exist
        if class_iri not in voc_builder.vocabulary.classes:
            voc_builder.add_class(class_=Class(iri=class_iri))

        # node can be 1 of 3 things:
        #   - a parentclass statment -> UriRef
        #   - a relation statment -> BNode
        #   - an intersection of parentclasses ,
        #   relations and intersections -> BNode
        if isinstance(node, rdflib.term.BNode):
            # sub has no IRI and is therefore a relation

            # extract the subpredicates and subobjects as statments
            # if node is a relation:
            #      in total there should be 3-4 statments:
            #      rdf:type pointing to owl:Restriction
            #      owl:onProperty pointing to a data or object property
            #      1-2 staments which values are exepted, this can point to an
            #      URIRef or BNode

            # if node is a intersection:
            #      it has the predicate owl:intersectionOf
            #      and a set of objects

            predicates = []
            objects = []
            for p in graph.predicates(subject=node):
                predicates.append(p)
            for o in graph.objects(subject=node):
                objects.append(o)

            # Combination of statements
            if rdflib.term.URIRef(Tags.owl_intersection.value) in predicates:
                objects = self._extract_objects_out_of_single_combination(
                    graph, node, True, False)
                for object in objects:
                    self._parse_subclass_term(graph=graph,
                                              voc_builder=voc_builder,
                                              node=object,
                                              class_iri=class_iri)

            elif rdflib.term.URIRef(Tags.owl_union.value) in predicates:
                self._add_logging_information(
                    LogLevel.CRITICAL, IdType.class_, class_iri,
                    "Relation statements combined with or")

            elif rdflib.term.URIRef(Tags.owl_one_of.value) in predicates:
                self._add_logging_information(
                    LogLevel.CRITICAL, IdType.class_, class_iri,
                    "Relation statements combined with oneOf")

            # Relation statement
            else:

                additional_statements = {}
                rdf_type = ""
                owl_on_property = ""

                for i in range(len(predicates)):
                    if predicates[i] == rdflib.term.URIRef(
                            Tags.rdf_type.value):
                        rdf_type = get_iri_from_uriref(objects[i])
                    elif predicates[i] == rdflib.term.URIRef(
                            "http://www.w3.org/2002/07/owl#onProperty"):
                        owl_on_property = get_iri_from_uriref(objects[i])
                    else:
                        additional_statements[get_iri_from_uriref(
                            predicates[i])] = objects[i]

                relation_is_ok = True
                if not rdf_type == "http://www.w3.org/2002/07/owl#Restriction":
                    self._add_logging_information(
                        LogLevel.CRITICAL, IdType.class_, class_iri,
                        "Class has an unknown subClass statement")
                    relation_is_ok = False

                if owl_on_property == "":
                    self._add_logging_information(
                        LogLevel.CRITICAL, IdType.class_, class_iri,
                        "Class has a relation without a property")
                    relation_is_ok = False

                # object or data relation?
                if relation_is_ok:
                    relation = None
                    id = uuid.uuid4().hex
                    # this id can and should be random. a class_iri can have a
                    # property_iri multiple times, to assign always the same id
                    # for the same relation is not worth the trouble

                    relation = Relation(property_iri=owl_on_property, id=id)
                    voc_builder.add_relation_for_class(class_iri, relation)

                    # go through the additional statement to figure out the
                    # targetIRI and the restrictionType/cardinality
                    self._parse_relation_type(graph, relation,
                                              additional_statements)

        # parent-class statement or empty list element
        else:
            # owlThing is the root object, but it is not declared as a class
            # in the file to prevent None pointer when looking up parents,
            # a class that has a parent owlThing simply has no parents
            if not get_iri_from_uriref(node) == \
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil":
                # ignore empty lists
                if not get_iri_from_uriref(node) == \
                       "http://www.w3.org/2002/07/owl#Thing":
                    voc_builder.vocabulary.\
                        get_class_by_iri(class_iri).parent_class_iris.\
                        append(get_iri_from_uriref(node))
예제 #3
0
    def _parse_to_vocabulary(self, graph: rdflib.Graph,
                             voc_builder: VocabularyBuilder):
        """Parse an graph that was extracted from a TTL file into the vocabulary

        Args:
            graph (rdflib.Graph)
            voc_builder (VocabularyBuilder): Builder object to manipulate a
                vocabulary

        Returns:
            None
        """

        # OWLClasses
        for a in graph.subjects(object=rdflib.term.URIRef(
                "http://www.w3.org/2002/07/owl#Class"),
                                predicate=rdflib.term.URIRef(
                                    Tags.rdf_type.value)):

            if isinstance(a, rdflib.term.BNode):
                pass
                # owl:Class can also occure in complex target statements of
                # relations as BNode, ignore it here
            else:

                # defined in other source -> ignore
                if self._is_object_defined_by_other_source(a, graph=graph):
                    continue

                iri, label, comment = self._extract_annotations(graph, a)
                c = Class(iri=iri, label=label, comment=comment)
                voc_builder.add_class(class_=c)

        # Class properties
        found_class_iris = set()
        for class_node in graph.subjects(predicate=rdflib.term.URIRef(
                "http://www.w3.org/2000/01/rdf-schema#subClassOf")):

            class_iri = get_iri_from_uriref(class_node)
            found_class_iris.add(class_iri)

        for class_iri in found_class_iris:
            # parent class / relation parsing
            for sub in graph.objects(
                    subject=rdflib.term.URIRef(class_iri),
                    predicate=rdflib.term.URIRef(
                        'http://www.w3.org/2000/01/rdf-schema#subClassOf')):
                self.current_class_iri = class_iri  # used only for logging
                self._parse_subclass_term(graph=graph,
                                          voc_builder=voc_builder,
                                          node=sub,
                                          class_iri=class_iri)

        # OWlObjectProperties
        for a in graph.subjects(object=rdflib.term.URIRef(
                "http://www.w3.org/2002/07/owl#ObjectProperty"),
                                predicate=rdflib.term.URIRef(
                                    Tags.rdf_type.value)):

            if isinstance(a, rdflib.term.BNode):
                self._add_logging_information(LogLevel.WARNING,
                                              IdType.object_property,
                                              "unknown",
                                              "Found unparseable statement")

            else:
                # defined in other source -> ignore
                if self._is_object_defined_by_other_source(a, graph):
                    continue

                iri, label, comment = self._extract_annotations(graph, a)

                obj_prop = ObjectProperty(iri=iri,
                                          label=label,
                                          comment=comment)
                voc_builder.add_object_property(obj_prop)
                # extract inverse properties, it can be multiple but only
                # URIRefs allowed no union/intersection
                for inverse_iri_node in graph.objects(
                        subject=a,
                        predicate=rdflib.term.URIRef(
                            'http://www.w3.org/2002/07/owl#inverseOf')):
                    if isinstance(inverse_iri_node, rdflib.term.BNode):
                        self._add_logging_information(
                            LogLevel.CRITICAL, IdType.object_property, iri,
                            "Complex inverseProperty statements aren't allowed"
                        )
                    else:
                        inverse_iri = get_iri_from_uriref(inverse_iri_node)
                        obj_prop.add_inverse_property_iri(inverse_iri)

        # OWlDataProperties
        for a in graph.subjects(object=rdflib.term.URIRef(
                "http://www.w3.org/2002/07/owl#DatatypeProperty"),
                                predicate=rdflib.term.URIRef(
                                    Tags.rdf_type.value)):

            if isinstance(a, rdflib.term.BNode):
                self._add_logging_information(LogLevel.WARNING,
                                              IdType.data_property, "unknown",
                                              "Found unparseable statement")

            else:
                # defined in other source -> ignore
                if self._is_object_defined_by_other_source(a, graph):
                    continue

                iri, label, comment = self._extract_annotations(graph, a)

                data_prop = DataProperty(iri=iri, label=label, comment=comment)
                voc_builder.add_data_property(data_prop)

        # OWLDataTypes
        # only the custom created datatype_catalogue are listed in the file,
        # the predefined are automatically added at the start
        # of post processing
        for a in graph.subjects(object=rdflib.term.URIRef(
                "http://www.w3.org/2000/01/rdf-schema#Datatype"),
                                predicate=rdflib.term.URIRef(
                                    Tags.rdf_type.value)):

            if isinstance(a, rdflib.term.BNode):
                # self._add_logging_information(LogLevel.WARNING,
                #                              IdType.datatype, "unknown",
                #                              "Found unparseable statement")
                pass
                #e.g: :
                # customDataType4 rdf:type rdfs:Datatype ;
                # owl:equivalentClass [ rdf:type rdfs:Datatype ;....
                # the second Datatype triggers this if condition,
                # but we can ignore this statement

            else:
                # defined in other source -> ignore
                if self._is_object_defined_by_other_source(a, graph):
                    continue

                iri, label, comment = self._extract_annotations(graph, a)

                datatype = Datatype(iri=iri, label=label, comment=comment)
                voc_builder.add_datatype(datatype=datatype)

                # a datatype can be empty -> use string
                # a datatype can have multiple equivalent classes
                # (predefined types) -> ignore for now
                # a datatype can contain an enum of possible values ->
                # most interesting
                # under the predicate owl:equivalentClass is than a
                # list(first, rest, nil) under the pred.
                # oneOf with the values

                enum_values = []
                for equivalent_class in graph.objects(
                        subject=a,
                        predicate=rdflib.term.URIRef(
                            "http://www.w3.org/2002/07/owl#equivalentClass")):

                    if isinstance(equivalent_class, rdflib.term.URIRef):
                        # points to an other defined datatype, ignore
                        pass
                    else:
                        # is a bNode and points to owl:oneOf
                        enum_literals = self.\
                            _extract_objects_out_of_single_combination(
                                graph, equivalent_class, accept_and=False,
                                accept_or=False, accept_one_of=True)
                        for literal in enum_literals:
                            enum_values.append(str(literal))
                datatype.enum_values = enum_values
                if len(enum_values) > 0:
                    datatype.type = DatatypeType.enum
                else:
                    datatype.type = DatatypeType.string

        # OWLIndividuals

        for a in graph.subjects(
                object=rdflib.term.URIRef(Tags.owl_individual.value),
                predicate=rdflib.term.URIRef(Tags.rdf_type.value)):

            if isinstance(a, rdflib.term.BNode):
                self._add_logging_information(LogLevel.WARNING,
                                              IdType.individual, "unknown",
                                              "Found unparseable statement")

            else:
                # defined in other source -> ignore
                if self._is_object_defined_by_other_source(a, graph):
                    continue

                iri, label, comment = self._extract_annotations(graph, a)
                objects = graph.objects(subject=a,
                                        predicate=rdflib.term.URIRef(
                                            Tags.rdf_type.value))
                # superclasses = types
                types = []
                for object in objects:
                    if not object == \
                           rdflib.term.URIRef(Tags.owl_individual.value):
                        types.extend(
                            self._extract_objects_out_of_layered_combination(
                                graph, object, True, False))

                individual = Individual(iri=iri, label=label, comment=comment)
                for type in types:
                    individual.parent_class_iris.append(
                        get_iri_from_uriref(type))
                voc_builder.add_individual(individual=individual)

        # As seen for example in the bricks ontology an individual can be
        # declared with :individual1 rdf:type :Class1
        # this type of declaration is hard to completly detect
        # we need to see that the object is a class iri and not a specifier iri.
        # as we may not have loaded all dependencies we can not simply look it
        # up in vocabulary
        # -> getbase uri of statement and filter all known specifier uris
        for sub in graph.subjects(
                predicate=rdflib.term.URIRef(Tags.rdf_type.value)):
            for obj in graph.objects(subject=sub,
                                     predicate=rdflib.term.URIRef(
                                         Tags.rdf_type.value)):

                if isinstance(obj, rdflib.term.BNode):
                    continue
                obj_iri = get_iri_from_uriref(obj)

                obj_base_iri = get_base_out_of_iri(iri=obj_iri)
                if obj_base_iri not in specifier_base_iris:
                    iri, label, comment = \
                        self._extract_annotations(graph, sub)
                    if not voc_builder.entity_is_known(iri):
                        iri, label, comment = \
                            self._extract_annotations(graph, sub)
                        individual = Individual(iri=iri,
                                                label=label,
                                                comment=comment)
                        individual.parent_class_iris.append(obj_iri)
                        voc_builder.add_individual(individual)