def _add_owl_thing(cls, voc_builder: VocabularyBuilder): """Add owl_thing class to the vocabulary in the predefined source By definition each class is a subclass of owl:thing and owl:thing can be a target of relation but owl thing is never mentioned explicitly in ontology files. Args: voc_builder: Builder object for Vocabulary Returns: None """ root_class = Class(iri="http://www.w3.org/2002/07/owl#Thing", comment="Predefined root_class", label="Thing", predefined=True) # as it is the root object it is only a parent of classes which have no # parents yet for class_ in voc_builder.vocabulary.get_classes(): if class_.parent_class_iris == []: class_.parent_class_iris.insert(0, root_class.iri) if root_class.iri not in voc_builder.vocabulary.classes: voc_builder.add_class(root_class) root_class.source_ids.add("PREDEFINED")
def _parse_subclass_term(self, graph: rdflib.Graph, voc_builder: VocabularyBuilder, node: rdflib.term, class_iri: str): """Parse a subclass term of the given node and class_iri Args: graph (rdflib.graph): Graph describing ontology vocabulary (Vocabulary): Vocabualry to parse into node (rdflib.term) class_iri (str) Returns: None """ # class could have been only defined in other source, than no class # is defined, but as we have found a relation for a class, the class # needs to exist if class_iri not in voc_builder.vocabulary.classes: voc_builder.add_class(class_=Class(iri=class_iri)) # node can be 1 of 3 things: # - a parentclass statment -> UriRef # - a relation statment -> BNode # - an intersection of parentclasses , # relations and intersections -> BNode if isinstance(node, rdflib.term.BNode): # sub has no IRI and is therefore a relation # extract the subpredicates and subobjects as statments # if node is a relation: # in total there should be 3-4 statments: # rdf:type pointing to owl:Restriction # owl:onProperty pointing to a data or object property # 1-2 staments which values are exepted, this can point to an # URIRef or BNode # if node is a intersection: # it has the predicate owl:intersectionOf # and a set of objects predicates = [] objects = [] for p in graph.predicates(subject=node): predicates.append(p) for o in graph.objects(subject=node): objects.append(o) # Combination of statements if rdflib.term.URIRef(Tags.owl_intersection.value) in predicates: objects = self._extract_objects_out_of_single_combination( graph, node, True, False) for object in objects: self._parse_subclass_term(graph=graph, voc_builder=voc_builder, node=object, class_iri=class_iri) elif rdflib.term.URIRef(Tags.owl_union.value) in predicates: self._add_logging_information( LogLevel.CRITICAL, IdType.class_, class_iri, "Relation statements combined with or") elif rdflib.term.URIRef(Tags.owl_one_of.value) in predicates: self._add_logging_information( LogLevel.CRITICAL, IdType.class_, class_iri, "Relation statements combined with oneOf") # Relation statement else: additional_statements = {} rdf_type = "" owl_on_property = "" for i in range(len(predicates)): if predicates[i] == rdflib.term.URIRef( Tags.rdf_type.value): rdf_type = get_iri_from_uriref(objects[i]) elif predicates[i] == rdflib.term.URIRef( "http://www.w3.org/2002/07/owl#onProperty"): owl_on_property = get_iri_from_uriref(objects[i]) else: additional_statements[get_iri_from_uriref( predicates[i])] = objects[i] relation_is_ok = True if not rdf_type == "http://www.w3.org/2002/07/owl#Restriction": self._add_logging_information( LogLevel.CRITICAL, IdType.class_, class_iri, "Class has an unknown subClass statement") relation_is_ok = False if owl_on_property == "": self._add_logging_information( LogLevel.CRITICAL, IdType.class_, class_iri, "Class has a relation without a property") relation_is_ok = False # object or data relation? if relation_is_ok: relation = None id = uuid.uuid4().hex # this id can and should be random. a class_iri can have a # property_iri multiple times, to assign always the same id # for the same relation is not worth the trouble relation = Relation(property_iri=owl_on_property, id=id) voc_builder.add_relation_for_class(class_iri, relation) # go through the additional statement to figure out the # targetIRI and the restrictionType/cardinality self._parse_relation_type(graph, relation, additional_statements) # parent-class statement or empty list element else: # owlThing is the root object, but it is not declared as a class # in the file to prevent None pointer when looking up parents, # a class that has a parent owlThing simply has no parents if not get_iri_from_uriref(node) == \ "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil": # ignore empty lists if not get_iri_from_uriref(node) == \ "http://www.w3.org/2002/07/owl#Thing": voc_builder.vocabulary.\ get_class_by_iri(class_iri).parent_class_iris.\ append(get_iri_from_uriref(node))
def _parse_to_vocabulary(self, graph: rdflib.Graph, voc_builder: VocabularyBuilder): """Parse an graph that was extracted from a TTL file into the vocabulary Args: graph (rdflib.Graph) voc_builder (VocabularyBuilder): Builder object to manipulate a vocabulary Returns: None """ # OWLClasses for a in graph.subjects(object=rdflib.term.URIRef( "http://www.w3.org/2002/07/owl#Class"), predicate=rdflib.term.URIRef( Tags.rdf_type.value)): if isinstance(a, rdflib.term.BNode): pass # owl:Class can also occure in complex target statements of # relations as BNode, ignore it here else: # defined in other source -> ignore if self._is_object_defined_by_other_source(a, graph=graph): continue iri, label, comment = self._extract_annotations(graph, a) c = Class(iri=iri, label=label, comment=comment) voc_builder.add_class(class_=c) # Class properties found_class_iris = set() for class_node in graph.subjects(predicate=rdflib.term.URIRef( "http://www.w3.org/2000/01/rdf-schema#subClassOf")): class_iri = get_iri_from_uriref(class_node) found_class_iris.add(class_iri) for class_iri in found_class_iris: # parent class / relation parsing for sub in graph.objects( subject=rdflib.term.URIRef(class_iri), predicate=rdflib.term.URIRef( 'http://www.w3.org/2000/01/rdf-schema#subClassOf')): self.current_class_iri = class_iri # used only for logging self._parse_subclass_term(graph=graph, voc_builder=voc_builder, node=sub, class_iri=class_iri) # OWlObjectProperties for a in graph.subjects(object=rdflib.term.URIRef( "http://www.w3.org/2002/07/owl#ObjectProperty"), predicate=rdflib.term.URIRef( Tags.rdf_type.value)): if isinstance(a, rdflib.term.BNode): self._add_logging_information(LogLevel.WARNING, IdType.object_property, "unknown", "Found unparseable statement") else: # defined in other source -> ignore if self._is_object_defined_by_other_source(a, graph): continue iri, label, comment = self._extract_annotations(graph, a) obj_prop = ObjectProperty(iri=iri, label=label, comment=comment) voc_builder.add_object_property(obj_prop) # extract inverse properties, it can be multiple but only # URIRefs allowed no union/intersection for inverse_iri_node in graph.objects( subject=a, predicate=rdflib.term.URIRef( 'http://www.w3.org/2002/07/owl#inverseOf')): if isinstance(inverse_iri_node, rdflib.term.BNode): self._add_logging_information( LogLevel.CRITICAL, IdType.object_property, iri, "Complex inverseProperty statements aren't allowed" ) else: inverse_iri = get_iri_from_uriref(inverse_iri_node) obj_prop.add_inverse_property_iri(inverse_iri) # OWlDataProperties for a in graph.subjects(object=rdflib.term.URIRef( "http://www.w3.org/2002/07/owl#DatatypeProperty"), predicate=rdflib.term.URIRef( Tags.rdf_type.value)): if isinstance(a, rdflib.term.BNode): self._add_logging_information(LogLevel.WARNING, IdType.data_property, "unknown", "Found unparseable statement") else: # defined in other source -> ignore if self._is_object_defined_by_other_source(a, graph): continue iri, label, comment = self._extract_annotations(graph, a) data_prop = DataProperty(iri=iri, label=label, comment=comment) voc_builder.add_data_property(data_prop) # OWLDataTypes # only the custom created datatype_catalogue are listed in the file, # the predefined are automatically added at the start # of post processing for a in graph.subjects(object=rdflib.term.URIRef( "http://www.w3.org/2000/01/rdf-schema#Datatype"), predicate=rdflib.term.URIRef( Tags.rdf_type.value)): if isinstance(a, rdflib.term.BNode): # self._add_logging_information(LogLevel.WARNING, # IdType.datatype, "unknown", # "Found unparseable statement") pass #e.g: : # customDataType4 rdf:type rdfs:Datatype ; # owl:equivalentClass [ rdf:type rdfs:Datatype ;.... # the second Datatype triggers this if condition, # but we can ignore this statement else: # defined in other source -> ignore if self._is_object_defined_by_other_source(a, graph): continue iri, label, comment = self._extract_annotations(graph, a) datatype = Datatype(iri=iri, label=label, comment=comment) voc_builder.add_datatype(datatype=datatype) # a datatype can be empty -> use string # a datatype can have multiple equivalent classes # (predefined types) -> ignore for now # a datatype can contain an enum of possible values -> # most interesting # under the predicate owl:equivalentClass is than a # list(first, rest, nil) under the pred. # oneOf with the values enum_values = [] for equivalent_class in graph.objects( subject=a, predicate=rdflib.term.URIRef( "http://www.w3.org/2002/07/owl#equivalentClass")): if isinstance(equivalent_class, rdflib.term.URIRef): # points to an other defined datatype, ignore pass else: # is a bNode and points to owl:oneOf enum_literals = self.\ _extract_objects_out_of_single_combination( graph, equivalent_class, accept_and=False, accept_or=False, accept_one_of=True) for literal in enum_literals: enum_values.append(str(literal)) datatype.enum_values = enum_values if len(enum_values) > 0: datatype.type = DatatypeType.enum else: datatype.type = DatatypeType.string # OWLIndividuals for a in graph.subjects( object=rdflib.term.URIRef(Tags.owl_individual.value), predicate=rdflib.term.URIRef(Tags.rdf_type.value)): if isinstance(a, rdflib.term.BNode): self._add_logging_information(LogLevel.WARNING, IdType.individual, "unknown", "Found unparseable statement") else: # defined in other source -> ignore if self._is_object_defined_by_other_source(a, graph): continue iri, label, comment = self._extract_annotations(graph, a) objects = graph.objects(subject=a, predicate=rdflib.term.URIRef( Tags.rdf_type.value)) # superclasses = types types = [] for object in objects: if not object == \ rdflib.term.URIRef(Tags.owl_individual.value): types.extend( self._extract_objects_out_of_layered_combination( graph, object, True, False)) individual = Individual(iri=iri, label=label, comment=comment) for type in types: individual.parent_class_iris.append( get_iri_from_uriref(type)) voc_builder.add_individual(individual=individual) # As seen for example in the bricks ontology an individual can be # declared with :individual1 rdf:type :Class1 # this type of declaration is hard to completly detect # we need to see that the object is a class iri and not a specifier iri. # as we may not have loaded all dependencies we can not simply look it # up in vocabulary # -> getbase uri of statement and filter all known specifier uris for sub in graph.subjects( predicate=rdflib.term.URIRef(Tags.rdf_type.value)): for obj in graph.objects(subject=sub, predicate=rdflib.term.URIRef( Tags.rdf_type.value)): if isinstance(obj, rdflib.term.BNode): continue obj_iri = get_iri_from_uriref(obj) obj_base_iri = get_base_out_of_iri(iri=obj_iri) if obj_base_iri not in specifier_base_iris: iri, label, comment = \ self._extract_annotations(graph, sub) if not voc_builder.entity_is_known(iri): iri, label, comment = \ self._extract_annotations(graph, sub) individual = Individual(iri=iri, label=label, comment=comment) individual.parent_class_iris.append(obj_iri) voc_builder.add_individual(individual)