def parse(f, on_prepare_triple=None, new_blank=None, new_literal=None): parser = xml.parsers.expat.ParserCreate(None, "") try: parser.buffer_text = True parser.specified_attributes = True except: pass ontology_iri = "" objs = [] annots = [] prefixes = {} current_content = "" current_attrs = None current_blank = 0 in_declaration = False in_prop_chain = False before_declaration = True last_cardinality = "0" nb_triple = 0 if not on_prepare_triple: def on_prepare_triple(s, p, o): nonlocal nb_triple nb_triple += 1 if not s.startswith("_"): s = "<%s>" % s if not (o.startswith("_") or o.startswith('"')): o = "<%s>" % o print("%s %s %s ." % (s, "<%s>" % p, o)) if not new_blank: def new_blank(): nonlocal current_blank current_blank += 1 return "_:%s" % current_blank if not new_literal: def new_literal(value, attrs): value = value.replace('"', '\\"').replace("\n", "\\n") lang = attrs.get("http://www.w3.org/XML/1998/namespacelang") if lang: return '"%s"@%s' % (value, lang) datatype = attrs.get("datatypeIRI") if datatype: return '"%s"^^<%s>' % (value, datatype) return '"%s"' % (value) def new_list(l): bn = bn0 = new_blank() if l: for i in range(len(l) - 1): on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", l[i]) bn_next = new_blank() on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", bn_next) bn = bn_next on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", l[-1]) on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") else: on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") return bn0 def unabbreviate_IRI(abbreviated_iri): prefix, name = abbreviated_iri.split(":", 1) return prefixes[prefix] + name def get_IRI(attrs): if "IRI" in attrs: iri = attrs["IRI"] if not iri: return ontology_iri if iri.startswith("#") or iri.startswith("/"): iri = ontology_iri + iri return iri return unabbreviate_IRI(attrs["abbreviatedIRI"]) def startElement(tag, attrs): nonlocal current_content, current_attrs, in_declaration, before_declaration, last_cardinality, in_prop_chain, ontology_iri current_content = "" if (tag == "http://www.w3.org/2002/07/owl#Prefix"): prefixes[attrs["name"]] = attrs["IRI"] elif (tag == "http://www.w3.org/2002/07/owl#Declaration"): in_declaration = True before_declaration = False elif (tag in types): iri = get_IRI(attrs) if in_declaration: on_prepare_triple(iri, rdf_type, types[tag]) objs.append(iri) elif (tag == "http://www.w3.org/2002/07/owl#Datatype"): objs.append(get_IRI(attrs)) elif (tag == "http://www.w3.org/2002/07/owl#Literal"): current_attrs = attrs elif ((tag == "http://www.w3.org/2002/07/owl#ObjectIntersectionOf") or (tag == "http://www.w3.org/2002/07/owl#ObjectUnionOf") or (tag == "http://www.w3.org/2002/07/owl#ObjectOneOf") or (tag == "http://www.w3.org/2002/07/owl#DataIntersectionOf") or (tag == "http://www.w3.org/2002/07/owl#DataUnionOf") or (tag == "http://www.w3.org/2002/07/owl#DisjointClasses") or (tag == "http://www.w3.org/2002/07/owl#DisjointObjectProperties") or (tag == "http://www.w3.org/2002/07/owl#DisjointDataProperties") or (tag == "http://www.w3.org/2002/07/owl#DifferentIndividuals")): objs.append("(") elif ((tag == "http://www.w3.org/2002/07/owl#ObjectExactCardinality") or (tag == "http://www.w3.org/2002/07/owl#ObjectMinCardinality") or (tag == "http://www.w3.org/2002/07/owl#ObjectMaxCardinality") or (tag == "http://www.w3.org/2002/07/owl#DataExactCardinality") or (tag == "http://www.w3.org/2002/07/owl#DataMinCardinality") or (tag == "http://www.w3.org/2002/07/owl#DataMaxCardinality")): objs.append("(") last_cardinality = attrs["cardinality"] elif (tag == "http://www.w3.org/2002/07/owl#AnonymousIndividual"): objs.append(new_blank()) elif (tag == "http://www.w3.org/2002/07/owl#SubObjectPropertyOf"): in_prop_chain = False elif (tag == "http://www.w3.org/2002/07/owl#ObjectInverseOf") or ( tag == "http://www.w3.org/2002/07/owl#DataInverseOf") or ( tag == "http://www.w3.org/2002/07/owl#inverseOf"): objs.append(new_blank()) elif (tag == "http://www.w3.org/2002/07/owl#ObjectPropertyChain"): objs.append("(") elif (tag == "http://www.w3.org/2002/07/owl#DatatypeRestriction"): objs.append("(") elif (tag == "http://www.w3.org/2002/07/owl#FacetRestriction"): objs.append(attrs["facet"]) elif (tag == "http://www.w3.org/2002/07/owl#Ontology"): ontology_iri = attrs["ontologyIRI"] on_prepare_triple(ontology_iri, rdf_type, "http://www.w3.org/2002/07/owl#Ontology") version_iri = attrs.get("versionIRI") if version_iri: on_prepare_triple(ontology_iri, "http://www.w3.org/2002/07/owl#versionIRI", version_iri) elif (tag == "RDF") or (tag == "rdf:RDF"): raise ValueError( "Not an OWL/XML file! (It seems to be an OWL/RDF file)") def endElement(tag): nonlocal in_declaration, objs, in_prop_chain if (tag == "http://www.w3.org/2002/07/owl#Declaration"): in_declaration = False objs = [] # Purge stack elif (tag == "http://www.w3.org/2002/07/owl#Literal"): objs.append(new_literal(current_content, current_attrs)) elif (tag == "http://www.w3.org/2002/07/owl#SubClassOf") or ( tag == "http://www.w3.org/2002/07/owl#SubObjectPropertyOf" ) or (tag == "http://www.w3.org/2002/07/owl#SubDataPropertyOf") or ( tag == "http://www.w3.org/2002/07/owl#SubAnnotationPropertyOf"): parent = objs.pop() child = objs.pop() if (tag == "http://www.w3.org/2002/07/owl#SubObjectPropertyOf" ) and in_prop_chain: relation = "http://www.w3.org/2002/07/owl#propertyChainAxiom" parent, child = child, parent else: relation = sub_ofs[tag] on_prepare_triple(child, relation, parent) if annots: purge_annotations((child, relation, parent)) elif (tag == "http://www.w3.org/2002/07/owl#ClassAssertion"): child = objs.pop() # Order is reversed compared to SubClassOf! parent = objs.pop() on_prepare_triple(child, rdf_type, parent) if annots: purge_annotations((child, rdf_type, parent)) elif (tag == "http://www.w3.org/2002/07/owl#EquivalentClasses") or ( tag == "http://www.w3.org/2002/07/owl#EquivalentObjectProperties" ) or (tag == "http://www.w3.org/2002/07/owl#EquivalentDataProperties"): o1 = objs.pop() o2 = objs.pop() if o1.startswith("_"): o1, o2 = o2, o1 # Swap in order to have blank node at third position -- rapper seems to do that on_prepare_triple(o1, equivs[tag], o2) if annots: purge_annotations((o1, equivs[tag], o2)) elif (tag == "http://www.w3.org/2002/07/owl#ObjectPropertyDomain") or ( tag == "http://www.w3.org/2002/07/owl#DataPropertyDomain" ) or (tag == "http://www.w3.org/2002/07/owl#AnnotationPropertyDomain"): val = objs.pop() obj = objs.pop() on_prepare_triple(obj, "http://www.w3.org/2000/01/rdf-schema#domain", val) if annots: purge_annotations( (obj, "http://www.w3.org/2000/01/rdf-schema#domain", val)) elif (tag == "http://www.w3.org/2002/07/owl#ObjectPropertyRange") or ( tag == "http://www.w3.org/2002/07/owl#DataPropertyRange" ) or (tag == "http://www.w3.org/2002/07/owl#AnnotationPropertyRange"): val = objs.pop() obj = objs.pop() on_prepare_triple(obj, "http://www.w3.org/2000/01/rdf-schema#range", val) if annots: purge_annotations( (obj, "http://www.w3.org/2000/01/rdf-schema#range", val)) elif (tag in prop_types): obj = objs.pop() on_prepare_triple(obj, rdf_type, prop_types[tag]) elif (tag == "http://www.w3.org/2002/07/owl#InverseObjectProperties" ) or (tag == "http://www.w3.org/2002/07/owl#InverseDataProperties"): a, b = objs.pop(), objs.pop() on_prepare_triple(b, "http://www.w3.org/2002/07/owl#inverseOf", a) elif (tag == "http://www.w3.org/2002/07/owl#ObjectPropertyChain"): start = _rindex(objs) list_iri = new_list(objs[start + 1:]) in_prop_chain = True objs[start:] = [list_iri] elif (tag in disjoints): start = _rindex(objs) list_obj = objs[start + 1:] tag, rel, member = disjoints[tag] if rel and (len(list_obj) == 2): on_prepare_triple(list_obj[0], rel, list_obj[1]) if annots: purge_annotations((list_obj[0], rel, list_obj[1])) else: list_iri = new_list(list_obj) iri = new_blank() on_prepare_triple(iri, rdf_type, tag) on_prepare_triple(iri, member, list_iri) if annots: purge_annotations((iri, rdf_type, tag)) del objs[start:] elif (tag == "http://www.w3.org/2002/07/owl#ObjectPropertyAssertion" ) or (tag == "http://www.w3.org/2002/07/owl#DataPropertyAssertion"): p, s, o = objs[-3:] on_prepare_triple(s, p, o) if annots: purge_annotations((s, p, o)) del objs[-3:] elif (tag == "http://www.w3.org/2002/07/owl#ObjectComplementOf") or ( tag == "http://www.w3.org/2002/07/owl#DataComplementOf"): iri = new_blank() on_prepare_triple(iri, rdf_type, "http://www.w3.org/2002/07/owl#Class") on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#complementOf", objs[-1]) objs[-1] = iri elif (tag in restrs): iri = new_blank() on_prepare_triple(iri, rdf_type, "http://www.w3.org/2002/07/owl#Restriction") on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#onProperty", objs.pop(-2)) on_prepare_triple(iri, restrs[tag], objs[-1]) objs[-1] = iri elif (tag in card_restrs): iri = new_blank() on_prepare_triple(iri, rdf_type, "http://www.w3.org/2002/07/owl#Restriction") start = _rindex(objs) values = objs[start + 1:] del objs[start:] if len(values) == 2: # Qualified tag = qual_card_restrs[tag] on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#onProperty", values[-2]) if objs[-1].startswith("http://www.w3.org/2001/XMLSchema"): on_prepare_triple( iri, "http://www.w3.org/2002/07/owl#onDataRange", values[-1]) else: on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#onClass", values[-1]) else: # Non qualified tag = card_restrs[tag] on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#onProperty", values[-1]) on_prepare_triple( iri, tag, new_literal( last_cardinality, { "datatypeIRI": "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" })) objs.append(iri) elif (tag == "http://www.w3.org/2002/07/owl#ObjectOneOf"): start = _rindex(objs) list_iri = new_list(objs[start + 1:]) iri = new_blank() on_prepare_triple(iri, rdf_type, "http://www.w3.org/2002/07/owl#Class") on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#oneOf", list_iri) objs[start:] = [iri] elif (tag == "http://www.w3.org/2002/07/owl#ObjectIntersectionOf") or ( tag == "http://www.w3.org/2002/07/owl#ObjectUnionOf" ) or (tag == "http://www.w3.org/2002/07/owl#DataIntersectionOf") or ( tag == "http://www.w3.org/2002/07/owl#DataUnionOf"): start = _rindex(objs) list_iri = new_list(objs[start + 1:]) iri = new_blank() if objs[start + 1:][0].startswith("http://www.w3.org/2001/XMLSchema"): on_prepare_triple( iri, rdf_type, "http://www.w3.org/2000/01/rdf-schema#Datatype") else: on_prepare_triple(iri, rdf_type, "http://www.w3.org/2002/07/owl#Class") if (tag == "http://www.w3.org/2002/07/owl#ObjectIntersectionOf" ) or (tag == "http://www.w3.org/2002/07/owl#DataIntersectionOf"): on_prepare_triple( iri, "http://www.w3.org/2002/07/owl#intersectionOf", list_iri) else: on_prepare_triple(iri, "http://www.w3.org/2002/07/owl#unionOf", list_iri) objs[start:] = [iri] elif (tag == "http://www.w3.org/2002/07/owl#Import"): on_prepare_triple(ontology_iri, "http://www.w3.org/2002/07/owl#imports", current_content) elif (tag == "http://www.w3.org/2002/07/owl#IRI"): iri = current_content if not iri: iri = ontology_iri else: if iri.startswith("#") or iri.startswith("/"): iri = ontology_iri + iri objs.append(iri) elif (tag == "http://www.w3.org/2002/07/owl#AbbreviatedIRI"): iri = unabbreviate_IRI(current_content) objs.append(iri) elif (tag == "http://www.w3.org/2002/07/owl#AnnotationAssertion"): on_prepare_triple(objs[-2], objs[-3], objs[-1]) if annots: purge_annotations((objs[-2], objs[-3], objs[-1])) elif (tag == "http://www.w3.org/2002/07/owl#Annotation"): if before_declaration: # On ontology on_prepare_triple(ontology_iri, objs[-2], objs[-1]) else: annots.append((objs[-2], objs[-1])) del objs[-2:] elif (tag == "http://www.w3.org/2002/07/owl#DatatypeRestriction"): start = _rindex(objs) datatype, *list_bns = objs[start + 1:] list_bns = new_list(list_bns) bn = new_blank() objs[start:] = [bn] on_prepare_triple(bn, rdf_type, "http://www.w3.org/2000/01/rdf-schema#Datatype") on_prepare_triple(bn, "http://www.w3.org/2002/07/owl#onDatatype", datatype) on_prepare_triple( bn, "http://www.w3.org/2002/07/owl#withRestrictions", list_bns) elif (tag == "http://www.w3.org/2002/07/owl#FacetRestriction"): facet, literal = objs[-2:] bn = new_blank() on_prepare_triple(bn, facet, literal) objs[-2:] = [bn] elif (tag == "http://www.w3.org/2002/07/owl#ObjectInverseOf") or ( tag == "http://www.w3.org/2002/07/owl#DataInverseOf") or ( tag == "http://www.w3.org/2002/07/owl#inverseOf"): bn, prop = objs[-2:] on_prepare_triple(bn, "http://www.w3.org/2002/07/owl#inverseOf", prop) objs[-2:] = [bn] def characters(content): nonlocal current_content current_content += content def purge_annotations(on_iri): nonlocal annots if isinstance(on_iri, tuple): s, p, o = on_iri on_iri = new_blank() on_prepare_triple(on_iri, rdf_type, "http://www.w3.org/2002/07/owl#Axiom") on_prepare_triple(on_iri, "http://www.w3.org/2002/07/owl#annotatedSource", s) on_prepare_triple( on_iri, "http://www.w3.org/2002/07/owl#annotatedProperty", p) on_prepare_triple(on_iri, "http://www.w3.org/2002/07/owl#annotatedTarget", o) for prop_iri, value in annots: on_prepare_triple(on_iri, prop_iri, value) annots = [] #parser.StartNamespaceDeclHandler = startNamespace #parser.EndNamespaceDeclHandler = endNamespace parser.StartElementHandler = startElement parser.EndElementHandler = endElement parser.CharacterDataHandler = characters try: if isinstance(f, str): f = open(f, "rb") parser.ParseFile(f) f.close() else: parser.ParseFile(f) except Exception as e: raise OwlReadyOntologyParsingError( "OWL/XML parsing error in file %s, line %s, column %s." % (getattr(f, "name", "???"), parser.CurrentLineNumber, parser.CurrentColumnNumber)) from e return nb_triple
def parse(f, on_prepare_triple=None, new_blank=None, new_literal=None, default_base=""): parser = xml.parsers.expat.ParserCreate(None, "") try: parser.buffer_text = True parser.specified_attributes = True except: pass stack = [["", ""]] # List of [parse type, value] pairs prefixes = {} prefixess = [prefixes] tag_is_predicate = False current_blank = 0 current_fake_blank = 0 current_content = "" current_attrs = None nb_triple = 0 bns = defaultdict(set) dont_create_unnamed_bn = False axiom_annotation_sources = {} axiom_annotation_props = {} axiom_annotation_targets = {} triples_with_unnamed_bn = defaultdict(list) if default_base: xml_base = default_base if xml_base.endswith("#") or xml_base.endswith("/"): xml_base = xml_base[:-1] xml_dir = xml_base.rsplit("/", 1)[0] + "/" else: xml_base = "" xml_dir = "" if not on_prepare_triple: def on_prepare_triple(s, p, o): nonlocal nb_triple nb_triple += 1 if not s.startswith("_"): s = "<%s>" % s if not (o.startswith("_") or o.startswith('"')): o = "<%s>" % o print("%s %s %s ." % (s, "<%s>" % p, o)) if not new_blank: def new_blank(): nonlocal current_blank current_blank += 1 return "_:%s" % current_blank def new_fake_blank(): nonlocal current_fake_blank current_fake_blank += 1 return "_ %s" % current_fake_blank node_2_blanks = defaultdict(new_blank) known_nodes = set() if not new_literal: def new_literal(value, attrs): value = value.replace('"', '\\"').replace("\n", "\\n") lang = attrs.get("http://www.w3.org/XML/1998/namespacelang") if lang: return '"%s"@%s' % (value, lang) datatype = attrs.get( "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype") if datatype: return '"%s"^^<%s>' % (value, datatype) return '"%s"' % (value) def new_list(l): bn = bn0 = new_blank() if l: for i in range(len(l) - 1): on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", l[i]) bn_next = new_blank() on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", bn_next) bn = bn_next on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", l[-1]) on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") else: on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") on_prepare_triple( bn, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") return bn0 def add_to_bn(bn, type, rel, value): if type == "COL": value = tuple( frozenset(bns[v]) if v.startswith("_") and ( not v in known_nodes) else v for v in value) bns[bn].add((type, rel) + value) else: if value.startswith("_") and (not value in known_nodes): value = frozenset(bns[value]) bns[bn].add((type, rel, value)) def startNamespace(prefix, uri): nonlocal prefixes prefixes = prefixes.copy() prefixess.append(prefixes) if prefix: prefixes[prefix] = uri else: prefixes[""] = uri def endNamespace(prefix): nonlocal prefixes prefixess.pop() prefixes = prefixess[-1] def startElement(tag, attrs): nonlocal tag_is_predicate, current_content, current_attrs, dont_create_unnamed_bn, xml_base, xml_dir tag_is_predicate = not tag_is_predicate if tag_is_predicate: if attrs.get("http://www.w3.org/1999/02/22-rdf-syntax-ns#parseType" ) == "Collection": stack.append(["Collection", []]) elif tag == "http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF": stack.append(["RDF", ""]) namespace_base = attrs.get( "http://www.w3.org/XML/1998/namespacebase") if namespace_base: xml_base = namespace_base xml_dir = namespace_base.rsplit("/", 1)[0] + "/" else: iri = attrs.get( "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource") if iri: if not ":" in iri: if not iri: iri = xml_base elif iri.startswith("#"): iri = xml_base + iri elif iri.startswith("/"): iri = xml_dir + iri[1:] else: iri = urllib.parse.urljoin(xml_dir, iri) stack.append(["Resource", iri]) else: iri = attrs.get( "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID") if iri: iri = node_2_blanks[iri] known_nodes.add(iri) stack.append(["Resource", iri]) else: stack.append(["Literal", ""]) current_content = "" current_attrs = attrs if (tag == "http://www.w3.org/2002/07/owl#annotatedSource" ) or (tag == "http://www.w3.org/2002/07/owl#annotatedTarget"): dont_create_unnamed_bn = True else: iri = attrs.get("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", None) if iri is None: iri = attrs.get( "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID", None) if iri: iri = "#%s" % iri if iri is None: iri = attrs.get( "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID") if iri: iri = node_2_blanks[iri] known_nodes.add(iri) else: if dont_create_unnamed_bn: iri = new_fake_blank() else: iri = new_blank() else: if not ":" in iri: if not iri: iri = xml_base elif iri.startswith("#"): iri = xml_base + iri elif iri.startswith("/"): iri = xml_dir + iri[1:] else: iri = urllib.parse.urljoin(xml_dir, iri) if tag != "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description": if not iri.startswith("_ "): on_prepare_triple( iri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", tag) if iri.startswith("_"): add_to_bn( iri, "REL", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", tag) if stack[-1][0] == "Collection": stack[-1][1].append(iri) else: if stack[-1][0] == "Literal": stack[-1][0] = "Resource" stack[-1][1] = iri def endElement(tag): nonlocal tag_is_predicate, dont_create_unnamed_bn if tag_is_predicate: parse_type, value = stack.pop() if stack[-1][0] == "Collection": iri = stack[-1][1][-1] else: iri = stack[-1][1] if tag == "http://www.w3.org/2002/07/owl#annotatedSource": dont_create_unnamed_bn = False axiom_annotation_sources[iri] = value if isinstance(value, str) and value.startswith("_ "): triples_with_unnamed_bn[iri].insert( 0, (tag, value, parser.CurrentLineNumber, parser.CurrentColumnNumber)) tag_is_predicate = not tag_is_predicate return elif tag == "http://www.w3.org/2002/07/owl#annotatedProperty": axiom_annotation_props[iri] = value elif tag == "http://www.w3.org/2002/07/owl#annotatedTarget": dont_create_unnamed_bn = False axiom_annotation_targets[iri] = value if isinstance(value, str) and value.startswith("_ "): triples_with_unnamed_bn[iri].append( (tag, value, parser.CurrentLineNumber, parser.CurrentColumnNumber)) tag_is_predicate = not tag_is_predicate return if parse_type == "Resource": if not iri.startswith("_ "): on_prepare_triple(iri, tag, value) if iri.startswith("_"): add_to_bn(iri, "REL", tag, value) if value.startswith("_"): add_to_bn(value, "INV", tag, iri) elif parse_type == "Literal": value = new_literal(current_content, current_attrs) if not iri.startswith("_ "): on_prepare_triple(iri, tag, value) if iri.startswith("_"): add_to_bn(iri, "REL", tag, value) elif parse_type == "Collection": if not iri.startswith("_ "): on_prepare_triple(iri, tag, new_list(value)) if iri.startswith("_"): add_to_bn(iri, "COL", tag, value) tag_is_predicate = not tag_is_predicate def characters(content): nonlocal current_content if stack[-1][0] == "Literal": current_content += content parser.StartNamespaceDeclHandler = startNamespace parser.EndNamespaceDeclHandler = endNamespace parser.StartElementHandler = startElement parser.EndElementHandler = endElement parser.CharacterDataHandler = characters try: if isinstance(f, str): f = open(f, "rb") parser.ParseFile(f) f.close() else: parser.ParseFile(f) except Exception as e: raise OwlReadyOntologyParsingError( "RDF/XML parsing error in file %s, line %s, column %s." % (getattr(f, "name", "???"), parser.CurrentLineNumber, parser.CurrentColumnNumber)) from e if triples_with_unnamed_bn: content_2_bns = defaultdict(list) for bn, content in bns.items(): if not bn.startswith("_ "): content_2_bns[frozenset(content)].append(bn) def rebuild_bn(content): bn = new_blank() content_2_bns[frozenset(content)].append(bn) for i in content: if i[0] == "REL": drop, p, o = i if not isinstance(o, str): o = rebuild_bn(o) on_prepare_triple(bn, p, o) elif i[0] == "INV": drop, p, o = i if not isinstance(o, str): o = rebuild_bn(o) on_prepare_triple(o, p, bn) elif i[0] == "COL": drop, p, *l = i l = [(isinstance(x, str) and x) or rebuild_bn(x) for x in l] o = new_list(l) on_prepare_triple(bn, p, o) else: print(i) raise ValueError return bn for axiom_iri, triples in triples_with_unnamed_bn.items(): for p, o, line, column in triples: try: content = bns[o] if p == "http://www.w3.org/2002/07/owl#annotatedSource": target = axiom_annotation_targets[axiom_iri] if target.startswith("_"): target = frozenset(bns[target]) candidates_bn = content_2_bns[frozenset( content | {("REL", axiom_annotation_props[axiom_iri], target)})] else: source = axiom_annotation_sources[axiom_iri] if source.startswith("_"): source = frozenset(bns[source] | {( "REL", axiom_annotation_props[axiom_iri], target)}) candidates_bn = (content_2_bns[frozenset( content | {("INV", axiom_annotation_props[axiom_iri], source)})] or content_2_bns[frozenset(content)]) if candidates_bn: o = candidates_bn[-1] else: #print() #print("rebuild", o, content) o = rebuild_bn(content) #print() on_prepare_triple(axiom_iri, p, o) except Exception as e: raise OwlReadyOntologyParsingError( "RDF/XML parsing error in file %s, line %s, column %s." % (getattr(f, "name", "???"), line, column)) from e return nb_triple