Example #1
0
    def canonicalTerm(self, term):

        if isinstance(term, URIRef):

            if self.prolog is not None:
                namespace_manager = NamespaceManager(Graph())

                for prefix,uri in list(self.prolog.prefixBindings.items()):
                    namespace_manager.bind(prefix, uri, override=False)

                try:
                    prefix,uri,localName = namespace_manager.compute_qname(term)
                except:
                    return term

                if prefix not in self.prolog.prefixBindings:
                    return term
                else:
                    return ':'.join([prefix, localName])

            else:
                return term

        elif isinstance(term, Literal):
            return term.n3()

        elif isinstance(term, BNode):
            return term.n3()

        else:
            assert isinstance(term, Variable)
            return term.n3()
Example #2
0
    def canonicalTerm(self, term):

        if isinstance(term, URIRef):

            if self.prolog is not None:
                namespace_manager = NamespaceManager(Graph())

                for prefix,uri in self.prolog.prefixBindings.items():
                    namespace_manager.bind(prefix, uri, override=False)

                try:
                    prefix,uri,localName = namespace_manager.compute_qname(term)
                except:
                    return term

                if prefix not in self.prolog.prefixBindings:
                    return term
                else:
                    return u':'.join([prefix, localName])

            else:
                return term

        elif isinstance(term, Literal):
            return term.n3()

        elif isinstance(term, BNode):
            return term.n3()

        else:
            assert isinstance(term, Variable)
            return term.n3()
Example #3
0
class data_graph:
    def __init__(self, graph: Graph, prefixes=None):
        self.G = graph

        self.CLASSES = collections.OrderedDict()
        self.PROPS = collections.OrderedDict()
        self.OUT = []

        path = "prefixes/namespaces.json"
        filepath = pkg_resources.resource_filename(__name__, path)

        self.namespaces = NamespaceManager(graph=Graph())
        self.namespaces.bind("sh", SH)

        with open(filepath, "r", encoding="utf-8") as fin:
            for prefix, namespace in json.load(fin).items():
                self.namespaces.bind(prefix, namespace)

        if prefixes:
            with open(prefixes, "r", encoding="utf-8") as fin:
                for prefix, namespace in json.load(fin).items():
                    self.namespaces.bind(prefix, namespace)

    def sh_label_gen(self, uri):
        prefix, namespace, name = self.namespaces.compute_qname(uri)
        return prefix + "_" + name

    def uri_validator(self, x):
        try:
            result = urlparse(x)
            return all([result.scheme, result.netloc])
        except Exception:
            return False

    def extract_classes(self):
        types_query = "select distinct ?class_ { ?s rdf:type ?class_ }"
        for row in self.G.query(types_query, initNs={"rdf": RDF}):
            self.CLASSES[row.class_] = {"label": self.sh_label_gen(row.class_)}

    def extract_props(self):
        prop_query = "select distinct ?prop { ?s ?prop ?o . filter(?prop != rdf:type)}"
        prop_subj_classes = "select distinct ?class_ {{ ?sub {prop} ?o ; a ?class_ . }}"
        for property_row in self.G.query(prop_query, initNs={"rdf": RDF}):
            prop = property_row.prop
            self.PROPS[prop] = {
                "nodekind": None,
                "cardinality": None,
                "classes": [],
                "label": self.sh_label_gen(prop),
            }
            for class_row in self.G.query(
                    prop_subj_classes.format(prop=prop.n3())):
                class_ = class_row.class_
                self.PROPS[prop]["classes"].append(
                    self.CLASSES[class_]["label"])

            if len(self.PROPS[prop]["classes"]) == 1:
                self.PROPS[prop]["type"] = "unique"
            else:
                self.PROPS[prop]["type"] = "repeat"

    def extract_constraints(self):

        for prop in self.PROPS.keys():
            types = []
            classes = []
            datatypes = []
            for s, p, o in self.G.triples((None, prop, None)):
                nodeType = type(o)
                if not types:
                    types.append(nodeType)
                elif nodeType not in types:
                    # currently only one type is handled per property
                    break
                if nodeType == URIRef:
                    for _, _, objectClass in self.G.triples(
                        (o, RDF.type, None)):
                        classes.append(objectClass)
                elif nodeType == Literal:
                    datatypes.append(o.datatype or XSD.langString if o.
                                     language else XSD.string)

            if len(set(types)) == 1:
                if types[0] == URIRef:
                    self.PROPS[prop]["nodekind"] = "IRI"
                    self.PROPS[prop]["objectclasses"] = classes
                elif types[0] == BNode:
                    self.PROPS[prop]["nodekind"] = "BNode"
                elif types[0] == Literal:
                    self.PROPS[prop]["nodekind"] = "Literal"
                    if len(set(datatypes)) == 1:
                        self.PROPS[prop]["datatype"] = datatypes[0]

    def gen_graph(self, namespace=None, implicit_class_target=False):
        self.extract_classes()
        self.extract_props()
        self.extract_constraints()
        ng = rdflib.Graph(namespace_manager=self.namespaces)

        if namespace is not None:
            if self.uri_validator(namespace[0]):
                uri = namespace[0]
                if namespace[0][-1] not in ["#", "/", "\\"]:
                    uri = namespace[0] + "#"
                EX = Namespace(uri)
                ng.bind(namespace[1], EX)
            else:
                print("##malformed URI, using http://example.org/ instead...")
                EX = Namespace("http://www.example.org/")
                ng.bind("ex", EX)
        else:
            EX = Namespace("http://www.example.org/")
            ng.bind("ex", EX)

        for c in self.CLASSES.keys():
            label = self.CLASSES[c]["label"]
            ng.add((EX[label], RDF.type, SH.NodeShape))
            ng.add((EX[label], SH.targetClass, c))
            ng.add((EX[label], SH.nodeKind, SH.BlankNodeOrIRI))

        for p in self.PROPS.keys():

            ng.add((EX[self.PROPS[p]["label"]], RDF.type, SH.PropertyShape))
            ng.add((EX[self.PROPS[p]["label"]], SH.path, p))

            for class_prop in self.PROPS[p]["classes"]:
                ng.add(
                    (EX[class_prop], SH.property, EX[self.PROPS[p]["label"]]))
            if self.PROPS[p]["nodekind"] == "IRI":
                ng.add((EX[self.PROPS[p]["label"]], SH.nodeKind, SH.IRI))
            elif self.PROPS[p]["nodekind"] == "BNode":
                ng.add((EX[self.PROPS[p]["label"]], SH.nodeKind, SH.BlankNode))
            elif self.PROPS[p]["nodekind"] == "Literal":
                ng.add((EX[self.PROPS[p]["label"]], SH.nodeKind, SH.Literal))
            if ("objectclasses" in self.PROPS[p]
                    and len(self.PROPS[p]["objectclasses"]) > 0):
                if len(set(self.PROPS[p]["objectclasses"])) == 1:
                    ng.add((
                        EX[self.PROPS[p]["label"]],
                        SH["class"],
                        self.PROPS[p]["objectclasses"][0],
                    ))
                else:
                    classNum = 0
                    listnode = EX[self.PROPS[p]["label"] + "-classlist-" +
                                  str(classNum)]
                    ng.add((EX[self.PROPS[p]["label"]], SH["or"], listnode))
                    for objectclass in sorted(
                            set(self.PROPS[p]["objectclasses"])):
                        nextlistnode = EX[self.PROPS[p]["label"] +
                                          "-classlist-" + str(classNum)]
                        alternativenode = EX[self.PROPS[p]["label"] +
                                             "-class-" + str(classNum)]
                        if classNum > 0:
                            ng.add((listnode, RDF.rest, nextlistnode))
                        listnode = nextlistnode
                        classNum += 1
                        ng.add((listnode, RDF.first, alternativenode))
                        ng.add((alternativenode, SH["class"], objectclass))
                    ng.add((listnode, RDF.rest, RDF.nil))
            if "datatype" in self.PROPS[p]:
                ng.add((EX[self.PROPS[p]["label"]], SH.datatype,
                        self.PROPS[p]["datatype"]))

        return ng