Beispiel #1
0
    def __init__(self, nsmap: PrefixMap, shape: Shape, shape_interpreter):
        self._shape = shape
        self.containing_schema = shape_interpreter
        w_shape = PyxbWrapper(shape)
        self.tripleconstraints = self.depth_first_triples(w_shape)
        self.predmap = {}
        mp_num = 0
        while mp_num < len(self.tripleconstraints):
            k = nsmap.uri_for(self.tripleconstraints[mp_num].predicate)
            self.predmap.setdefault(k, [])
            self.predmap[k].append(str(mp_num) + ' ')
            mp_num += 1
        self._triple_number = 0
        self._pattern = r''.join(self.depth_first_pattern(w_shape))

        # Generate an additional match for every extra predicate
        #  Extra patterns are recognized by the leading '0
        for e in self._shape.extra:
            mp_num += 1
            ptn = '0' + str(mp_num) + ' '
            k = nsmap.uri_for(e.ref)
            self.predmap.setdefault(k, [])
            self.predmap[k].append(ptn)
            self._pattern += "(" + ptn + ")*"

        self.matchpattern = re.compile(self._pattern)
Beispiel #2
0
    def __init__(self, nsmap: PrefixMap, shape: Shape, shape_interpreter):
        self._shape = shape
        self.containing_schema = shape_interpreter
        w_shape = PyxbWrapper(shape)
        self.tripleconstraints = self.depth_first_triples(w_shape)
        self.predmap = {}
        mp_num = 0
        while mp_num < len(self.tripleconstraints):
            k = nsmap.uri_for(self.tripleconstraints[mp_num].predicate)
            self.predmap.setdefault(k, [])
            self.predmap[k].append(str(mp_num) + ' ')
            mp_num += 1
        self._triple_number = 0
        self._pattern = r''.join(self.depth_first_pattern(w_shape))

        # Generate an additional match for every extra predicate
        #  Extra patterns are recognized by the leading '0
        for e in self._shape.extra:
            mp_num += 1
            ptn = '0' + str(mp_num) + ' '
            k = nsmap.uri_for(e.ref)
            self.predmap.setdefault(k, [])
            self.predmap[k].append(ptn)
            self._pattern += "(" + ptn + ")*"

        self.matchpattern = re.compile(self._pattern)
Beispiel #3
0
 def __init__(self, schema: Schema, schema_dom, g: Graph = None):
     """ Schema interpreter
     :param schema: definition
     :param schema_dom: equivalent in dom to get the namespace map
     """
     self.schema = schema
     self._nsmap = PrefixMap(schema, schema_dom)
     self._default_namespace = schema.default_namespace if schema.default_namespace else ''
     self._shapes = {self._nsmap.uri_for(sh.label): sh for sh in schema.shape}
     self._compiled_shapes = {}
     self._triple_results = {}
     self._graph = g
Beispiel #4
0
    def __init__(self, dom_schema):
        """ Constructor - convert the supplied schema to json
        :param dom_schema: DOM document to convert
        """
        self.schema = CreateFromDOM(dom_schema)
        self.json = dict(type="schema")

        self._prefixmap = PrefixMap(self.schema, dom_schema)
        self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + [
            'xml', 'xmlns'
        ]
        self.shex_schema()
Beispiel #5
0
    def __init__(self, dom_schema):
        """ Constructor - convert the supplied schema to json
        :param dom_schema: DOM document to convert
        """
        self.schema = CreateFromDOM(dom_schema)
        self.json = dict(type="schema")

        self._prefixmap = PrefixMap(self.schema, dom_schema)
        self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + ['xml', 'xmlns']
        self.shex_schema()
Beispiel #6
0
class ShapeInterpreter:
    def __init__(self, schema: Schema, schema_dom, g: Graph = None):
        """ Schema interpreter
        :param schema: definition
        :param schema_dom: equivalent in dom to get the namespace map
        """
        self.schema = schema
        self._nsmap = PrefixMap(schema, schema_dom)
        self._default_namespace = schema.default_namespace if schema.default_namespace else ''
        self._shapes = {self._nsmap.uri_for(sh.label): sh for sh in schema.shape}
        self._compiled_shapes = {}
        self._triple_results = {}
        self._graph = g

    @property
    def graph(self):
        return self._graph

    @graph.setter
    def graph(self, g: Graph):
        self._graph = g
        self._triple_results = {}

    def _i_shape_subject(self, subj, cs: CompiledShape) -> bool:
        """ Evaluate the shape against the set of triples with subject subj.
        :param subj: subject to evaluate
        :param cs: compiled shape to use in interpretation
        :return: True and a list of predicate/object permutations
        """

        # Everything passes an empty shape
        if len(cs.tripleconstraints) == 0:
            return True

        # Create a list of unique permutations of all the triples in the graph with subject = subj
        #   If there are any predicates in the predicate_objects list that aren't constrained, fail if
        #   the shape is closed and ignore if it is open
        predicate_objects = [e for e in self.graph.predicate_objects(subject=subj)]
        unmatched_predicate_objects = [e for e in predicate_objects if str(e[0]) not in cs.predmap]
        if unmatched_predicate_objects and cs.closed:
            return False
        target_predicate_objects = [po for po in predicate_objects if po not in unmatched_predicate_objects]
        if len(target_predicate_objects) > 4:
            pred_obj_permutations = permutations(target_predicate_objects)
        else:
            pred_obj_permutations = list(set(permutations(target_predicate_objects)))
        # if len(pred_obj_permutations) > 100:
        #     print("   PERMUTATIONS: %d" % len(pred_obj_permutations))

        # For each permutation, create a cross product of all candidate tripleconstraints with the same
        # predicate as that in the permutation. The string equivalent of this permutation becomes the pattern
        # that we match against the complied shape match pattern.
        #
        # If the permutation matches, add it to the candidates list.  The key is the particular permutation and
        # the value is the list of all possible tripleconstraints that it will pass
        #
        # If the target predicate isn't in the predicate isn't in the predicate to tripleconstraint map, it
        # is ignored if the shape
        candidates = dict()
        for pred_obj_permutation in pred_obj_permutations:
            tc_idx_list = [cs.predmap[str(pred)] for (pred, _) in pred_obj_permutation if str(pred) in cs.predmap]
            tc_idx_cross_product = list(product(*tc_idx_list))
            for tc_idx_list in tc_idx_cross_product:
                match_str = ''.join(tc_idx_list)
                if cs.matchpattern.fullmatch(match_str):
                    candidates.setdefault(pred_obj_permutation, []).append(tc_idx_list)

        # We now have a set of candidates, the key being a particular ordering of the list of predicate/objects for
        # the supplied subject and the data being a list of the tripleconstraint(s) that the ordering would have to
        # pass for the shape to pass.

        # Determine which, if any, predicate/object orderings pass the corresponding triples in the list
        for pred_obj_permutation, tc_idx_lists in candidates.items():
            for tc_idx_list in tc_idx_lists:
                if self.i_tc_idx_entry(subj, cs, tc_idx_list, pred_obj_permutation):
                    return True
        return False

    def i_tc_idx_entry(self, subj, cs: CompiledShape, tc_idx_list: list, pred_obj_permutation: list) -> bool:
        """ Interpret the set of triple constraints in the particular predicate/object list and triple_constraint indices
        :param subj: Subject being tested
        :param cs: complied shape
        :param tc_idx_list:
        :param pred_obj_permutation:
        :return:
        """
        # TODO: Clean this up
        # hit = tc_idx_list == ('0 ', '2 ', '2 ', '3 ')
        # if hit:
        #     print("EVAL: " + ''.join(tc_idx_list))
        #     print(', '.join(["( %s, %s)" % (e[0].split('/')[-1], e[1].split('/')[-1]) for e in pred_obj_permutation]))
        hit = False
        for i in range(len(tc_idx_list)):
            idx = tc_idx_list[i]
            pred, obj = pred_obj_permutation[i][0], pred_obj_permutation[i][1]
            if int(idx) != 0 and idx.startswith('0'):
                # Extra: The permutation that matched this can't have matched any other triple.
                for tripc_num in cs.predmap[str(pred)]:
                    if int(tripc_num) == 0 or not tripc_num.startswith('0'):
                        if self.i_tripleconstraint(Triple(subj, pred, obj), cs.tripleconstraints[int(tripc_num)]):
                            return False
            else:
                rslt = self.i_tripleconstraint(Triple(subj, pred, obj),
                                               cs.tripleconstraints[int(tc_idx_list[i])])
                # if hit:
                #     print("entry: %i (%s) (%s, %s, %s) = %s" % (i, tc_idx_list[i], subj, pred, obj, rslt))
                if not rslt:
                    return False

        return True

    def i_shape(self, subj: URIRef, shape: ShapeLabel) -> bool:
        """ Interpret subject subj and shape in the graph
        :param subj: subject of interpretation
        :param shape: name of shape
        :return: success indicator
        """
        assert self.graph is not None, "Graph must be supplied"
        if str(shape) not in self._shapes:
            raise SchemaException("Unresolved shape reference: %s" % str(shape))
        if shape not in self._compiled_shapes:
            self._compiled_shapes[shape] = CompiledShape(self._nsmap, self._shapes[str(shape)], self)
        cs = self._compiled_shapes[shape]

        if subj:
            return self._i_shape_subject(subj, cs)
        else:
            for s in self.graph.subjects():
                if self._i_shape_subject(s, cs):
                    return True
        return False

    def i_tripleconstraint(self, t: Triple, c: TripleConstraint) -> bool:
        k = (t, c)
        if k in self._triple_results:
            return self._triple_results[k]
        if URIRef(self._nsmap.uri_for(c.predicate)) == t.p:
            rslt = (not c.objectConstraint or self.i_so_constraint(c.objectConstraint, t.o)) and \
                   (not c.subjectConstraint or self.i_so_constraint(c.subjectConstraint, t.s)) and \
                   (not c.object or self.i_so(c.object, t.o)) and \
                   (not c.subject or self.i_so(c.subject, t.s)) and \
                   (not c.objectShape or self.i_so_shape(c.objectShape, t.o)) and \
                   (not c.subjectShape or self.i_so_shape(c.subjectShape, t.s)) and \
                   (not c.objectType or self.i_so_type(c.objectType, t.o)) and \
                   (not c.subjectType or self.i_so_type(c.subjectType, t.s)) and \
                   (not c.datatype or self.i_datatype(c.datatype, t.o)) and \
                   (not c.valueClass or self.i_value_class(c.valueClass, t.o))
        else:
            rslt = False
        rval = rslt if c.negated is None or not c.negated else not rslt
        self._triple_results[k] = rval
        return rval

    def i_so_constraint(self, c: TripleConstraintValueClass, o: RDFTerm) -> bool:
        rslt = (not c.facet or self.i_facet(c.facet, o)) and \
               (not c.valueSet or self.i_value_set(c.valueSet, o))
        return rslt

    def i_so(self, c: IRI, o: RDFTerm) -> bool:
        return o.is_iri and str(o.iri) == str(c)

    def i_so_shape(self, c: ShapeLabel, o: RDFTerm) -> bool:
        return False if o.is_literal else self.i_shape(o.val, c)

    @staticmethod
    def i_so_type(c: NodeType, o: RDFTerm) -> bool:
        return (c == NodeType.LITERAL and o.is_literal) or (c == NodeType.IRI and o.is_iri) or \
               (c == NodeType.BNODE and o.is_bnode) or (c == NodeType.NONLITERAL and not o.is_literal)

    def i_datatype(self, c: IRI, o: RDFTerm) -> bool:
        # TODO: Flesh this out
        return o.is_literal and \
               ((str(o.literal.datatype) == self._nsmap.uri_for(c)) or
                (not(o.literal.datatype) and self._nsmap.uri_for(c) == 'xsd:string'))

    def i_value_class(self, c: ValueClassLabel, o) -> bool:
        return True

    def i_object(self, c: IRI, o: RDFTerm) -> bool:
        return isinstance(o, URIRef) and URIRef(self._nsmap.uri_for(c)) == o

    @staticmethod
    def i_facet(fct: XSFacet, o: RDFTerm) -> bool:
        for f in fct:
            if f.pattern:
                rslt = bool(re.fullmatch(f.pattern, str(o.val)))
            elif f.not_:
                rslt = bool(re.fullmatch(f.not_, str(o.val)))
            elif f.minLength:
                rslt = len(str(o.val)) <= f.minLength
            elif f.maxLength:
                rslt = len(str(o.val)) >= f.maxLength
            elif f.length:
                rslt = len(str(o.val)) == f.length
            elif f.minValue:
                v = ShapeInterpreter._coerce_numtype(o, o.literal.value)
                f = ShapeInterpreter._coerce_numtype(o, f.minValue)
                rslt = False if v is None or f is None else f < v if f.minValue.open else f <= v
            elif f.maxValue:
                v = ShapeInterpreter._coerce_numtype(o, o.literal.value)
                f = ShapeInterpreter._coerce_numtype(o, f.maxValue)
                rslt = False if v is None or f is None else f > v if f.maxValue.open else f >= v
            elif f.totalDigits:
                rslt = bool(test_numeric_facet(o.literal.value, total_digits=f.totalDigits))
            elif f.fractionDigits:
                rslt = bool(test_numeric_facet(o.literal.value, fraction_digits=f.fractionDigits))
            else:
                assert False, "Unhandled facet"
            if not rslt:
                return False
        return True

    dtlist = {XSD.integer: int, XSD.decimal: float, XSD.double: float}

    @staticmethod
    def _coerce_numtype(o: RDFTerm, v) -> str:
        if not o.is_literal or o.literal.datatype not in ShapeInterpreter.dtlist:
            return None
        return ShapeInterpreter.dtlist[o.literal.datatype](str(v))

    def i_value_set(self, vs: ValueSet, o: RDFTerm) -> bool:
        for vse in PyxbChoice(vs).elements:
            if vse.iriRange:
                rslt = self.i_iri_range(vse.iriRange, o)
            elif vse.rdfLiteral:
                rslt = self.i_rdf_literal(vse.rdfLiteral, o)
            elif vse.integer:
                rslt = self.i_integer(vse.integer, o)
            elif vse.decimal:
                rslt = self.i_decimal(vse.decimal, o)
            elif vse.double:
                rslt = self.i_double(vse.double, o)
            elif vse.boolean:
                rslt = self.i_boolean(vse.boolean, o)
            else:
                assert False, "Unknown vse type"
            if not rslt:
                return False
        return True

    def i_iri_range(self, ir: IRIRange, o: RDFTerm):
        return self.i_iri_stem(ir, o) and not any(self.i_iri_stem(ex, o) for ex in ir.exclusion)

    @staticmethod
    def i_iri_stem(ist: IRIStem, o: RDFTerm):
        return not o.is_iri or (str(ist.base) == str(o.iri) if not ist.stem else str(o.iri).startswith(ist.base))

    def i_rdf_literal(self, rdfl: RDFLiteral, o):
        if not o.is_literal:
            return False
        lit = Literal(rdfl.value(),
                      lang=rdfl.langtag,
                      datatype=URIRef(self._nsmap.uri_for(rdfl.datatype)) if rdfl.datatype else None)
        return lit == o.literal

    @staticmethod
    def i_integer(intv, o):
        if not o.is_literal:
            return False
        lit = Literal(intv, datatype=XSD.integer)
        return lit == o.literal

    @staticmethod
    def i_decimal(decv, o):
        if not o.is_literal:
            return False
        lit = Literal(decv, datatype=XSD.decimal)
        return lit == o.literal

    @staticmethod
    def i_double(doubv, o):
        if not o.is_literal:
            return False
        lit = Literal(doubv, datatype=XSD.double)
        return lit == o.literal

    @staticmethod
    def i_boolean(boolv, o):
        if not o.is_literal:
            return False
        lit = Literal(boolv, datatype=XSD.boolean)
        return lit == o.literal
Beispiel #7
0
class ShExSchema:

    """ ShEx XML Schema to JSON wrapper
    """
    def __init__(self, dom_schema):
        """ Constructor - convert the supplied schema to json
        :param dom_schema: DOM document to convert
        """
        self.schema = CreateFromDOM(dom_schema)
        self.json = dict(type="schema")

        self._prefixmap = PrefixMap(self.schema, dom_schema)
        self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + ['xml', 'xmlns']
        self.shex_schema()

    def shex_schema(self):
        """ <code>xs:Element name="Schema" type="shex:Schema</code>
        """
        self.json["prefixes"] = {prefix: url for prefix, url in self._prefixmap.namespaces().items()
                                 if prefix is not None and url and prefix not in self._exclude_prefixes}
        if self.schema.startActions:
            self.json["startActs"] = self.shex_semantic_actions(self.schema.startActions)
        if self.schema.shape:
            self.json["shapes"] = {self._uri(s.label): self.shex_shape(s) for s in self.schema.shape}
        if self.schema.valueClass:
            self.json["valueClasses"] = \
                {self.shex_iri(vc.definition.valueClassLabel if vc.definition else vc.external.ref):
                     self.shex_value_class_definition(vc) for vc in self.schema.valueClass}
        if self.schema.start:
            self.json["start"] = self._uri(self.schema.start)

    def shex_shape(self, shape: Shape) -> dict:
        """ <code>xs:complexType name="shape"</code>
        :param shape: XML Shape
        :return: S-JSON Shape Entry
        """
        rval = dict(type="shape")
        w_shape = PyxbWrapper(shape)
        self.shex_annotations_and_actions(rval, w_shape)
        [self.shex_expression_choice(rval, e) for e in w_shape.elements]
        for e in w_shape.elements:
            if e.type == "import_":
                rval.setdefault("inherit", []).append(self.shex_shape_ref(e.value.node))
            elif e.type == "extra":
                rval.setdefault(e.type, []).append(self._uri(e.value.node.ref))

        # shape.label is the dictionary key in the Schema container
        if shape.virtual:
            rval["virtual"] = shape.virtual
        if shape.closed:
            rval["closed"] = shape.closed
        return rval

    @staticmethod
    def _typed_expression(typ: str, val: dict) -> dict:
        val["type"] = typ
        return val

    def shex_expression_choice(self, target: dict, e: PyxbWrapper.PyxbElement) -> dict:
        """ <code>xs:group name="ExpressionChoice"</code>
        :param target: target type with ExpressionChoice mixin
        :param e: Wrapper for ExpressionChoice element
        :return: target
        """
        if e.type in ["someOf", "group"]:
            expr = self.shex_shape_constraint(e.value.node)
        elif e.type == "tripleConstraint":
            expr = self.shex_triple_constraint(e.value.node)
        elif e.type == "include":
            expr = dict(include=self._uri(e.value.node.ref))
        else:
            expr = None
        if expr:
            target["expression"] = self._typed_expression(e.type, expr)
        return target

    def shex_annotations_and_actions(self, target: dict, ew: PyxbWrapper):
        """ <code>xs:group name="AnnotationsAndActions</code>
        :param target: dictionary using the group
        :param ew: xml element that contains the group
        """
        for e in ew.elements:
            if e.type == "actions":
                target["semActs"] = self.shex_semantic_actions(e.value.node)
            elif e.type == "annotation":
                target.setdefault("annotations", []).append(self.shex_annotation(e.value.node))

    def shex_shape_constraint(self, sc: ShapeConstraint) -> dict:
        """ <code>xs:complexType name="ShapeConstraint"</code>
        :param sc: A complete shape constraint
        :return: S-JSON expression
        """
        rval = dict()
        sc_wrapper = PyxbWrapper(sc)
        for e in sc_wrapper.elements:
            entry = self.shex_expression_choice({}, e)
            if "expression" in entry:
                rval.setdefault("expressions", []).append(entry["expression"])
        self.shex_annotations_and_actions(rval, sc_wrapper)
        self.shex_cardinality(rval, sc_wrapper)
        return rval

    def shex_triple_constraint(self, tc: TripleConstraint) -> dict:
        """ <code>xs:complexType name="TripleConstraint"</code>
        :param tc: TripleConstraint to process
        :return: SJson equivalent
        """
        assert not ((tc.objectConstraint or tc.object or tc.objectShape or tc.objectType) and
               (tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType)), \
            "Cannot mix subject and object constraints"

        tc_dict = dict(type="tripleConstraint", predicate=self.shex_iri(tc.predicate))
        if tc.valueClass:
            tc_dict["valueClassRef"] = self.shex_value_class_label(tc.valueClass)
        else:
            vc_dict = dict(type="valueClass")
            if tc.objectConstraint:
                self.shex_triple_constraint_value_class(vc_dict, tc.objectConstraint)
            if tc.object:
                vc_dict["values"] = [self.shex_iri(tc.object)]
            if tc.objectShape:
                vc_dict["reference"] = self.shex_shape_label(tc.objectShape)
            if tc.objectType:
                vc_dict["nodeKind"] = self.shex_node_type(tc.objectType)

            if tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType or tc.inverse:
                tc_dict["inverse"] = True
                if tc.subjectConstraint:
                    self.shex_triple_constraint_value_class(vc_dict, tc.subjectConstraint)
            if tc.subject:
                vc_dict["values"] = [self.shex_iri(tc.subject)]
            if tc.subjectShape:
                vc_dict["reference"] = self.shex_shape_label(tc.subjectShape)
            if tc.subjectType:
                vc_dict["nodeKind"] = self.shex_node_type(tc.subjectType)

            if tc.datatype:
                vc_dict["datatype"] = self._uri(tc.datatype)
            if tc.negated:
                tc_dict["negated"] = tc.negated
            tc_wrapper = PyxbWrapper(tc)
            self.shex_annotations_and_actions(tc_dict, tc_wrapper)
            self.shex_cardinality(tc_dict, tc_wrapper)
            tc_dict["value"] = vc_dict
        return tc_dict

    @staticmethod
    def shex_node_type(nt: NodeType):
        return str(nt).lower()

    def shex_annotation(self, annot: Annotation) -> list:
        """ <code>xs:complexType name="Annotation"</code>
        :param annot: Annotation
        :return: S-JSON equivalent
        """
        rval = [self._uri(annot.iri)] if annot.iri else []
        if annot.literal:
            rval.append(self.shex_rdf_literal(annot.literal))
        else:
            rval.append(self.shex_iri_ref(annot.iriref))
        return rval

    def shex_semantic_actions(self, acts: SemanticActions) -> list:
        """ <code>xs:complexType name="SemanticActions"</code>
        :param acts: actions
        :return: list of actions
        """
        return [self.shex_semantic_action(a) for a in acts.action]

    def shex_semantic_action(self, act: SemanticAction) -> dict:
        """ <code>xs:complexType name="SemanticAction"</code>
        :param act: action
        :return: S-JSON representation
        """
        # TODO: validating
        rval = {}
        if act.productionName:
            rval['name'] = self._uri(act.productionName.ref)
        if act.codeDecl:
            rval['contents'] = self.shex_code_decl(act.codeDecl)
        return rval

    @staticmethod
    def shex_code_decl(cd: CodeDecl):
        """ <code>xs:complexType name="CodeDecl" mixed="true"</code>
        :param cd:
        :return:
        """
        return PyxbWrapper.mixed_content(cd)

    def shex_value_class_definition(self, vcd: ValueClassDefinition) -> dict:
        """ <code>xs:complexType name="ValueClassDefinition"</code>
        :param vcd:
        :return:
        """
        rval = dict(type="valueClass")
        if vcd.external:
            rval["external"] = self.shex_value_class_ref(vcd.external)
        else:
            self.shex_inline_value_class_definition(rval, vcd.definition)
            if vcd.definition.actions:
                rval["semActs"] = self.shex_semantic_actions(vcd.definition.actions)
        return rval

    def shex_inline_value_class_definition(self, vc: dict, ivcd: InlineValueClassDefinition) -> list:
        """ <code>xs:complexType name="InlineValueClassDefinition"</code>
        :param vc: dictionary to record the actual elements
        :param ivcd:
        :return:
        """
        # valueClassLabel becomes the identity
        vcd_wrapper = PyxbWrapper(ivcd)
        for e in vcd_wrapper.elements:
            if e.type == "nodetype":
                vc["nodeKind"] = self.shex_node_type(e.value.node)
            elif e.type == "datatype":
                vc[e.type] = self._uri(e.value.node)
            elif e.type == "facet":
                self.shex_xs_facet(vc, e.value.node)
            elif e.type == "or_":
                vc["reference"] = self.shex_group_shape_constr(e.value.node)
            elif e.type == "valueSet":
                vc["values"] = self.shex_value_set(e.value.node)
            else:
                assert False, "Unknown ValueClassExpression choice entry: %s" % e.type

    def shex_group_shape_constr(self, gsc: GroupShapeConstr) -> dict:
        """ <code>xs:complexType name="GroupShapeConstr"</code>
        :param gsc:
        :return:
        """
        rval = dict(type="or", disjuncts=[self.shex_shape_ref(d) for d in gsc.disjunct])
        if gsc.stringFacet:
            [self.shex_xs_facet(rval, e) for e in gsc.stringFacet]
        return rval

    # noinspection PyTypeChecker
    def shex_triple_constraint_value_class(self, vc: dict, tcvc: TripleConstraintValueClass) -> (dict, dict):
        return self.shex_inline_value_class_definition(vc, tcvc)

    def shex_value_class_label(self, l: ValueClassLabel) -> str:
        """ <code>xs:simpleType name="ValueClassLabel"</code>
        :param l:
        :return:
        """
        return self.shex_iri(l)

    def shex_value_class_ref(self, lr: ValueClassRef) -> str:
        """ <code>xs:complexType name="ValueClassRef"</code>
        :param lr:
        :return:
        """
        return self.shex_value_class_label(lr.ref)

    def shex_shape_label(self, sl: ShapeLabel) -> str:
        """ <code>xs:simpleType name="ShapeLabel"</code>
        :param sl:
        :return:
        """
        return self.shex_iri(sl)

    def shex_shape_ref(self, sr: ShapeRef) -> str:
        """ <code>xs:complexType name="ShapeRef"</code>
        :param sr:
        :return:
        """
        return self.shex_shape_label(sr.ref)

    @staticmethod
    def shex_code_label(cl: ProductionName) -> str:
        """ <code>xs:complexType name="CodeLabel"</code>
        :param cl:
        :return:
        """
        return cl.ref.value()

    @staticmethod
    def _normalize_value(v):
        return int(v.integer) if v.integer is not None else \
            float(v.double) if v.double is not None else float(v.decimal)

    @staticmethod
    def shex_xs_facet(target: dict, f: XSFacet):
        """ <code>xs:complexType name="XSFacet"</code>
        :param target: target dictionary (ValueClass)
        :param f: facet to transform
        """
        if f.pattern:
            target["pattern"] = f.pattern
        elif f.not_:
            target["negated"] = True
        elif f.minLength:
            target["minlength"] = f.minLength
        elif f.maxLength:
            target["maxlength"] = f.maxLength
        elif f.length:
            target["length"] = f.length
        elif f.minValue:
            if f.minValue.open:
                target["minexclusive"] = ShExSchema._normalize_value(f.minValue)
            else:
                target["mininclusive"] = ShExSchema._normalize_value(f.minValue)
        elif f.maxValue:
            if f.maxValue.open:
                target["maxexclusive"] = ShExSchema._normalize_value(f.maxValue)
            else:
                target["maxinclusive"] = ShExSchema._normalize_value(f.maxValue)
        elif f.totalDigits:
            target["totaldigits"] = f.totalDigits
        elif f.fractionDigits:
            target["fractiondigits"] = f.fractionDigits
        else:
            assert False, "Unknown facet %s" % f

    # shex_endpoint  is covered in the xs_facet logic above

    # noinspection PyTypeChecker
    @staticmethod
    def shex_string_facet(target: dict, sf: StringFacet):
        ShExSchema.shex_xs_facet(target, sf)

    # noinspection PyTypeChecker
    @staticmethod
    def shex_numeric_facet(target: dict, nf: NumericFacet):
        ShExSchema.shex_xs_facet(target, nf)

    def shex_value_set(self, vs: ValueSet) -> list:
        if vs.iriRange:
            return [self.shex_iri_range(e) for e in vs.iriRange]
        elif vs.rdfLiteral:
            return [self.shex_rdf_literal(e) for e in vs.rdfLiteral]
        elif vs.integer:
            return ['"%i"^^%s' % (e, XSD.integer) for e in vs.integer]
        elif vs.decimal:
            return ['"%d"^^%s' % (e, XSD.decimal) for e in vs.decimal]
        elif vs.double:
            return ['"%e"^^%s' % (e, XSD.double) for e in vs.double]
        elif vs.boolean:
            return ['"%s"^^%s' % (e, XSD.boolean) for e in vs.boolean]
        else:
            assert False, "Unknown ValueSet type"

    def shex_iri_stem(self, ist: IRIStem) -> dict:
        if ist.base and not ist.stem:
            return self.shex_iri(ist.base)
        else:
            return dict(stem=self.shex_iri(ist.base)) if ist.base else dict(stem=dict(type="wildcard"))

    def shex_iri_range(self, irir: IRIRange) -> object:
        """
        :param irir:
        :return:
        """
        def add_stem_type(d: dict, v: IRIStem):
            if v.stem:
                d["type"] = "stem"
            return d

        # If just a base, return the IRI
        if irir.base and not irir.stem and not irir.exclusion:
            return self.shex_iri(irir.base)
        rval = dict(type="stemRange")
        rval.update(self.shex_iri_stem(irir))
        if irir.exclusion:
            rval["exclusions"] = [add_stem_type(self.shex_iri_stem(e), e) for e in irir.exclusion]
        return rval

    def shex_rdf_literal(self, lit: RDFLiteral) -> str:
        rval = '"' + lit.value() + '"'
        if lit.datatype:
            rval += '^^' + self.shex_iri(lit.datatype)
        if lit.langtag:
            rval += '@' + lit.langtag
        return rval

    def shex_iri(self, iri: IRI) -> str:
        return self._uri(str(iri))

    def shex_iri_ref(self, ref: IRIRef) -> str:
        return self.shex_iri(ref.ref)

    def shex_prefixed_name(self, pn: PrefixedName) -> str:
        return self._uri(str(pn))

    @staticmethod
    def shex_cardinality(target: dict, card: PyxbWrapper):
        minv = card.node.min if card.node.min is not None else 1
        maxv = card.node.max if card.node.max is not None else 1
        if minv == maxv:
            if minv != 1:
                # TODO: Fix comparison tests so we can substitute length here
                # target["length"] = minv
                target["min"] = minv
                target["max"] = maxv
        else:
            target["min"] = minv
            target["max"] = '*' if maxv == "unbounded" else maxv

    def _uri(self, element):
        """ Map element into a complete URI
        :param element: URI or QNAME
        :return: URI
        """
        return self._prefixmap.uri_for(PyxbWrapper.proc_unicode(element))
Beispiel #8
0
class ShExSchema:
    """ ShEx XML Schema to JSON wrapper
    """
    def __init__(self, dom_schema):
        """ Constructor - convert the supplied schema to json
        :param dom_schema: DOM document to convert
        """
        self.schema = CreateFromDOM(dom_schema)
        self.json = dict(type="schema")

        self._prefixmap = PrefixMap(self.schema, dom_schema)
        self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + [
            'xml', 'xmlns'
        ]
        self.shex_schema()

    def shex_schema(self):
        """ <code>xs:Element name="Schema" type="shex:Schema</code>
        """
        self.json["prefixes"] = {
            prefix: url
            for prefix, url in self._prefixmap.namespaces().items() if
            prefix is not None and url and prefix not in self._exclude_prefixes
        }
        if self.schema.startActions:
            self.json["startActs"] = self.shex_semantic_actions(
                self.schema.startActions)
        if self.schema.shape:
            self.json["shapes"] = {
                self._uri(s.label): self.shex_shape(s)
                for s in self.schema.shape
            }
        if self.schema.valueClass:
            self.json["valueClasses"] = \
                {self.shex_iri(vc.definition.valueClassLabel if vc.definition else vc.external.ref):
                     self.shex_value_class_definition(vc) for vc in self.schema.valueClass}
        if self.schema.start:
            self.json["start"] = self._uri(self.schema.start)

    def shex_shape(self, shape: Shape) -> dict:
        """ <code>xs:complexType name="shape"</code>
        :param shape: XML Shape
        :return: S-JSON Shape Entry
        """
        rval = dict(type="shape")
        w_shape = PyxbWrapper(shape)
        self.shex_annotations_and_actions(rval, w_shape)
        [self.shex_expression_choice(rval, e) for e in w_shape.elements]
        for e in w_shape.elements:
            if e.type == "import_":
                rval.setdefault("inherit",
                                []).append(self.shex_shape_ref(e.value.node))
            elif e.type == "extra":
                rval.setdefault(e.type, []).append(self._uri(e.value.node.ref))

        # shape.label is the dictionary key in the Schema container
        if shape.virtual:
            rval["virtual"] = shape.virtual
        if shape.closed:
            rval["closed"] = shape.closed
        return rval

    @staticmethod
    def _typed_expression(typ: str, val: dict) -> dict:
        val["type"] = typ
        return val

    def shex_expression_choice(self, target: dict,
                               e: PyxbWrapper.PyxbElement) -> dict:
        """ <code>xs:group name="ExpressionChoice"</code>
        :param target: target type with ExpressionChoice mixin
        :param e: Wrapper for ExpressionChoice element
        :return: target
        """
        if e.type in ["someOf", "group"]:
            expr = self.shex_shape_constraint(e.value.node)
        elif e.type == "tripleConstraint":
            expr = self.shex_triple_constraint(e.value.node)
        elif e.type == "include":
            expr = dict(include=self._uri(e.value.node.ref))
        else:
            expr = None
        if expr:
            target["expression"] = self._typed_expression(e.type, expr)
        return target

    def shex_annotations_and_actions(self, target: dict, ew: PyxbWrapper):
        """ <code>xs:group name="AnnotationsAndActions</code>
        :param target: dictionary using the group
        :param ew: xml element that contains the group
        """
        for e in ew.elements:
            if e.type == "actions":
                target["semActs"] = self.shex_semantic_actions(e.value.node)
            elif e.type == "annotation":
                target.setdefault("annotations", []).append(
                    self.shex_annotation(e.value.node))

    def shex_shape_constraint(self, sc: ShapeConstraint) -> dict:
        """ <code>xs:complexType name="ShapeConstraint"</code>
        :param sc: A complete shape constraint
        :return: S-JSON expression
        """
        rval = dict()
        sc_wrapper = PyxbWrapper(sc)
        for e in sc_wrapper.elements:
            entry = self.shex_expression_choice({}, e)
            if "expression" in entry:
                rval.setdefault("expressions", []).append(entry["expression"])
        self.shex_annotations_and_actions(rval, sc_wrapper)
        self.shex_cardinality(rval, sc_wrapper)
        return rval

    def shex_triple_constraint(self, tc: TripleConstraint) -> dict:
        """ <code>xs:complexType name="TripleConstraint"</code>
        :param tc: TripleConstraint to process
        :return: SJson equivalent
        """
        assert not ((tc.objectConstraint or tc.object or tc.objectShape or tc.objectType) and
               (tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType)), \
            "Cannot mix subject and object constraints"

        tc_dict = dict(type="tripleConstraint",
                       predicate=self.shex_iri(tc.predicate))
        if tc.valueClass:
            tc_dict["valueClassRef"] = self.shex_value_class_label(
                tc.valueClass)
        else:
            vc_dict = dict(type="valueClass")
            if tc.objectConstraint:
                self.shex_triple_constraint_value_class(
                    vc_dict, tc.objectConstraint)
            if tc.object:
                vc_dict["values"] = [self.shex_iri(tc.object)]
            if tc.objectShape:
                vc_dict["reference"] = self.shex_shape_label(tc.objectShape)
            if tc.objectType:
                vc_dict["nodeKind"] = self.shex_node_type(tc.objectType)

            if tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType or tc.inverse:
                tc_dict["inverse"] = True
                if tc.subjectConstraint:
                    self.shex_triple_constraint_value_class(
                        vc_dict, tc.subjectConstraint)
            if tc.subject:
                vc_dict["values"] = [self.shex_iri(tc.subject)]
            if tc.subjectShape:
                vc_dict["reference"] = self.shex_shape_label(tc.subjectShape)
            if tc.subjectType:
                vc_dict["nodeKind"] = self.shex_node_type(tc.subjectType)

            if tc.datatype:
                vc_dict["datatype"] = self._uri(tc.datatype)
            if tc.negated:
                tc_dict["negated"] = tc.negated
            tc_wrapper = PyxbWrapper(tc)
            self.shex_annotations_and_actions(tc_dict, tc_wrapper)
            self.shex_cardinality(tc_dict, tc_wrapper)
            tc_dict["value"] = vc_dict
        return tc_dict

    @staticmethod
    def shex_node_type(nt: NodeType):
        return str(nt).lower()

    def shex_annotation(self, annot: Annotation) -> list:
        """ <code>xs:complexType name="Annotation"</code>
        :param annot: Annotation
        :return: S-JSON equivalent
        """
        rval = [self._uri(annot.iri)] if annot.iri else []
        if annot.literal:
            rval.append(self.shex_rdf_literal(annot.literal))
        else:
            rval.append(self.shex_iri_ref(annot.iriref))
        return rval

    def shex_semantic_actions(self, acts: SemanticActions) -> list:
        """ <code>xs:complexType name="SemanticActions"</code>
        :param acts: actions
        :return: list of actions
        """
        return [self.shex_semantic_action(a) for a in acts.action]

    def shex_semantic_action(self, act: SemanticAction) -> dict:
        """ <code>xs:complexType name="SemanticAction"</code>
        :param act: action
        :return: S-JSON representation
        """
        # TODO: validating
        rval = {}
        if act.productionName:
            rval['name'] = self._uri(act.productionName.ref)
        if act.codeDecl:
            rval['contents'] = self.shex_code_decl(act.codeDecl)
        return rval

    @staticmethod
    def shex_code_decl(cd: CodeDecl):
        """ <code>xs:complexType name="CodeDecl" mixed="true"</code>
        :param cd:
        :return:
        """
        return PyxbWrapper.mixed_content(cd)

    def shex_value_class_definition(self, vcd: ValueClassDefinition) -> dict:
        """ <code>xs:complexType name="ValueClassDefinition"</code>
        :param vcd:
        :return:
        """
        rval = dict(type="valueClass")
        if vcd.external:
            rval["external"] = self.shex_value_class_ref(vcd.external)
        else:
            self.shex_inline_value_class_definition(rval, vcd.definition)
            if vcd.definition.actions:
                rval["semActs"] = self.shex_semantic_actions(
                    vcd.definition.actions)
        return rval

    def shex_inline_value_class_definition(
            self, vc: dict, ivcd: InlineValueClassDefinition) -> list:
        """ <code>xs:complexType name="InlineValueClassDefinition"</code>
        :param vc: dictionary to record the actual elements
        :param ivcd:
        :return:
        """
        # valueClassLabel becomes the identity
        vcd_wrapper = PyxbWrapper(ivcd)
        for e in vcd_wrapper.elements:
            if e.type == "nodetype":
                vc["nodeKind"] = self.shex_node_type(e.value.node)
            elif e.type == "datatype":
                vc[e.type] = self._uri(e.value.node)
            elif e.type == "facet":
                self.shex_xs_facet(vc, e.value.node)
            elif e.type == "or_":
                vc["reference"] = self.shex_group_shape_constr(e.value.node)
            elif e.type == "valueSet":
                vc["values"] = self.shex_value_set(e.value.node)
            else:
                assert False, "Unknown ValueClassExpression choice entry: %s" % e.type

    def shex_group_shape_constr(self, gsc: GroupShapeConstr) -> dict:
        """ <code>xs:complexType name="GroupShapeConstr"</code>
        :param gsc:
        :return:
        """
        rval = dict(type="or",
                    disjuncts=[self.shex_shape_ref(d) for d in gsc.disjunct])
        if gsc.stringFacet:
            [self.shex_xs_facet(rval, e) for e in gsc.stringFacet]
        return rval

    # noinspection PyTypeChecker
    def shex_triple_constraint_value_class(
            self, vc: dict, tcvc: TripleConstraintValueClass) -> (dict, dict):
        return self.shex_inline_value_class_definition(vc, tcvc)

    def shex_value_class_label(self, l: ValueClassLabel) -> str:
        """ <code>xs:simpleType name="ValueClassLabel"</code>
        :param l:
        :return:
        """
        return self.shex_iri(l)

    def shex_value_class_ref(self, lr: ValueClassRef) -> str:
        """ <code>xs:complexType name="ValueClassRef"</code>
        :param lr:
        :return:
        """
        return self.shex_value_class_label(lr.ref)

    def shex_shape_label(self, sl: ShapeLabel) -> str:
        """ <code>xs:simpleType name="ShapeLabel"</code>
        :param sl:
        :return:
        """
        return self.shex_iri(sl)

    def shex_shape_ref(self, sr: ShapeRef) -> str:
        """ <code>xs:complexType name="ShapeRef"</code>
        :param sr:
        :return:
        """
        return self.shex_shape_label(sr.ref)

    @staticmethod
    def shex_code_label(cl: ProductionName) -> str:
        """ <code>xs:complexType name="CodeLabel"</code>
        :param cl:
        :return:
        """
        return cl.ref.value()

    @staticmethod
    def _normalize_value(v):
        return int(v.integer) if v.integer is not None else \
            float(v.double) if v.double is not None else float(v.decimal)

    @staticmethod
    def shex_xs_facet(target: dict, f: XSFacet):
        """ <code>xs:complexType name="XSFacet"</code>
        :param target: target dictionary (ValueClass)
        :param f: facet to transform
        """
        if f.pattern:
            target["pattern"] = f.pattern
        elif f.not_:
            target["negated"] = True
        elif f.minLength:
            target["minlength"] = f.minLength
        elif f.maxLength:
            target["maxlength"] = f.maxLength
        elif f.length:
            target["length"] = f.length
        elif f.minValue:
            if f.minValue.open:
                target["minexclusive"] = ShExSchema._normalize_value(
                    f.minValue)
            else:
                target["mininclusive"] = ShExSchema._normalize_value(
                    f.minValue)
        elif f.maxValue:
            if f.maxValue.open:
                target["maxexclusive"] = ShExSchema._normalize_value(
                    f.maxValue)
            else:
                target["maxinclusive"] = ShExSchema._normalize_value(
                    f.maxValue)
        elif f.totalDigits:
            target["totaldigits"] = f.totalDigits
        elif f.fractionDigits:
            target["fractiondigits"] = f.fractionDigits
        else:
            assert False, "Unknown facet %s" % f

    # shex_endpoint  is covered in the xs_facet logic above

    # noinspection PyTypeChecker
    @staticmethod
    def shex_string_facet(target: dict, sf: StringFacet):
        ShExSchema.shex_xs_facet(target, sf)

    # noinspection PyTypeChecker
    @staticmethod
    def shex_numeric_facet(target: dict, nf: NumericFacet):
        ShExSchema.shex_xs_facet(target, nf)

    def shex_value_set(self, vs: ValueSet) -> list:
        if vs.iriRange:
            return [self.shex_iri_range(e) for e in vs.iriRange]
        elif vs.rdfLiteral:
            return [self.shex_rdf_literal(e) for e in vs.rdfLiteral]
        elif vs.integer:
            return ['"%i"^^%s' % (e, XSD.integer) for e in vs.integer]
        elif vs.decimal:
            return ['"%d"^^%s' % (e, XSD.decimal) for e in vs.decimal]
        elif vs.double:
            return ['"%e"^^%s' % (e, XSD.double) for e in vs.double]
        elif vs.boolean:
            return ['"%s"^^%s' % (e, XSD.boolean) for e in vs.boolean]
        else:
            assert False, "Unknown ValueSet type"

    def shex_iri_stem(self, ist: IRIStem) -> dict:
        if ist.base and not ist.stem:
            return self.shex_iri(ist.base)
        else:
            return dict(stem=self.shex_iri(ist.base)) if ist.base else dict(
                stem=dict(type="wildcard"))

    def shex_iri_range(self, irir: IRIRange) -> object:
        """
        :param irir:
        :return:
        """
        def add_stem_type(d: dict, v: IRIStem):
            if v.stem:
                d["type"] = "stem"
            return d

        # If just a base, return the IRI
        if irir.base and not irir.stem and not irir.exclusion:
            return self.shex_iri(irir.base)
        rval = dict(type="stemRange")
        rval.update(self.shex_iri_stem(irir))
        if irir.exclusion:
            rval["exclusions"] = [
                add_stem_type(self.shex_iri_stem(e), e) for e in irir.exclusion
            ]
        return rval

    def shex_rdf_literal(self, lit: RDFLiteral) -> str:
        rval = '"' + lit.value() + '"'
        if lit.datatype:
            rval += '^^' + self.shex_iri(lit.datatype)
        if lit.langtag:
            rval += '@' + lit.langtag
        return rval

    def shex_iri(self, iri: IRI) -> str:
        return self._uri(str(iri))

    def shex_iri_ref(self, ref: IRIRef) -> str:
        return self.shex_iri(ref.ref)

    def shex_prefixed_name(self, pn: PrefixedName) -> str:
        return self._uri(str(pn))

    @staticmethod
    def shex_cardinality(target: dict, card: PyxbWrapper):
        minv = card.node.min if card.node.min is not None else 1
        maxv = card.node.max if card.node.max is not None else 1
        if minv == maxv:
            if minv != 1:
                # TODO: Fix comparison tests so we can substitute length here
                # target["length"] = minv
                target["min"] = minv
                target["max"] = maxv
        else:
            target["min"] = minv
            target["max"] = '*' if maxv == "unbounded" else maxv

    def _uri(self, element):
        """ Map element into a complete URI
        :param element: URI or QNAME
        :return: URI
        """
        return self._prefixmap.uri_for(PyxbWrapper.proc_unicode(element))