Beispiel #1
0
def evaluate(g: Graph,
             schema: Union[str, ShExJ.Schema],
             focus: Optional[Union[str, URIRef, IRIREF]],
             start: Optional[Union[str, URIRef, IRIREF, START,
                                   START_TYPE]] = None,
             debug_trace: bool = False) -> Tuple[bool, Optional[str]]:
    """ Evaluate focus node `focus` in graph `g` against shape `shape` in ShEx schema `schema`

    :param g: Graph containing RDF
    :param schema: ShEx Schema -- if str, it will be parsed
    :param focus: focus node in g. If not specified, all URI subjects in G will be evaluated.
    :param start: Starting shape.  If omitted, the Schema start shape is used
    :param debug_trace: Turn on debug tracing
    :return: None if success or failure reason if failure
    """
    if isinstance(schema, str):
        schema = SchemaLoader().loads(schema)
    if schema is None:
        return False, "Error parsing schema"
    if not isinstance(focus, URIRef):
        focus = URIRef(str(focus))
    if start is None:
        start = str(schema.start) if schema.start else None
    if start is None:
        return False, "No starting shape"
    if not isinstance(
            start, IRIREF) and start is not START and start is not START_TYPE:
        start = IRIREF(str(start))
    cntxt = Context(g, schema)
    cntxt.debug_context.debug = debug_trace
    map_ = FixedShapeMap()
    map_.add(ShapeAssociation(focus, start))
    test_result, reasons = isValid(cntxt, map_)
    return test_result, '\n'.join(reasons)
Beispiel #2
0
 def test_example_2(self):
     schema, _ = setup_test(shex_2, None)
     cntxt = Context(None, schema)
     shapes_visited = []
     triples_visited = []
     cntxt.visit_shapes(schema.shapes[0], visit_shape, shapes_visited)
     self.assertEqual(["http://schema.example/S1", "http://schema.example/S2" ], shapes_visited)
Beispiel #3
0
 def extern_shape_for(self, ref: ShExJ.IRIREF) -> Optional[ShExJ.Shape]:
     for extern in self.externs:
         extern_schema = self.owner.schema_loader.load(extern)
         if extern_schema:
             cntxt = Context(None, extern_schema)
             if ref in cntxt.schema_id_map:
                 return cntxt.schema_id_map[ref]
     return None
Beispiel #4
0
    def evaluate(
        self,
        rdf: Optional[Union[str, Graph]] = None,
        shex: Optional[Union[str, ShExJ.Schema]] = None,
        focus: Optional[URIPARM] = None,
        start: STARTPARM = None,
        rdf_format: Optional[str] = None,
        debug: Optional[bool] = None,
        debug_slurps: Optional[bool] = None,
        over_slurp: Optional[bool] = None,
        output_sink: Optional[Callable[[EvaluationResult], bool]] = None
    ) -> List[EvaluationResult]:
        if rdf is not None or shex is not None or focus is not None or start is not None:
            evaluator = ShExEvaluator(
                rdf=rdf if rdf is not None else self.g,
                schema=shex if shex is not None else self._schema,
                focus=focus if focus is not None else self.focus,
                start=start
                if start is not None else self.start if self.start else START,
                rdf_format=rdf_format
                if rdf_format is not None else self.rdf_format,
                output_sink=output_sink
                if output_sink is not None else self.output_sink)
        else:
            evaluator = self

        self.eval_result = []
        if evaluator.output_sink is None:

            def sink(e: EvaluationResult) -> bool:
                self.eval_result.append(e)
                return True

            evaluator.output_sink = sink

        processing = True
        self.nerrors = 0
        self.nnodes = 0
        if START in evaluator.start and evaluator._schema.start is None:
            self.nerrors += 1
            evaluator.output_sink(
                EvaluationResult(False, None, None,
                                 'START node is not specified'))
            return self.eval_result

        # Experimental -- xfer all ShEx namespaces to g
        if self.pfx and evaluator.g is not None:
            self.pfx.add_bindings(evaluator.g)

        cntxt = Context(evaluator.g, evaluator._schema)
        cntxt.debug_context.debug = debug if debug is not None else self.debug
        cntxt.debug_context.trace_slurps = debug_slurps if debug_slurps is not None else self.debug_slurps
        cntxt.over_slurp = self.over_slurp if over_slurp is not None else self.over_slurp

        for focus in evaluator.foci:
            self.nnodes += 1
            start_list: List[Union[URIRef, START]] = []
            for start in evaluator.start:
                if start is START:
                    start_list.append(evaluator._schema.start)
                elif isinstance(start, START_TYPE):
                    start_list += list(
                        evaluator.g.objects(focus, start.start_predicate))
                else:
                    start_list.append(start)
            if start_list:
                for start_node in start_list:
                    map_ = FixedShapeMap()
                    map_.add(ShapeAssociation(focus, start_node))
                    cntxt.reset()
                    success, fail_reasons = isValid(cntxt, map_)
                    if not success:
                        self.nerrors += 1
                    if not evaluator.output_sink(
                            EvaluationResult(
                                success, focus, start_node,
                                '\n'.join(fail_reasons)
                                if not success else '')):
                        processing = False
                        break
            else:
                self.nerrors += 1
                evaluator.output_sink(
                    EvaluationResult(False, focus, None,
                                     "No start node located"))
            if not processing:
                break
        return self.eval_result
Beispiel #5
0
def setup_context(shex_str: str, rdf_str: Optional[str]) -> Context:
    schema, g = setup_test(shex_str, rdf_str)
    if g is None:
        g = Graph()
        g.parse(rdf_header)
    return Context(g, schema)
Beispiel #6
0
    def eval_entry(self, entry_name: str) -> bool:
        mes = self.mfst.entries[entry_name]
        for me in mes:  # There can be more than one entry per name...
            # Determine the start point
            if not self.started:
                if not me.name.startswith(START_AFTER):
                    self.start_skipped += 1
                    return True
                else:
                    self.started = True
                    if VERBOSE:
                        print(
                            f"STARTED - Skipped {self.start_skipped} entries")

            # Determine whether this entry should be skipped
            should_skip = False

            # Skip
            skipped_traits = list(me.traits.intersection(skip_traits))
            if skipped_traits:
                if VERBOSE:
                    print(
                        f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])}) - Skipped trait"
                    )
                key = str(skipped_traits[0]).replace(str(SHT), 'sht:')
                if key not in self.skip_reasons:
                    self.skip_reasons[key] = 0
                self.skip_reasons[key] = self.skip_reasons[key] + 1
                self.skip(me.name)
                should_skip = True
            elif me.name in self.expected_failures:
                if VERBOSE:
                    print(
                        f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])})"
                        f" - {self.expected_failures[me.name]}")
                key = self.expected_failures[me.name]
                if key not in self.skip_reasons:
                    self.skip_reasons[key] = 0
                self.skip_reasons[key] = self.skip_reasons[key] + 1
                self.skip(me.name)
                should_skip = True
            if should_skip and not TEST_SKIPS_ONLY:
                return True
            if TEST_SKIPS_ONLY and not should_skip:
                return True

            # Validate the entry
            if VERBOSE:
                shex_uri = self.mfst.schema_loader.location_rewrite(
                    me.schema_uri)
                data_uri = self.mfst.data_redirector.uri_for(me.data_uri) \
                    if self.mfst.data_redirector else me.data_uri
                print(
                    f"Testing {me.name} ({'P' if me.should_pass else 'F'}): {shex_uri} - {data_uri}"
                )
            g, s = me.data_graph(), me.shex_schema()
            if g is None and me.data_uri:
                print("\t ERROR: Unable to load data file")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.skip(me.name)
                return True
            if not s:
                print(f"\t ERROR: Unable to load schema {me.schema_uri}")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.nskipped += 1
                self.skip(me.name)
                return False

            cntxt = Context(g,
                            s,
                            me.extern_shape_for,
                            base_namespace=BASE_FILE_LOC)
            cntxt.debug_context.debug = DEBUG
            map_ = FixedShapeMap()
            focus = self.mfst.data_uri(me.focus)
            if not focus:
                print("\t***** FAIL *****")
                print(f"\tFocus: {me.focus} not in schema")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.fail(me.name)
                return False
            # if ':' not in focus:
            #     focus = "file://" + focus
            map_.add(
                ShapeAssociation(
                    focus,
                    ShExJ.IRIREF(me.shape) if me.shape else START))

            #################################
            #  Actual validation occurs here
            #################################
            rslt = isValid(cntxt, map_)

            test_result, reasons = rslt[0] or not me.should_pass, rslt[1]

            # Analyze the result
            if not VERBOSE and not test_result:
                print(
                    f"Failed {me.name} ({'P' if me.should_pass else 'F'}): {me.schema_uri} - {me.data_uri}"
                )
                print(f"\t TRAITS: ({','.join(me.traits)})")
            if test_result:
                self.pass_(me.name)
            else:
                if VERBOSE:
                    print("\t**** FAIL *****")
                    print(f"\t TRAITS: ({','.join(me.traits)})")
                    for reason in reasons:
                        print(f"\t{reason}")
                self.fail(me.name)
            return test_result