Ejemplo n.º 1
0
    def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False,
                 stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \
            -> List[EvaluationResult]:
        """ Run the test identified by manifest_uri

        :param manifest_uri: uri of manifest
        :param num_entries: number of manifest elements to test
        :param verbose: True means talk about it
        :param debug: debug setting for shex evaluator
        :param stop_on_fail: True means run until failure
        :param debug_slurps: True means emit SPARQL_slurper statistics
        :param save_graph_dir: If present, save the final graph in this directory
        :return:
        """
        manifest = loads(self.fetch_uri(manifest_uri))
        rval: List[EvaluationResult] = []
        for case in manifest:
            if verbose:
                print(case._as_json_dumps())
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            shex = self.fetch_uri(case.schemaURL)
            evaluator = ShExEvaluator(schema=shex, debug=debug)
            prefixes = PrefixLibrary(shex, SKOS=SKOS)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")
            dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint,
                                                       sparql_query)
            dfs_slice = dfs[:num_entries] if num_entries is not None else dfs
            for df in dfs_slice:
                slurper = SlurpyGraphWithAgent(sparql_endpoint)
                # slurper.debug_slurps = debug_slurps
                prefixes.add_bindings(slurper)
                print(f"Evaluating: {df}")
                results = evaluator.evaluate(rdf=slurper,
                                             focus=df,
                                             debug=debug,
                                             debug_slurps=debug_slurps,
                                             over_slurp=False)
                rval += results
                if save_graph_dir:
                    element_name = df.rsplit('/', 1)[1]
                    file_name = os.path.join(save_graph_dir,
                                             element_name + '.ttl')
                    print(f"Writing: {file_name}")
                    slurper.serialize(file_name, format="turtle")
                if stop_on_fail and not all(r.result for r in results):
                    break
        return rval
Ejemplo n.º 2
0
class ShExEvaluator:
    """ Shape Expressions Evaluator """
    def __init__(
        self,
        rdf: Optional[Union[str, Graph]] = None,
        schema: Optional[Union[str, ShExJ.Schema]] = None,
        focus: Optional[URIPARM] = None,
        start: STARTPARM = None,
        rdf_format: str = "turtle",
        debug: bool = False,
        debug_slurps: bool = False,
        over_slurp: bool = None,
        output_sink: Optional[Callable[[EvaluationResult],
                                       bool]] = None) -> None:
        """ Evaluator constructor.  All of the parameters below can be set in the constructor or at runtime

        :param rdf: RDF string, file name, URL or Graph for evaluation.
        :param schema: ShEx Schema to evaluate. Can be ShExC, ShExJ or a pre-parsed schema
        :param focus: focus node(s).  If absent, all non-BNode subjects in the graph are evaluated
        :param start: start node(s). If absent, the START node in the schema is used
        :param rdf_format: format for RDF. Default: "Turtle"
        :param debug: emit semi-helpful debug information
        :param debug: debug graph fetch calls
        :param over_slurp: Controls whether SPARQL slurper does exact or over slurps
        :param output_sink: Function for accepting evaluation results and returns whether to keep evaluating
        """
        self.pfx: PrefixLibrary = None
        self.rdf_format = rdf_format
        self.g = None
        self.rdf = rdf
        self._schema = None
        self.schema = schema
        self._focus = None
        self.focus = focus
        self.start = start
        self.debug = debug
        self.debug_slurps = debug_slurps
        self.over_slurp = over_slurp
        self.output_sink = output_sink
        self.nerrors = 0
        self.nnodes = 0
        self.eval_result = []

    @property
    def rdf(self) -> str:
        """

        :return: The rendering of whatever RDF is currently being evaluated
        """
        return self.g.serialize(format=self.rdf_format).decode()

    @rdf.setter
    def rdf(self, rdf: Optional[Union[str, Graph]]) -> None:
        """ Set the RDF DataSet to be evaulated.  If ``rdf`` is a string, the presence of a return is the
        indicator that it is text instead of a location.

        :param rdf: File name, URL, representation of rdflib Graph
        """
        if isinstance(rdf, Graph):
            self.g = rdf
        else:
            self.g = Graph()
            if isinstance(rdf, str):
                if '\n' in rdf or '\r' in rdf:
                    self.g.parse(data=rdf, format=self.rdf_format)
                elif ':' in rdf:
                    self.g.parse(location=rdf, format=self.rdf_format)
                else:
                    self.g.parse(source=rdf, format=self.rdf_format)

    @property
    def schema(self) -> Optional[str]:
        """

        :return: The ShExC representation of the schema if one is supplied
        """
        return str(ShExC(self._schema)) if self._schema else None

    @schema.setter
    def schema(self, shex: Optional[Union[str, ShExJ.Schema]]) -> None:
        """ Set the schema to be used.  Schema can either be a ShExC or ShExJ string or a pre-parsed schema.

        :param shex:  Schema
        """
        self.pfx = None
        if shex is not None:
            if isinstance(shex, ShExJ.Schema):
                self._schema = shex
            else:
                shext = shex.strip()
                loader = SchemaLoader()
                if ('\n' in shex or '\r' in shex) or shext[0] in '#<_: ':
                    self._schema = loader.loads(shex)
                else:
                    self._schema = loader.load(shex) if isinstance(
                        shex, str) else shex
                if self._schema is None:
                    raise ValueError("Unable to parse shex file")
                self.pfx = PrefixLibrary(loader.schema_text)

    @property
    def focus(self) -> Optional[List[URIRef]]:
        """
        :return: The list of focus nodes (if any)
        """
        return self._focus

    @property
    def foci(self) -> List[URIRef]:
        """

        :return: The current set of focus nodes
        """
        return self._focus if self._focus else sorted(
            [s for s in set(self.g.subjects()) if isinstance(s, URIRef)])

    @focus.setter
    def focus(self, focus: Optional[URIPARM]) -> None:
        """ Set the focus node(s).  If no focus node is specified, the evaluation will occur for all non-BNode
        graph subjects.  Otherwise it can be a string, a URIRef or a list of string/URIRef combinations

        :param focus: None if focus should be all URIRefs in the graph otherwise a URI or list of URI's
        """
        self._focus = normalize_uriparm(focus) if focus else None

    @property
    def start(self) -> STARTPARM:
        """

        :return: The schema start node(s)
        """
        return self._start

    @start.setter
    def start(self, start: STARTPARM) -> None:
        self._start = normalize_startparm(start) if start else [START]

    def evaluate(
        self,
        rdf: Optional[Union[str, Graph]] = None,
        shex: Optional[Union[str, ShExJ.Schema]] = None,
        focus: Optional[URIPARM] = None,
        start: STARTPARM = None,
        rdf_format: Optional[str] = None,
        debug: Optional[bool] = None,
        debug_slurps: Optional[bool] = None,
        over_slurp: Optional[bool] = None,
        output_sink: Optional[Callable[[EvaluationResult], bool]] = None
    ) -> List[EvaluationResult]:
        if rdf is not None or shex is not None or focus is not None or start is not None:
            evaluator = ShExEvaluator(
                rdf=rdf if rdf is not None else self.g,
                schema=shex if shex is not None else self._schema,
                focus=focus if focus is not None else self.focus,
                start=start
                if start is not None else self.start if self.start else START,
                rdf_format=rdf_format
                if rdf_format is not None else self.rdf_format,
                output_sink=output_sink
                if output_sink is not None else self.output_sink)
        else:
            evaluator = self

        self.eval_result = []
        if evaluator.output_sink is None:

            def sink(e: EvaluationResult) -> bool:
                self.eval_result.append(e)
                return True

            evaluator.output_sink = sink

        processing = True
        self.nerrors = 0
        self.nnodes = 0
        if START in evaluator.start and evaluator._schema.start is None:
            self.nerrors += 1
            evaluator.output_sink(
                EvaluationResult(False, None, None,
                                 'START node is not specified'))
            return self.eval_result

        # Experimental -- xfer all ShEx namespaces to g
        if self.pfx and evaluator.g is not None:
            self.pfx.add_bindings(evaluator.g)

        cntxt = Context(evaluator.g, evaluator._schema)
        cntxt.debug_context.debug = debug if debug is not None else self.debug
        cntxt.debug_context.trace_slurps = debug_slurps if debug_slurps is not None else self.debug_slurps
        cntxt.over_slurp = self.over_slurp if over_slurp is not None else self.over_slurp

        for focus in evaluator.foci:
            self.nnodes += 1
            start_list: List[Union[URIRef, START]] = []
            for start in evaluator.start:
                if start is START:
                    start_list.append(evaluator._schema.start)
                elif isinstance(start, START_TYPE):
                    start_list += list(
                        evaluator.g.objects(focus, start.start_predicate))
                else:
                    start_list.append(start)
            if start_list:
                for start_node in start_list:
                    map_ = FixedShapeMap()
                    map_.add(ShapeAssociation(focus, start_node))
                    cntxt.reset()
                    success, fail_reasons = isValid(cntxt, map_)
                    if not success:
                        self.nerrors += 1
                    if not evaluator.output_sink(
                            EvaluationResult(
                                success, focus, start_node,
                                '\n'.join(fail_reasons)
                                if not success else '')):
                        processing = False
                        break
            else:
                self.nerrors += 1
                evaluator.output_sink(
                    EvaluationResult(False, focus, None,
                                     "No start node located"))
            if not processing:
                break
        return self.eval_result
Ejemplo n.º 3
0
    def test_basics(self):
        """ Test basic functions """
        pl = PrefixLibrary()
        print(str(pl))
        g = Graph()
        pl.add_bindings(g)

        self.assertEqual("""@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", g.serialize(format="turtle").decode().strip())
        pl = PrefixLibrary("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix wikibase: <http://wikiba.se/ontology-beta#> .
@prefix wds: <http://www.wikidata.org/entity/statement/> .
@prefix wdata: <https://www.wikidata.org/wiki/Special:EntityData/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix schema: <http://schema.org/> .
@prefix cc: <http://creativecommons.org/ns#> .
@prefix geo: <http://www.opengis.net/ont/geosparql#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix wdref: <http://www.wikidata.org/reference/> .
@prefix wdv: <http://www.wikidata.org/value/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix wdt: <http://www.wikidata.org/prop/direct/> .
@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
@prefix p: <http://www.wikidata.org/prop/> .
@prefix ps: <http://www.wikidata.org/prop/statement/> .
@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
@prefix pr: <http://www.wikidata.org/prop/reference/> .
@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
@prefix wdno: <http://www.wikidata.org/prop/novalue/> .

and some junk""")

        self.assertEqual(
            [('OWL', Namespace('http://www.w3.org/2002/07/owl#')),
             ('WIKIBASE', Namespace('http://wikiba.se/ontology-beta#')),
             ('WDS', Namespace('http://www.wikidata.org/entity/statement/')),
             ('WDATA', Namespace('https://www.wikidata.org/wiki/Special:EntityData/')),
             ('SKOS', Namespace('http://www.w3.org/2004/02/skos/core#')),
             ('SCHEMA', Namespace('http://schema.org/')),
             ('CC', Namespace('http://creativecommons.org/ns#')),
             ('GEO', Namespace('http://www.opengis.net/ont/geosparql#')),
             ('PROV', Namespace('http://www.w3.org/ns/prov#')),
             ('WDREF', Namespace('http://www.wikidata.org/reference/')),
             ('WDV', Namespace('http://www.wikidata.org/value/')),
             ('WD', Namespace('http://www.wikidata.org/entity/')),
             ('WDT', Namespace('http://www.wikidata.org/prop/direct/')),
             ('WDTN', Namespace('http://www.wikidata.org/prop/direct-normalized/')),
             ('P', Namespace('http://www.wikidata.org/prop/')),
             ('PS', Namespace('http://www.wikidata.org/prop/statement/')),
             ('PSV', Namespace('http://www.wikidata.org/prop/statement/value/')),
             ('PSN', Namespace('http://www.wikidata.org/prop/statement/value-normalized/')),
             ('PQ', Namespace('http://www.wikidata.org/prop/qualifier/')),
             ('PQV', Namespace('http://www.wikidata.org/prop/qualifier/value/')),
             ('PQN', Namespace('http://www.wikidata.org/prop/qualifier/value-normalized/')),
             ('PR', Namespace('http://www.wikidata.org/prop/reference/')),
             ('PRV', Namespace('http://www.wikidata.org/prop/reference/value/')),
             ('PRN', Namespace('http://www.wikidata.org/prop/reference/value-normalized/')),
             ('WDNO', Namespace('http://www.wikidata.org/prop/novalue/'))], [e for e in pl]
        )
        
        pl = PrefixLibrary("""
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX pr: <http://www.wikidata.org/prop/reference/>
PREFIX prv: <http://www.wikidata.org/prop/reference/value/>
PREFIX pv: <http://www.wikidata.org/prop/value/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX gw: <http://genewiki.shape/>


start = @gw:cancer
gw:cancer {
  p:P1748 {
    prov:wasDerivedFrom @<reference>
  }+
}

<reference> {
  pr:P248  IRI ;
  pr:P813  xsd:dateTime ;
  pr:P699  LITERAL
}""", foaf=known_prefixes.FOAF, owl=known_prefixes.OWL, rdfs=standard_prefixes.RDFS)
        self.assertEqual(
            [('XSD', Namespace('http://www.w3.org/2001/XMLSchema#')),
             ('PROV', Namespace('http://www.w3.org/ns/prov#')),
             ('P', Namespace('http://www.wikidata.org/prop/')),
             ('PR', Namespace('http://www.wikidata.org/prop/reference/')),
             ('PRV', Namespace('http://www.wikidata.org/prop/reference/value/')),
             ('PV', Namespace('http://www.wikidata.org/prop/value/')),
             ('PS', Namespace('http://www.wikidata.org/prop/statement/')),
             ('GW', Namespace('http://genewiki.shape/')),
             ('FOAF', Namespace('http://xmlns.com/foaf/0.1/')),
             ('OWL', Namespace('http://www.w3.org/2002/07/owl#')),
             ('RDFS', Namespace('http://www.w3.org/2000/01/rdf-schema#'))], [e for e in pl])

        pl = PrefixLibrary(None, ex="http://example.org/")
        self.assertEqual("http://example.org/", str(pl.EX))

        known_prefixes.add_bindings(g)
        self.assertEqual("""@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix doap: <http://usefulinc.com/ns/doap#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xmlns: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", g.serialize(format="turtle").decode().strip())