Esempio n. 1
0
    def test_multiple_evaluate(self):
        """ Test calling evaluate multiple times in a row """
        p = PrefixLibrary(shex)
        e = ShExEvaluator(rdf=rdf, schema=shex, focus=p.EX.s)

        # conformant
        for _ in range(NUM_ITERS):
            self.assertTrue(e.evaluate()[0].result)

        # non-conformant
        for _ in range(NUM_ITERS):
            self.assertFalse(e.evaluate(focus=p.EX.a)[0].result)
Esempio n. 2
0
    def test_closed(self):
        """ Test closed definition """

        e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person)
        
        pprint(e.evaluate())
        self.assertFalse(e.evaluate()[0].result)

        from pyshex.evaluate import evaluate
        g = Graph()
        g.parse(data=rdf, format="turtle")
        pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person))
def test_andras_loop():
    manifast = \
        "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json"
    manifest = jsonasobj.loads(requests.get(manifast).text)

    for case in manifest:
        print(case._as_json_dumps())
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            print("==== Schema =====")
            # print(shex._as_json_dumps())

            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = get_sparql_dataframe(sparql_endpoint, sparql_query)
            for wdid in df.item:
                slurpeddata = requests.get(wdid + ".ttl")
                results = evaluator.evaluate(rdf=slurpeddata.text,
                                             focus=wdid,
                                             debug=False)
                for result in results:
                    if result.result:
                        print(str(result.focus) + ": CONFORMS")
                    else:
                        print("item with issue: " + str(result.focus) + " - " +
                              "shape applied: " + str(result.start))
Esempio n. 4
0
 def test_no_start(self):
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x)
     rslt = e.evaluate()[0]
     self.assertFalse(rslt.result)
     self.assertEqual('START node is not specified', rslt.reason.strip())
Esempio n. 5
0
    def test_range_construct(self):
        """ A range can be a builtin type, a TypeDefinition or a ClassDefinition """
        with open(self.meta_rdf_path) as rdf:
            evaluator = ShExEvaluator(
                rdf.read(),
                shex3,
                focus=[
                    "http://bioentity.io/vocab/abstract",
                    "http://bioentity.io/vocab/class_definition_is_a",
                    "http://bioentity.io/vocab/defining_slots"
                ],
                start="http://bioentity.io/vocab/SlotDefinition")
        self.assertTrue(self.eval_results(evaluator.evaluate()))

        results = evaluator.evaluate(
            rdf=fail_rdf_1, focus="http://bioentity.io/vocab/definitional")
        self.assertFalse(any(r.result for r in results))
Esempio n. 6
0
 def test_bad_start(self):
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, start=EX.c, focus=EX.x)
     rslt = e.evaluate()[0]
     self.assertFalse(rslt.result)
     self.assertEqual('Shape: http://a.example/c not found in Schema',
                      rslt.reason.strip())
Esempio n. 7
0
 def test_lists(self):
     with open(self.meta_rdf_path) as rdf:
         evaluator = ShExEvaluator(
             rdf.read(),
             shex2,
             focus="https://biolink.github.io/metamodel/ontology/meta.ttl",
             start="http://bioentity.io/vocab/SchemaDefinition")
     self.assertTrue(self.eval_results(evaluator.evaluate()))
Esempio n. 8
0
 def test_infinite_loop(self):
     g = Graph()
     g.add((EX.Obs1, FHIR.status, Literal("final")))
     e = ShExEvaluator(rdf=g,
                       schema=shex,
                       focus=EX.Obs1,
                       start=FHIR.ObservationShape,
                       debug=False)
     self.assertTrue(e.evaluate()[0].result)
Esempio n. 9
0
 def test_inconsistent(self):
     shex = """<http://a.example/S> {<http://a.example/p> not @<http://a.example/S>}"""
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False)
     rslt = e.evaluate()
     self.assertFalse(rslt[0].result)
     self.assertEqual("""Testing <http://a.example/x> against shape http://a.example/S
 Testing <http://a.example/x> against shape http://a.example/S
   http://a.example/S: Inconsistent recursive shape reference""", rslt[0].reason.strip())
Esempio n. 10
0
    def test_probe(self):
        """ Test for determining performance problem """
        shex_file = os.path.join(self.source_path, 'probe.shex')
        data_dir = os.path.join(self.cwd, 'data')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)
        rdf_file = os.path.join(data_dir, 'probe.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Esempio n. 11
0
    def test_biolink_correct_rdf(self):
        """ Test some conforming RDF  """
        self.single_file_generator('shexj', ShExGenerator, format='json')  # Make sure ShEx is current

        shex_file = env.expected_path('biolink-model.shexj')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        rdf_file = env.input_path('probe.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Esempio n. 12
0
    def test_biolink_correct_rdf(self):
        """ Test some conforming RDF  """
        self.single_file_generator('shex', ShExGenerator)
        shex_file = os.path.join(self.source_path, 'biolink-model.shex')
        data_dir = os.path.join(self.cwd, 'data')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        rdf_file = os.path.join(data_dir, 'correct.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Esempio n. 13
0
 def test_full_meta(self):
     with open(self.meta_rdf_path) as rdf:
         with open(self.meta_shex_path) as shexf:
             evaluator = ShExEvaluator(
                 rdf.read(),
                 shexf.read(),
                 focus=
                 "https://biolink.github.io/metamodel/ontology/meta.ttl",
                 start="http://bioentity.io/vocab/SchemaDefinition")
     # Fails because
     # ---> Testing http://bioentity.io/vocab/local_name against (inner shape)
     #   ---> Testing http://www.w3.org/2001/XMLSchema#string against http://bioentity.io/vocab/Element
     #       No matching triples found for predicate http://www.w3.org/1999/02/22-rdf-syntax-ns#label
     self.assertFalse(evaluator.evaluate()[0].result)
Esempio n. 14
0
 def test_biolink_shexeval(self) -> None:
     base_dir = os.path.abspath(
         os.path.join(os.path.dirname(__file__), '..', 'data'))
     g = CFGraph()
     g.load(os.path.join(base_dir, 'validation', 'biolink-model.ttl'),
            format="turtle")
     evaluator = ShExEvaluator(
         g, os.path.join(base_dir, 'schemas', 'meta.shex'),
         "https://biolink.github.io/biolink-model/ontology/biolink.ttl",
         "http://bioentity.io/vocab/SchemaDefinition")
     result = evaluator.evaluate(debug=False)
     for rslt in result:
         if not rslt.result:
             print(f"Error: {rslt.reason}")
     self.assertTrue(all(r.result for r in result))
Esempio n. 15
0
    def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False,
                 stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \
            -> List[EvaluationResult]:
        """ Run the test identified by manifest_uri

        :param manifest_uri: uri of manifest
        :param num_entries: number of manifest elements to test
        :param verbose: True means talk about it
        :param debug: debug setting for shex evaluator
        :param stop_on_fail: True means run until failure
        :param debug_slurps: True means emit SPARQL_slurper statistics
        :param save_graph_dir: If present, save the final graph in this directory
        :return:
        """
        manifest = loads(self.fetch_uri(manifest_uri))
        rval: List[EvaluationResult] = []
        for case in manifest:
            if verbose:
                print(case._as_json_dumps())
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            shex = self.fetch_uri(case.schemaURL)
            evaluator = ShExEvaluator(schema=shex, debug=debug)
            prefixes = PrefixLibrary(shex, SKOS=SKOS)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")
            dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint,
                                                       sparql_query)
            dfs_slice = dfs[:num_entries] if num_entries is not None else dfs
            for df in dfs_slice:
                slurper = SlurpyGraphWithAgent(sparql_endpoint)
                # slurper.debug_slurps = debug_slurps
                prefixes.add_bindings(slurper)
                print(f"Evaluating: {df}")
                results = evaluator.evaluate(rdf=slurper,
                                             focus=df,
                                             debug=debug,
                                             debug_slurps=debug_slurps,
                                             over_slurp=False)
                rval += results
                if save_graph_dir:
                    element_name = df.rsplit('/', 1)[1]
                    file_name = os.path.join(save_graph_dir,
                                             element_name + '.ttl')
                    print(f"Writing: {file_name}")
                    slurper.serialize(file_name, format="turtle")
                if stop_on_fail and not all(r.result for r in results):
                    break
        return rval
Esempio n. 16
0
 def shextest(self,
              rdf_file: str,
              shex_file: str,
              focus: str,
              cfgraph: bool = False) -> None:
     base_dir = os.path.abspath(
         os.path.join(os.path.dirname(__file__), '..'))
     g = CFGraph() if cfgraph else Graph()
     g.load(os.path.join(base_dir, 'rdf', rdf_file), format="turtle")
     evaluator = ShExEvaluator(
         g, os.path.join(base_dir, 'shex', shex_file), focus,
         "http://w3id.org/biolink/vocab/SchemaDefinition")
     result = evaluator.evaluate(debug=False)
     for rslt in result:
         if not rslt.result:
             print(f"Error: {rslt.reason}")
     self.assertTrue(all(r.result for r in result))
Esempio n. 17
0
def run_shex_manifest():
    print(os.environ["SHEX_MANIFEST"])
    manifest = jsonasobj.loads(requests.get(os.environ["SHEX_MANIFEST"]).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    if os.environ["debug"] == "True":
                        debug = True
                    elif os.environ["debug"] == "False":
                        debug = False
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=debug)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', result.reason)
                            wdi_core.WDItemEngine.log("ERROR", msg)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
Esempio n. 18
0
    def test_biolink_shex_incorrect_rdf(self):
        """ Test some non-conforming RDF  """
        self.single_file_generator('shexj', ShExGenerator, format='json')
        shex_file = env.expected_path('biolink-model.shexj')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        # incorrect.ttl has 16 error lines (more or less).
        rdf_file = env.temp_file_path('incorrect.ttl')
        errs_file = env.temp_file_path('incorrect.errs')
        results = evaluator.evaluate(rdf_file)
        self.assertFalse(self._evaluate_shex_results(results, printit=False))
        self.assertEqual(1, len(results))
        self.assertTrue('Unmatched triples in CLOSED shape' in results[0].reason)
        ntabs = results[0].reason.count('\n\t')
        self.assertEqual(13, ntabs)
        if not os.path.exists(errs_file):
            with open(errs_file, 'w') as f:
                f.write(shex_results_as_string(results[0]))
Esempio n. 19
0
def run_shex_manifest():
    #manifest = \
    #    "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json"
    # manifest = jsonasobj.loads(requests.get(os.environ['MANIFEST_URL']).text)
    manifest_loc = "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_all.json"
    manifest = jsonasobj.loads(requests.get(manifest_loc).text)
    # print(os.environ['MANIFEST_URL'])
    for case in manifest:
        print(case._as_json_dumps())
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            # print("==== Schema =====")
            #print(shex._as_json_dumps())

            evaluator = ShExEvaluator(schema=shex, debug=False)
            sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "")

            df = get_sparql_dataframe(sparql_endpoint, sparql_query)
            for wdid in df.item:
                slurpeddata = SlurpyGraph(sparql_endpoint)
                # slurpeddata = requests.get(wdid + ".ttl")

                results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False, debug_slurps=True)
                for result in results:
                    if result.result:
                        print(str(result.focus) + ": CONFORMS")
                    else:
                        if str(result.focus) in [
                            "http://www.wikidata.org/entity/Q33525",
                            "http://www.wikidata.org/entity/Q62736",
                            "http://www.wikidata.org/entity/Q112670"
                        ]:
                            continue
                        print(
                            "item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start))


# run_shex_manifest()
Esempio n. 20
0
def run_shex_manifest():
    manifest = jsonasobj.loads(
        requests.get(
            "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/reactome/manifest.json"
        ).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=False)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', '')
                            wdi_core.WDItemEngine.log("ERROR", s)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
Esempio n. 21
0
 def test_complete_constructor(self):
     test_rdf = os.path.join(
         os.path.split(os.path.abspath(__file__))[0], '..', 'test_issues',
         'data', 'Q18557122.ttl')
     evaluator = ShExEvaluator(
         test_rdf, shex_schema,
         [loc_prefixes.WIKIDATA, loc_prefixes.WIKIDATA.Q18557112],
         loc_prefixes.WIKIDATA.cancer)
     results = evaluator.evaluate()
     self.assertFalse(results[0].result)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/'),
                      results[0].focus)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'),
                      results[0].start)
     self.assertEqual('Focus: http://www.wikidata.org/entity/ not in graph',
                      results[0].reason)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/Q18557112'),
                      results[1].focus)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'),
                      results[1].start)
     self.assertEqual(
         '  Shape: http://www.wikidata.org/entity/cancer not found in Schema',
         results[1].reason)
Esempio n. 22
0
 def test_infinite_loop(self):
     e = ShExEvaluator(rdf=data, schema=shex, focus=FHIR.d, debug=False)
     rslt = e.evaluate()
     # self.assertEqual("http://a.example/S: Inconsistent recursive shape reference", rslt[0].reason)
     self.assertFalse(rslt[0].result)
     print(rslt[0].reason)
Esempio n. 23
0
 def test_infinite_loop(self):
     e = ShExEvaluator(rdf=data, schema=shex, focus=EX.d)
     rslt = e.evaluate(debug=False)
     self.assertTrue(rslt[0].result)
Esempio n. 24
0
      },
      "distribution":[
         {
            "@type":"DataDownload",
            "encodingFormat":"CSV",
            "contentUrl":"http://www.ncdc.noaa.gov/stormevents/ftp.jsp"
         },
         {
            "@type":"DataDownload",
            "encodingFormat":"XML",
            "contentUrl":"http://gis.ncdc.noaa.gov/all-records/catalog/search/resource/details.page?id=gov.noaa.ncdc:C00510"
         }
      ],
      "temporalCoverage":"1950-01-01/2013-12-18",
      "spatialCoverage":{
         "@type":"Place",
         "geo":{
            "@type":"GeoShape",
            "box":"18.0 -65.0 72.0 172.0"
         }
      }
    }
"""

rval = evaluator.evaluate(good_eg_1,
                          focus="http://example.org/good_",
                          rdf_format="json-ld")
for r in rval:
    if not r.result:
        print(r.reason)
Esempio n. 25
0
 def test_false_positive_minimum(self):
     shex = """<http://a.example/S> {<http://a.example/p> @<http://a.example/S>}"""
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False)
     self.assertTrue(e.evaluate()[0].result)
Esempio n. 26
0
 def test_wikidata_2(self):
     pfx = PrefixLibrary(shex_schema,
                         wikidata="http://www.wikidata.org/entity/")
     evaluator = ShExEvaluator(self.test_path, shex_schema,
                               pfx.WIKIDATA.Q18557112)
     print(evaluator.evaluate(start=pfx.GW.cancer, debug=False))
Esempio n. 27
0
 def test_false_positive(self):
     with open(os.path.join(self.test_data, 'shex', 'disease.shex')) as f:
         shex = f.read()
     e = ShExEvaluator(os.path.join(self.test_data, 'Q12214.ttl'), shex, WIKIDATA.Q12214, debug=False)
     self.assertFalse(e.evaluate()[0].result)
Esempio n. 28
0
from pyshex import ShExEvaluator
from rdflib import Graph, plugin
from rdflib.serializer import Serializer
import json

if __name__ == '__main__':
    shex = open('data/recipe_shex.shex').read()
    jsonld = open('data/recipe_jsonld.json').read()
    context = json.loads(open('data/context.json').read())

    g = Graph().parse(data=jsonld, context=context, format='json-ld')
    print(g.serialize(format='json-ld', indent=4).decode('utf-8'))

    evaluator = ShExEvaluator(schema=shex,
                              start="http://schema.org/shex#Recipe")
    results = evaluator.evaluate(g,
                                 focus="http://example.org/recipe",
                                 rdf_format="json-ld")
    for r in results:
        if not r.result:
            print(r.reason)
            break
    else:
        print("Success")