def test_catalog_schema(instance_file, json_schema_file, shex_file, shex_subject, shex_rule): try: instance = get_file(instance_file) abs_path = 'file:' + os.getcwd() + os.sep + PATH_TO_SCHEMAS resolver = RefResolver(base_uri=abs_path, referrer=None) schema = get_file(json_schema_file) validator = Draft7Validator(schema=schema, resolver=resolver) validator.validate(instance) context = get_file('ejp_vocabulary_context.json') instance["@context"] = context graph = ConjunctiveGraph() graph.parse(data=json.dumps(instance), format='json-ld') with open(shex_file, 'r') as shex_schema: for s, p, o in graph.triples((None, RDF.type, shex_subject)): print("checking ShEX valdidation for {}".format(s)) rslt, reason = evaluate(graph, shex_schema.read(), start=shex_rule, focus=s) if not rslt: print(f"{reason if reason else 'DOES NOT CONFORM'}") except ValidationError as e: print("testing catalog schema failed") print("error validating file : {}".format(e)) raise pass print("testing catalog schema passed")
def qc_metadata(metadatafile): schema_resource = pkg_resources.resource_stream(__name__, "schema.yml") cache = {"https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/schema.yml": schema_resource.read().decode("utf-8")} (document_loader, avsc_names, schema_metadata, metaschema_loader) = schema_salad.schema.load_schema("https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/schema.yml", cache=cache) shex = pkg_resources.resource_stream(__name__, "shex.rdf").read().decode("utf-8") if not isinstance(avsc_names, schema_salad.avro.schema.Names): print(avsc_names) return False try: doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True) doc["id"] = uuid.uuid4().urn g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/shex.rdf#submissionShape") if not rslt: print(reason) return rslt except Exception as e: traceback.print_exc() logging.warn(e) return False
def qc_shex(rdf): shex = rdf_strings("validation_shape.rdf") #rdf = rdf_strings(metadata) START = Namespace("http://whatever/") DEFAULT = Namespace("file:///peters_repo/bh20-seq-resource/example/") g = Graph() g.parse(data=rdf, format="turtle") rslt, reason = evaluate(g, shex, DEFAULT.placeholder, START.submissionShape) return rslt
def test_closed(self): """ Test closed definition """ e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person) pprint(e.evaluate()) self.assertFalse(e.evaluate()[0].result) from pyshex.evaluate import evaluate g = Graph() g.parse(data=rdf, format="turtle") pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person))
def qc_metadata(metadatafile): global metadata_schema log = logging.getLogger(__name__) if metadata_schema is None: schema_resource = pkg_resources.resource_stream( __name__, "bh20seq-schema.yml") cache = { "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8") } metadata_schema = schema_salad.schema.load_schema( "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", cache=cache) (document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema shex = pkg_resources.resource_stream( __name__, "bh20seq-shex.rdf").read().decode("utf-8") if not isinstance(avsc_names, schema_salad.avro.schema.Names): raise Exception(avsc_names) doc, metadata = schema_salad.schema.load_and_validate( document_loader, avsc_names, metadatafile, True) g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) rslt, reason = evaluate( g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape" ) # As part of QC make sure serialization works too, this will raise # an exception if there are invalid URIs. g.serialize(format="ntriples") if not rslt: raise Exception(reason) return metadata['sample']['sample_id']
def qc_metadata(metadatafile): log = logging.getLogger(__name__) schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml") cache = { "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8") } ( document_loader, avsc_names, schema_metadata, metaschema_loader ) = schema_salad.schema.load_schema( "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", cache=cache) shex = pkg_resources.resource_stream( __name__, "bh20seq-shex.rdf").read().decode("utf-8") if not isinstance(avsc_names, schema_salad.avro.schema.Names): print(avsc_names) return False try: doc, metadata = schema_salad.schema.load_and_validate( document_loader, avsc_names, metadatafile, True) g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) rslt, reason = evaluate( g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape" ) if not rslt: log.debug(reason) print(reason) return rslt except Exception as e: traceback.print_exc() log.warn(e) return False
def main(args): (document_loader, avsc_names, schema_metadata, metaschema_loader) = schema_salad.schema.load_schema(args.schema) with open(args.metadata) as f: metadata_contents = ruamel.yaml.round_trip_load(f) for metadata_content in metadata_contents: metadata_content["id"] = "http://example.org/id" add_lc_filename(metadata_content, metadata_content["id"]) doc, metadata = schema_salad.schema.load_and_validate( document_loader, avsc_names, metadata_content, True) with open(args.shex) as f: shex = f.read() g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) validation_result, reason = evaluate(g, shex, doc["id"], "sample_name") if not validation_result: print(reason)
def test_schema(self): with open("bh20sequploader/bh20seq-schema.yml") as schema_resource: metadata_schema = schema_salad.schema.load_schema("bh20sequploader/bh20seq-schema.yml") (document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema # print(metadata_schema) self.assertTrue(isinstance(avsc_names, schema_salad.avro.schema.Names)) metadatafile = "test/data/input/TN_UT2.yaml" doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True) print(doc) g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) with open("bh20sequploader/bh20seq-shex.rdf") as f: shex = f.read() # Note the https link simply acts as a URI descriptor (it does not fetch) rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape") with open("test/data/regression/TN_UT2.rdf","w") as f: f.write(g.serialize(format="ntriples").decode("utf-8")) if not rslt: raise Exception(reason)
print(f'<img src="{YumlGenerator(yaml).serialize()}"/>') print(f'\n-----\n{YumlGenerator(yaml).serialize()}\n') cntxt = loads( ContextGenerator(yaml).serialize(base="http://example.org/context/")) print(as_json(cntxt)) shex = ShExGenerator(yaml).serialize(collections=False) print(shex) # Generate a person joe_smith = module.Person(id="42", last_name="smith", first_name=['Joe', 'Bob'], age=43) print(joe_smith) # Add the context and turn it into RDF jsonld = as_json(yaml_to_json(joe_smith, cntxt)) print(jsonld) g = Graph() g.parse(data=jsonld, format="json-ld") print(g.serialize(format="turtle").decode()) from pyshex.evaluate import evaluate r = evaluate(g, shex, start="http://example.org/sample/example1/Person", focus="http://example.org/context/42") print("Conforms" if r[0] else r[1])
def test_first_example(self): g = Graph() g.parse(data=rdf1, format="turtle") rslt, reason = evaluate(g, shexc, EX.obs1, SCHOOL.Enrollee) self.assertEqual(True, False)
def test_wikidata_1(self): g = Graph() g.load(self.test_path, format="turtle") rslt, _ = evaluate(g, shex_schema, WIKIDATA.Q18557112) self.assertTrue(rslt)
# Generate a person joe_smith = module.Person(id="42", last_name="smith", first_name=['Joe', 'Bob'], age=43) print(joe_smith) # Add the context and turn it into RDF jsonld = dumps(joe_smith, cntxt) print(jsonld) g = Graph() g.parse(data=jsonld, format="json-ld") print(g.serialize(format="turtle").decode()) shex = ShExGenerator(yaml).serialize(collections=False) print(shex) from pyshex.evaluate import evaluate r = evaluate(g, shex, start="http://example.org/model/Person", focus="http://example.org/people/42") print("Conforms" if r[0] else r[1]) r = evaluate(g, shex, start="http://example.org/model/FriendlyPerson", focus="http://example.org/people/42") print("Conforms" if r[0] else r[1])