예제 #1
0
def test_catalog_schema(instance_file, json_schema_file, shex_file,
                        shex_subject, shex_rule):
    try:
        instance = get_file(instance_file)

        abs_path = 'file:' + os.getcwd() + os.sep + PATH_TO_SCHEMAS
        resolver = RefResolver(base_uri=abs_path, referrer=None)
        schema = get_file(json_schema_file)
        validator = Draft7Validator(schema=schema, resolver=resolver)
        validator.validate(instance)

        context = get_file('ejp_vocabulary_context.json')
        instance["@context"] = context

        graph = ConjunctiveGraph()
        graph.parse(data=json.dumps(instance), format='json-ld')

        with open(shex_file, 'r') as shex_schema:
            for s, p, o in graph.triples((None, RDF.type, shex_subject)):
                print("checking ShEX valdidation for {}".format(s))
                rslt, reason = evaluate(graph,
                                        shex_schema.read(),
                                        start=shex_rule,
                                        focus=s)
                if not rslt:
                    print(f"{reason if reason else 'DOES NOT CONFORM'}")

    except ValidationError as e:
        print("testing catalog schema failed")
        print("error validating file : {}".format(e))
        raise
    pass
    print("testing catalog schema passed")
예제 #2
0
def qc_metadata(metadatafile):
    schema_resource = pkg_resources.resource_stream(__name__, "schema.yml")
    cache = {"https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/schema.yml": schema_resource.read().decode("utf-8")}
    (document_loader,
     avsc_names,
     schema_metadata,
     metaschema_loader) = schema_salad.schema.load_schema("https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/schema.yml", cache=cache)

    shex = pkg_resources.resource_stream(__name__, "shex.rdf").read().decode("utf-8")

    if not isinstance(avsc_names, schema_salad.avro.schema.Names):
        print(avsc_names)
        return False

    try:
        doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True)
        doc["id"] = uuid.uuid4().urn
        g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx)
        rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/bio-ontology-research-group/mrsaweb/master/mrsaweb/apps/uploader/shex.rdf#submissionShape")

        if not rslt:
            print(reason)

        return rslt
    except Exception as e:
        traceback.print_exc()
        logging.warn(e)
    return False
예제 #3
0
def qc_shex(rdf):
    shex = rdf_strings("validation_shape.rdf")
    #rdf = rdf_strings(metadata)

    START = Namespace("http://whatever/")
    DEFAULT = Namespace("file:///peters_repo/bh20-seq-resource/example/")
    g = Graph()
    g.parse(data=rdf, format="turtle")
    rslt, reason = evaluate(g, shex, DEFAULT.placeholder,
                            START.submissionShape)
    return rslt
예제 #4
0
    def test_closed(self):
        """ Test closed definition """

        e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person)
        
        pprint(e.evaluate())
        self.assertFalse(e.evaluate()[0].result)

        from pyshex.evaluate import evaluate
        g = Graph()
        g.parse(data=rdf, format="turtle")
        pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person))
예제 #5
0
def qc_metadata(metadatafile):
    global metadata_schema
    log = logging.getLogger(__name__)
    if metadata_schema is None:
        schema_resource = pkg_resources.resource_stream(
            __name__, "bh20seq-schema.yml")
        cache = {
            "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml":
            schema_resource.read().decode("utf-8")
        }
        metadata_schema = schema_salad.schema.load_schema(
            "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml",
            cache=cache)

    (document_loader, avsc_names, schema_metadata,
     metaschema_loader) = metadata_schema

    shex = pkg_resources.resource_stream(
        __name__, "bh20seq-shex.rdf").read().decode("utf-8")

    if not isinstance(avsc_names, schema_salad.avro.schema.Names):
        raise Exception(avsc_names)

    doc, metadata = schema_salad.schema.load_and_validate(
        document_loader, avsc_names, metadatafile, True)
    g = schema_salad.jsonld_context.makerdf("workflow", doc,
                                            document_loader.ctx)
    rslt, reason = evaluate(
        g, shex, doc["id"],
        "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape"
    )

    # As part of QC make sure serialization works too, this will raise
    # an exception if there are invalid URIs.
    g.serialize(format="ntriples")

    if not rslt:
        raise Exception(reason)

    return metadata['sample']['sample_id']
예제 #6
0
def qc_metadata(metadatafile):
    log = logging.getLogger(__name__)
    schema_resource = pkg_resources.resource_stream(__name__,
                                                    "bh20seq-schema.yml")
    cache = {
        "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml":
        schema_resource.read().decode("utf-8")
    }
    (
        document_loader, avsc_names, schema_metadata, metaschema_loader
    ) = schema_salad.schema.load_schema(
        "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml",
        cache=cache)

    shex = pkg_resources.resource_stream(
        __name__, "bh20seq-shex.rdf").read().decode("utf-8")

    if not isinstance(avsc_names, schema_salad.avro.schema.Names):
        print(avsc_names)
        return False

    try:
        doc, metadata = schema_salad.schema.load_and_validate(
            document_loader, avsc_names, metadatafile, True)
        g = schema_salad.jsonld_context.makerdf("workflow", doc,
                                                document_loader.ctx)
        rslt, reason = evaluate(
            g, shex, doc["id"],
            "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape"
        )

        if not rslt:
            log.debug(reason)
            print(reason)

        return rslt
    except Exception as e:
        traceback.print_exc()
        log.warn(e)
    return False
예제 #7
0
def main(args):

    (document_loader, avsc_names, schema_metadata,
     metaschema_loader) = schema_salad.schema.load_schema(args.schema)

    with open(args.metadata) as f:
        metadata_contents = ruamel.yaml.round_trip_load(f)

    for metadata_content in metadata_contents:
        metadata_content["id"] = "http://example.org/id"
        add_lc_filename(metadata_content, metadata_content["id"])
        doc, metadata = schema_salad.schema.load_and_validate(
            document_loader, avsc_names, metadata_content, True)

    with open(args.shex) as f:
        shex = f.read()

    g = schema_salad.jsonld_context.makerdf("workflow", doc,
                                            document_loader.ctx)
    validation_result, reason = evaluate(g, shex, doc["id"], "sample_name")

    if not validation_result:
        print(reason)
예제 #8
0
    def test_schema(self):
        with open("bh20sequploader/bh20seq-schema.yml") as schema_resource:
            metadata_schema = schema_salad.schema.load_schema("bh20sequploader/bh20seq-schema.yml")
            (document_loader,
             avsc_names,
             schema_metadata,
             metaschema_loader) = metadata_schema
            # print(metadata_schema)
            self.assertTrue(isinstance(avsc_names, schema_salad.avro.schema.Names))
            metadatafile = "test/data/input/TN_UT2.yaml"
            doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True)
            print(doc)
            g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx)
            with open("bh20sequploader/bh20seq-shex.rdf") as f:
                shex = f.read()
            # Note the https link simply acts as a URI descriptor (it does not fetch)
            rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")

            with open("test/data/regression/TN_UT2.rdf","w") as f:
                f.write(g.serialize(format="ntriples").decode("utf-8"))

            if not rslt:
                raise Exception(reason)
예제 #9
0
print(f'<img src="{YumlGenerator(yaml).serialize()}"/>')
print(f'\n-----\n{YumlGenerator(yaml).serialize()}\n')

cntxt = loads(
    ContextGenerator(yaml).serialize(base="http://example.org/context/"))
print(as_json(cntxt))

shex = ShExGenerator(yaml).serialize(collections=False)
print(shex)

# Generate a person
joe_smith = module.Person(id="42",
                          last_name="smith",
                          first_name=['Joe', 'Bob'],
                          age=43)
print(joe_smith)

# Add the context and turn it into RDF
jsonld = as_json(yaml_to_json(joe_smith, cntxt))
print(jsonld)
g = Graph()
g.parse(data=jsonld, format="json-ld")
print(g.serialize(format="turtle").decode())

from pyshex.evaluate import evaluate
r = evaluate(g,
             shex,
             start="http://example.org/sample/example1/Person",
             focus="http://example.org/context/42")
print("Conforms" if r[0] else r[1])
예제 #10
0
 def test_first_example(self):
     g = Graph()
     g.parse(data=rdf1, format="turtle")
     rslt, reason = evaluate(g, shexc, EX.obs1, SCHOOL.Enrollee)
     self.assertEqual(True, False)
예제 #11
0
 def test_wikidata_1(self):
     g = Graph()
     g.load(self.test_path, format="turtle")
     rslt, _ = evaluate(g, shex_schema, WIKIDATA.Q18557112)
     self.assertTrue(rslt)
예제 #12
0
# Generate a person
joe_smith = module.Person(id="42",
                          last_name="smith",
                          first_name=['Joe', 'Bob'],
                          age=43)
print(joe_smith)

# Add the context and turn it into RDF
jsonld = dumps(joe_smith, cntxt)
print(jsonld)
g = Graph()
g.parse(data=jsonld, format="json-ld")
print(g.serialize(format="turtle").decode())

shex = ShExGenerator(yaml).serialize(collections=False)
print(shex)

from pyshex.evaluate import evaluate
r = evaluate(g,
             shex,
             start="http://example.org/model/Person",
             focus="http://example.org/people/42")
print("Conforms" if r[0] else r[1])

r = evaluate(g,
             shex,
             start="http://example.org/model/FriendlyPerson",
             focus="http://example.org/people/42")
print("Conforms" if r[0] else r[1])