def test_shex(self):
     test_file_path = os.path.abspath(
         os.path.join(os.path.dirname(__file__), 'data'))
     with open(
             os.path.join(test_file_path,
                          '1refbnode_with_spanning_PN_CHARS_BASE1.shex'),
             'rb') as f:
         # shex = [c if ord(c) < 65536 else c.encode().decode('utf-16') for c in f.read().decode()]
         shex = f.read().decode()
         parse(shex)
     self.assertTrue(True, "Parsing was valid")
 def test_explicit_base(self):
     shex_str = f'BASE <{str(FOO)}>\n<S1> {{<p1> [<o1>]}}'
     shex: ShExJ.Schema = parse(shex_str, str(BASE))
     self.assertEqual(str(FOO.S1),
                      str(shex.shapes[0].id))
     self.assertEqual(str(FOO.p1), str(shex.shapes[0].expression.predicate))
     self.assertEqual(str(FOO.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
Beispiel #3
0
def validate_shexc(shexc_str: str, input_fname: str) -> bool:
    """
    Validate json_str against ShEx Schema
    :param shexc_str: String to validate
    :param input_fname: Name of source file for error reporting
    :return: True if pass
    """
    if has_invalid_chars(shexc_str):
        print("ANTLR does not support unicode literals > 4 hex digits.")
        return False
    log = MemLogger('\t')
    logger = Logger(log)
    shexj = parse(shexc_str)
    shexj['@context'] = "http://www.w3.org/ns/shex.jsonld"
    if shexj is None:
        return False
    shex_obj = jsg_loads(shexj._as_json, ShExJ)
    if not shex_obj._is_valid(logger):
        print("File: {} - ".format(input_fname))
        print(log.log)
        return False
    elif not compare_json(input_fname, shex_obj._as_json, log):
        print("File: {} - ".format(input_fname))
        print(log.log)
        return False
    return True
Beispiel #4
0
 def test_default_base(self):
     shex_str = '<S1> {<p1> [<o1>]}'
     shex: ShExJ.Schema = parse(shex_str, str(BASE))
     self.assertEqual(str(BASE.S1),
                      str(shex.shapes[0].id))
     self.assertEqual(str(BASE.p1), str(shex.shapes[0].expression.predicate))
     self.assertEqual(str(BASE.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
Beispiel #5
0
 def test_explicit_base(self):
     shex_str = f'BASE <{str(FOO)}>\n<S1> {{<p1> [<o1>]}}'
     shex: ShExJ.Schema = parse(shex_str, str(BASE))
     self.assertEqual(str(FOO.S1),
                      str(shex.shapes[0].id))
     self.assertEqual(str(FOO.p1), str(shex.shapes[0].expression.predicate))
     self.assertEqual(str(FOO.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
 def test_default_base(self):
     shex_str = '<S1> {<p1> [<o1>]}'
     shex: ShExJ.Schema = parse(shex_str, str(BASE))
     self.assertEqual(str(BASE.S1),
                      str(shex.shapes[0].id))
     self.assertEqual(str(BASE.p1), str(shex.shapes[0].expression.predicate))
     self.assertEqual(str(BASE.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
Beispiel #7
0
    def test_explicit_uris(self):
        shex_str = f"""
BASE <{str(FOO)}>
PREFIX ex: <{EX}>

ex:S1 {{ex:p1 [ex:o1]}}"""
        shex: ShExJ.Schema = parse(shex_str, str(BASE))
        self.assertEqual(str(EX.S1),
                         str(shex.shapes[0].id))
        self.assertEqual(str(EX.p1), str(shex.shapes[0].expression.predicate))
        self.assertEqual(str(EX.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
    def test_explicit_uris(self):
        shex_str = f"""
BASE <{str(FOO)}>
PREFIX ex: <{EX}>

ex:S1 {{ex:p1 [ex:o1]}}"""
        shex: ShExJ.Schema = parse(shex_str, str(BASE))
        self.assertEqual(str(EX.S1),
                         str(shex.shapes[0].id))
        self.assertEqual(str(EX.p1), str(shex.shapes[0].expression.predicate))
        self.assertEqual(str(EX.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
Beispiel #9
0
    def loads(self, schema_txt: str) -> ShExJ.Schema:
        """ Parse and return schema as a ShExJ Schema

        :param schema_txt: ShExC or ShExJ representation of a ShEx Schema
        :return: ShEx Schema representation of schema
        """
        self.schema_text = schema_txt
        if schema_txt.strip()[0] == '{':
            # TODO: figure out how to propagate self.base_location into this parse
            return cast(ShExJ.Schema, loads(schema_txt, ShExJ))
        else:
            return generate_shexj.parse(schema_txt, self.base_location)
Beispiel #10
0
    def __init__(self,
                 schema: Union[ShExJ.Schema, str],
                 base: Optional[str] = None) -> None:
        """ Construct a converter

        :param schema: schema string or instance to parse
        """
        self.base = base
        if isinstance(schema, ShExJ.Schema):
            self.schema = schema
        else:
            self.schema = generate_shexj.parse(schema)
Beispiel #11
0
    def __init__(self, schema: Union[ShExJ.Schema, str], base: Optional[str] = None,
                 namespaces: Optional[Union[NamespaceManager, Graph]] = None) -> None:
        """ Construct a converter

        :param schema: schema string or instance to parse
        :param base: module base
        :param namespaces: Used for namespace maps
        """
        self.base = base
        if isinstance(schema, ShExJ.Schema):
            self.schema = schema
        else:
            self.schema = generate_shexj.parse(schema)
        self.namespaces = namespaces.namespace_manager if isinstance(namespaces, Graph) else namespaces
        self.referenced_prefixes = set()
Beispiel #12
0
    def load_shapes(self):
        self.shapes = {}
        shapes_to_load = [
            "ProteinContainingComplex", "MolecularFunction",
            "CellularComponent", "BiologicalProcess", "AnatomicalEntity",
            "InformationBiomacromolecule"
        ]

        shex_url = "https://raw.githubusercontent.com/geneontology/go-shapes/master/shapes/go-cam-shapes.shex"
        shex_response = requests.get(shex_url)
        shex_raw = shex_response.text
        shex_json_str = generate_shexj.parse(shex_raw)._as_json_dumps()
        full_shex_ds = json.loads(shex_json_str)
        for shape in full_shex_ds["shapes"]:
            shape_name = path.basename(shape["id"])
            if shape_name in shapes_to_load:
                self.shapes[shape_name] = {}
                shexps = shape.get('shapeExprs')
                if shexps is None:
                    shexps = [shape]
                for shexp in shexps:
                    if isinstance(shexp, dict) and 'expression' in shexp:
                        for exp in shexp['expression']['expressions']:
                            if exp['type'] == 'TripleConstraint':
                                predicate = contract_uri_wrapper(
                                    exp['predicate'])[0]
                                self.shapes[shape_name][predicate] = []
                                values = exp['valueExpr']
                                if isinstance(values, dict):
                                    values = values['shapeExprs']
                                else:
                                    values = [values]
                                for v in values:
                                    # path.basename(v) - Gets the Shape name minus the URL prefix
                                    # E.g. -
                                    self.shapes[shape_name][predicate].append(
                                        path.basename(v))
                del self.shapes[shape_name]["rdf:type"]
        # TODO: Get this into ShEx spec or delete this when we decide against it
        self.shapes["CellularComponent"]["BFO:0000050"].append(
            "CellularComponent")  # CC-part_of->CC
Beispiel #13
0
def validate_shexc(shexc_str: str, input_fname: str) -> bool:
    """
    Validate json_str against ShEx Schema
    :param shexc_str: String to validate
    :param input_fname: Name of source file for error reporting
    :return: True if pass
    """
    shexj = parse(shexc_str)
    if shexj is None:
        return False
    shexj['@context'] = "http://www.w3.org/ns/shex.jsonld"
    shex_obj = jsg_loads(as_json(shexj), ShExJ)
    log = StringIO()
    rval = True
    with redirect_stdout(log):
        if not shex_obj._is_valid():
            rval = False
        elif not compare_json(input_fname, as_json(shex_obj), log):
            rval = False
    if not rval:
        print("File: {} - ".format(input_fname))
        print(log.getvalue())
    return rval
Beispiel #14
0
    def generate_shexj_from_shexstament(shexstatement):
        """
        This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values.

        Parameters
        ----------
          shexstatement : str
            shex

        Returns
        -------
          shexj
            shape expression in JSON (ShExJ)

        """
        shexj = ""
        try:
            shexjson = parse(shexstatement)._as_json
            parsed = json.loads(shexjson)
            shexj = json.dumps(parsed, indent=4, sort_keys=False)
        except Exception as e:
            print("Unable to parse. Error: " + str(e))
        return shexj
Beispiel #15
0
            E = m.group(0)
            with open(dir + E, 'r') as f:
                Edict[E] = f.read()
        else:
            print("{} does not match: {}".format(l, m))

# make sure BASE is set to http://www.wikidata.org/wiki/EntitySchema:Exyz in each Exyz file
for E, schema in Edict.items():
    newschema = ''
    base_is_set = False
    for line in schema.split('\n'):
        if line.startswith('BASE '):
            line = 'BASE <' + WIKIBASE + E + '>'
            base_is_set = True
        newschema = newschema + line + '\n'
    Edict[
        E] = newschema if base_is_set else 'BASE <' + WIKIBASE + E + '>\n' + newschema

# convert
for E, schema in Edict.items():
    print('-----processing {}:{}'.format(E, schema), file=sys.stderr)
    shexj = parse(InputStream(schema))
    shexj['@context'] = "http://www.w3.org/ns/shex.jsonld"
    shexj['@id'] = WIKIBASE + E
    g = Graph().parse(data=as_json(shexj, indent=None), format="json-ld")

    # put original ShExC code as rdfs:comment into schema
    g.add((URIRef(WIKIBASE + E), RDFS.comment, Literal(schema)))
    # write to N3
    g.serialize(open(dir + E + '.n3', "wb"), format='n3')
Beispiel #16
0
 def test_no_base(self):
     shex_str = '<S1> {<p1> [<o1>]}'
     shex: ShExJ.Schema = parse(shex_str)
     self.assertEqual("S1", str(shex.shapes[0].id))
     self.assertEqual("p1", str(shex.shapes[0].expression.predicate))
     self.assertEqual("o1", str(shex.shapes[0].expression.valueExpr.values[0]))
 def shexc_to_shexj(self, shexc, base=None) -> ShExJ.Schema:
     shex: ShExJ.Schema = parse(shexc, default_base=base)
     self.assertIsNotNone(shex, "Compile error")
     shex['@context'] = "http://www.w3.org/ns/shex.jsonld"
     return shex
 def test_shex(self):
     schema = parse(shex)
     self.assertEqual(shexj, as_json(schema))
Beispiel #19
0
 def test_1(self):
     parse(shex_schema)
     self.assertTrue(True, "Parser didn't die")
 def test_no_base(self):
     shex_str = '<S1> {<p1> [<o1>]}'
     shex: ShExJ.Schema = parse(shex_str)
     self.assertEqual("S1", str(shex.shapes[0].id))
     self.assertEqual("p1", str(shex.shapes[0].expression.predicate))
     self.assertEqual("o1", str(shex.shapes[0].expression.valueExpr.values[0]))