Beispiel #1
0
def test_time():

    with CSVW(csv_path="tests/datatypes.time.csv",
              metadata_path="tests/datatypes.time.csv-metadata.json") as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    NS = Namespace('https://www.example.org/')

    time1_lit = Literal("19:30:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time1'], time1_lit)))) == 1

    time2_lit = Literal("09:30:10.5", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time2'], time2_lit)))) == 1

    time3_lit = Literal("10:30:10Z", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time3'], time3_lit)))) == 1

    time4_lit = Literal("11:30:10-06:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time4'], time4_lit)))) == 1

    time5_lit = Literal("04:30:10+04:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time5'], time5_lit)))) == 1
Beispiel #2
0
def test_literals_with_new_lines():
    csv_path = "tests/parsing.quoted_newlines.csv"
    metadata_path = "tests/parsing.quoted_newlines.csv-metadata.json"
    csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path)

    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    ns = Namespace("http://example.org/expense/")
    desc = URIRef("http://example.org/desc")

    taxi_triples = list(g.triples((ns['taxi'], desc, None)))
    assert len(taxi_triples) == 1
    taxi_desc = taxi_triples[0][2]
    assert isinstance(taxi_desc, Literal)
    assert len(taxi_desc.value.splitlines()) == 2

    flight = URIRef("http://example.org/expense/multi-hop%20flight")
    flight_triples = list(g.triples((flight, desc, None)))
    assert len(flight_triples) == 1
    flight_desc = flight_triples[0][2]
    assert isinstance(flight_desc, Literal)
    assert len(flight_desc.value.splitlines()) == 4

    dinner_triples = list(g.triples((ns['dinner'], desc, None)))
    assert len(dinner_triples) == 1
    dinner_desc = dinner_triples[0][2]
    assert isinstance(dinner_desc, Literal)
    assert u'\u2019' in dinner_desc, "Expected to read unicode characters"
    assert u"('')" in dinner_desc, "Expected to read apostrophes"
Beispiel #3
0
def test_default_with_datatype():
    csvw = CSVW(
        csv_path='tests/virtual1.csv',
        metadata_path='tests/virtual1.default.datatype.csv-metadata.json')
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    ns = Namespace("http://example.org/")

    for x in [1, 2]:
        active_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['active'], None)))
        assert len(active_vals) == 1
        active_val = active_vals[0][2]
        assert isinstance(active_val, Literal)
        assert active_val.datatype == XSD.boolean
        assert active_val.value

        string_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['stringprop1'], None)))
        assert len(string_vals) == 1
        string_val = string_vals[0][2]
        assert isinstance(string_val, Literal)
        assert string_val.value == "some string"

        string_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['stringprop2'], None)))
        assert len(string_vals) == 1
        string_val = string_vals[0][2]
        assert isinstance(string_val, Literal)
        assert "%20" not in string_val.value
def test_null_values_with_single_string():
    csvw = CSVW(csv_path="tests/null1.csv",
                metadata_path="tests/null1.single.csv-metadata.json")
    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    # There should be no subject NA
    all_subjects = {x for x in g.subjects()}
    assert subj_ns['null_key'] not in all_subjects
    assert subj_ns['1'] in all_subjects
    assert len(all_subjects) == 4

    # Null valued objects should not be created
    all_objects = {x for x in g.objects()}
    assert Literal('null_key', datatype=XSD.token) not in all_objects
    assert Literal('null_sector') not in all_objects
    assert Literal('null_id', datatype=XSD.token) not in all_objects
    assert Literal('PUBLIC') in all_objects
    assert Literal('12', datatype=XSD.token) in all_objects

    # Spot check some triples do not exist but other do from the same row
    null_key_lit = Literal('null_id', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['2'], id_uri, null_key_lit)))) == 0

    priv_lit = Literal('PRIVATE')
    assert len(list(g.triples((subj_ns['2'], sect_uri, priv_lit)))) == 1

    null_sector_lit = Literal('null_sector')
    assert len(list(g.triples((subj_ns['3'], sect_uri, null_sector_lit)))) == 0

    twelve_lit = Literal('12', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['3'], id_uri, twelve_lit)))) == 1
Beispiel #5
0
def ConvertToRDFN3 (filename, destinationFileName):
    _graph = ConjunctiveGraph()
    _graph.parse(filename, format="nt")
    _graph.triples((None, None, None))

    of = open(destinationFileName, "wb")
    of.write(_graph.serialize(format="n3"))
    of.close()
Beispiel #6
0
def ConvertToSQLLITE (filename,destinationFileName):

    _graph = ConjunctiveGraph()
    _graph.parse(filename, format="nt")
    _graph.triples((None, None, None))


    sql = ConjunctiveGraph('SQLite')
    sql.open(destinationFileName, create=True)

    for t in _graph.triples((None,None,None)):
        sql.add(t)

    sql.commit()
    sql.close()
Beispiel #7
0
def run(input_file, input_format_hint, output_format):
    #print(input_format_hint)
    g = ConjunctiveGraph(store=OrderedAndIndexedStore())
    g.parse(input_file, format=input_format_hint)

    triples = []

    for t in g.triples((None,None,None)):
        triples.append(t)

    triples.sort(key=lambda x:x[0])

    #for t in triples:
    #    print(t[0])

    #import IPython; IPython.embed()


    g.remove((None,None,None))
    #print(list(g.triples((None,None,None))))


    for t in triples:
        g.add(t)

    #print(list(g2.triples((None,None,None))))
    #import IPython; IPython.embed()

    #out = open('out.n3', 'wb')
    # g.serialize(out, format='n3')

    for l in g.serialize(format=output_format).splitlines(): print(l.decode())
Beispiel #8
0
def catalyst_graph_for(file):
    if file.startswith("/"):
        file = "file://" + file
    logging.info("InferenceStore catalyst_graph_for started")

    # quads = jsonld.to_rdf(file, {'format': 'application/nquads'})
    logging.info("InferenceStore JSON-LD loaded")

    g = ConjunctiveGraph()
    g.namespace_manager = namespace_manager
    # g.parse(data=quads, format='nquads')
    g.load(file, format="json-ld")
    logging.info("InferenceStore base graph loaded")

    f = FuXiInferenceStore.get_instance()

    # get the inference engine
    cl = f.get_inference(g)
    logging.info("InferenceStore inference graph loaded")

    union_g = rdflib.ConjunctiveGraph()

    for s, p, o in g.triples((None, None, None)):
        union_g.add((s, p, o))

    for s, p, o in cl.triples((None, None, None)):
        union_g.add((s, p, o))

    logging.info("InferenceStore union graph prepared")

    return union_g
Beispiel #9
0
def instance_view_jsonld(request):
    from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager
    from rdflib import URIRef, ConjunctiveGraph
    ctx = request.context
    user_id = authenticated_userid(request) or Everyone
    permissions = get_permissions(user_id, ctx.get_discussion_id())
    instance = ctx._instance
    if not instance.user_can(user_id, CrudPermissions.READ, permissions):
        return HTTPUnauthorized()
    discussion = ctx.get_instance_of_class(Discussion)
    if not discussion:
        raise HTTPNotFound()
    aqsm = AssemblQuadStorageManager()
    uri = URIRef(aqsm.local_uri() + instance.uri()[6:])
    d_storage_name = aqsm.discussion_storage_name(discussion.id)
    v = get_virtuoso(instance.db, d_storage_name)
    cg = ConjunctiveGraph(v, d_storage_name)
    result = cg.triples((uri, None, None))
    #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri)
    # Something is wrong here.
    triples = '\n'.join([
        '%s %s %s.' % (uri.n3(), p.n3(), o.n3()) for (s, p, o) in result
        if '_with_no_name_entry' not in o
    ])
    return aqsm.quads_to_jsonld(triples)
Beispiel #10
0
    def generate(cls, utensils):
        graph = ConjunctiveGraph()
        load_rdf_file(STORE['actions'], graph)

        for utensil in utensils:
            for action in utensil.actions:
                map(rdfSubject.db.add, graph.triples((action.resUri, None, None)))
Beispiel #11
0
    def _store_in_file(self, cur_g: ConjunctiveGraph, cur_file_path: str, context_path: str) -> None:
        # Note: the following lines from here and until 'cur_json_ld' are a sort of hack for including all
        # the triples of the input graph into the final stored file. Some how, some of them are not written
        # in such file otherwise - in particular the provenance ones.
        new_g: ConjunctiveGraph = ConjunctiveGraph()
        for s, p, o in cur_g.triples((None, None, None)):
            g_iri: Optional[URIRef] = None
            for g_context in cur_g.contexts((s, p, o)):
                g_iri = g_context.identifier
                break

            new_g.addN([(s, p, o, g_iri)])

        if self.output_format == "json-ld":
            if context_path is not None and context_path in self.context_map:
                cur_json_ld: Any = json.loads(
                    new_g.serialize(format="json-ld", context=self.context_map[context_path]).decode("utf-8"))

                if isinstance(cur_json_ld, dict):
                    cur_json_ld["@context"] = context_path
                else:  # it is a list
                    for item in cur_json_ld:
                        item["@context"] = context_path
            else:
                cur_json_ld: Any = json.loads(new_g.serialize(format="json-ld").decode("utf-8"))

            with open(cur_file_path, "wt", encoding='utf-8') as f:
                json.dump(cur_json_ld, f, indent=4, ensure_ascii=False)
        else:
            new_g.serialize(cur_file_path, format=self.output_format, encoding="utf-8")

        self.repok.add_sentence(f"File '{cur_file_path}' added.")
def test_catalog_schema(instance_file, json_schema_file, shex_file,
                        shex_subject, shex_rule):
    try:
        instance = get_file(instance_file)

        abs_path = 'file:' + os.getcwd() + os.sep + PATH_TO_SCHEMAS
        resolver = RefResolver(base_uri=abs_path, referrer=None)
        schema = get_file(json_schema_file)
        validator = Draft7Validator(schema=schema, resolver=resolver)
        validator.validate(instance)

        context = get_file('ejp_vocabulary_context.json')
        instance["@context"] = context

        graph = ConjunctiveGraph()
        graph.parse(data=json.dumps(instance), format='json-ld')

        with open(shex_file, 'r') as shex_schema:
            for s, p, o in graph.triples((None, RDF.type, shex_subject)):
                print("checking ShEX valdidation for {}".format(s))
                rslt, reason = evaluate(graph,
                                        shex_schema.read(),
                                        start=shex_rule,
                                        focus=s)
                if not rslt:
                    print(f"{reason if reason else 'DOES NOT CONFORM'}")

    except ValidationError as e:
        print("testing catalog schema failed")
        print("error validating file : {}".format(e))
        raise
    pass
    print("testing catalog schema passed")
Beispiel #13
0
def instance_view_jsonld(request):
    from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager
    from rdflib import URIRef, ConjunctiveGraph
    ctx = request.context
    user_id = authenticated_userid(request) or Everyone
    permissions = get_permissions(
        user_id, ctx.get_discussion_id())
    instance = ctx._instance
    if not instance.user_can(user_id, CrudPermissions.READ, permissions):
        return HTTPUnauthorized()
    discussion = ctx.get_instance_of_class(Discussion)
    if not discussion:
        raise HTTPNotFound()
    aqsm = AssemblQuadStorageManager()
    uri = URIRef(aqsm.local_uri() + instance.uri()[6:])
    d_storage_name = aqsm.discussion_storage_name(discussion.id)
    v = get_virtuoso(instance.db, d_storage_name)
    cg = ConjunctiveGraph(v, d_storage_name)
    result = cg.triples((uri, None, None))
    #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri)
    # Something is wrong here.
    triples = '\n'.join([
        '%s %s %s.' % (uri.n3(), p.n3(), o.n3())
        for (s, p, o) in result
        if '_with_no_name_entry' not in o])
    return aqsm.quads_to_jsonld(triples)
Beispiel #14
0
def catalyst_graph_for(file):
    if file.startswith('/'):
        file = 'file://' + file
    logging.info("InferenceStore catalyst_graph_for started")

    quads = jsonld.to_rdf(file, {'format': 'application/nquads'})
    logging.info("InferenceStore JSON-LD loaded")

    g = ConjunctiveGraph()
    apply_catalyst_napespace_manager(g)
    g.parse(data=quads, format='nquads')
    logging.info("InferenceStore base graph loaded")

    f = FuXiInferenceStore.get_instance()

    # get the inference engine
    cl = f.get_inference(g)
    logging.info("InferenceStore inference graph loaded")

    union_g = rdflib.ConjunctiveGraph()

    for s, p, o in g.triples((None, None, None)):
        union_g.add((s, p, o))

    for s, p, o in cl.triples((None, None, None)):
        union_g.add((s, p, o))

    logging.info("InferenceStore union graph prepared")

    return union_g
def get_ref_vocabs():
    reflist = {}
    if not os.path.isfile(ag.vocabulariesref):
        return reflist
    graph = Graph()
    graph.parse(ag.vocabulariesref)
    for s, p, o in graph.triples((None, namespaces['dcterms']['isVersionOf'], None)):
        reflist[str(s)] = str(o)
    return reflist
Beispiel #16
0
def labelsforpath(pFilePath):
    # if file name is the same as an image instance already present in the database, don't read file:
    p = BDR[Path(pFilePath).stem]
    model = ConjunctiveGraph()
    model.parse(str(pFilePath), format="trig")
    res = {}
    for _, _, o in model.triples((p, SKOS.prefLabel, None)):
        res[o.language] = o.value
    return res
Beispiel #17
0
def test_others(metadata_file):
    with CSVW(csv_path="tests/datatypes.others.csv",
              metadata_path=metadata_file) as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    triples_to_look_for = [
        (NS['custom_pred'], "someformatteddata",
         URIRef("https://www.datatypes.org#mycustomdatatypedefinition")),
        (NS["anyURI"], "https://www.sampleuri.org", XSD.anyURI),
        (NS["base64Binary"], "0FB8", XSD.base64Binary),
        (NS['boolean1'], True, XSD.boolean),
        (NS['boolean2'], False, XSD.boolean),
        (NS['boolean3'], True, XSD.boolean),
        (NS['boolean4'], False, XSD.boolean),
        (NS['integer'], -3, XSD.integer),
        (NS['long'], -1231235555, XSD.long),
        (NS['int'], 3, XSD.int),
        (NS['short'], -1231, XSD.short),
        (NS['byte'], 45, XSD.byte),
        (NS['nonNegativeInteger'], 111, XSD.nonNegativeInteger),
        (NS['positiveInteger'], 123456, XSD.positiveInteger),
        (NS['unsignedLong'], 3456, XSD.unsignedLong),
        (NS['unsignedInt'], 7890000, XSD.unsignedInt),
        (NS['unsignedShort'], 65000, XSD.unsignedShort),
        (NS['unsignedByte'], 254, XSD.unsignedByte),
        (NS['nonPositiveInteger'], -123, XSD.nonPositiveInteger),
        (NS['negativeInteger'], -34500000, XSD.negativeInteger),
        (NS['decimal'], "+3.5", XSD.decimal),
        (NS['double'], "4268.22752E11", XSD.double),
        (NS['float'], "+24.3e-3", XSD.float),
        (NS['duration'], "P2Y6M5DT12H35M30S", XSD.duration),
        (NS['dayTimeDuration'], "P1DT2H", XSD.dayTimeDuration),
        (NS['yearMonthDuration'], "P0Y20M", XSD.yearMonthDuration),
        (NS['gDay'], "---02", XSD.gDay),
        (NS['gMonth'], "--04", XSD.gMonth),
        (NS['gMonthDay'], "--04-12", XSD.gMonthDay),
        (NS['gYear'], "2004", XSD.gYear),
        (NS['gYearMonth'], "2004-04", XSD.gYearMonth),
        (NS['hexBinary'], "0FB8", XSD.hexBinary),
        (NS['QName'], "myElement", XSD.QName),
        (NS['normalizedString'], "This is a normalized string!",
         XSD.normalizedString),
        (NS['token'], "token", XSD.token),
        (NS['language'], "en", XSD.language),
        (NS['Name'], "_my.Element", XSD.Name),
        (NS['NMTOKEN'], "123_456", XSD.NMTOKEN),
        (NS['xml'], "<a>bla</a>", RDF.XMLLiteral),
        (NS['html'], "<div><p>xyz</p></div>", RDF.HTML),
        (NS['json'], "{}", CSVW_NS.JSON),
    ]
    for pred, lit_val, lit_type in triples_to_look_for:
        lit = Literal(lit_val, datatype=lit_type)
        assert len(list(g.triples(
            (NS['event/1'], pred, lit)))) == 1, "Failed for {}".format(pred)
Beispiel #18
0
def test_bool_with_format():
    csvw = CSVW(csv_path="tests/datatypes.bool.csv",
                metadata_path="tests/datatypes.bool.csv-metadata.json")
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    true_lit = Literal(True, datatype=XSD.boolean)
    false_lit = Literal(False, datatype=XSD.boolean)

    assert len(list(g.triples((NS['event/1'], NS['bool1'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/1'], NS['bool2'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/1'], NS['bool3'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool1'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool2'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool3'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool1'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool2'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool3'], false_lit)))) == 1
Beispiel #19
0
def verify_rdf_contents(contents, fmt):
    g = ConjunctiveGraph()
    g.parse(data=contents, format=fmt)

    books = Namespace('http://www.books.org/')
    isbn = Namespace("http://www.books.org/isbn/")

    # Check number of all triples
    assert sum(
        1 for _ in g.triples((None, None,
                              None))) == NUM_SUBJECTS * NUM_TRIPLES_PER_SUBJ

    # Check number of subject
    subjs = set(g.subjects())
    expected_subjs = ["0062316095", "0374532508", "1610391845", "0374275637"]
    assert len(subjs) == len(expected_subjs)
    for s in expected_subjs:
        assert isbn[s] in subjs

        # Verify isbn number is positive integer
        s_isbn = list(g.triples((isbn[s], books['isbnnumber'], None)))
        assert len(s_isbn) == 1
        s_isbn_val = s_isbn[0][2]
        assert isinstance(s_isbn_val, Literal)
        assert s_isbn_val.datatype == XSD.positiveInteger
        # Verify pages is a unsignedShort
        s_page = list(g.triples((isbn[s], books['pagecount'], None)))
        assert len(s_page) == 1
        s_page_val = s_page[0][2]
        assert isinstance(s_page_val, Literal)
        assert s_page_val.datatype == XSD.unsignedShort
        # Verify hardcover is a boolean
        s_hardcover = list(g.triples((isbn[s], books['hardcover'], None)))
        assert len(s_hardcover) == 1
        s_hardcover_val = s_hardcover[0][2]
        assert isinstance(s_hardcover_val, Literal)
        assert s_hardcover_val.datatype == XSD.boolean
        # Verify price is a decimal
        s_price = list(g.triples((isbn[s], books['price'], None)))
        assert len(s_price) == 1
        s_price_val = s_price[0][2]
        assert isinstance(s_price_val, Literal)
        assert s_price_val.datatype == XSD.decimal
Beispiel #20
0
 def __isIncluded(self, targetURI, returnedDocURI):
     try:
         g = ConjunctiveGraph()
         g.parse(returnedDocURI, format=self.result['format'])
         subList = list(g.triples((rdflib.URIRef(targetURI), None, None)))
         preList = list(g.triples((None, rdflib.URIRef(targetURI), None)))
         objList = list(g.triples((None, None , rdflib.URIRef(targetURI)))) 
         if len(subList)==0 and len(preList)==0 and len(objList)==0:
             return 0
         else:
             subList.extend(preList)
             subList.extend(objList)
             self.result['relatedTriple']= subList
             return 1
     except Exception, e:
         print e
         self.result['isException'] = 1
         self.result['exceptionType']= 'RDF Graph Load Exception'
         self.result['exceptionMsg']=repr(e)
         return 0
Beispiel #21
0
 def from_string(self, uri, text, format="xml", encoding="utf-8"):
     self.reset()
     self.set_uri(uri)
     t = TextInputSource(text, system_id=uri)
     t.setEncoding(encoding)
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(t, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
Beispiel #22
0
 def __isIncluded(self, targetURI, returnedDocURI):
     try:
         g = ConjunctiveGraph()
         g.parse(returnedDocURI, format=self.result['format'])
         subList = list(g.triples((rdflib.URIRef(targetURI), None, None)))
         preList = list(g.triples((None, rdflib.URIRef(targetURI), None)))
         objList = list(g.triples((None, None, rdflib.URIRef(targetURI))))
         if len(subList) == 0 and len(preList) == 0 and len(objList) == 0:
             return 0
         else:
             subList.extend(preList)
             subList.extend(objList)
             self.result['relatedTriple'] = subList
             return 1
     except Exception, e:
         print e
         self.result['isException'] = 1
         self.result['exceptionType'] = 'RDF Graph Load Exception'
         self.result['exceptionMsg'] = repr(e)
         return 0
Beispiel #23
0
 def from_url(self, url, uri=None, format="xml",  encoding="utf-8"):
     self.reset()
     if not uri:
         self.set_uri(url)
     else:
         self.set_uri(uri)
     g = ConjunctiveGraph(identifier=self.uri)
     g = g.parse(url, format)
     for prefix, ns in g.namespaces():
         self.add_namespace(prefix, ns)
     for s,p,o in g.triples((self.uri, None, None)):
         self.add_triple(p, o)
Beispiel #24
0
def test_datetime():
    with CSVW(csv_path="tests/datatypes.datetime.csv",
              metadata_path="tests/datatypes.datetime.csv-metadata.json"
              ) as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    dt1_lit = Literal("2002-05-30T09:00:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime1'], dt1_lit)))) == 1

    dt2_lit = Literal("2002-05-30T09:30:10.5", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime2'], dt2_lit)))) == 1

    dt3_lit = Literal("2002-05-30T09:30:10Z", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime3'], dt3_lit)))) == 1

    dt4_lit = Literal("2002-05-30T09:30:10-06:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime4'], dt4_lit)))) == 1

    dt5_lit = Literal("2002-05-30T09:30:10+04:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime5'], dt5_lit)))) == 1

    datestamp = Literal("2004-04-12T13:20:00-05:00",
                        datatype=XSD.dateTimeStamp)
    assert len(list(g.triples(
        (NS['event/1'], NS['datetimestamp'], datestamp)))) == 1
Beispiel #25
0
 def get(self):
     g = ConjunctiveGraph()
     addTrig(g, "http://bang:9099/graph")
     maxMeters = 65000
     pts = []
     print "loaded", len(g)
     for s,p,o in g.triples((None, MAP['distanceToHomeM'], None)):
         pts.append(dict(who=s,
                         frac=float(o) / maxMeters,
                         distanceToHomeM=o,
                         displayMilesDistance="%.1f miles" %
                         (float(o) * 0.000621371)))
     self.write(json.dumps({'pts': pts}))
Beispiel #26
0
def ConvertToSQLLITE (filename,destinationFileName):

    _graph = ConjunctiveGraph()
    _graph.parse(filename, format="nt")

    sql = ConjunctiveGraph('SQLite')
    sql.open(destinationFileName, create=True)

    for t in _graph.triples((None,None,None)):
        sql.add(t)

    sql.commit()
    sql.close()
Beispiel #27
0
def test_date():
    with CSVW(csv_path="tests/datatypes.date.csv",
              metadata_path="tests/datatypes.date.csv-metadata.json") as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    date1_lit = Literal("2017-01-09", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date1'], date1_lit)))) == 1

    date2_lit = Literal("2017-01-10Z", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date2'], date2_lit)))) == 1

    date3_lit = Literal("2017-01-11", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date3'], date3_lit)))) == 1

    date4_lit = Literal("2002-09-24-06:00", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date4'], date4_lit)))) == 1

    date5_lit = Literal("2002-09-24+04:00", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date5'], date5_lit)))) == 1
Beispiel #28
0
def test_literals_with_escaped_quotes():
    csv_path = "tests/parsing.escaped_quotes.csv"
    metadata_path = "tests/parsing.escaped_quotes.csv-metadata.json"
    csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path)

    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    ns = Namespace("http://example.org/expense/")
    desc = URIRef("http://example.org/desc")

    taxi_triples = list(g.triples((ns['taxi'], desc, None)))
    assert len(taxi_triples) == 1
    taxi_desc = taxi_triples[0][2]
    assert isinstance(taxi_desc, Literal)
    assert taxi_desc.value == "go from x to y"

    quoted_expense_triples = list(
        g.triples((URIRef("http://example.org/expense/quoted%20expense"), desc,
                   None)))
    assert len(quoted_expense_triples) == 1
    quoted_expense_desc = quoted_expense_triples[0][2]
    assert isinstance(quoted_expense_desc, Literal)
    assert quoted_expense_desc.value == "for some reason it came with quotes in it"

    flight_triples = list(g.triples((ns['flight'], desc, None)))
    assert len(flight_triples) == 1
    flight_desc = flight_triples[0][2]
    assert isinstance(flight_desc, Literal)
    assert flight_desc.value == "had to fly \"escaped quotes business\" for this trip"

    car_triples = list(g.triples((ns['car'], desc, None)))
    assert len(car_triples) == 1
    car_desc = car_triples[0][2]
    assert isinstance(car_desc, Literal)
    assert car_desc.value == " some \ in it to be escaped"
Beispiel #29
0
    def from_string(self, rdf_manifest_string, format="xml"):
        t = TextInputSource(rdf_manifest_string)
        g = ConjunctiveGraph()
        g = g.parse(t, format)
        
        for s,p,o in g.triples((None, None, None)):
            if s not in self.items:
                self.items.append(s)
            if p == NAMESPACES['rdf']['type']:
                self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_type(o)
            else:
                self.items_rdfobjects.setdefault(s,RDFobject(uri=s)).add_triple(p, o)

        for prefix, ns in g.namespaces():
            self.add_namespace(prefix ,ns)
def get_vocab_editorial_note(vocabprefix):
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    vocabdir = os.path.join(ag.vocabulariesdir, vocabprefix)
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    msgs = []
    if not os.path.isfile(vocabstatusfile):
        return msgs
    graph = Graph()
    graph.parse(vocabstatusfile)
    for s, p, o in graph.triples((None, namespaces['skos']['editorialNote'], None)):
        nm = None
        for n in graph.objects(URIRef(s), namespaces['nfo']['fileName']):
            nm = str(n)
        msgs.append((str(o), nm))
    return msgs
Beispiel #31
0
def test_default():
    csvw = CSVW(csv_path='tests/virtual1.csv',
                metadata_path='tests/virtual1.default.csv-metadata.json')
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    all_subjects = {x for x in g.subjects()}
    assert len(all_subjects) == 4

    ns = Namespace("http://example.org/")
    assert ns['sub-1'] in all_subjects
    assert ns['sub-2'] in all_subjects
    assert len([g.triples((ns['sub-1'], ns['obj-1'], ns['myvalue']))]) == 1
    assert len([g.triples((ns['sub-2'], ns['obj-2'], ns['myvalue']))]) == 1
Beispiel #32
0
def test_jsonld():
    # generate shared canvase json-ld
    tei_file = "sga/data/tei/ox/ox-frankenstein_notebook_c1.xml"
    manifest_uri = 'http://example.com/frankenstein.json'
    m = Manifest(tei_file, manifest_uri)
    jsonld = m.jsonld()
    open('test.jsonld', 'w').write(json.dumps(jsonld, indent=2))

    # find the manifest
    manifest = None
    for r in jsonld['@graph']:
        if '@type' in r and r['@type'] == 'sc:Manifest':
            manifest = r
    assert manifest

    # check for images
    assert 'images' in manifest

    # check for canvases
    assert 'canvases' in manifest

    # get the sequence
    assert 'sequences' in manifest
    seq = get(jsonld, manifest['sequences'][0])

    # first canvas
    assert 'first' in seq
    canvas = get(jsonld, seq['first'])
    assert canvas['label'] == '1r'

    # check the content annotations
    assert count_type(jsonld, 'sc:ContentAnnotation') == 90

    # css should be there
    assert count_type(jsonld, 'cnt:ContentAsText') == 61

    # parse the json-ld as rdf
    register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
    g = ConjunctiveGraph()
    jsonld_str = json.dumps(jsonld)
    g.parse(data=jsonld_str, format='json-ld')

    # quick sanity check the graph
    assert g.value(
        URIRef('http://example.com/frankenstein.json'),
        RDF.type) == URIRef('http://www.shared-canvas.org/ns/Manifest')
    line_anns = list(g.triples((None, RDF.type, SGA.LineAnnotation)))
    assert len(line_anns) == 638
Beispiel #33
0
def test_jsonld():
    # generate shared canvase json-ld
    tei_file = "sga/data/tei/ox/ox-frankenstein_notebook_c1.xml"
    manifest_uri = 'http://example.com/frankenstein.json'
    m = Manifest(tei_file, manifest_uri)
    jsonld = m.jsonld()
    open('test.jsonld', 'w').write(json.dumps(jsonld, indent=2))

    # find the manifest
    manifest = None
    for r in jsonld['@graph']:
        if '@type' in r and r['@type'] == 'sc:Manifest':
            manifest = r
    assert manifest

    # check for images
    assert 'images' in manifest

    # check for canvases
    assert 'canvases' in manifest

    # get the sequence
    assert 'sequences' in manifest
    seq = get(jsonld, manifest['sequences'][0])

    # first canvas
    assert 'first' in seq
    canvas = get(jsonld, seq['first'])
    assert canvas['label'] == '1r'

    # check the content annotations
    assert count_type(jsonld, 'sc:ContentAnnotation') == 90
   
    # css should be there
    assert count_type(jsonld, 'cnt:ContentAsText') == 61

    # parse the json-ld as rdf
    register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
    g = ConjunctiveGraph()
    jsonld_str = json.dumps(jsonld)
    g.parse(data=jsonld_str, format='json-ld')

    # quick sanity check the graph
    assert g.value(URIRef('http://example.com/frankenstein.json'), RDF.type) == URIRef('http://www.shared-canvas.org/ns/Manifest')
    line_anns = list(g.triples((None, RDF.type, SGA.LineAnnotation)))
    assert len(line_anns) == 638
Beispiel #34
0
class GraphCache(object):

    def __init__(self, cachedir):
        self.graph = ConjunctiveGraph()
        self.mtime_map = {}
        self.cachedir = cachedir
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

    def load(self, url):
        src = VOCAB_SOURCE_MAP.get(str(url), url)
        if os.path.isfile(url):
            context_id = create_input_source(url).getPublicId()
            last_vocab_mtime = self.mtime_map.get(url)
            vocab_mtime = os.stat(url).st_mtime
            if not last_vocab_mtime or last_vocab_mtime < vocab_mtime:
                logger.debug("Parse file: '%s'", url)
                self.mtime_map[url] = vocab_mtime
                # use CG as workaround for json-ld always loading as dataset
                graph = ConjunctiveGraph()
                graph.parse(src, format=guess_format(src))
                self.graph.remove_context(context_id)
                for s, p, o in graph:
                    self.graph.add((s, p, o, context_id))
                return graph
        else:
            context_id = url

        if any(self.graph.triples((None, None, None), context=context_id)):
            logger.debug("Using context <%s>" % context_id)
            return self.graph.get_context(context_id)

        cache_path = self.get_fs_path(url)
        if os.path.exists(cache_path):
            logger.debug("Load local copy of <%s> from '%s'", context_id, cache_path)
            return self.graph.parse(cache_path, format='turtle', publicID=context_id)
        else:
            logger.debug("Fetching <%s> to '%s'", context_id, cache_path)
            graph = self.graph.parse(src,
                    format='rdfa' if url.endswith('html') else None)
            with open(cache_path, 'w') as f:
                graph.serialize(f, format='turtle')
            return graph

    def get_fs_path(self, url):
        return os.path.join(self.cachedir, quote(url, safe="")) + '.ttl'
Beispiel #35
0
    def run(self):
        ontologies = self.ontologies

        # Definition of namespaces
        # Uncomment if needed
        NS_owl = Namespace("http://www.w3.org/2002/07/owl#")
        NS_rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
        NS_xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
        NS_rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
        NS_mcf = Namespace(
            "http://www.mycorporisfabrica.org/ontology/mcf.owl#")

        g1 = ConjunctiveGraph()
        g2 = ConjunctiveGraph()
        g1.parse(ontologies[0], format=guess_format(ontologies[0]))
        g2.parse(ontologies[1], format=guess_format(ontologies[1]))
        listDiff = ConjunctiveGraph()
        listDiff = g1 ^ g2

        global listNames, listSizes

        for s, p, o in g1.triples((None, None, None)):
            item = ""
            #item += "[[ "+str(s)+" ]]\t[[ "+str(p)+" ]]\t[[ "+str(o)+" ]]"
            item += str(s) + " || " + str(p) + " || " + str(o)
            self.emit(SIGNAL('addListItem(QString)'), item)

        ontologySplit = ontologies[0].split('/')
        ontologyName = ontologySplit[len(ontologySplit) - 1]
        listNames.append(ontologyName)
        listSizes.append(str(len(g1)))
        tab["Ontology"] = listNames
        tab["Size"] = listSizes

        self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)

        ontologySplit = ontologies[1].split('/')
        ontologyName = ontologySplit[len(ontologySplit) - 1]
        listNames.append(ontologyName)
        listSizes.append(str(len(g2)))
        tab["Ontology"] = listNames
        tab["Size"] = listSizes

        self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)
def test_empty_boolean():
    csvw = CSVW(csv_path="tests/empty.csv",
                metadata_path="tests/empty.bool.csv-metadata.json")
    rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    assert len(g) == 2
    assert len(list(g.triples((None, None, Literal(False))))) == 2

    csvw = CSVW(csv_path="tests/empty.csv",
                metadata_path="tests/empty.invalid_base.csv-metadata.json")
    rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    assert len(g) == 0
	def run(self):
		ontologies = self.ontologies

		# Definition of namespaces
		# Uncomment if needed
		NS_owl =  Namespace("http://www.w3.org/2002/07/owl#")
		NS_rdfs =  Namespace("http://www.w3.org/2000/01/rdf-schema#")
		NS_xsd =  Namespace("http://www.w3.org/2001/XMLSchema#")
		NS_rdf =  Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
		NS_mcf =  Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#")

		g1 = ConjunctiveGraph()
		g2 = ConjunctiveGraph()
		g1.parse(ontologies[0], format=guess_format(ontologies[0]))
		g2.parse(ontologies[1], format=guess_format(ontologies[1]))
		listDiff = ConjunctiveGraph()
		listDiff = g1 ^ g2

		global listNames, listSizes

		for s,p,o in g1.triples((None, None, None)):
			item = ""
			#item += "[[ "+str(s)+" ]]\t[[ "+str(p)+" ]]\t[[ "+str(o)+" ]]"
			item +=str(s)+" || "+str(p)+" || "+str(o)
			self.emit(SIGNAL('addListItem(QString)'), item)

		ontologySplit = ontologies[0].split('/')
		ontologyName=ontologySplit[len(ontologySplit)-1]
		listNames.append(ontologyName)
		listSizes.append(str(len(g1)))
		tab["Ontology"] = listNames
		tab["Size"] = listSizes

		self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)

		ontologySplit = ontologies[1].split('/')
		ontologyName=ontologySplit[len(ontologySplit)-1]
		listNames.append(ontologyName)
		listSizes.append(str(len(g2)))
		tab["Ontology"] = listNames
		tab["Size"] = listSizes

		self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)
def update_vocab_uri_in_statusfile(userid, oldprefix, newprefix, oldvocabdir, newvocabdir):
    olduri = "http://vocab.ox.ac.uk/%s"%oldprefix
    newuri = "http://vocab.ox.ac.uk/%s"%newprefix

    mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
    vocabstatusfile = os.path.join(newvocabdir, 'status.rdf')
    if not os.path.isfile(mediatorfile) or not os.path.isfile(vocabstatusfile):
        return False

    #update uri in mediator file
    rdf_str = None
    f = codecs.open(mediatorfile, 'r', 'utf-8')
    rdf_str = f.read()
    f.close() 
    rdf_str = rdf_str.replace(olduri, newuri)
    rdf_str = rdf_str.replace(oldvocabdir, newvocabdir)
    f = codecs.open(mediatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    
    #update uri in vocab status file
    rdf_str = None
    f = codecs.open(vocabstatusfile, 'r', 'utf-8')
    rdf_str = f.read()
    f.close()
    rdf_str = rdf_str.replace(olduri, newuri)
    rdf_str = rdf_str.replace(oldvocabdir, newvocabdir)
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()

    #Remove editorial note 0
    graph = Graph()
    graph.parse(vocabstatusfile)
    for s, p, o in graph.triples((URIRef(newuri), namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0]))):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Beispiel #39
0
def verify_rdf(rdf_output):
    ids_ns = Namespace("http://foo.example.org/CSV/People-IDs/")
    ages_ns = Namespace("http://foo.example.org/CSV/People-Ages/")
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    all_subjects = {x for x in g.subjects()}
    assert len(all_subjects) == 2

    bob_subj = ids_ns['1']
    joe_subj = ids_ns['2']
    assert bob_subj in all_subjects
    assert joe_subj in all_subjects

    # Bob's details
    assert len([g.triples((bob_subj, ids_ns.id, Literal(1)))]) == 1
    assert len([g.triples((bob_subj, ids_ns.name, Literal("Bob")))]) == 1
    assert len([g.triples((bob_subj, ages_ns.age, Literal(34)))]) == 1

    # Joe's details
    assert len([g.triples((joe_subj, ids_ns.id, Literal(2)))]) == 1
    assert len([g.triples((joe_subj, ids_ns.name, Literal("Joe")))]) == 1
    assert len([g.triples((joe_subj, ages_ns.age, Literal(54)))]) == 1
# Load the annotations from file

# If LOAD_FROM_RDF is set, we will load a file containing Open Annotation annotations, and get all objects of the hasTopic relation
# Otherwise, we read the URIs from a simple text file (one URI per line)
LOAD_FROM_RDF = False
LOAD_FROM_SPARQL = True
bioportal_uris = set()

if LOAD_FROM_RDF :

    
    cg = ConjunctiveGraph()
    cg.parse("/Users/hoekstra/projects/data2semantics/MockupEntityRecognizer/annotations-first-list.n3",format="n3")
    cg.parse("/Users/hoekstra/projects/data2semantics/MockupEntityRecognizer/annotations-second-list.n3",format="n3")
    
    for s,p,o in cg.triples((None, URIRef("http://www.w3.org/ns/openannotation/extension/hasSemanticTag"), None)) :
        uo = unicode(o)
        
        m = re.search(r'http://purl.bioontology.org/ontology/(.+)/(.+)$', uo)
        
        if not m:
            # Find abbreviations and concept ids for OBO ontologies converted to OWL
            m = re.search(r'http://purl.org/obo/owl/(.+)#(.+)$', uo)
        
        if m :
            bioportal_uris.add(uo)
        
    for s,p,o in cg.triples((None, URIRef("http://www.w3.org/2004/02/skos/core#exactMatch"), None)) :
        bioportal_uris.add(unicode(s))
        bioportal_uris.add(unicode(o))
elif LOAD_FROM_SPARQL :
class ContextTestCase(unittest.TestCase):
    storetest = True
    identifier = URIRef("rdflib_test")

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self, uri='sqlite://', storename=None):
        store = plugin.get(storename, Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(store, identifier=self.identifier)
        self.graph.open(uri, create=True)

    def tearDown(self, uri='sqlite://'):
        self.graph.destroy(uri)
        try:
            self.graph.close()
        except:
            pass

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
            isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                     namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        # print("Graph", graph.identifier, graph.serialize(format="nt"))
        # print("Selfgraph", self.graph.identifier,
        #                    self.graph.serialize(format="nt"))
        self.assertEquals(len(self.graph.store), len(graph.store))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):

        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph.store)
        print("Original", oldLen, self.graph.store)
        self.addStuffInMultipleContexts()
        newLen = len(self.graph.store)
        print("MultipleContexts", newLen, self.graph.store)
        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        print("No context", len(list(self.graph.triples((None, None, None)))))
        print("Context context-1", len(
            list(self.graph.triples((None, None, None), context=self.c1))))
        print("Context context-2", len(
            list(self.graph.triples((None, None, None), context=self.c2))))
        self.assertEquals(len(self.graph.store), oldLen + 1,
                          [self.graph.store, oldLen + 1])

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph.store), oldLen + 1,
                          [graph.store, oldLen + 1])

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if (PY3 and not isinstance(c,(str, bytes))) or not isinstance(c, basestring):
                return c.identifier
            return c
        self.assert_(self.c1 in list(map(cid, self.graph.contexts())))
        self.assert_(self.c2 in list(map(cid, self.graph.contexts())))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(
                c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)),
                    set([(tarek, cheese), (michel, cheese),
                         (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(
                michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes,
                    cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(
                tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(
                pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(
                bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([
                    (bob, hates, michel), (bob, likes, cheese),
                    (tarek, likes, pizza), (michel, likes, pizza),
                    (michel, likes, cheese), (bob, hates, pizza),
                    (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
Beispiel #42
0
class TestSparql11(unittest.TestCase):

    def setUp(self):
        self.longMessage = True
        self.graph = ConjunctiveGraph('SPARQLUpdateStore')

        root = HOST + DB
        self.graph.open((root + "sparql", root + "update"))

        # clean out the store
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0

    def tearDown(self):
        self.graph.close()

    def testSimpleGraph(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g.add((bob, likes, pizza))
        g.add((bob, likes, cheese))

        g2 = self.graph.get_context(othergraphuri)
        g2.add((michel, likes, pizza))

        self.assertEqual(3, len(g), 'graph contains 3 triples')
        self.assertEqual(1, len(g2), 'other graph contains 1 triple')

        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEqual(2, len(list(r)), "two people like pizza")

        r = g.triples((None, likes, pizza))
        self.assertEqual(2, len(list(r)), "two people like pizza")

        # Test initBindings
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                    initBindings={'s': tarek})
        self.assertEqual(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, pizza))
        self.assertEqual(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, cheese))
        self.assertEqual(0, len(list(r)), "tarek doesn't like cheese")

        g2.add((tarek, likes, pizza))
        g.remove((tarek, likes, pizza))
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEqual(1, len(list(r)), "only bob likes pizza")

    def testConjunctiveDefault(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g2 = self.graph.get_context(othergraphuri)
        g2.add((bob, likes, pizza))
        g.add((tarek, hates, cheese))

        self.assertEqual(2, len(g), 'graph contains 2 triples')

        # the following are actually bad tests as they depend on your endpoint,
        # as pointed out in the sparqlstore.py code:
        #
        ## For ConjunctiveGraphs, reading is done from the "default graph" Exactly
        ## what this means depends on your endpoint, because SPARQL does not offer a
        ## simple way to query the union of all graphs as it would be expected for a
        ## ConjuntiveGraph.
        ##
        ## Fuseki/TDB has a flag for specifying that the default graph
        ## is the union of all graphs (tdb:unionDefaultGraph in the Fuseki config).
        self.assertEqual(3, len(self.graph),
            'default union graph should contain three triples but contains:\n'
            '%s' % list(self.graph))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEqual(2, len(list(r)), "two people like pizza")

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                             initBindings={'s': tarek})
        self.assertEqual(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, pizza))
        self.assertEqual(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, cheese))
        self.assertEqual(0, len(list(r)), "tarek doesn't like cheese")

        g2.remove((bob, likes, pizza))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEqual(1, len(list(r)), "only tarek likes pizza")

    def testUpdate(self):
        self.graph.update("INSERT DATA { GRAPH <urn:graph> { <urn:michel> <urn:likes> <urn:pizza> . } }")

        g = self.graph.get_context(graphuri)
        self.assertEqual(1, len(g), 'graph contains 1 triples')

    def testUpdateWithInitNs(self):
        self.graph.update(
            "INSERT DATA { GRAPH ns:graph { ns:michel ns:likes ns:pizza . } }",
            initNs={'ns': URIRef('urn:')}
        )

        g = self.graph.get_context(graphuri)
        self.assertEqual(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

    def testUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WherE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
            }
        )

        g = self.graph.get_context(graphuri)
        self.assertEqual(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

    def testMultipleUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WHERE { };"
            "INSERT { GRAPH <urn:graph> { ?d ?b ?c . } } WHERE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
                'd': URIRef('urn:bob'),
            }
        )

        g = self.graph.get_context(graphuri)
        self.assertEqual(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza), (bob,likes,pizza)]),
            'michel and bob like pizza'
        )

    def testNamedGraphUpdate(self):
        g = self.graph.get_context(graphuri)
        r1 = "INSERT DATA { <urn:michel> <urn:likes> <urn:pizza> }"
        g.update(r1)
        self.assertEqual(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

        r2 = "DELETE { <urn:michel> <urn:likes> <urn:pizza> } " + \
             "INSERT { <urn:bob> <urn:likes> <urn:pizza> } WHERE {}"
        g.update(r2)
        self.assertEqual(
            set(g.triples((None, None, None))),
            set([(bob, likes, pizza)]),
            'only bob likes pizza'
        )
        says = URIRef("urn:says")

        # Strings with unbalanced curly braces
        tricky_strs = ["With an unbalanced curly brace %s " % brace
                       for brace in ["{", "}"]]
        for tricky_str in tricky_strs:
            r3 = """INSERT { ?b <urn:says> "%s" }
            WHERE { ?b <urn:likes> <urn:pizza>} """ % tricky_str
            g.update(r3)

        values = set()
        for v in g.objects(bob, says):
            values.add(str(v))
        self.assertEqual(values, set(tricky_strs))

        # Complicated Strings
        r4strings = []
        r4strings.append(r'''"1: adfk { ' \\\" \" { "''')
        r4strings.append(r'''"2: adfk } <foo> #éï \\"''')

        r4strings.append(r"""'3: adfk { " \\\' \' { '""")
        r4strings.append(r"""'4: adfk } <foo> #éï \\'""")

        r4strings.append(r'''"""5: adfk { ' \\\" \" { """''')
        r4strings.append(r'''"""6: adfk } <foo> #éï \\"""''')
        r4strings.append('"""7: ad adsfj \n { \n sadfj"""')

        r4strings.append(r"""'''8: adfk { " \\\' \' { '''""")
        r4strings.append(r"""'''9: adfk } <foo> #éï \\'''""")
        r4strings.append("'''10: ad adsfj \n { \n sadfj'''")

        r4 = "\n".join([
            u'INSERT DATA { <urn:michel> <urn:says> %s } ;' % s
            for s in r4strings
        ])
        g.update(r4)
        values = set()
        for v in g.objects(michel, says):
            values.add(text_type(v))
        self.assertEqual(values, set([re.sub(r"\\(.)", r"\1", re.sub(r"^'''|'''$|^'|'$|" + r'^"""|"""$|^"|"$', r"", s)) for s in r4strings]))

        # IRI Containing ' or #
        # The fragment identifier must not be misinterpreted as a comment
        # (commenting out the end of the block).
        # The ' must not be interpreted as the start of a string, causing the }
        # in the literal to be identified as the end of the block.
        r5 = """INSERT DATA { <urn:michel> <urn:hates> <urn:foo'bar?baz;a=1&b=2#fragment>, "'}" }"""

        g.update(r5)
        values = set()
        for v in g.objects(michel, hates):
            values.add(text_type(v))
        self.assertEqual(values, set([u"urn:foo'bar?baz;a=1&b=2#fragment", u"'}"]))

        # Comments
        r6 = u"""
            INSERT DATA {
                <urn:bob> <urn:hates> <urn:bob> . # No closing brace: }
                <urn:bob> <urn:hates> <urn:michel>.
            }
        #Final { } comment"""

        g.update(r6)
        values = set()
        for v in g.objects(bob, hates):
            values.add(v)
        self.assertEqual(values, set([bob, michel]))

    def testNamedGraphUpdateWithInitBindings(self):
        g = self.graph.get_context(graphuri)
        r = "INSERT { ?a ?b ?c } WHERE {}"
        g.update(r, initBindings={
                'a': michel,
                'b': likes,
                'c': pizza
            })
        self.assertEqual(
            set(g.triples((None, None, None))),
            set([(michel, likes, pizza)]),
            'only michel likes pizza'
        )

    def testEmptyNamedGraph(self):
        empty_graph_iri = "urn:empty-graph-1"
        self.graph.update("CREATE GRAPH <%s>" % empty_graph_iri)
        named_graphs = [text_type(r[0]) for r in self.graph.query(
            "SELECT ?name WHERE { GRAPH ?name {} }")]
        # Some SPARQL endpoint backends (like TDB) are not able to find empty named graphs
        # (at least with this query)
        if empty_graph_iri in named_graphs:
            self.assertTrue(empty_graph_iri in [text_type(g.identifier)
                                                for g in self.graph.contexts()])

    def testEmptyLiteral(self):
        # test for https://github.com/RDFLib/rdflib/issues/457
        # also see test_issue457.py which is sparql store independent!
        g = self.graph.get_context(graphuri)
        g.add((
            URIRef('http://example.com/s'),
            URIRef('http://example.com/p'),
            Literal('')))

        o = tuple(g)[0][2]
        self.assertEqual(o, Literal(''), repr(o))
Beispiel #43
0
if len(sys.argv) != 3 :
    print "expect 3 arguments"
    sys.exit(2)
    
#print "\nparse files:"

g1 = ConjunctiveGraph()
g1.parse(sys.argv[1])
g2 = ConjunctiveGraph()
g2.parse(sys.argv[2])

g1_g2 = ConjunctiveGraph()
g2_g1 = ConjunctiveGraph()

for t in g1.triples((None,None,None)) :
    if t not in g2 :
        g1_g2.add(t)

for t in g2.triples((None,None,None)) :
    if t not in g1 :
        g2_g1.add(t)

prefixes = [ 
    ["http://RDVocab.info/Elements/", "rda:"] , 
    ["http://www.w3.org/2004/02/skos/core#", "skos:"] ,
    ["http://www.w3.org/2000/01/rdf-schema#", "rdfs:" ] ,
    ["http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:" ] ,
    ["http://metadataregistry.org/uri/profile/RegAp/", "regap:" ] ,
    ["http://www.w3.org/2001/XMLSchema#", "xsd:" ] ,
    ["http://metadataregistry.org/uri/RegStatus/", "regstatus:" ]
Beispiel #44
0
    done = set()
    if path.exists(args.done):
        with open(args.done) as f:
            for line in f.readlines():
                stripped_line = line.strip()
                if stripped_line != "":
                    done.add(stripped_line)

    print("Loading the graph")
    g = ConjunctiveGraph()
    g.parse(args.input, format=args.format)

    print("Convert DOIs in lowercase form")
    doi_to_remove = []
    doi_to_add = []
    for s, p, o in g.triples((None, LITERAL.hasLiteralValue, None)):
        o_str = str(o)
        lower_o_str = o_str.lower()
        if o_str != lower_o_str:
            doi_to_remove.append((s, p, o))
            doi_to_add.append((s, p, Literal(lower_o_str)))
    for s, p, o in doi_to_remove:
        g.remove((s, p, o))
    for s, p, o in doi_to_add:
        g.add((s, p, o))

    if not args.avoid:
        print("Check additional mapping in the oc/ccc triplestore")
        rf = ResourceFinder(ts_url=triplestore_url, default_dir=default_dir)
        with open(args.table, "a") as f:
            for s, p, o in g.triples((None, DATACITE.hasIdentifier, None)):
class ManifestHelper(object):
    def __init__(self, uri=None):
        self.uri = None
        if uri:
            self.uri = uri
        self.reset()
    
    def reset(self):
        self.g = None
        if self.uri:
            self.g = ConjunctiveGraph(identifier=self.uri)
        else:
            self.g = ConjunctiveGraph()
        self.namespaces = {}
        self.urihelper = URIHelper(self.namespaces)
        #add defaults
        for prefix, ns in NAMESPACES.iteritems():
            self.add_namespace(prefix, ns)
    
    def from_string(self, textfile, format="xml", encoding="utf-8"):
        self.reset()
        self.g.parse(textfile, format)
        return
    
    def triple_exists(self, s, p, o):
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return False        
        if s == '*':
            s = None
        if p == '*':
            p = None
        if o == '*':
            o = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None:
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)
             
        count = 0
        for ans_s, ans_p, ans_o in self.g.triples((s, p, o)):
            count += 1
        if count > 0:
            return True
        else:
            return False 
    
    def list_objects(self, s, p):
        objects = []
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return objects
        if s == '*':
            s = None
        if p == '*':
            p = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        for o in self.g.objects(s, p):
            objects.append(o)
        return objects
    
    def add_triple(self, s, p, o):
        if not isinstance(s, URIRef) and not isinstance(s, BNode):
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef):
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode):
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)

        self.g.add((s, p, o))
        self.g.commit()
        return
    
    def add_namespace(self, prefix, uri):
        if not isinstance (prefix, basestring):
            raise TypeError('Add namespace: prefix is not of type string or unicode') 

        if not isinstance(uri, (URIRef, Namespace)):
            if not isinstance(uri, basestring):
                raise TypeError('Add namespace: namespace is not of type string or unicode') 

        if not isinstance(prefix, unicode):
            prefix = unicode(prefix)

        if isinstance(uri, basestring) and not isinstance(uri, unicode):
            uri = unicode(uri)

        self.namespaces[prefix] = self.urihelper.get_namespace(uri)
        if prefix not in self.urihelper.namespaces:
            self.urihelper.namespaces[prefix] = self.urihelper.get_namespace(uri)
        self.g.bind(prefix, self.namespaces[prefix])
        return
    
    def del_namespace(self, prefix, ns):
        if prefix in self.namespaces:
            del self.namespaces[prefix]
        return
    
    def del_triple(self, s, p, o=None):
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return
        if s == '*':
            s = None
        if p == '*':
            p = None
        if o == '*':
            o = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None:
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)
        self.g.remove((s, p, o))
        return
    
    def get_graph(self):
        return self.g
    
    def to_string(self, format="xml"):
        if type(self.g).__name__ in ['ConjunctiveGraph', 'Graph'] and len(self.g)>0:
            self.g.commit()
            ans_str = self.g.serialize(format=format, encoding="utf-8")+"\n"
            return ans_str
        else:
            return u'<?xml version="1.0" encoding="UTF-8"?>\n'
Beispiel #46
0
class Inspector(object):

    """ Class that includes methods for querying an RDFS/OWL ontology """

    def __init__(self, uri, language=""):
        super(Inspector, self).__init__()
        self.rdfGraph = ConjunctiveGraph()
        try:
            self.rdfGraph.parse(uri, format="application/rdf+xml")
        except:
            try:
                self.rdfGraph.parse(uri, format="n3")
            except:
                raise exceptions.Error("Could not parse the file! Is it a valid RDF/OWL ontology?")
        finally:
            self.baseURI = self.get_OntologyURI() or uri
            self.allclasses = self.__getAllClasses(includeDomainRange=True, includeImplicit=True, removeBlankNodes=False, excludeRDF_OWL=False)

    def get_OntologyURI(self, return_as_string=True):
        test = [x for x, y, z in self.rdfGraph.triples((None, RDF.type, Ontology))]
        if test:
            if return_as_string:
                return str(test[0])
            else:
                return test[0]
        else:
            return None

    def __getAllClasses(self, classPredicate="", includeDomainRange=False, includeImplicit=False, removeBlankNodes=True, addOWLThing=True, excludeRDF_OWL=True):

        rdfGraph = self.rdfGraph
        exit = {}

        def addIfYouCan(x, mydict):
            if excludeRDF_OWL:
                if x.startswith('http://www.w3.org/2002/07/owl#') or  \
                   x.startswith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") or \
                   x.startswith("http://www.w3.org/2000/01/rdf-schema#"):
                    return mydict
            if x not in mydict:
                mydict[x] = None
            return mydict

        if addOWLThing:
            exit = addIfYouCan(Thing, exit)

        if classPredicate == "rdfs" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, RDFS.Class):
                exit = addIfYouCan(s, exit)

        if classPredicate == "owl" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, Class):
                exit = addIfYouCan(s, exit)

        if includeDomainRange:
            for o in rdfGraph.objects(None, RDFS.domain):
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDFS.range):
                exit = addIfYouCan(o, exit)

        if includeImplicit:
            for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)):
                exit = addIfYouCan(s, exit)
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDF.type):
                exit = addIfYouCan(o, exit)

        # get a list
        exit = exit.keys()
        if removeBlankNodes:
            exit = [x for x in exit if not isBlankNode(x)]
        return sort_uri_list_by_name(exit)

    def __getTopclasses(self, classPredicate=''):
        returnlist = []

        for eachclass in self.__getAllClasses(classPredicate):
            x = self.get_classDirectSupers(eachclass)
            if not x:
                returnlist.append(eachclass)
        return sort_uri_list_by_name(returnlist)

    def __getTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = topclasses

            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = children
                for potentialfather in children:
                    self.__getTree(potentialfather, out)

            return out

        else:
            children = self.get_classDirectSubs(father)
            out[father] = children
            for ch in children:
                self.__getTree(ch, out)

    def __buildClassTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = [Thing]
            out[Thing] = sort_uri_list_by_name(topclasses)
            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = sort_uri_list_by_name(children)
                for potentialfather in children:
                    self.__buildClassTree(potentialfather, out)
            return out
        else:
            children = self.get_classDirectSubs(father)
            out[father] = sort_uri_list_by_name(children)
            for ch in children:
                self.__buildClassTree(ch, out)

    # methods for getting ancestores and descendants of classes: by default, we do not include blank nodes
    def get_classDirectSupers(self, aClass, excludeBnodes=True, sortUriName=False):
        returnlist = []
        for o in self.rdfGraph.objects(aClass, RDFS.subClassOf):
            if not (o == Thing):
                if excludeBnodes:
                    if not isBlankNode(o):
                        returnlist.append(o)
                else:
                    returnlist.append(o)
        if sortUriName:
            return sort_uri_list_by_name(remove_duplicates(returnlist))
        else:
            return remove_duplicates(returnlist)

    def get_classDirectSubs(self, aClass, excludeBnodes=True):
        returnlist = []
        for s, v, o in self.rdfGraph.triples((None, RDFS.subClassOf, aClass)):
            if excludeBnodes:
                if not isBlankNode(s):
                    returnlist.append(s)
            else:
                returnlist.append(s)
        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def get_classSiblings(self, aClass, excludeBnodes=True):
        returnlist = []
        for father in self.get_classDirectSupers(aClass, excludeBnodes):
            for child in self.get_classDirectSubs(father, excludeBnodes):
                if child != aClass:
                    returnlist.append(child)

        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def entitySynonyms(self, anEntity, language=DEFAULT_LANGUAGE, getall=True):
        if getall:
            temp = []
            # Uberon synonyms
            for o in self.rdfGraph.objects(anEntity, Synonym):
                temp += [o]
            # EFO synonyms
            for o in self.rdfGraph.objects(anEntity, EFO_Synonym):
                temp += [o]
            # OBI synonyms
            for o in self.rdfGraph.objects(anEntity, OBO_Synonym):
                temp += [o]
            return temp
        else:
            for o in self.rdfGraph.objects(anEntity, Synonym):
                if getattr(o, 'language') and getattr(o, 'language') == language:
                    return o
            return ""

    def classFind(self, name, exact=False):
        temp = []
        if name:
            for x in self.allclasses:
                if exact:
                    if x.__str__().lower() == str(name).lower():
                        return [x]
                else:
                    if x.__str__().lower().find(str(name).lower()) >= 0:
                        temp.append(x)
        return temp
    def test_get_history(self):
        with open(filepath('test-patch-adds-items.json')) as f:
            patch = f.read()

        with self.client as client:
            res1 = client.patch(
                '/d/',
                data=patch,
                content_type='application/json',
                headers={'Authorization': 'Bearer '
                         + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'})
            patch_url = urlparse(res1.headers['Location']).path
            client.post(
                patch_url + 'merge',
                headers={'Authorization': 'Bearer '
                         + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'})
            res2 = client.get('/h')
            self.assertEqual(res2.status_code, http.client.OK)
            self.assertEqual(
                res2.headers['Content-Type'], 'application/ld+json')
            jsonld = res2.get_data(as_text=True)

        g = ConjunctiveGraph()
        g.parse(format='json-ld', data=jsonld)

        # Initial data load
        self.assertIn(  # None means any
            (PERIODO['p0h#change-1'], PROV.endedAtTime, None), g)
        self.assertIn(
            (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0d?version=0']), g)
        self.assertIn(
            (PERIODO['p0d?version=0'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0h#patch-1']), g)
        self.assertIn(
            (PERIODO['p0h#patch-1'],
             FOAF.page, PERIODO['p0patches/1/patch.jsonpatch']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0d?version=1']), g)
        self.assertIn(
            (PERIODO['p0d?version=1'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0trgkv?version=1']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=1'],
             PROV.specializationOf, PERIODO['p0trgkv']), g)
        self.assertIn(
            (PERIODO['p0h#change-1'],
             PROV.generated, PERIODO['p0trgkvwbjd?version=1']), g)
        self.assertIn(
            (PERIODO['p0trgkvwbjd?version=1'],
             PROV.specializationOf, PERIODO['p0trgkvwbjd']), g)

        # Change from first submitted patch
        self.assertIn(  # None means any
            (PERIODO['p0h#change-2'], PROV.startedAtTime, None), g)
        self.assertIn(  # None means any
            (PERIODO['p0h#change-2'], PROV.endedAtTime, None), g)
        start = g.value(
            subject=PERIODO['p0h#change-2'],
            predicate=PROV.startedAtTime)
        self.assertEqual(start.datatype, XSD.dateTime)
        self.assertRegex(start.value.isoformat(), W3CDTF)
        end = g.value(
            subject=PERIODO['p0h#change-2'],
            predicate=PROV.endedAtTime)
        self.assertEqual(end.datatype, XSD.dateTime)
        self.assertRegex(end.value.isoformat(), W3CDTF)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.wasAssociatedWith,
             URIRef('http://orcid.org/1234-5678-9101-112X')), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.wasAssociatedWith,
             URIRef('http://orcid.org/1211-1098-7654-321X')), g)
        for association in g.subjects(
                predicate=PROV.agent,
                object=URIRef('http://orcid.org/1234-5678-9101-112X')):
            role = g.value(subject=association, predicate=PROV.hadRole)
            self.assertIn(role, (PERIODO['p0v#submitted'],
                                 PERIODO['p0v#updated']))
        merger = g.value(
            predicate=PROV.agent,
            object=URIRef('http://orcid.org/1211-1098-7654-321X'))
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.qualifiedAssociation, merger), g)
        self.assertIn(
            (merger, PROV.hadRole, PERIODO['p0v#merged']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0d?version=1']), g)
        self.assertIn(
            (PERIODO['p0d?version=1'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0h#patch-2']), g)
        self.assertIn(
            (PERIODO['p0h#patch-2'],
             FOAF.page, PERIODO['p0patches/2/patch.jsonpatch']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'],
             PROV.generated, PERIODO['p0d?version=2']), g)
        self.assertIn(
            (PERIODO['p0d?version=2'],
             PROV.specializationOf, PERIODO['p0d']), g)
        self.assertIn(
            (PERIODO['p0h#change-2'],
             PROV.generated, PERIODO['p0trgkv?version=2']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=2'],
             PROV.specializationOf, PERIODO['p0trgkv']), g)
        self.assertIn(
            (PERIODO['p0trgkv?version=2'],
             PROV.wasRevisionOf, PERIODO['p0trgkv?version=1']), g)

        entities = 0
        for _, _, version in g.triples(
                (PERIODO['p0h#change-2'], PROV.generated, None)):
            entity = g.value(subject=version, predicate=PROV.specializationOf)
            self.assertEqual(str(entity) + '?version=2', str(version))
            entities += 1
        self.assertEqual(entities, 5)
class ContextTestCase(unittest.TestCase):
    store_name = 'default'
    path = None
    storetest = True
    create = True
    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
               isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                         namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        # self.assertEquals(len(self.graph), len(graph))
        self.assertEquals(
            len(list(self.graph.triples((None, None, None)))),
            len(list(graph.triples((None, None, None)))))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)
        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, basestring):
                return c.identifier
            return c
        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)),
                    set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)),
                    set([(tarek, cheese), (michel, cheese),
                         (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)),
                    set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)),
                    set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)),
                    set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)),
                    set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)),
                    set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([
                    (bob, hates, michel), (bob, likes, cheese),
                    (tarek, likes, pizza), (michel, likes, pizza),
                    (michel, likes, cheese), (bob, hates, pizza),
                    (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
Beispiel #49
0
def propositionalize_rdf(rdf_files, output_train, output_test, pos_graphs, k_fold, type="r"):
	graph_labels_train = []
	graph_labels_test = []
	triple_counter = 0
	triple_dict = defaultdict(int)

	rdf_files = np.array(rdf_files)
	i_fold = 0
	for train_index, test_index in cross_validation.KFold(len(rdf_files), n_folds=k_fold):
		train = True
		test = True
		filelist_train = rdf_files[train_index]
		filelist_test = rdf_files[test_index]

		output_train_tmp = output_train + str(i_fold) + ".txt"
		output_test_tmp = output_test + str(i_fold) + ".txt"

		# delete train and test output files
		try:
			os.remove(output_train_tmp)
		except OSError:
			pass
		try:
			os.remove(output_test_tmp)
		except OSError:
			pass
		# First round train then test
		while train or test:
			list_of_feature_sets = []
			graph_labels_tmp = []
			filelist_tmp = None
			graph_labels_list_tmp = None
			if train:
				filelist_tmp = filelist_train
				output_tmp = output_train_tmp
				train = False
				graph_labels_list_tmp = graph_labels_train
			else:
				filelist_tmp = filelist_test
				output_tmp = output_test_tmp
				test = False
				graph_labels_list_tmp = graph_labels_test
			for f in filelist_tmp:
				num = int(f.split("_")[1])
				if num in pos_graphs:
					graph_labels_tmp.append(1)
				else:
					graph_labels_tmp.append(0)
				feature_set = set()
				g = ConjunctiveGraph()
				g.load(open(f, 'rb'))
				for t in g.triples((None, None, None)):
					s,p,o = t
					if t[0].startswith("N"):
						s = "Blank"
					if t[2].startswith("N"):
						o = "Blank"
					if type == "o":
						p = "relation"
					if triple_dict[(s,p,o)] == 0:
						triple_dict[(s,p,o)] = triple_counter
						feature_set.add(triple_counter)
						# write 1 on triple position in tabular
						triple_counter += 1
					else:
						pos = triple_dict[(s,p,o)]
						feature_set.add(pos)
						# write 1 on triple position in tabular
				list_of_feature_sets.append(feature_set)
			graph_labels_list_tmp.append(graph_labels_tmp)
			X = []
			for instance in list_of_feature_sets:
				ins = np.zeros(200)
				for pos in instance:
					ins[pos] = 1
				X.append(ins)
			f = open(output_tmp, 'wt')
			try:
				writer = csv.writer(f)
				for ins in X:
					writer.writerow( list(ins) )
			finally:
				f.close()
		i_fold += 1
	return graph_labels_train, graph_labels_test
Beispiel #50
0
class TestSparql11(unittest.TestCase):

    def setUp(self):
        self.longMessage = True
        self.graph = ConjunctiveGraph('SPARQLUpdateStore')

        root = "http://localhost:3030/ukpp/"
        self.graph.open((root + "sparql", root + "update"))

        # clean out the store
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0

    def tearDown(self):
        self.graph.close()

    def testSimpleGraph(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g.add((bob, likes, pizza))
        g.add((bob, likes, cheese))

        g2 = self.graph.get_context(othergraphuri)
        g2.add((michel, likes, pizza))

        self.assertEquals(3, len(g), 'graph contains 3 triples')
        self.assertEquals(1, len(g2), 'other graph contains 1 triple')

        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = g.triples((None, likes, pizza))
        self.assertEquals(2, len(list(r)), "two people like pizza")

        # Test initBindings
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                    initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.add((tarek, likes, pizza))
        g.remove((tarek, likes, pizza))
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only bob likes pizza")

    def testConjunctiveDefault(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g2 = self.graph.get_context(othergraphuri)
        g2.add((bob, likes, pizza))
        g.add((tarek, hates, cheese))

        self.assertEquals(2, len(g), 'graph contains 2 triples')
        self.assertEquals(3, len(self.graph),
                          'default union graph contains three triples')

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                             initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.remove((bob, likes, pizza))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only tarek likes pizza")

    def testUpdate(self):
        self.graph.update("INSERT DATA { GRAPH <urn:graph> { <urn:michel> <urn:likes> <urn:pizza> . } }")
        
        g = self.graph.get_context(graphuri)
        self.assertEquals(1, len(g), 'graph contains 1 triples')
        
    def testUpdateWithInitNs(self):
        self.graph.update(
            "INSERT DATA { GRAPH ns:graph { ns:michel ns:likes ns:pizza . } }",
            initNs={'ns': URIRef('urn:')}
        )
        
        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )
        
    def testUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WherE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
            }
        )
        
        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

    def testMultipleUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WHERE { };"
            "INSERT { GRAPH <urn:graph> { ?d ?b ?c . } } WHERE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
                'd': URIRef('urn:bob'),
            }
        )
        
        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza), (bob,likes,pizza)]),
            'michel and bob like pizza'
        )

    def testNamedGraphUpdate(self):
        g = self.graph.get_context(graphuri)
        r1 = "INSERT DATA { <urn:michel> <urn:likes> <urn:pizza> }"
        g.update(r1)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

        r2 = "DELETE { <urn:michel> <urn:likes> <urn:pizza> } " + \
             "INSERT { <urn:bob> <urn:likes> <urn:pizza> } WHERE {}"
        g.update(r2)
        self.assertEquals(
            set(g.triples((None, None, None))),
            set([(bob, likes, pizza)]),
            'only bob likes pizza'
        )
        says = URIRef("urn:says")

        # Strings with unbalanced curly braces
        tricky_strs = ["With an unbalanced curly brace %s " % brace
                       for brace in ["{", "}"]]
        for tricky_str in tricky_strs:
            r3 = """INSERT { ?b <urn:says> "%s" }
            WHERE { ?b <urn:likes> <urn:pizza>} """ % tricky_str
            g.update(r3)

        values = set()
        for v in g.objects(bob, says):
            values.add(str(v))
        self.assertEquals(values, set(tricky_strs))

        # Complicated Strings
        r4strings = []
        r4strings.append(ur'''"1: adfk { ' \\\" \" { "''')
        r4strings.append(ur'''"2: adfk } <foo> #éï \\"''')

        r4strings.append(ur"""'3: adfk { " \\\' \' { '""")
        r4strings.append(ur"""'4: adfk } <foo> #éï \\'""")

        r4strings.append(ur'''"""5: adfk { ' \\\" \" { """''')
        r4strings.append(ur'''"""6: adfk } <foo> #éï \\"""''')
        r4strings.append(u'"""7: ad adsfj \n { \n sadfj"""')

        r4strings.append(ur"""'''8: adfk { " \\\' \' { '''""")
        r4strings.append(ur"""'''9: adfk } <foo> #éï \\'''""")
        r4strings.append(u"'''10: ad adsfj \n { \n sadfj'''")

        r4 = "\n".join([
            u'INSERT DATA { <urn:michel> <urn:says> %s } ;' % s
            for s in r4strings
        ])
        g.update(r4)
        values = set()
        for v in g.objects(michel, says):
            values.add(unicode(v))
        self.assertEquals(values, set([re.sub(ur"\\(.)", ur"\1", re.sub(ur"^'''|'''$|^'|'$|" + ur'^"""|"""$|^"|"$', ur"", s)) for s in r4strings]))
class TestSparql11(unittest.TestCase):

    def setUp(self):
        self.longMessage = True
        self.graph = ConjunctiveGraph('SPARQLUpdateStore')

        root = "http://localhost:3030/ukpp/"
        self.graph.open((root + "sparql", root + "update"))

        # clean out the store
        for c in self.graph.contexts():
            c.remove((None, None, None))

    def tearDown(self):
        self.graph.close()

    def testSimpleGraph(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g.add((bob, likes, pizza))
        g.add((bob, likes, cheese))

        g2 = self.graph.get_context(othergraphuri)
        g2.add((michel, likes, pizza))

        self.assertEquals(3, len(g), 'graph contains 3 triples')
        self.assertEquals(1, len(g2), 'other graph contains 1 triple')

        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = g.triples((None, likes, pizza))
        self.assertEquals(2, len(list(r)), "two people like pizza")

        # Test initBindings
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                    initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.add((tarek, likes, pizza))
        g.remove((tarek, likes, pizza))
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only bob likes pizza")

    def testConjunctiveDefault(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g2 = self.graph.get_context(othergraphuri)
        g2.add((bob, likes, pizza))
        g.add((tarek, hates, cheese))

        self.assertEquals(2, len(g), 'graph contains 2 triples')
        self.assertEquals(3, len(self.graph),
                          'default union graph contains three triples')

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                             initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.remove((bob, likes, pizza))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only tarek likes pizza")
Beispiel #52
0
class Trace(object):
    """
    classdocs
    """

    def __init__(self, provns="http://www.example.com/prov/", trailFile=None, logLevel=logging.DEBUG):
        """
        Constructor
        """

        self.trail = {}

        # Initialise logger

        self.log = logging.getLogger(__name__)
        self.log.setLevel(logLevel)

        logHandler = logging.StreamHandler()
        logHandler.setLevel(logging.DEBUG)

        logFormatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        logHandler.setFormatter(logFormatter)

        self.log.addHandler(logHandler)

        # Initialise graph
        self.log.debug("Initialising graph")
        self.g = ConjunctiveGraph()

        self.log.debug("Initialising namespaces")
        self.PROV = Namespace("http://www.w3.org/ns/prov#")
        self.D2S = Namespace("http://aers.data2semantics.org/vocab/provenance/")
        self.FRBR = Namespace("http://purl.org/vocab/frbr/core#")
        self.TIME = Namespace("http://www.w3.org/2006/time#")
        self.PROVNS = Namespace(provns)

        self.log.debug("Binding namespace prefixes")
        self.g.bind("prov", self.PROV)
        self.g.bind("d2sprov", self.D2S)
        self.g.bind("frbr", self.FRBR)
        self.g.bind("time", self.TIME)
        self.g.bind("provns", self.PROVNS)

        if trailFile:
            try:
                self.log.debug("Loading provenance trail file")
                self.g.parse(trailFile, format="n3")
                self.buildProvenanceTrail()
            except:
                self.log.warning("Trailfile does not exist yet...")

        self.log.info("Initialised")

        return

    def buildProvenanceTrail(self):
        self.log.debug("Loading provenance trail")
        plugin.register("sparql", query.Processor, "rdfextras.sparql.processor", "Processor")
        plugin.register("sparql", query.Result, "rdfextras.sparql.query", "SPARQLQueryResult")

        #        self.log.debug(self.g.serialize(format='turtle'))

        expressions_works = self.g.query(
            """SELECT DISTINCT ?w ?e
       WHERE {
          ?w rdf:type frbr:Work .
          ?e frbr:realizationOf ?w .
          ?e provo:wasGeneratedAt ?t .
          ?t time:inXSDDateTime ?dt .
       } ORDER BY ?w, ?dt """,
            initNs=dict(
                frbr=Namespace("http://purl.org/vocab/frbr/core#"),
                provo=Namespace("http://www.w3.org/ns/prov-o/"),
                time=Namespace("http://www.w3.org/2006/time#"),
            ),
        )
        #        self.log.debug(expressions_works.result)

        for row in expressions_works.result:
            (work, expression) = row

            self.trail.setdefault(work, []).append(expression)

            self.log.debug("Work: %s\nExpression: %s" % (work, expression))

        activities = self.g.query(
            """SELECT DISTINCT ?a
       WHERE {
          ?a rdf:type provo:Activity .
          ?a provo:endedAt ?t .
          ?t time:inXSDDateTime ?dt .
       } ORDER BY ?dt """,
            initNs=dict(
                frbr=Namespace("http://purl.org/vocab/frbr/core#"),
                provo=Namespace("http://www.w3.org/ns/prov-o/"),
                time=Namespace("http://www.w3.org/2006/time#"),
            ),
        )
        self.log.debug(activities.result)

        for row in activities.result:

            self.trail.setdefault(self.PROV["Activity"], []).append(row)

            self.log.debug("Activity: %s" % (row))

        self.log.debug(self.trail)
        #        quit()
        return

    def execute(self, params=[], inputs=[], outputs=[], replace=None, logOutput=True, sandbox=False):
        """
        Calls a commandline script using subprocess.call, and captures relevant provenance information
            @param params - A list of strings used as arguments to the subprocess.call method
            @param inputs - A list of strings (QNames) for all input resources
            @param outputs - A list of strings (QNames) for all output resources
            @param replace - A string that should not be reported in in the provenance trail (e.g. a password)
            @param logOutput - A boolean option for capturing the output of the shell script in an rdfs:comment field
        """

        commandURI = self.mintActivity(params[0])

        # Get & set the starting time
        start = self.mintTime()
        self.g.add((commandURI, self.PROV["startedAt"], start))

        # Execute the command specified in params
        self.log.debug("Executing {0}".format(params))

        if not sandbox:
            output = check_output(params)
        else:
            self.log.debug("Sandbox mode: command not executed, no actual output generated")
            output = "Sandbox mode: command not executed, no actual output generated"
        #        self.log.debug("Output:\n{0}".format(output))

        # Optionally store the command stdout to a literal value
        if logOutput:
            self.g.add((commandURI, RDFS.comment, Literal(output, datatype=XSD.string)))

        # Get & set the end time
        end = self.mintTime()
        self.g.add((commandURI, self.PROV["endedAt"], end))

        # Store all parameters in a new provo:Activity instance
        for p in params[1:]:
            if not (p in inputs or p in outputs):
                # Optionally replace the 'replace' string with 'HIDDENVALUE' (useful for passwords)
                if replace:
                    pclean = p.replace(replace, "HIDDENVALUE")
                else:
                    pclean = p

                self.log.debug("Adding literal parameter value: {0}".format(pclean))
                self.g.add((commandURI, self.D2S["parameter"], Literal(pclean)))

        for p in inputs:
            # Optionally replace the 'replace' string with 'HIDDENVALUE' (useful for passwords)
            if replace:
                pclean = p.replace(replace, "HIDDENVALUE")
            else:
                pclean = p
            # p is an input to the process, and thus a resource by itself
            # p is a frbr:Expression (version) of a work (e.g. we could generate multiple versions of the same file)

            # If a work & expression for 'p' has already been specified, use the latest one.
            p_work = self.PROVNS[quote(pclean, safe="~/")]
            if p_work in self.trail:
                pExpressionURI = self.trail[p_work][-1]
                self.log.debug("Found previous expression: {0}".format(pExpressionURI))
                # And this means that the current Activity 'wasInformedBy' the process that generated the expression
                for (subj, pred, activity) in self.g.triples((pExpressionURI, self.PROV["wasGeneratedBy"], None)):
                    self.log.debug(
                        "Adding provo:wasInformedBy dependency between {0} and {1}".format(commandURI, activity)
                    )
                    self.g.add((commandURI, self.PROV["wasInformedBy"], activity))

            # Otherwise create a new expression
            else:
                pExpressionURI = self.mintExpression(pclean)
                self.log.debug("Minted new input expression: {0}".format(pExpressionURI))

            self.g.add((commandURI, self.PROV["used"], pExpressionURI))

        for p in outputs:
            # Optionally replace the 'replace' string with 'HIDDENVALUE' (useful for passwords)
            if replace:
                pclean = p.replace(replace, "HIDDENVALUE")
            else:
                pclean = p

            pExpressionURI = self.mintExpression(pclean)
            self.log.debug("Minted new output expression: {0}".format(pExpressionURI))

            self.g.add((pExpressionURI, self.PROV["wasGeneratedBy"], commandURI))
            self.g.add((pExpressionURI, self.PROV["wasGeneratedAt"], end))

        return

    def mintActivity(self, p):
        porig = p
        p = quote(p, safe="~/")
        p = p.lstrip("./")

        commandURI = self.PROVNS["{0}_{1}".format(p, datetime.now().isoformat())]
        commandTypeURI = self.D2S[p.capitalize()]

        if self.PROV["Activity"] in self.trail:
            lastActivity = self.trail[self.PROV["Activity"]][-1]
            self.log.debug(
                "Adding provo:wasScheduledAfter dependency between {0} and {1}".format(commandURI, lastActivity)
            )
            self.g.add((commandURI, self.PROV["wasScheduledAfter"], lastActivity))

        self.g.add((commandTypeURI, RDF.type, self.PROV["Plan"]))
        self.g.add((commandURI, self.PROV["hadPlan"], commandTypeURI))
        self.g.add((commandURI, RDF.type, self.PROV["Activity"]))
        self.g.add((commandURI, self.D2S["shellCommand"], Literal(porig)))

        userURI = URIRef("http://{0}/{1}".format(socket.gethostname(), os.getlogin()))
        self.g.add((commandURI, self.PROV["wasControlledBy"], userURI))

        # Add the activity to the list of activities in the provenance trail
        self.trail.setdefault(self.PROV["Activity"], []).append(commandURI)

        return commandURI

    def mintTime(self):
        time = BNode()
        now = datetime.now().isoformat()
        self.g.add((time, RDF.type, self.TIME["Instant"]))
        self.g.add((time, self.TIME["inXSDDateTime"], Literal(now, datatype=XSD.dateTime)))

        return time

    def mintExpression(self, p):

        # If the parameter is a URI, just use it, but add a timestamp
        if p.startswith("http://"):
            pExpressionURI = URIRef("{0}_{1}".format(p, datetime.now().isoformat()))
        # Else mint a new URI within our own namespace
        else:
            p = quote(p, safe="~/")
            p = p.lstrip("./")
            pExpressionURI = self.PROVNS["{0}_{1}".format(p, datetime.now().isoformat())]

        # Add the Expression to the trail for its Work
        self.trail.setdefault(self.PROVNS[p], []).append(pExpressionURI)

        self.g.add((self.PROVNS[p], RDF.type, self.FRBR["Work"]))
        self.g.add((pExpressionURI, RDF.type, self.FRBR["Expression"]))
        self.g.add((pExpressionURI, RDF.type, self.PROV["Entity"]))

        self.g.add((pExpressionURI, self.FRBR["realizationOf"], self.PROVNS[p]))

        return pExpressionURI

    def serialize(self, trailFile="out.ttl"):
        f = open(trailFile, "w")
        print "Serializing to {}".format(trailFile)
        return self.g.serialize(f, format="turtle")
        print "Done"
class TestSparql11(unittest.TestCase):

    def setUp(self):
        self.longMessage = True
        self.graph = ConjunctiveGraph('SPARQLUpdateStore')

        root = HOST + DB
        self.graph.open((root + "sparql", root + "update"))

        # clean out the store
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0

    def tearDown(self):
        self.graph.close()

    def testSimpleGraph(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g.add((bob, likes, pizza))
        g.add((bob, likes, cheese))

        g2 = self.graph.get_context(othergraphuri)
        g2.add((michel, likes, pizza))

        self.assertEquals(3, len(g), 'graph contains 3 triples')
        self.assertEquals(1, len(g2), 'other graph contains 1 triple')

        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = g.triples((None, likes, pizza))
        self.assertEquals(2, len(list(r)), "two people like pizza")

        # Test initBindings
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                    initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = g.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.add((tarek, likes, pizza))
        g.remove((tarek, likes, pizza))
        r = g.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only bob likes pizza")

    def testConjunctiveDefault(self):
        g = self.graph.get_context(graphuri)
        g.add((tarek, likes, pizza))
        g2 = self.graph.get_context(othergraphuri)
        g2.add((bob, likes, pizza))
        g.add((tarek, hates, cheese))

        self.assertEquals(2, len(g), 'graph contains 2 triples')

        # the following are actually bad tests as they depend on your endpoint,
        # as pointed out in the sparqlstore.py code:
        #
        ## For ConjunctiveGraphs, reading is done from the "default graph" Exactly
        ## what this means depends on your endpoint, because SPARQL does not offer a
        ## simple way to query the union of all graphs as it would be expected for a
        ## ConjuntiveGraph.
        ##
        ## Fuseki/TDB has a flag for specifying that the default graph
        ## is the union of all graphs (tdb:unionDefaultGraph in the Fuseki config).
        self.assertEquals(3, len(self.graph),
            'default union graph should contain three triples but contains:\n'
            '%s' % list(self.graph))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(2, len(list(r)), "two people like pizza")

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }",
                             initBindings={'s': tarek})
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, pizza))
        self.assertEquals(1, len(list(r)), "i was asking only about tarek")

        r = self.graph.triples((tarek, likes, cheese))
        self.assertEquals(0, len(list(r)), "tarek doesn't like cheese")

        g2.remove((bob, likes, pizza))

        r = self.graph.query("SELECT * WHERE { ?s <urn:likes> <urn:pizza> . }")
        self.assertEquals(1, len(list(r)), "only tarek likes pizza")

    def testUpdate(self):
        self.graph.update("INSERT DATA { GRAPH <urn:graph> { <urn:michel> <urn:likes> <urn:pizza> . } }")

        g = self.graph.get_context(graphuri)
        self.assertEquals(1, len(g), 'graph contains 1 triples')

    def testUpdateWithInitNs(self):
        self.graph.update(
            "INSERT DATA { GRAPH ns:graph { ns:michel ns:likes ns:pizza . } }",
            initNs={'ns': URIRef('urn:')}
        )

        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

    def testUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WherE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
            }
        )

        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

    def testMultipleUpdateWithInitBindings(self):
        self.graph.update(
            "INSERT { GRAPH <urn:graph> { ?a ?b ?c . } } WHERE { };"
            "INSERT { GRAPH <urn:graph> { ?d ?b ?c . } } WHERE { }",
            initBindings={
                'a': URIRef('urn:michel'),
                'b': URIRef('urn:likes'),
                'c': URIRef('urn:pizza'),
                'd': URIRef('urn:bob'),
            }
        )

        g = self.graph.get_context(graphuri)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza), (bob,likes,pizza)]),
            'michel and bob like pizza'
        )

    def testNamedGraphUpdate(self):
        g = self.graph.get_context(graphuri)
        r1 = "INSERT DATA { <urn:michel> <urn:likes> <urn:pizza> }"
        g.update(r1)
        self.assertEquals(
            set(g.triples((None,None,None))),
            set([(michel,likes,pizza)]),
            'only michel likes pizza'
        )

        r2 = "DELETE { <urn:michel> <urn:likes> <urn:pizza> } " + \
             "INSERT { <urn:bob> <urn:likes> <urn:pizza> } WHERE {}"
        g.update(r2)
        self.assertEquals(
            set(g.triples((None, None, None))),
            set([(bob, likes, pizza)]),
            'only bob likes pizza'
        )
        says = URIRef("urn:says")

        # Strings with unbalanced curly braces
        tricky_strs = ["With an unbalanced curly brace %s " % brace
                       for brace in ["{", "}"]]
        for tricky_str in tricky_strs:
            r3 = """INSERT { ?b <urn:says> "%s" }
            WHERE { ?b <urn:likes> <urn:pizza>} """ % tricky_str
            g.update(r3)

        values = set()
        for v in g.objects(bob, says):
            values.add(str(v))
        self.assertEquals(values, set(tricky_strs))

        # Complicated Strings
        r4strings = []
        r4strings.append(ur'''"1: adfk { ' \\\" \" { "''')
        r4strings.append(ur'''"2: adfk } <foo> #éï \\"''')

        r4strings.append(ur"""'3: adfk { " \\\' \' { '""")
        r4strings.append(ur"""'4: adfk } <foo> #éï \\'""")

        r4strings.append(ur'''"""5: adfk { ' \\\" \" { """''')
        r4strings.append(ur'''"""6: adfk } <foo> #éï \\"""''')
        r4strings.append(u'"""7: ad adsfj \n { \n sadfj"""')

        r4strings.append(ur"""'''8: adfk { " \\\' \' { '''""")
        r4strings.append(ur"""'''9: adfk } <foo> #éï \\'''""")
        r4strings.append(u"'''10: ad adsfj \n { \n sadfj'''")

        r4 = "\n".join([
            u'INSERT DATA { <urn:michel> <urn:says> %s } ;' % s
            for s in r4strings
        ])
        g.update(r4)
        values = set()
        for v in g.objects(michel, says):
            values.add(unicode(v))
        self.assertEquals(values, set([re.sub(ur"\\(.)", ur"\1", re.sub(ur"^'''|'''$|^'|'$|" + ur'^"""|"""$|^"|"$', ur"", s)) for s in r4strings]))
Beispiel #54
0
class Inspector(object):
    """ Class that includes methods for querying an RDFS/OWL ontology """
    def __init__(self, uri, language=""):
        super(Inspector, self).__init__()
        self.rdfGraph = ConjunctiveGraph()
        try:
            self.rdfGraph.parse(uri, format="application/rdf+xml")
        except:
            try:
                self.rdfGraph.parse(uri, format="n3")
            except:
                raise exceptions.Error(
                    "Could not parse the file! Is it a valid RDF/OWL ontology?"
                )
        finally:
            self.baseURI = self.get_OntologyURI() or uri
            self.allclasses = self.__getAllClasses(includeDomainRange=True,
                                                   includeImplicit=True,
                                                   removeBlankNodes=False,
                                                   excludeRDF_OWL=False)

    def get_OntologyURI(self, return_as_string=True):
        test = [
            x for x, y, z in self.rdfGraph.triples((None, RDF.type, Ontology))
        ]
        if test:
            if return_as_string:
                return str(test[0])
            else:
                return test[0]
        else:
            return None

    def __getAllClasses(self,
                        classPredicate="",
                        includeDomainRange=False,
                        includeImplicit=False,
                        removeBlankNodes=True,
                        addOWLThing=True,
                        excludeRDF_OWL=True):

        rdfGraph = self.rdfGraph
        exit = {}

        def addIfYouCan(x, mydict):
            if excludeRDF_OWL:
                if x.startswith('http://www.w3.org/2002/07/owl#') or  \
                   x.startswith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") or \
                   x.startswith("http://www.w3.org/2000/01/rdf-schema#"):
                    return mydict
            if x not in mydict:
                mydict[x] = None
            return mydict

        if addOWLThing:
            exit = addIfYouCan(Thing, exit)

        if classPredicate == "rdfs" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, RDFS.Class):
                exit = addIfYouCan(s, exit)

        if classPredicate == "owl" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, Class):
                exit = addIfYouCan(s, exit)

        if includeDomainRange:
            for o in rdfGraph.objects(None, RDFS.domain):
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDFS.range):
                exit = addIfYouCan(o, exit)

        if includeImplicit:
            for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)):
                exit = addIfYouCan(s, exit)
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDF.type):
                exit = addIfYouCan(o, exit)

        # get a list
        exit = exit.keys()
        if removeBlankNodes:
            exit = [x for x in exit if not isBlankNode(x)]
        return sort_uri_list_by_name(exit)

    def __getTopclasses(self, classPredicate=''):
        returnlist = []

        for eachclass in self.__getAllClasses(classPredicate):
            x = self.get_classDirectSupers(eachclass)
            if not x:
                returnlist.append(eachclass)
        return sort_uri_list_by_name(returnlist)

    def __getTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = topclasses

            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = children
                for potentialfather in children:
                    self.__getTree(potentialfather, out)

            return out

        else:
            children = self.get_classDirectSubs(father)
            out[father] = children
            for ch in children:
                self.__getTree(ch, out)

    def __buildClassTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = [Thing]
            out[Thing] = sort_uri_list_by_name(topclasses)
            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = sort_uri_list_by_name(children)
                for potentialfather in children:
                    self.__buildClassTree(potentialfather, out)
            return out
        else:
            children = self.get_classDirectSubs(father)
            out[father] = sort_uri_list_by_name(children)
            for ch in children:
                self.__buildClassTree(ch, out)

    # methods for getting ancestores and descendants of classes: by default, we do not include blank nodes
    def get_classDirectSupers(self,
                              aClass,
                              excludeBnodes=True,
                              sortUriName=False):
        returnlist = []
        for o in self.rdfGraph.objects(aClass, RDFS.subClassOf):
            if not (o == Thing):
                if excludeBnodes:
                    if not isBlankNode(o):
                        returnlist.append(o)
                else:
                    returnlist.append(o)
        if sortUriName:
            return sort_uri_list_by_name(remove_duplicates(returnlist))
        else:
            return remove_duplicates(returnlist)

    def get_classDirectSubs(self, aClass, excludeBnodes=True):
        returnlist = []
        for s, v, o in self.rdfGraph.triples((None, RDFS.subClassOf, aClass)):
            if excludeBnodes:
                if not isBlankNode(s):
                    returnlist.append(s)
            else:
                returnlist.append(s)
        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def get_classSiblings(self, aClass, excludeBnodes=True):
        returnlist = []
        for father in self.get_classDirectSupers(aClass, excludeBnodes):
            for child in self.get_classDirectSubs(father, excludeBnodes):
                if child != aClass:
                    returnlist.append(child)

        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def entitySynonyms(self, anEntity, language=DEFAULT_LANGUAGE, getall=True):
        if getall:
            temp = []
            # Uberon synonyms
            for o in self.rdfGraph.objects(anEntity, Synonym):
                temp += [o]
            # EFO synonyms
            for o in self.rdfGraph.objects(anEntity, Synonym):
                temp += [o]
            # OBI synonyms
            for o in self.rdfGraph.objects(anEntity, OBO_Synonym):
                temp += [o]
            return temp
        else:
            for o in self.rdfGraph.objects(anEntity, Synonym):
                if getattr(o, 'language') and getattr(o,
                                                      'language') == language:
                    return o
            return ""

    def classFind(self, name, exact=False):
        temp = []
        if name:
            for x in self.allclasses:
                if exact:
                    if x.__str__().lower() == str(name).lower():
                        return [x]
                else:
                    if x.__str__().lower().find(str(name).lower()) >= 0:
                        temp.append(x)
        return temp
Beispiel #55
0
from rdflib import ConjunctiveGraph
g = ConjunctiveGraph()
g.parse('../data/hm_17_1.rss')
#len(g)
import sys
sys.path.append('../')
from model.namespaces import *
from model.bibo import Article

from rdfalchemy import rdfSubject

nsm = g._get_namespace_manager()
nsm.bind('prism', 'http:prism.com') 
print g.serialize()
#PRISM2 = Namespace('http://prismstandard.org/namespaces/basic/2.0/')
for s, p, o in g.triples((None, RDF.type, RSS.item)):
    g.add((s, p, BIBO.Article))
    g.remove((s, p, o))



rdfSubject.db = g

l = list(Article.ClassInstances())
a = l[1]
print a.title
print a.creators
print a.sPg
Beispiel #56
0
class TestInfer(TestCase):
    def setUp(self):
        self.model = ConjunctiveGraph()
        add_default_schemas(self.model)
        self.model.parse(data=MINI_FOAF_ONTOLOGY, format='turtle')

    def test_class(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)

        s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']]
        found = list(self.model.triples(s))
        self.assertEqual(len(found), 0)
        inference._rule_class()
        s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']]
        found = list(self.model.triples(s))
        self.assertEqual(len(found), 1)

    def test_inverse_of(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)
        depiction = (None, FOAF['depiction'], fooNS['me.jpg'])
        size = len(self.model)
        found_statements = list(self.model.triples(depiction))
        self.assertEqual(len(found_statements), 0)
        inference._rule_inverse_of()
        found_statements = list(self.model.triples(depiction))
        self.assertEqual(len(found_statements), 1)

        # we should've added one statement.
        self.assertEqual(len(self.model), size + 1)

        size = len(self.model)
        inference._rule_inverse_of()
        # we should already have both versions in our model
        self.assertEqual(len(self.model), size)

    def test_validate_types(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)

        errors = list(inference._validate_types())
        self.assertEqual(len(errors), 0)

        s = (fooNS['document'], DC['title'], Literal("bleem"))
        self.model.add(s)
        errors = list(inference._validate_types())
        self.assertEqual(len(errors), 1)

    def test_validate_undefined_properties_in_schemas(self):
        fooNS = Namespace('http://example.org/')
        inference = Infer(self.model)

        errors = list(inference._validate_undefined_properties())
        self.assertEqual(len(errors), 0)

    def test_validate_undefined_properties_in_inference(self):
        fooNS = Namespace('http://example.org/')
        foafNS = Namespace('http://xmlns.com/foaf/0.1/')

        self.model.parse(data=FOAF_DATA, format='turtle')

        inference = Infer(self.model)
        errors = list(inference._validate_undefined_properties())
        self.assertEqual(len(errors), 2)

        inference = Infer(self.model)
        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 0)

        s = (fooNS['me.jpg'], FOAF['firstName'], Literal("name"))
        self.model.add(s)
        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 1)
        startswith = 'Domain of '
        self.assertEqual(errors[0][:len(startswith)], startswith)
        self.assertTrue('http://example.org/me.jpg' in errors[0])
        endswith = 'http://xmlns.com/foaf/0.1/Person'
        self.assertEqual(errors[0][-len(endswith):], endswith)
        self.model.remove(s)

        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 0)
        s = (fooNS['foo.txt'], RDF['type'], FOAF['Document'])
        self.model.add(s)
        s = (fooNS['me.jpg'], FOAF['depicts'], FOAF['foo.txt'])
        self.model.add(s)

        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 1)
        startswith = 'Range of '
        self.assertEqual(errors[0][:len(startswith)], startswith)
        self.assertTrue('http://example.org/me.jpg' in errors[0])
        endswith = 'http://www.w3.org/2002/07/owl#Thing'
        self.assertEqual(errors[0][-len(endswith):], endswith)
        self.model.remove(s)

    def test_property_multiple_domain_types(self):
        """Can we process a property with multiple domain types?
        """
        turtle = """
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix foo: <http://example.org/> .
        @prefix bar: <http://example.com/> .

        foo:AClass a rdfs:Class .
        foo:BClass a rdfs:Class .
        bar:ABarClass a rdfs:Class .

        foo:aprop a rdf:Property ;
            rdfs:domain foo:AClass ;
            rdfs:domain bar:ABarClass ;
            rdfs:range foo:BClass .

        foo:object a foo:BClass .
        foo:subject a foo:AClass ;
           foo:aprop foo:object .
        bar:subject a bar:ABarClass ;
           foo:aprop foo:object .
        """
        self.model.parse(data=turtle, format='turtle')
        inference = Infer(self.model)

        errmsg = list(inference._validate_property_types())
        self.assertEqual(len(errmsg), 0)