コード例 #1
0
ファイル: models.py プロジェクト: delving/nave
    def get_graph_from_sparql_results(sparql_json, named_graph=None):
        if len(sparql_json['results']['bindings']) == 0:
            return ConjunctiveGraph(), 0
        sparql_vars = sparql_json['head']['vars']
        if 'g' in sparql_vars:
            if not named_graph:
                named_graph = sparql_json['results']['bindings'][0]['g']['value']
            sparql_vars.remove('g')
        triple_levels = RDFModel.get_context_triples(sparql_json['head']['vars'])
        nr_levels = len(triple_levels)
        if named_graph:
            named_graph = URIRef(named_graph)
        graph = ConjunctiveGraph(identifier=named_graph)

        graph.namespace_manager = namespace_manager
        for binding in sparql_json['results']['bindings']:
            binding_levels = RDFModel.get_context_levels(len(binding.keys()))
            for s, p, o in triple_levels[:binding_levels]:
                subject = URIRef(binding[s]['value'])
                if binding[s]['type'] == 'bnode':
                    subject = BNode(binding[s]['value'])
                predicate = URIRef(binding[p]['value'])
                obj = RDFModel.get_object_from_sparql_result(binding[o])
                graph.add((subject, predicate, obj))
        # materialize inferences
        for subject, obj in graph.subject_objects(
                predicate=URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy")):
            graph.add((obj, URIRef("http://www.openarchives.org/ore/terms/aggregates"), subject))
            graph.remove((subject, URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy"), obj))
        return graph, nr_levels
コード例 #2
0
ファイル: rdf_cleanup2.py プロジェクト: koo5/hackery2
def run(input_file, input_format_hint, output_format):
    #print(input_format_hint)
    g = ConjunctiveGraph(store=OrderedAndIndexedStore())
    g.parse(input_file, format=input_format_hint)

    triples = []

    for t in g.triples((None,None,None)):
        triples.append(t)

    triples.sort(key=lambda x:x[0])

    #for t in triples:
    #    print(t[0])

    #import IPython; IPython.embed()


    g.remove((None,None,None))
    #print(list(g.triples((None,None,None))))


    for t in triples:
        g.add(t)

    #print(list(g2.triples((None,None,None))))
    #import IPython; IPython.embed()

    #out = open('out.n3', 'wb')
    # g.serialize(out, format='n3')

    for l in g.serialize(format=output_format).splitlines(): print(l.decode())
コード例 #3
0
def update_mediator(params):
    #Write user metadata and save the rdf file
    if not ('username' in params and params['username']):
        return False
    det = get_mediator_details(params['username'])
    graph = Graph()
    graph.parse(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']))
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    uri = URIRef(det['uri'])
    if 'firstname' in params and params['firstname']:
        graph.remove((uri, namespaces['foaf']['firstName'], None))
        graph.add((uri, namespaces['foaf']['firstName'], Literal(params['firstname'])))
    if 'lastname' in params and params['lastname']:
        graph.remove((uri, namespaces['foaf']['lastName'], None))
        graph.add((uri, namespaces['foaf']['lastName'], Literal(params['lastname'])))
    if 'email' in params and params['email']:
        graph.remove((uri, namespaces['foaf']['mbox'], None))
        graph.add((uri, namespaces['foaf']['mbox'], Literal(params['email'])))
    if 'title' in params and params['title']:
        graph.remove((uri, namespaces['foaf']['title'], None))
        graph.add((uri, namespaces['foaf']['title'], Literal(params['title'])))
    if 'department' in params and params['department']:
        graph.remove((uri, namespaces['dcterms']['isPartOf'], None))
        department = params['department'].split(';')
        for d in department:
            graph.add((uri, namespaces['dcterms']['isPartOf'], Literal(d.strip())))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']), 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
コード例 #4
0
def change_status(vocabprefix, uri, predicate, message, action):
    if not action in ['add', 'remove']:
        return False
    vocab_uri = URIRef(uri)
    vocabdir = os.path.join(ag.vocabulariesdir, vocabprefix)
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    if not os.path.isfile(vocabstatusfile):
        return False
    graph = Graph()
    graph.parse(vocabstatusfile)
    predicate = predicate.split(':')
    ns = predicate[0]
    term = predicate[1]
    if message and (message.startswith('http://') or message.startswith('file://')):
        message = URIRef(message)
    elif message:
        message = Literal(message)
    if action == 'add':
        for prefix, url in namespaces.iteritems():
            graph.bind(prefix, URIRef(url))
        graph.add((vocab_uri, namespaces[ns][term], message))
    elif action == 'remove':
        graph.remove((vocab_uri, namespaces[ns][term], message))
     
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
コード例 #5
0
def update_vocab_uri_in_statusfile(userid, oldprefix, newprefix, oldvocabdir, newvocabdir):
    olduri = "http://vocab.ox.ac.uk/%s"%oldprefix
    newuri = "http://vocab.ox.ac.uk/%s"%newprefix

    mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
    vocabstatusfile = os.path.join(newvocabdir, 'status.rdf')
    if not os.path.isfile(mediatorfile) or not os.path.isfile(vocabstatusfile):
        return False

    #update uri in mediator file
    rdf_str = None
    f = codecs.open(mediatorfile, 'r', 'utf-8')
    rdf_str = f.read()
    f.close() 
    rdf_str = rdf_str.replace(olduri, newuri)
    rdf_str = rdf_str.replace(oldvocabdir, newvocabdir)
    f = codecs.open(mediatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    
    #update uri in vocab status file
    rdf_str = None
    f = codecs.open(vocabstatusfile, 'r', 'utf-8')
    rdf_str = f.read()
    f.close()
    rdf_str = rdf_str.replace(olduri, newuri)
    rdf_str = rdf_str.replace(oldvocabdir, newvocabdir)
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()

    #Remove editorial note 0
    graph = Graph()
    graph.parse(vocabstatusfile)
    for s, p, o in graph.triples((URIRef(newuri), namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0]))):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
コード例 #6
0
#Adding new asana
bnode = BNode()  # class relations
rdfGraph.add((newAsana, RDF.type, asana))
rdfGraph.add((newAsana, RDF.type, bnode))
rdfGraph.add((newAsana, RDFS.label, Literal("Новая асана", lang="ru")))
rdfGraph.add((newAsana, description,
              Literal("Описание процесса выполнения", datatype=XSD.string)))
rdfGraph.add((bnode, OWL.onProperty, URIRef(negativeAffect)))
rdfGraph.add((bnode, OWL.someValuesOf, URIRef(backbone)))

print("\nGetting with new element")
printElements(rdfGraph)

#Remove sukhasana
rdfGraph.remove((sukhasana, None, None))

print("\nGetting with deleted element")
printElements(rdfGraph)

#Search for all categories which affects 'позвоночник'
ds = URIRef(description).n3(
    rdfGraph.namespace_manager)  # convert to NS:suffix format
print(f"{description} -> {ds}")

pq = prepareQuery(f"""SELECT ?asana ?label ?description WHERE {{
  ?asana rdf:type ?o .
  ?asana {ds} ?description .
  ?asana rdfs:label ?label .
  ?o owl:onProperty ?affect .
  ?o owl:someValuesFrom ?affectTo .
コード例 #7
0
class ContextTestCase(unittest.TestCase):
    storetest = True
    identifier = URIRef("rdflib_test")

    michel = URIRef(u"michel")
    tarek = URIRef(u"tarek")
    bob = URIRef(u"bob")
    likes = URIRef(u"likes")
    hates = URIRef(u"hates")
    pizza = URIRef(u"pizza")
    cheese = URIRef(u"cheese")
    c1 = URIRef(u"context-1")
    c2 = URIRef(u"context-2")

    def setUp(self, uri="sqlite://", storename=None):
        store = plugin.get(storename, Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(store, identifier=self.identifier)
        self.graph.open(uri, create=True)

    def tearDown(self, uri="sqlite://"):
        self.graph.destroy(uri)
        self.graph.close()

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
            isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                     namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEqual(len(self.graph.store), len(graph.store))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):

        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEqual(len(graph), oldLen + 10)
        self.assertEqual(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEqual(len(self.graph), oldLen)
        self.assertEqual(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph.store)
        self.addStuffInMultipleContexts()
        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEqual(len(self.graph.store), oldLen + 1,
                         [self.graph.store, oldLen + 1])

        graph = Graph(self.graph.store, self.c1)
        self.assertEqual(len(graph.store), oldLen + 1,
                         [graph.store, oldLen + 1])

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertIn(triple, self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertNotIn(triple, self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertNotIn(triple, self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, string_types):
                return c.identifier
            return c
        self.assertIn(self.c1, list(map(cid, self.graph.contexts())))
        self.assertIn(self.c2, list(map(cid, self.graph.contexts())))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assertIn(self.c1, contextList)
        self.assertIn(self.c2, contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEqual(len(Graph(self.graph.store, c1)), 1)
        self.assertEqual(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assertNotIn(self.c1, self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEqual(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEqual
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(
                c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)),
                    set([(tarek, cheese), (michel, cheese),
                         (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(
                michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes,
                    cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(
                tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(
                pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(
                bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([
                    (bob, hates, michel), (bob, likes, cheese),
                    (tarek, likes, pizza), (michel, likes, pizza),
                    (michel, likes, cheese), (bob, hates, pizza),
                    (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #8
0
ファイル: test_graph_context.py プロジェクト: drewp/rdflib
class ContextTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = ConjunctiveGraph(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEqual(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.graph.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEqual(len(graph), oldLen + 10)
        self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.graph.get_context(c1))
        self.assertEqual(len(self.graph), oldLen)
        self.assertEqual(len(graph), 0)

    def testLenInMultipleContexts(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEqual(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEqual(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertTrue(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertTrue(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            return c.identifier
        self.assertTrue(self.c1 in map(cid, self.graph.contexts()))
        self.assertTrue(self.c2 in map(cid, self.graph.contexts()))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assertTrue(self.c1 in contextList, (self.c1, contextList))
        self.assertTrue(self.c2 in contextList, (self.c2, contextList))

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEqual(len(Graph(self.graph.store, c1)), 1)
        self.assertEqual(len(self.graph.get_context(c1)), 1)

        self.graph.remove_context(self.graph.get_context(c1))
        self.assertTrue(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEqual(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEqual
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.graph.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(
                c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(
                set(c.subject_objects(likes)), set(
                    [(tarek, cheese), (michel, cheese),
                     (michel, pizza), (bob, cheese),
                     (tarek, pizza)]))

            asserte(set(c.predicate_objects(
                michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes,
                    cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(
                tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(
                pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(
                bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set(
                [(bob, hates, michel), (bob, likes, cheese),
                 (tarek, likes, pizza), (michel, likes, pizza),
                 (michel, likes, cheese), (bob, hates, pizza),
                 (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #9
0
class ContextTestCase(unittest.TestCase):
    storetest = True
    identifier = URIRef("rdflib_test")

    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self, uri='sqlite://', storename=None):
        store = plugin.get(storename, Store)(identifier=self.identifier)
        self.graph = ConjunctiveGraph(store, identifier=self.identifier)
        self.graph.open(uri, create=True)

    def tearDown(self, uri='sqlite://'):
        self.graph.destroy(uri)
        try:
            self.graph.close()
        except:
            pass

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
            isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                     namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        # print("Graph", graph.identifier, graph.serialize(format="nt"))
        # print("Selfgraph", self.graph.identifier,
        #                    self.graph.serialize(format="nt"))
        self.assertEquals(len(self.graph.store), len(graph.store))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):

        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph.store)
        print("Original", oldLen, self.graph.store)
        self.addStuffInMultipleContexts()
        newLen = len(self.graph.store)
        print("MultipleContexts", newLen, self.graph.store)
        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        print("No context", len(list(self.graph.triples((None, None, None)))))
        print("Context context-1", len(
            list(self.graph.triples((None, None, None), context=self.c1))))
        print("Context context-2", len(
            list(self.graph.triples((None, None, None), context=self.c2))))
        self.assertEquals(len(self.graph.store), oldLen + 1,
                          [self.graph.store, oldLen + 1])

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph.store), oldLen + 1,
                          [graph.store, oldLen + 1])

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if (PY3 and not isinstance(c,(str, bytes))) or not isinstance(c, basestring):
                return c.identifier
            return c
        self.assert_(self.c1 in list(map(cid, self.graph.contexts())))
        self.assert_(self.c2 in list(map(cid, self.graph.contexts())))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(
                c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)),
                    set([(tarek, cheese), (michel, cheese),
                         (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(
                michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes,
                    cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(
                tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(
                pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(
                bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([
                    (bob, hates, michel), (bob, likes, cheese),
                    (tarek, likes, pizza), (michel, likes, pizza),
                    (michel, likes, cheese), (bob, hates, pizza),
                    (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #10
0
ファイル: test_context.py プロジェクト: RDFLib/rdfextras
class ContextTestCase(unittest.TestCase):
    store_name = 'default'
    path = None
    storetest = True
    create = True
    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store_name)
        self.graph.destroy(self.path)
        if isinstance(self.path, type(None)):
            if self.store_name == "SQLite":
                self.path = mkstemp(prefix='test',dir='/tmp')
            else:
                self.path = mkdtemp(prefix='test',dir='/tmp')
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass
        import os
        if hasattr(self,'path') and self.path is not None:
            if os.path.exists(self.path):
                if os.path.isdir(self.path):
                    for f in os.listdir(self.path): os.unlink(self.path+'/'+f)
                    os.rmdir(self.path)
                elif len(self.path.split(':')) == 1:
                    os.unlink(self.path)
                else:
                    os.remove(self.path)

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
               isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                         namespace_manager=self)
    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel)) # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel)) # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEquals(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1) 

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        self.addStuffInMultipleContexts()
        def cid(c):
            if not isinstance(c, basestring):
                return c.identifier
            return c
        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #11
0
class ManifestHelper(object):
    def __init__(self, uri=None):
        self.uri = None
        if uri:
            self.uri = uri
        self.reset()
    
    def reset(self):
        self.g = None
        if self.uri:
            self.g = ConjunctiveGraph(identifier=self.uri)
        else:
            self.g = ConjunctiveGraph()
        self.namespaces = {}
        self.urihelper = URIHelper(self.namespaces)
        #add defaults
        for prefix, ns in NAMESPACES.iteritems():
            self.add_namespace(prefix, ns)
    
    def from_string(self, textfile, format="xml", encoding="utf-8"):
        self.reset()
        self.g.parse(textfile, format)
        return
    
    def triple_exists(self, s, p, o):
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return False        
        if s == '*':
            s = None
        if p == '*':
            p = None
        if o == '*':
            o = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None:
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)
             
        count = 0
        for ans_s, ans_p, ans_o in self.g.triples((s, p, o)):
            count += 1
        if count > 0:
            return True
        else:
            return False 
    
    def list_objects(self, s, p):
        objects = []
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return objects
        if s == '*':
            s = None
        if p == '*':
            p = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        for o in self.g.objects(s, p):
            objects.append(o)
        return objects
    
    def add_triple(self, s, p, o):
        if not isinstance(s, URIRef) and not isinstance(s, BNode):
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef):
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode):
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)

        self.g.add((s, p, o))
        self.g.commit()
        return
    
    def add_namespace(self, prefix, uri):
        if not isinstance (prefix, basestring):
            raise TypeError('Add namespace: prefix is not of type string or unicode') 

        if not isinstance(uri, (URIRef, Namespace)):
            if not isinstance(uri, basestring):
                raise TypeError('Add namespace: namespace is not of type string or unicode') 

        if not isinstance(prefix, unicode):
            prefix = unicode(prefix)

        if isinstance(uri, basestring) and not isinstance(uri, unicode):
            uri = unicode(uri)

        self.namespaces[prefix] = self.urihelper.get_namespace(uri)
        if prefix not in self.urihelper.namespaces:
            self.urihelper.namespaces[prefix] = self.urihelper.get_namespace(uri)
        self.g.bind(prefix, self.namespaces[prefix])
        return
    
    def del_namespace(self, prefix, ns):
        if prefix in self.namespaces:
            del self.namespaces[prefix]
        return
    
    def del_triple(self, s, p, o=None):
        if not type(self.g).__name__ in ['ConjunctiveGraph', 'Graph']:
            return
        if s == '*':
            s = None
        if p == '*':
            p = None
        if o == '*':
            o = None

        if not isinstance(s, URIRef) and not isinstance(s, BNode) and not s == None:
            s = self.urihelper.get_uriref(s)
        
        if not isinstance(p, URIRef) and not p == None:
            p = self.urihelper.parse_uri(p)

        if not isinstance(o, URIRef) and not isinstance(o, Literal) and not isinstance(o, BNode) and not o == None:
            if not isinstance(o, basestring):
                o = unicode(o)
            o = self.urihelper.parse_uri(o, return_Literal_not_Exception=True)
        self.g.remove((s, p, o))
        return
    
    def get_graph(self):
        return self.g
    
    def to_string(self, format="xml"):
        if type(self.g).__name__ in ['ConjunctiveGraph', 'Graph'] and len(self.g)>0:
            self.g.commit()
            ans_str = self.g.serialize(format=format, encoding="utf-8")+"\n"
            return ans_str
        else:
            return u'<?xml version="1.0" encoding="UTF-8"?>\n'
コード例 #12
0
ファイル: skosify.py プロジェクト: edsu/lcco
            parts = re.split(r' +', label)
            label = ' '.join(l.lower().capitalize() for l in parts).strip()
            position = 0
        else:
            parts = line.split("\t")
            label = parts.pop().strip()
            range = parts.pop(0).strip()
            position = len(parts) + 1

        # if there's no range then we've got a chunk of text that needs 
        # to be added to the last concept we added to the graph 
        if not range:
            uri = range_uri(lc_class[-1][0])
            old_label = list(g.objects(uri, SKOS.prefLabel))[0]
            new_label = "%s %s" % (old_label, label)
            g.remove((uri, SKOS.prefLabel, old_label))
            g.add((uri, SKOS.prefLabel, Literal(new_label, 'en')))
            continue

        lc_class = lc_class[0:position]
        lc_class.insert(position, (range, label))

        label = '--'.join([c[1] for c in lc_class])
        uri = range_uri(range)

        g.add((uri, RDF.type, SKOS.Concept))
        g.add((uri, SKOS.prefLabel, Literal(label, 'en')))
        g.add((uri, SKOS.notation, Literal(range, datatype=LCC)))

        if position == 0:
            g.add((LCCO, SKOS.hasTopConcept, uri))
コード例 #13
0
class ContextTestCase(unittest.TestCase):
    store_name = 'default'
    path = None
    storetest = True
    create = True
    michel = URIRef(u'michel')
    tarek = URIRef(u'tarek')
    bob = URIRef(u'bob')
    likes = URIRef(u'likes')
    hates = URIRef(u'hates')
    pizza = URIRef(u'pizza')
    cheese = URIRef(u'cheese')
    c1 = URIRef(u'context-1')
    c2 = URIRef(u'context-2')

    def setUp(self):
        self.graph = ConjunctiveGraph(store=self.store_name)
        self.graph.destroy(self.path)
        self.graph.open(self.path, create=self.create)

    def tearDown(self):
        self.graph.destroy(self.path)
        try:
            self.graph.close()
        except:
            pass

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
               isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                         namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        # self.assertEquals(len(self.graph), len(graph))
        self.assertEquals(
            len(list(self.graph.triples((None, None, None)))),
            len(list(graph.triples((None, None, None)))))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)
        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, basestring):
                return c.identifier
            return c
        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)),
                    set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)),
                    set([(tarek, cheese), (michel, cheese),
                         (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)),
                    set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)),
                    set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)),
                    set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)),
                    set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)),
                    set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([
                    (bob, hates, michel), (bob, likes, cheese),
                    (tarek, likes, pizza), (michel, likes, pizza),
                    (michel, likes, cheese), (bob, hates, pizza),
                    (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #14
0
class MemoryStore:
    """A class that combines and syncronieses n-quad files and an in-memory quad store.

    This class contains information about all graphs, their corresponding URIs and
    pathes in the file system. For every Graph (context of Quad-Store) exists a
    FileReference object (n-quad) that enables versioning (with git) and persistence.
    """
    def __init__(self):
        """Initialize a new MemoryStore instance."""
        logger = logging.getLogger('quit.core.MemoryStore')
        logger.debug('Create an instance of MemoryStore')
        self.store = ConjunctiveGraph(identifier='default')

        return

    def getgraphuris(self):
        """Method to get all available named graphs.

        Returns:
            A list containing all graph uris found in store.
        """
        graphs = []
        for graph in self.store.contexts():
            if isinstance(graph, BNode) or str(graph.identifier) == 'default':
                pass
            else:
                graphs.append(graph.identifier)

        return graphs

    def getgraphcontent(self, graphuri):
        """Get the serialized content of a named graph.

        Args:
            graphuri: The URI of a named graph.
        Returns:
            content: A list of strings where each string is a quad.
        """
        data = []
        context = self.store.get_context(URIRef(graphuri))
        triplestring = context.serialize(format='nt').decode('UTF-8')

        # Since we have triples here, we transform them to quads by adding the graphuri
        # TODO This might cause problems if ' .\n' will be part of a literal.
        #   Maybe a regex would be a better solution
        triplestring = triplestring.replace(' .\n', ' <' + graphuri + '> .\n')

        data = triplestring.splitlines()
        data.remove('')

        return data

    def getstoreobject(self):
        """Get the conjunctive graph object.

        Returns:
            graph: A list of strings where each string is a quad.
        """

    def graphexists(self, graphuri):
        """Ask if a named graph FileReference object for a named graph URI.

        Args:
            graphuri: A string containing the URI of a named graph

        Returns:
            True or False
        """
        if self.store.get_context(URIRef(graphuri)) is None:
            return False
        else:
            return True

    def addfile(self, filename, serialization):
        """Add a file to the store.

        Args:
            filename: A String for the path to the file.
            serialization: A String containg the RDF format
        Raises:
            ValueError if the given file can't be parsed as nquads.
        """
        try:
            self.store.parse(source=filename, format=serialization)
        except Exception as e:
            logger.debug(e)
            logger.debug("Could not import file: {}. " +
                         "Make sure the file exists and contains data in  {}".
                         format(filename, serialization))

    def addquads(self, quads):
        """Add quads to the MemoryStore.

        Args:
            quads: Rdflib.quads that should be added to the MemoryStore.
        """
        self.store.addN(quads)
        self.store.commit()

    def query(self, querystring):
        """Execute a SPARQL select query.

        Args:
            querystring: A string containing a SPARQL ask or select query.
        Returns:
            The SPARQL result set
        """
        return self.store.query(querystring)

    def update(self, querystring, versioning=True):
        """Execute a SPARQL update query and update the store.

        This method executes a SPARQL update query and updates and commits all affected files.

        Args:
            querystring: A string containing a SPARQL upate query.
        """
        # methods of rdflib ConjunciveGraph
        if versioning:
            actions = evalUpdate(self.store, querystring)
            self.store.update(querystring)
            return actions
        else:
            self.store.update(querystring)
            return

        return

    def removequads(self, quads):
        """Remove quads from the MemoryStore.

        Args:
            quads: Rdflib.quads that should be removed to the MemoryStore.
        """
        self.store.remove((quads))
        self.store.commit()
        return

    def exit(self):
        """Execute actions on API shutdown."""
        return
コード例 #15
0
ファイル: serialize.py プロジェクト: fserena/agora-gw
def _ted_as_json_ld(sg):
    g = ConjunctiveGraph()
    g.__iadd__(sg)

    for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" %
                       (WOT.Property, WOT.interactionName)):
        g.remove((res.p, WOT.interactionName, res.name))
        g.add((res.p, WOT.propertyName, res.name))

    for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" %
                       (WOT.Action, WOT.interactionName)):
        g.remove((res.p, WOT.interactionName, res.name))
        g.add((res.p, WOT.actionName, res.name))

    for res in g.query("""SELECT ?p ?name WHERE { ?p a <%s> ; <%s> ?name}""" %
                       (WOT.Event, WOT.interactionName)):
        g.remove((res.p, WOT.interactionName, res.name))
        g.add((res.p, WOT.eventName, res.name))

    context = build_context(g)

    if 'pid' in context:
        context['pid'] = str(WOT.interactionName)
    if 'aid' in context:
        context['aid'] = str(WOT.interactionName)
    if 'eid' in context:
        context['eid'] = str(WOT.interactionName)

    cg = skolemize(g)
    ted_nquads = cg.serialize(format='nquads')
    ld = jsonld.from_rdf(ted_nquads)

    td_frame = jsonld.compact(
        jsonld.frame(ld, {
            'context': context,
            '@type': str(CORE.ThingDescription)
        }), context)

    td_context = td_frame['@context']
    del td_frame['@context']
    ted_frame = jsonld.compact(
        jsonld.frame(ld, {
            'context': context,
            '@type': str(CORE.ThingEcosystemDescription)
        }), context)
    ted_context = ted_frame['@context']
    del ted_frame['@context']

    component_ids = []
    ted_components = ted_frame.get('describes', {}).get('components', [])
    if isinstance(ted_components, dict) or isinstance(ted_components, str):
        ted_components = [ted_components]
    for component in ted_components:
        # if it does not contain 'describedBy' it is a resource
        cid = component['@id'] if isinstance(
            component, dict) and 'describedBy' in component else component
        component_ids.append(cid)
    if component_ids:
        ted_frame['describes']['components'] = component_ids
    if '@graph' not in td_frame:
        source_td_frame = copy.deepcopy(td_frame)
        td_frame = {'@graph': []}
        if source_td_frame:
            td_frame['@graph'].append(source_td_frame)

    td_frame['@graph'].append(ted_frame)
    td_frame['@context'] = merge_two_dicts(td_context, ted_context)
    try:
        for pdata in path_data("$..interactions", td_frame['@graph']):
            if isinstance(pdata, list):
                for int_dict in pdata:
                    replace_interaction_name(int_dict)
            else:
                replace_interaction_name(pdata)
    except TypeError:
        pass

    return json.dumps(td_frame, indent=3, sort_keys=True)
コード例 #16
0
ファイル: tablinker.py プロジェクト: CEDAR-project/TabLinker
class TabLinker(object):
    defaultNamespacePrefix = "http://lod.cedar-project.nl/resource/"
    annotationsNamespacePrefix = "http://lod.cedar-project.nl/annotations/"
    namespaces = {
        "dcterms": Namespace("http://purl.org/dc/terms/"),
        "skos": Namespace("http://www.w3.org/2004/02/skos/core#"),
        "d2s": Namespace("http://lod.cedar-project.nl/core/"),
        "qb": Namespace("http://purl.org/linked-data/cube#"),
        "owl": Namespace("http://www.w3.org/2002/07/owl#"),
    }
    annotationNamespaces = {
        "np": Namespace("http://www.nanopub.org/nschema#"),
        "oa": Namespace("http://www.w3.org/ns/openannotation/core/"),
        "xsd": Namespace("http://www.w3.org/2001/XMLSchema#"),
        "dct": Namespace("http://purl.org/dc/terms/"),
    }

    def __init__(self, filename, config, level=logging.DEBUG):
        """TabLinker constructor
        
        Keyword arguments:
        filename -- String containing the name of the current Excel file being examined
        config -- Configuration object, loaded from .ini file
        level -- A logging level as defined in the logging module
        """
        self.config = config
        self.filename = filename

        self.log = logging.getLogger("TabLinker")
        self.log.setLevel(level)

        self.log.debug("Initializing Graphs")
        self.initGraphs()

        self.log.debug("Setting Scope")
        basename = os.path.basename(filename)
        basename = re.search("(.*)\.xls", basename).group(1)
        self.setScope(basename)

        self.log.debug("Loading Excel file {0}.".format(filename))
        self.rb = open_workbook(filename, formatting_info=True)

        self.log.debug("Reading styles")
        self.styles = Styles(self.rb)

        self.log.debug("Copied Workbook to writable copy")
        self.wb = copy(self.rb)

    def initGraphs(self):
        """Initialize the graphs, set default namespaces, and add schema information"""

        self.graph = ConjunctiveGraph()
        # Create a separate graph for annotations
        self.annotationGraph = ConjunctiveGraph()

        self.log.debug("Adding namespaces to graphs")
        # Bind namespaces to graphs
        for namespace in self.namespaces:
            self.graph.namespace_manager.bind(namespace, self.namespaces[namespace])

        # Same for annotation graph
        for namespace in self.annotationNamespaces:
            self.annotationGraph.namespace_manager.bind(namespace, self.annotationNamespaces[namespace])

        self.log.debug("Adding some schema information (dimension and measure properties) ")
        self.addDataCellProperty()

        self.graph.add((self.namespaces["d2s"]["dimension"], RDF.type, self.namespaces["qb"]["DimensionProperty"]))

        self.graph.add((self.namespaces["d2s"]["label"], RDF.type, RDF["Property"]))

    def addDataCellProperty(self):
        """Add definition of data cell resource to graph"""

        if len(self.config.get("dataCell", "propertyName")) > 0:
            self.dataCellPropertyName = self.config.get("dataCell", "propertyName")
        else:
            self.dataCellPropertyName = "hasValue"

        self.graph.add(
            (self.namespaces["d2s"][self.dataCellPropertyName], RDF.type, self.namespaces["qb"]["MeasureProperty"])
        )

        # Take labels from config
        if len(self.config.get("dataCell", "labels")) > 0:
            labels = self.config.get("dataCell", "labels").split(":::")
            for label in labels:
                labelProperties = label.split("-->")
                if len(labelProperties[0]) > 0 and len(labelProperties[1]) > 0:
                    self.graph.add(
                        (
                            self.namespaces["d2s"][self.dataCellPropertyName],
                            RDFS.label,
                            Literal(labelProperties[1], labelProperties[0]),
                        )
                    )

        if len(self.config.get("dataCell", "literalType")) > 0:
            self.graph.add(
                (
                    self.namespaces["d2s"][self.dataCellPropertyName],
                    RDFS.range,
                    URIRef(self.config.get("dataCell", "literalType")),
                )
            )

    def setScope(self, fileBasename):
        """Set the default namespace and base for all URIs of the current workbook"""
        self.fileBasename = fileBasename
        scopeNamespace = self.defaultNamespacePrefix + fileBasename + "/"

        # Annotations go to a different namespace
        annotationScopeNamespace = self.annotationsNamespacePrefix + fileBasename + "/"

        self.log.debug("Adding namespace for {0}: {1}".format(fileBasename, scopeNamespace))

        self.namespaces["scope"] = Namespace(scopeNamespace)
        self.annotationNamespaces["scope"] = Namespace(annotationScopeNamespace)
        self.graph.namespace_manager.bind("", self.namespaces["scope"])
        self.annotationGraph.namespace_manager.bind("", self.annotationNamespaces["scope"])

    def doLink(self):
        """Start tablinker for all sheets in workbook"""
        self.log.info("Starting TabLinker for all sheets in workbook")

        for n in range(self.rb.nsheets):
            self.log.debug("Starting with sheet {0}".format(n))
            self.r_sheet = self.rb.sheet_by_index(n)
            self.w_sheet = self.wb.get_sheet(n)

            self.rowns, self.colns = self.getValidRowsCols()

            self.sheet_qname = urllib.quote(re.sub("\s", "_", self.r_sheet.name))
            self.log.debug("Base for QName generator set to: {0}".format(self.sheet_qname))

            self.log.debug("Starting parser")
            self.parseSheet()

    ###
    #    Utility Functions
    ###

    def insideMergeBox(self, i, j):
        """
        Check if the specified cell is inside a merge box

        Arguments:
        i -- row
        j -- column

        Returns:
        True/False -- depending on whether the cell is inside a merge box
        """
        self.merged_cells = self.r_sheet.merged_cells
        for crange in self.merged_cells:
            rlo, rhi, clo, chi = crange
            if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo:
                return True
        return False

    def getMergeBoxCoord(self, i, j):
        """
        Get the top-left corner cell of the merge box containing the specified cell

        Arguments:
        i -- row
        j -- column

        Returns:
        (k, l) -- Coordinates of the top-left corner of the merge box
        """
        if not self.insideMergeBox(i, j):
            return (-1, -1)

        self.merged_cells = self.r_sheet.merged_cells
        for crange in self.merged_cells:
            rlo, rhi, clo, chi = crange
            if i <= rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo:
                return (rlo, clo)

    def getType(self, style):
        """Get type for a given excel style. Style name must be prefixed by 'TL '
    
        Arguments:
        style -- Style (string) to check type for
        
        Returns:
        String -- The type of this field. In case none is found, 'unknown'
        """
        typematch = re.search("TL\s(.*)", style)
        if typematch:
            cellType = typematch.group(1)
        else:
            cellType = "Unknown"
        return cellType

    def isEmpty(self, i, j):
        """Check whether cell is empty.
        
        Arguments:
        i -- row
        j -- column
        
        Returns:
        True/False -- depending on whether the cell is empty
        """
        if (
            self.r_sheet.cell(i, j).ctype == XL_CELL_EMPTY or self.r_sheet.cell(i, j).ctype == XL_CELL_BLANK
        ) or self.r_sheet.cell(i, j).value == "":
            return True
        else:
            return False

    def isEmptyRow(self, i, colns):
        """
        Determine whether the row 'i' is empty by iterating over all its cells
        
        Arguments:
        i     -- The index of the row to be checked.
        colns -- The number of columns to be checked
        
        Returns:
        true  -- if the row is empty
        false -- if the row is not empty
        """
        for j in range(0, colns):
            if not self.isEmpty(i, j):
                return False
        return True

    def isEmptyColumn(self, j, rowns):
        """
        Determine whether the column 'j' is empty by iterating over all its cells
        
        Arguments:
        j     -- The index of the column to be checked.
        rowns -- The number of rows to be checked
        
        Returns:
        true  -- if the column is empty
        false -- if the column is not empty
        """
        for i in range(0, rowns):
            if not self.isEmpty(i, j):
                return False
        return True

    def getValidRowsCols(self):
        """
        Determine the number of non-empty rows and columns in the Excel sheet
        
        Returns:
        rowns -- number of rows
        colns -- number of columns
        """
        colns = number_of_good_cols(self.r_sheet)
        rowns = number_of_good_rows(self.r_sheet)

        # Check whether the number of good columns and rows are correct
        while self.isEmptyRow(rowns - 1, colns):
            rowns = rowns - 1
        while self.isEmptyColumn(colns - 1, rowns):
            colns = colns - 1

        self.log.debug("Number of rows with content:    {0}".format(rowns))
        self.log.debug("Number of columns with content: {0}".format(colns))
        return rowns, colns

    def getQName(self, names):
        """
        Create a valid QName from a string or dictionary of names
        
        Arguments:
        names -- Either dictionary of names or string of a name.
        
        Returns:
        qname -- a valid QName for the dictionary or string
        """

        if type(names) == dict:
            qname = self.sheet_qname
            for k in names:
                qname = qname + "/" + self.processString(names[k])
        else:
            qname = self.sheet_qname + "/" + self.processString(names)

        self.log.debug("Minted new QName: {}".format(qname))
        return qname

    def processString(self, string):
        """
        Remove illegal characters (comma, brackets, etc) from string, and replace it with underscore. Useful for URIs
        
        Arguments:
        string -- The string representing the value of the source cell
        
        Returns:
        processedString -- The processed string
        """

        return urllib.quote(re.sub("\s|\(|\)|,|\.", "_", unicode(string).strip()).encode("utf-8", "ignore"))

    def addValue(self, source_cell_value, altLabel=None):
        """
        Add a "value" + optional label to the graph for a cell in the source Excel sheet. The value is typically the value stored in the source cell itself, but may also be a copy of another cell (e.g. in the case of 'idem.').
        
        Arguments:
        source_cell_value -- The string representing the value of the source cell
        
        Returns:
        source_cell_value_qname -- a valid QName for the value of the source cell
        """
        source_cell_value_qname = self.getQName(source_cell_value)
        self.graph.add(
            (
                self.namespaces["scope"][source_cell_value_qname],
                self.namespaces["qb"]["dataSet"],
                self.namespaces["scope"][self.sheet_qname],
            )
        )

        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_qname],
                self.namespaces["d2s"]["value"],
                self.namespaces["scope"][source_cell_value_qname],
            )
        )

        # If the source_cell_value is actually a dictionary (e.g. in the case of HierarchicalRowHeaders), then use the last element of the row hierarchy as prefLabel
        # Otherwise just use the source_cell_value as prefLabel
        if type(source_cell_value) == dict:
            self.graph.add(
                (
                    self.namespaces["scope"][source_cell_value_qname],
                    self.namespaces["skos"].prefLabel,
                    Literal(source_cell_value.values()[-1], "nl"),
                )
            )

            if altLabel and altLabel != source_cell_value.values()[-1]:
                # If altLabel has a value (typically for HierarchicalRowHeaders) different from the last element in the row hierarchy, we add it as alternative label.
                self.graph.add(
                    (
                        self.namespaces["scope"][source_cell_value_qname],
                        self.namespaces["skos"].altLabel,
                        Literal(altLabel, "nl"),
                    )
                )
        else:
            # Try to parse a date to add the appropriate datatype to the literal
            try:
                isodate.parse_datetime(source_cell_value)
                self.log.debug("Datetime on this cell: %s" % source_cell_value)
                self.graph.add(
                    (
                        self.namespaces["scope"][source_cell_value_qname],
                        self.namespaces["skos"].prefLabel,
                        Literal(source_cell_value, datatype=XSD.datetime),
                    )
                )
            except (ValueError, isodate.isoerror.ISO8601Error, AttributeError):
                self.log.debug("No datetime on this cell")
                self.graph.add(
                    (
                        self.namespaces["scope"][source_cell_value_qname],
                        self.namespaces["skos"].prefLabel,
                        Literal(source_cell_value, "nl"),
                    )
                )

            if altLabel and altLabel != source_cell_value:
                # If altLabel has a value (typically for HierarchicalRowHeaders) different from the source_cell_value, we add it as alternative label.
                self.graph.add(
                    (
                        self.namespaces["scope"][source_cell_value_qname],
                        self.namespaces["skos"].altLabel,
                        Literal(altLabel, "nl"),
                    )
                )

        return source_cell_value_qname

    def parseSheet(self):
        """
        Parses the currently selected sheet in the workbook, takes no arguments. Iterates over all cells in the Excel sheet and produces relevant RDF Triples. 
        """
        self.log.info("Parsing {0} rows and {1} columns.".format(self.rowns, self.colns))

        self.column_dimensions = {}
        self.property_dimensions = {}
        self.row_dimensions = {}
        self.rowhierarchy = {}

        # Get dictionary of annotations
        self.annotations = self.r_sheet.cell_note_map

        for i in range(0, self.rowns):
            self.rowhierarchy[i] = {}

            for j in range(0, self.colns):
                # Parse cell data
                self.source_cell = self.r_sheet.cell(i, j)
                self.source_cell_name = cellname(i, j)
                self.style = self.styles[self.source_cell].name
                self.cellType = self.getType(self.style)
                self.source_cell_qname = self.getQName(self.source_cell_name)

                self.log.debug(
                    '({},{}) {}/{}: "{}"'.format(i, j, self.cellType, self.source_cell_name, self.source_cell.value)
                )

                # Try to parse ints to avoid ugly _0 URIs
                try:
                    if int(self.source_cell.value) == self.source_cell.value:
                        self.source_cell.value = int(self.source_cell.value)
                except ValueError:
                    self.log.debug("(%s.%s) No parseable int" % (i, j))

                # Parse annotation (if any)
                if self.config.get("annotations", "enabled") == "1":
                    if (i, j) in self.annotations:
                        self.parseAnnotation(i, j)

                # Parse even if empty
                if self.cellType == "HRowHeader":
                    self.updateRowHierarchy(i, j)
                if self.cellType == "Data":
                    self.parseData(i, j)
                if self.cellType == "ColHeader":
                    self.parseColHeader(i, j)
                if self.cellType == "RowProperty":
                    self.parseRowProperty(i, j)

                if not self.isEmpty(i, j):
                    self.graph.add(
                        (
                            self.namespaces["scope"][self.source_cell_qname],
                            RDF.type,
                            self.namespaces["d2s"][self.cellType],
                        )
                    )
                    self.graph.add(
                        (
                            self.namespaces["scope"][self.source_cell_qname],
                            self.namespaces["d2s"]["cell"],
                            Literal(self.source_cell_name),
                        )
                    )
                    # self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['d2s']['col'],Literal(colname(j))))
                    # self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['d2s']['row'],Literal(i+1)))
                    # self.graph.add((self.namespaces['scope'][self.source_cell_qname] isrow row
                    if self.cellType == "Title":
                        self.parseTitle(i, j)

                    elif self.cellType == "RowHeader":
                        self.parseRowHeader(i, j)

                    elif self.cellType == "HRowHeader":
                        self.parseHierarchicalRowHeader(i, j)

                    elif self.cellType == "RowLabel":
                        self.parseRowLabel(i, j)

        self.log.info("Done parsing...")

    def updateRowHierarchy(self, i, j):
        """
        Build up lists for hierarchical row headers. Cells marked as hierarchical row header are often empty meaning that their intended value is stored somewhere else in the Excel sheet.
        
        Keyword arguments:
        int i -- row number
        int j -- col number
        
        Returns:
        New row hierarchy dictionary
        """
        if self.isEmpty(i, j) or str(self.source_cell.value).lower().strip() == "id.":
            # If the cell is empty, and a HierarchicalRowHeader, add the value of the row header above it.
            # If the cell above is not in the rowhierarchy, don't do anything.
            # If the cell is exactly 'id.', add the value of the row header above it.
            try:
                self.rowhierarchy[i][j] = self.rowhierarchy[i - 1][j]
                self.log.debug("({},{}) Copied from above\nRow hierarchy: {}".format(i, j, self.rowhierarchy[i]))
            except:
                # REMOVED because of double slashes in uris
                # self.rowhierarchy[i][j] = self.source_cell.value
                self.log.debug("({},{}) Top row, added nothing\nRow hierarchy: {}".format(i, j, self.rowhierarchy[i]))
        elif str(self.source_cell.value).lower().startswith("id.") or str(self.source_cell.value).lower().startswith(
            "id "
        ):
            # If the cell starts with 'id.', add the value of the row  above it, and append the rest of the cell's value.
            suffix = self.source_cell.value[3:]
            try:
                self.rowhierarchy[i][j] = self.rowhierarchy[i - 1][j] + suffix
                self.log.debug("({},{}) Copied from above+suffix\nRow hierarchy {}".format(i, j, self.rowhierarchy[i]))
            except:
                self.rowhierarchy[i][j] = self.source_cell.value
                self.log.debug("({},{}) Top row, added value\nRow hierarchy {}".format(i, j, self.rowhierarchy[i]))
        elif not self.isEmpty(i, j):
            self.rowhierarchy[i][j] = self.source_cell.value
            self.log.debug("({},{}) Added value\nRow hierarchy {}".format(i, j, self.rowhierarchy[i]))
        return self.rowhierarchy

    def parseHierarchicalRowHeader(self, i, j):
        """
        Create relevant triples for the cell marked as HierarchicalRowHeader (i, j are row and column)
        """

        # Use the rowhierarchy to create a unique qname for the cell's contents, give the source_cell's original value as extra argument
        self.log.debug("Parsing HierarchicalRowHeader")

        self.source_cell_value_qname = self.addValue(self.rowhierarchy[i], altLabel=self.source_cell.value)

        # Now that we know the source cell's value qname, add a d2s:isDimension link and the skos:Concept type
        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_qname],
                self.namespaces["d2s"]["isDimension"],
                self.namespaces["scope"][self.source_cell_value_qname],
            )
        )
        self.graph.add((self.namespaces["scope"][self.source_cell_qname], RDF.type, self.namespaces["skos"].Concept))

        hierarchy_items = self.rowhierarchy[i].items()
        try:
            parent_values = dict(hierarchy_items[:-1])
            self.log.debug(i, j, "Parent value: " + str(parent_values))
            parent_value_qname = self.getQName(parent_values)
            self.graph.add(
                (
                    self.namespaces["scope"][self.source_cell_value_qname],
                    self.namespaces["skos"]["broader"],
                    self.namespaces["scope"][parent_value_qname],
                )
            )
        except:
            self.log.debug(i, j, "Top of hierarchy")

        # Get the properties to use for the row headers
        try:
            properties = []
            for dim_qname in self.property_dimensions[j]:
                properties.append(dim_qname)
        except KeyError:
            self.log.debug("({}.{}) No row dimension for cell".format(i, j))

        self.row_dimensions.setdefault(i, []).append((self.source_cell_value_qname, properties))

    def parseRowLabel(self, i, j):
        """
        Create relevant triples for the cell marked as Label (i, j are row and column)
        """

        self.log.debug("Parsing Row Label")

        # Get the QName of the HierarchicalRowHeader cell that this label belongs to, based on the rowhierarchy for this row (i)
        hierarchicalRowHeader_value_qname = self.getQName(self.rowhierarchy[i])

        prefLabels = self.graph.objects(
            self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].prefLabel
        )
        for label in prefLabels:
            # If the hierarchicalRowHeader QName already has a preferred label, turn it into a skos:altLabel
            self.graph.remove(
                (self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].prefLabel, label)
            )
            self.graph.add(
                (self.namespaces["scope"][hierarchicalRowHeader_value_qname], self.namespaces["skos"].altLabel, label)
            )
            self.log.debug(
                "Turned skos:prefLabel {} for {} into a skos:altLabel".format(label, hierarchicalRowHeader_value_qname)
            )

        # Add the value of the label cell as skos:prefLabel to the header cell
        self.graph.add(
            (
                self.namespaces["scope"][hierarchicalRowHeader_value_qname],
                self.namespaces["skos"].prefLabel,
                Literal(self.source_cell.value, "nl"),
            )
        )

        # Record that this source_cell_qname is the label for the HierarchicalRowHeader cell
        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_qname],
                self.namespaces["d2s"]["isLabel"],
                self.namespaces["scope"][hierarchicalRowHeader_value_qname],
            )
        )

    def parseRowHeader(self, i, j):
        """
        Create relevant triples for the cell marked as RowHeader (i, j are row and column)
        """
        rowHeaderValue = ""

        # Don't attach the cell value to the namespace if it's already a URI
        isURI = urlparse(str(self.source_cell.value))
        if isURI.scheme and isURI.netloc:
            rowHeaderValue = URIRef(self.source_cell.value)
        else:
            self.source_cell_value_qname = self.addValue(self.source_cell.value)
            rowHeaderValue = self.namespaces["scope"][self.source_cell_value_qname]

        self.graph.add(
            (self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isDimension"], rowHeaderValue)
        )
        self.graph.add((rowHeaderValue, RDF.type, self.namespaces["d2s"]["Dimension"]))
        self.graph.add((rowHeaderValue, RDF.type, self.namespaces["skos"].Concept))

        # Get the properties to use for the row headers
        try:
            properties = []
            for dim_qname in self.property_dimensions[j]:
                properties.append(dim_qname)
        except KeyError:
            self.log.debug("({}.{}) No properties for cell".format(i, j))
        self.row_dimensions.setdefault(i, []).append((rowHeaderValue, properties))

        # Use the column dimensions dictionary to find the objects of the d2s:dimension property
        try:
            for dim_qname in self.column_dimensions[j]:
                self.graph.add(
                    (rowHeaderValue, self.namespaces["d2s"]["dimension"], self.namespaces["scope"][dim_qname])
                )
        except KeyError:
            self.log.debug("({}.{}) No column dimension for cell".format(i, j))

        return

    def parseColHeader(self, i, j):
        """
        Create relevant triples for the cell marked as Header (i, j are row and column)
        """
        if self.isEmpty(i, j):
            if self.insideMergeBox(i, j):
                k, l = self.getMergeBoxCoord(i, j)
                self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k, l).value)
            else:
                return
        else:
            self.source_cell_value_qname = self.addValue(self.source_cell.value)

        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_qname],
                self.namespaces["d2s"]["isDimension"],
                self.namespaces["scope"][self.source_cell_value_qname],
            )
        )
        self.graph.add(
            (self.namespaces["scope"][self.source_cell_value_qname], RDF.type, self.namespaces["d2s"]["Dimension"])
        )
        self.graph.add((self.namespaces["scope"][self.source_cell_qname], RDF.type, self.namespaces["skos"].Concept))

        # Add the value qname to the column_dimensions list for that column
        self.column_dimensions.setdefault(j, []).append(self.source_cell_value_qname)

        return

    def parseRowProperty(self, i, j):
        """
        Create relevant triples for the cell marked as Property (i, j are row and column)
        """
        if self.isEmpty(i, j):
            if self.insideMergeBox(i, j):
                k, l = self.getMergeBoxCoord(i, j)
                self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k, l).value)
            else:
                return
        else:
            self.source_cell_value_qname = self.addValue(self.source_cell.value)
        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_qname],
                self.namespaces["d2s"]["isDimensionProperty"],
                self.namespaces["scope"][self.source_cell_value_qname],
            )
        )
        self.graph.add(
            (
                self.namespaces["scope"][self.source_cell_value_qname],
                RDF.type,
                self.namespaces["qb"]["DimensionProperty"],
            )
        )
        self.graph.add((self.namespaces["scope"][self.source_cell_value_qname], RDF.type, RDF["Property"]))

        self.property_dimensions.setdefault(j, []).append(self.source_cell_value_qname)

        return

    def parseTitle(self, i, j):
        """
        Create relevant triples for the cell marked as Title (i, j are row and column)
        """

        self.source_cell_value_qname = self.addValue(self.source_cell.value)
        self.graph.add(
            (
                self.namespaces["scope"][self.sheet_qname],
                self.namespaces["d2s"]["title"],
                self.namespaces["scope"][self.source_cell_value_qname],
            )
        )
        self.graph.add(
            (self.namespaces["scope"][self.source_cell_value_qname], RDF.type, self.namespaces["d2s"]["Dimension"])
        )

        return

    def parseData(self, i, j):
        """
        Create relevant triples for the cell marked as Data (i, j are row and column)
        """

        if self.isEmpty(i, j) and self.config.get("dataCell", "implicitZeros") == "0":
            return

        observation = BNode()

        self.graph.add(
            (self.namespaces["scope"][self.source_cell_qname], self.namespaces["d2s"]["isObservation"], observation)
        )
        self.graph.add((observation, RDF.type, self.namespaces["qb"]["Observation"]))
        self.graph.add((observation, self.namespaces["qb"]["dataSet"], self.namespaces["scope"][self.sheet_qname]))
        if self.isEmpty(i, j) and self.config.get("dataCell", "implicitZeros") == "1":
            self.graph.add((observation, self.namespaces["d2s"][self.dataCellPropertyName], Literal(0)))
        else:
            self.graph.add(
                (observation, self.namespaces["d2s"][self.dataCellPropertyName], Literal(self.source_cell.value))
            )

        # Use the row dimensions dictionary to find the properties that link data values to row headers
        try:
            for (dim_qname, properties) in self.row_dimensions[i]:
                for p in properties:
                    self.graph.add((observation, self.namespaces["d2s"][p], dim_qname))
        except KeyError:
            self.log.debug("({}.{}) No row dimension for cell".format(i, j))

        # Use the column dimensions dictionary to find the objects of the d2s:dimension property
        try:
            for dim_qname in self.column_dimensions[j]:
                self.graph.add((observation, self.namespaces["d2s"]["dimension"], self.namespaces["scope"][dim_qname]))
        except KeyError:
            self.log.debug("({}.{}) No column dimension for cell".format(i, j))

    def parseAnnotation(self, i, j):
        """
        Create relevant triples for the annotation attached to cell (i, j)
        """

        if self.config.get("annotations", "model") == "oa":
            # Create triples according to Open Annotation model

            body = BNode()

            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    RDF.type,
                    self.annotationNamespaces["oa"]["Annotation"],
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["hasBody"],
                    body,
                )
            )
            self.annotationGraph.add(
                (
                    body,
                    RDF.value,
                    Literal(
                        self.annotations[(i, j)]
                        .text.replace("\n", " ")
                        .replace("\r", " ")
                        .replace("\r\n", " ")
                        .encode("utf-8")
                    ),
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["hasTarget"],
                    self.namespaces["scope"][self.source_cell_qname],
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["annotator"],
                    Literal(self.annotations[(i, j)].author.encode("utf-8")),
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["annotated"],
                    Literal(
                        datetime.datetime.fromtimestamp(os.path.getmtime(self.filename)).strftime("%Y-%m-%d"),
                        datatype=self.annotationNamespaces["xsd"]["date"],
                    ),
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["generator"],
                    URIRef("https://github.com/Data2Semantics/TabLinker"),
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["generated"],
                    Literal(
                        datetime.datetime.now().strftime("%Y-%m-%d"), datatype=self.annotationNamespaces["xsd"]["date"]
                    ),
                )
            )
            self.annotationGraph.add(
                (
                    self.annotationNamespaces["scope"][self.source_cell_qname],
                    self.annotationNamespaces["oa"]["modelVersion"],
                    URIRef("http://www.openannotation.org/spec/core/20120509.html"),
                )
            )
        else:
            # Create triples according to Nanopublications model
            print "Nanopublications not implemented yet!"
コード例 #17
0
ファイル: test_parser_hext.py プロジェクト: edmondchuc/rdflib
def test_roundtrip():
    # these are some RDF files that HexT can round-trip since the have no
    # literals with no datatype declared:
    TEST_DIR = Path(__file__).parent.absolute() / "nt"
    files_to_skip = {
        "paths-04.nt": "subject literal",
        "even_more_literals.nt": "JSON decoding error",
        "literals-02.nt": "JSON decoding error",
        "more_literals.nt": "JSON decoding error",
        "test.ntriples": "JSON decoding error",
        "literals-05.nt": "JSON decoding error",
        "i18n-01.nt": "JSON decoding error",
        "literals-04.nt": "JSON decoding error",
        "rdflibtest01.nt": "JSON decoding error",
        "rdflibtest05.nt": "JSON decoding error",
    }
    tests = 0
    skipped = 0
    skip = False
    print()
    p = TEST_DIR.glob("**/*")
    for f in [x for x in p if x.is_file()]:
        tests += 1
        print(f"Test {tests}: {f}")
        if f.name not in files_to_skip.keys():
            try:
                cg = ConjunctiveGraph().parse(f, format="nt")
                # print(cg.serialize(format="n3"))
            except:
                print(f"Skipping: could not NT parse")
                skipped += 1
                skip = True
            if not skip:
                cg2 = ConjunctiveGraph()
                cg2.parse(data=cg.serialize(format="hext"),
                          format="hext",
                          publicID=cg2.default_context.identifier)
                if cg2.context_aware:
                    for context in cg2.contexts():
                        for triple in context.triples((None, None, None)):
                            if type(triple[2]) == Literal:
                                if triple[2].datatype == XSD.string:
                                    context.remove(
                                        (triple[0], triple[1], triple[2]))
                                    context.add((triple[0], triple[1],
                                                 Literal(str(triple[2]))))
                else:
                    for triple in cg2.triples((None, None, None)):
                        if type(triple[2]) == Literal:
                            if triple[2].datatype == XSD.string:
                                cg2.remove((triple[0], triple[1], triple[2]))
                                cg2.add((triple[0], triple[1],
                                         Literal(str(triple[2]))))

                # print(cg2.serialize(format="trig"))
                assert cg.isomorphic(cg2)
            skip = False
        else:
            print(f"Skipping: {files_to_skip[f.name]}")

    print(f"No. tests: {tests}")
    print(f"No. tests skipped: {skipped}")
コード例 #18
0
ファイル: clone_factory.py プロジェクト: yanlirock/event-kge
hasPart = URIRef('http://www.loa-cnr.it/ontologies/DUL.owl#hasPart')
follows = URIRef('http://www.loa-cnr.it/ontologies/DUL.owl#follows')
involvedEquipment = base_ns['involvedEquipment']

global_msgs_dict = dict()
global_variants_dict = dict()
global_modules_dict = dict()
global_fes_dict = dict()
entities_dict = dict()

remove_original = False
clone_g = ConjunctiveGraph()

# TODO: remove NamedIndividual for all entities
for s, p, o in original_g.triples((None, RDF.type, OWL.NamedIndividual)):
    original_g.remove((s, p, o))

for i, clone in enumerate(clones):
    if i == len(clones) - 1:
        remove_original = True
    # copy all device entities
    for dev in original_g.subjects(RDF.type, device):
        # their associated triples
        for s, p, o in original_g.triples((dev, None, None)):
            new_s = clone + '-' + unicode(s).split('#')[1]
            new_s = amberg_ns[new_s]
            if p in (RDF.type, amberg_ns['hasSkill']):
                if remove_original:
                    original_g.remove((s, p, o))
                clone_g.add((new_s, p, o))
            elif p in (hasPart, amberg_ns['connectsTo']):
コード例 #19
0
def renderermain(graphinput, user):
    # Record rdf
    time.sleep(5)

    print(" ################ Intent Renderer ###################")
    print("\n")

    rendergraph = ConjunctiveGraph()
    rendergraph = graphinput
    hasService = URIRef('ex:hasService')
    hasArguments = URIRef('ex:hasArguments')
    hasCondition = URIRef('ex:hasCondition')
    hasBandwidth = URIRef('ex:hasBandwidth')
    hasDate = URIRef('ex:hasDate')
    hasTime = URIRef('ex:hasTime')
    hasZone = URIRef('ex:hasZone')

    #for subj, pred, obj in rendergraph:
    #   print subj, pred, obj

    posbw = renderbw(user, rendergraph)

    print("Rendering bandwidth permissions.... " + posbw)
    for subj, pred, obj in rendergraph:
        if str(obj) == "bwnolimit":  # MK Update this
            rendergraph.remove((subj, pred, obj))
            rendergraph.add((subj, hasBandwidth, Literal(posbw)))

    timefns = rendertime(user, rendergraph)
    for t in timefns:
        for subj, pred, obj in rendergraph:
            # print subj,pred, obj
            if t.service.upper() == str(subj).upper():
                rendergraph.remove((subj, pred, obj))
                fulltime = t.args
                # print fulltime
                tdate = fulltime.split()[0]
                ttime = fulltime.split()[1]
                ttime = ttime.replace(':', ".")
                ztime = fulltime.split()[2]
                rendergraph.add((subj, hasDate, Literal(tdate)))
                rendergraph.add((subj, hasTime, Literal(ttime)))
                rendergraph.add((subj, hasZone, Literal(ztime)))

    endpointdata = rendertopology(user, rendergraph)
    # for a in endpointdata:
    #	print a.service, a.args
    # update graph
    for subj, pred, obj in rendergraph:
        for a in endpointdata:
            if str(obj).upper() == a.service.upper():
                rendergraph.remove((subj, pred, obj))
                dotcheck = a.args
                dotcheck = a.args.replace(':', ".")
                rendergraph.add((subj, pred, Literal(dotcheck)))

    # if unfriendly asked

    # if isolated asked
    # renderprovision() call nsi
    print("Final rendering graph created and saved....")

    rdot = Digraph(comment='Rendered Intent')
    for subj, pred, obj in rendergraph:
        #print "new"
        #print subj, pred, obj
        rdot.node(subj, subj)
        rdot.node(obj, obj)
        rdot.edge(subj, obj, pred, constraint='false')

        # print(dot.source)
    rdot.format = 'png'
    rdot.render('../static/renderintent.dot', view=False)
    # call an exe file....

    # extracting data from RDF graph constructed
    print("Creating rendered graph in html output....")
    try:
        fname = '../templates/renderedinput.html'
        file = open(fname, 'w')
        firstpart = """<!DOCTYPE html>
		<html>
		<head>
		<title>Rendered Intent</title>
		<meta name="description" content="A concept map diagram ." />
		<meta charset="UTF-8">
		<script src="go.js"></script>
		<script src="https://cdnjs.cloudflare.com/ajax/libs/gojs/1.6.7/go-debug.js"></script>

		<link href="../assets/css/goSamples.css" rel="stylesheet" type="text/css" />  
		<script src="goSamples.js"></script>  
		<script id="code">
		function init() {
		if (window.goSamples) goSamples();  // init for these samples -- you don't need to call this
		var $ = go.GraphObject.make;  // for conciseness in defining templates
		myDiagram =
		$(go.Diagram, "myDiagramDiv",  // must name or refer to the DIV HTML element
			{\n
			initialAutoScale: go.Diagram.Uniform,  // an initial automatic zoom-to-fit\n
			contentAlignment: go.Spot.Center,  // align document to the center of the viewport\n
			layout:\n
			$(go.ForceDirectedLayout,  // automatically spread nodes apart\n
				{ defaultSpringLength: 30, defaultElectricalCharge: 100 })\n
			});\n
		// define each Node's appearance\n
		myDiagram.nodeTemplate =
		$(go.Node, "Auto",  // the whole node panel
			{ locationSpot: go.Spot.Center },
			// define the node's outer shape, which will surround the TextBlock\n
			$(go.Shape, "Rectangle",
				{ fill: $(go.Brush, "Linear", { 0: "rgb(254, 201, 0)", 1: "rgb(254, 162, 0)" }), stroke: "black" }),
				$(go.TextBlock, 
				{ font: "bold 10pt helvetica, bold arial, sans-serif", margin: 4 },
				new go.Binding("text", "text"))
			);
			// replace the default Link template in the linkTemplateMap
			myDiagram.linkTemplate =
			$(go.Link,  // the whole link panel
				$(go.Shape,  // the link shape
					{ stroke: "black" }),
					$(go.Shape,  // the arrowhead
						{ toArrow: "standard", stroke: null }),
						$(go.Panel, "Auto",
							$(go.Shape,  // the label background, which becomes transparent around the edges
								{ fill: $(go.Brush, "Radial", { 0: "rgb(240, 240, 240)", 0.3: "rgb(240, 240, 240)", 1: "rgba(240, 240, 240, 0)" }),
								stroke: null }),
						$(go.TextBlock,  // the label text
							{ textAlign: "center",
							font: "10pt helvetica, arial, sans-serif",
							stroke: "#555555", margin: 4 },
							new go.Binding("text", "text"))
								));
						// create the model for the concept map\n"""
        file.write(firstpart)
        cnode = 0

        # create a list of ids for plotting the js graph
        uniqueidlist = []
        for subj, pred, obj in rendergraph:
            flagnodefound = 0
            for j in uniqueidlist:
                if Literal(subj) == j:
                    flagnodefound = 1
            if flagnodefound == 0:
                uniqueidlist.append(Literal(subj))

        for subj, pred, obj in rendergraph:
            flagnodefound = 0
            for j in uniqueidlist:
                if Literal(obj) == j:
                    flagnodefound = 1
                    # print "found name " + j
            if flagnodefound == 0:
                # print obj
                uniqueidlist.append(Literal(obj))
        # adding links

        nodeDAstring = ''
        linkDAstring = ''
        tempstr = ""
        for j in uniqueidlist:
            # print nodeDAstring
            # print uniqueidlist.index(j)
            checkcommas = j.replace("'", "")

            tempstr = "{ key:" + str(uniqueidlist.index(j)) + \
                ", text: '" + checkcommas + "' },"
            # print tempstr
            nodeDAstring += tempstr

        tempstr = ""
        for subj, pred, obj in rendergraph:
            # print uniqueidlist.index(Literal(subj)),
            # uniqueidlist.index(Literal(obj)), pred
            tempstr = "{ from:" + str(uniqueidlist.index(
                Literal(subj))) + ", to:" + str(
                    uniqueidlist.index(
                        Literal(obj))) + ", text: '" + Literal(pred) + "'},"
            linkDAstring += tempstr

        file.write("""   var nodeDataArray = [""")

        file.write(nodeDAstring)

        file.write("{} ];")
        file.write("    var linkDataArray = [")
        file.write(linkDAstring)
        file.write("{} ];")
        secondpart = """    
				myDiagram.model = new go.GraphLinksModel(nodeDataArray, linkDataArray);
				}
				</script>
				</head>
				<body onload="init()">
				<div id="sample">
				<h3>Rendered Input</h3>
				<div id="myDiagramDiv" style="background-color: whitesmoke; border: solid 1px black; width: 100%; height: 700px"></div>
				<p>
				The Rendered intent created by INDIRA to call NSI. 
				</p>
				</div>
				</body>
				</html>"""
        file.write(secondpart)
        file.close()
    except:
        print("file writing error occured")
        sys.exit(0)

    #############################
    print("Calling NSI......")
    # check if file exists
    try:
        os.remove('./nsibash.sh')
    except OSError:
        pass

    #cmd ='./test'
    # os.system(cmd)

    #test=subprocess.Popen(["..\..\opennsa\./onsa --help"],stdout=subprocess.PIPE)
    # output=test.communicate()[0]

    # need to extract data from graph
    locallysave_eps = []
    localsrcname = ""
    localdestname = ""

    for subj, pred, obj in rendergraph:
        #print subj, pred, obj
        if Literal(subj).lower() == 'connect':
            locallysave_eps.append(Literal(obj))
            #print obj
        if Literal(subj).lower() == 'disconnect':
            locallysave_eps.append(Literal(obj))
            #print obj
        if Literal(subj).lower() == 'transfer':
            locallysave_eps.append(Literal(obj))

        if pred == hasBandwidth:
            if Literal(obj).lower() == 'unlimited':
                localbwvalue = 100
            else:
                numberextracted = Literal(obj)
                #print numberextracted
                localbwvalue = int(numberextracted)
            #print localbwvalue
        if Literal(subj).upper() == 'SCHEDULESTART':
            year = 2016
            month = 11
            day = 13
            hr = 12
            minu = 00
            secs = 00
            localzone = 'GMT'
            # convertime=''

            # print subj
            if pred == hasDate:
                datestring = Literal(obj)
                year, month, day = datestring.split("-")
            if pred == hasTime:
                timestring_local = Literal(obj)
                hr, minu, secs = timestring_local.split(".")
            if pred == hasZone:
                localzone = Literal(obj).split("+")[0]

            convertzone = timezone(localzone)
            converttime = convertzone.localize(
                datetime(int(year), int(month), int(day), int(hr), int(minu),
                         int(secs)))
            # print converttime
            converttime2 = converttime.astimezone(timezone('GMT'))
            # print converttime2

            converttime2 = str(converttime2)
            newdatensi = converttime2.split(" ")[0]
            timehalf = converttime2.split(" ")[1]
            newtimensi = timehalf.split("+")[0]
            newstarttime = newdatensi + "T" + newtimensi

            # print newstarttime

        if Literal(subj).upper() == 'SCHEDULESTOP':
            #print "stop"
            year = 2016
            month = 11
            day = 13
            hr = 17
            minu = 00
            secs = 00
            localzone = 'GMT'
            # convertime=''

            #print subj
            if pred == hasDate:
                datestring = Literal(obj)
                year, month, day = datestring.split("-")
            if pred == hasTime:
                timestring_local = Literal(obj)
                hr, minu, secs = timestring_local.split(".")
            if pred == hasZone:
                localzone = Literal(obj).split("+")[0]

            convertzone = timezone(localzone)
            converttime = convertzone.localize(
                datetime(int(year), int(month), int(day), int(hr), int(minu),
                         int(secs)))
            #print converttime
            converttime2 = converttime.astimezone(timezone('GMT'))
            #print converttime2

            converttime2 = str(converttime2)
            newdatensi = converttime2.split(" ")[0]
            timehalf = converttime2.split(" ")[1]
            newtimensi = timehalf.split("+")[0]
            newstoptime = newdatensi + "T" + newtimensi

            #print newstoptime

            # remove after testing


#	localsrcname=locallysave_eps[0]
#	localdestname=locallysave_eps[1]
#print "Connection points: " + len(locallysave_eps)
    if len(locallysave_eps) >= 2:
        localsrcname = locallysave_eps[0]
        localdestname = locallysave_eps[1]
    else:
        print("NSI called")

        #only takes two site names as arguments. Please start again!"

    time.sleep(2)
    globalidnsi = "urn:uuid:6e1f288a-5a26-4ad8-a9bc-eb91785cee15"
    #print localsrcname
    #print localdestname

    # HARD CODED VALUES

    hardsource = "es.net:2013::lbl-mr2:xe-9_3_0:+#1000"
    harddestination = "es.net:2013::bnl-mr2:xe-1_2_0:+#1000"

    ##########################

    #print "Creating bash file...."
    try:
        fname = './nsibash.sh'
        # print "h"
        file = open(fname, 'w')
        file.write("#!/bin/bash")
        file.write("\n")

        file.write("cd ../../opennsa")
        file.write("\n")
        # print "g"
        #print "constructing nsi commands...."
        params = "./onsa reserveprovision"
        # print params
        params = params + " -g " + globalidnsi
        # print params
        params = params + " -d " + harddestination  # localdestname
        # print params
        params = params + " -s " + hardsource  # localsrcname
        # print params
        params = params + " -b " + str(localbwvalue)
        # print params
        # print newstarttime
        # print newstoptime
        params = params + " -a " + newstarttime
        # print params
        params = params + " -e " + newstoptime
        # print params
        params = params + " -u https://nsi-aggr-west.es.net:443/nsi-v2/ConnectionServiceProvider"
        params = params + " -p es.net:2013:nsa:nsi-aggr-west"
        params = params + " -r canada.eh:2016:nsa:requester"
        params = params + " -h 198.128.151.17 -o 8443"
        params = params + " -l /etc/hostcert/muclient.crt -k /etc/hostcert/muclient.key"
        params = params + " -i /etc/ssl/certs/ -y -x -z -v -q;"
        # print params

        file.write(params)
        file.write("\n")
        file.write("exit;")
        file.close()
    except:
        pass
        #####print("file writing error occured")
    ### #sys.exit(0)

    #print "Running the bash file...."

    time.sleep(2)
    print("\n\n")
    print("OOPS! Something has gone horribly wrong!")
    return 1
コード例 #20
0
class RDFCrawler:

    logger = logging.getLogger(__name__)

    def __init__(self, uri, domains=set()):
        """

        :param uri: root URI to start crawling .
        :param domains: list of permits domains to crawl.
        """
        self.root = uri
        self.graph_route = 'graph_store_%s' % hash(self.root)
        self.graph = ConjunctiveGraph('Sleepycat')
        self.graph.open(self.graph_route, create=True)
        self._filter_domains = domains
        self._filter_domains.add(uri)
        self.last_process_time = 0.0
        self.lock = RLock()

    def filter_uris(self, uri_list):
        """
        :param uri_list: list of URIs to be filtered.
        :return: filtered list of URIs.
        """
        return [
            uri for uri in uri_list for match in self._filter_domains
            if match in str(uri)
        ]

    def _has_context(self, graph, subject):
        """

        :param subject: the URIRef or URI to check if it has current context.
        :return: True if subject has a current context.
        """
        return len(graph.get_context(self._get_context_id(subject))) > 1

    @staticmethod
    def _get_context_id(subject):
        """

        :param subject: URIRef or URI from which the get context id.
        :return: context id of the resource.
        Example:
            subject -> http://www.example.org/#fragment
            context_id -> http://www.example.org/
        """
        return str(subject).split('#')[0]

    def start(self):
        """
            start method for crawling.
        """
        self.lock.acquire(True)

        # Erase old graph
        for q in self.graph.quads():
            self.graph.remove(q)

        # Crawl for data
        logging.info('Start crawling: %s' % self.root)
        start_time = time.time()
        self._crawl([self.root])
        end_time = time.time()

        self.last_process_time = end_time - start_time
        logging.info(
            'Crawling complete after: %s seconds with %s predicates.' %
            (self.last_process_time, len(self.graph)))

        self.lock.release()

    def _crawl(self, uri_list):
        """
        Recursive method that crawl RDF objects
        :param uri_list: list of URIs to crawl
        """
        if len(uri_list) > 0:

            for uri in uri_list:
                try:

                    # A few considerations about parsing params.
                    #   publicID = uri due to redirection issues
                    #   Format = None due to default params use 'XML'
                    self.graph.parse(uri, publicID=uri, format=None)
                    logging.info('[OK]: %s' % uri)
                except Exception as e:
                    logging.info('[Error]: %s: %s' % (uri, e))

            # Check that there are context that remains without parsing
            objects = set([
                self._get_context_id(o)
                for o in set(self.graph.objects(None, None)) if
                isinstance(o, URIRef) and not self._has_context(self.graph, o)
            ])

            self._crawl(self.filter_uris(objects))
コード例 #21
0
class ContextTestCase(unittest.TestCase):
    def setUp(self):
        self.store = FastStore()
        self.graph = ConjunctiveGraph(self.store)

        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1

        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)

        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)

        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        contextualizedGraph = Graph(self.graph.store, self.c1)
        contextualizedGraph.add(triple)

        print(self.store.statements())

        self.assertEqual(len(self.graph), len(contextualizedGraph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1

        # make sure context is empty
        self.graph.remove_context(self.graph.get_context(c1))

        contextualizedGraph = Graph(self.graph.store, c1)
        initialLen = len(self.graph)

        for i in range(0, 10):
            contextualizedGraph.add((BNode(), self.hates, self.hates))

        self.assertEqual(len(contextualizedGraph), initialLen + 10)
        self.assertEqual(len(self.graph.get_context(c1)), initialLen + 10)

        self.graph.remove_context(self.graph.get_context(c1))

        self.assertEqual(len(self.graph), initialLen)
        self.assertEqual(len(contextualizedGraph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEqual(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEqual(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertTrue(triple in self.graph)

        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)

        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)

        # now fully remove
        self.graph.remove(triple)
        self.assertTrue(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertTrue(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            return c.identifier

        self.assertTrue(self.c1 in map(cid, self.graph.contexts()))
        self.assertTrue(self.c2 in map(cid, self.graph.contexts()))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assertTrue(self.c1 in contextList, (self.c1, contextList))
        self.assertTrue(self.c2 in contextList, (self.c2, contextList))

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEqual(len(Graph(self.graph.store, c1)), 1)
        self.assertEqual(len(self.graph.get_context(c1)), 1)

        self.graph.remove_context(self.graph.get_context(c1))
        self.assertTrue(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEqual(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEqual
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.graph.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob, )))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)),
                    set([(bob, pizza), (bob, michel)]))

            asserte(
                set(c.subject_objects(likes)),
                set([(tarek, cheese), (michel, cheese), (michel, pizza),
                     (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)),
                    set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)),
                    set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)),
                    set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)),
                    set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)),
                    set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(
                set(c),
                set([(bob, hates, michel), (bob, likes, cheese),
                     (tarek, likes, pizza), (michel, likes, pizza),
                     (michel, likes, cheese), (bob, hates, pizza),
                     (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #22
0
ファイル: parseHM.py プロジェクト: mhermans/serialservice
from rdflib import ConjunctiveGraph
g = ConjunctiveGraph()
g.parse('../data/hm_17_1.rss')
#len(g)
import sys
sys.path.append('../')
from model.namespaces import *
from model.bibo import Article

from rdfalchemy import rdfSubject

nsm = g._get_namespace_manager()
nsm.bind('prism', 'http:prism.com') 
print g.serialize()
#PRISM2 = Namespace('http://prismstandard.org/namespaces/basic/2.0/')
for s, p, o in g.triples((None, RDF.type, RSS.item)):
    g.add((s, p, BIBO.Article))
    g.remove((s, p, o))



rdfSubject.db = g

l = list(Article.ClassInstances())
a = l[1]
print a.title
print a.creators
print a.sPg
コード例 #23
0
ファイル: graph.py プロジェクト: oeg-upm/agora-py
def graph_plan(plan, fountain, agp):
    def extract_cycle_roots():
        c_roots = {}
        for c_id, c_node in described_cycles.items():
            c_root_types = set({})
            for crt in plan_graph.objects(c_node, AGORA.expectedType):
                crt_qname = plan_graph.qname(crt)
                c_root_types.update(_type_subtree(fountain, crt_qname))
            c_roots[c_id] = c_root_types
        return c_roots

    def inc_tree_length(tree, l):
        if tree not in tree_lengths:
            tree_lengths[tree] = 0
        tree_lengths[tree] += l

    def add_variable(p_node, vid, subject=True):
        sub_node = BNode(str(vid).replace('?', 'var_'))
        if subject:
            plan_graph.add((p_node, AGORA.subject, sub_node))
        else:
            plan_graph.add((p_node, AGORA.object, sub_node))
        plan_graph.set((sub_node, RDF.type, AGORA.Variable))
        plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string)))

    def describe_cycle(cycle_id, cg):
        c_node = BNode('cycle{}'.format(cycle_id))
        cg = cg.get_context(c_node)
        cg.add((c_node, RDF.type, AGORA.Cycle))
        previous_node = c_node
        c_steps = cycles[cycle_id]
        cycle_type = c_steps[0].get('type')
        for et in _type_subtree(fountain, cycle_type):
            cg.add((c_node, AGORA.expectedType, __extend_uri(prefixes, et)))
        for j, step in enumerate(c_steps):
            prop = step.get('property')
            b_node = BNode(previous_node.n3() + '/' + prop)
            cg.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop)))
            c_expected_type = step.get('type')
            cg.add((b_node, AGORA.expectedType, __extend_uri(prefixes, c_expected_type)))
            cg.add((previous_node, AGORA.next, b_node))
            previous_node = b_node
        return c_node

    def is_extensible(node, node_patterns):
        extensible = True
        near_patterns = node_patterns.copy()
        for prev in tree_graph.subjects(AGORA.next, node):
            for sib_node in tree_graph.objects(prev, AGORA.next):
                if sib_node != res.n:
                    near_patterns.update(set(tree_graph.objects(sib_node, AGORA.byPattern)))

        subjects = set()
        for p_node in near_patterns:
            p_subject = list(plan_graph.objects(p_node, AGORA.subject)).pop()
            if not isinstance(p_subject, URIRef):
                subject_str = list(plan_graph.objects(p_subject, RDFS.label)).pop().toPython()
            else:
                subject_str = str(p_subject)
            subjects.add(subject_str)

        if subjects and set.difference(subjects, roots):
            extensible = False

        return extensible

    def enrich_type_patterns(node_patterns):
        for p_node in node_patterns:
            p_pred = list(plan_graph.objects(p_node, AGORA.predicate)).pop()
            if p_pred == RDF.type:
                p_type = list(plan_graph.objects(p_node, AGORA.object)).pop()
                if isinstance(p_type, URIRef):
                    for et in [et for et in expected_types if et == p_type]:
                        q_expected_types = _type_subtree(fountain, tree_graph.qname(et))
                        for et_q in q_expected_types:
                            tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q)))
            else:
                for et in expected_types:
                    q_expected_types = _type_subtree(fountain, tree_graph.qname(et))
                    for et_q in q_expected_types:
                        tree_graph.add((res.n, AGORA.expectedType, __extend_uri(prefixes, et_q)))

    def apply_cycle_extensions(c_roots, node_types):
        for c_id, root_types in c_roots.items():
            found_extension = False
            for n, expected in node_types.items():
                if set.intersection(set(root_types), set(expected)):
                    tree_graph.add((n, AGORA.isCycleStartOf, described_cycles[c_id]))
                    found_extension = True

            if not found_extension:
                plan_graph.remove_context(plan_graph.get_context(described_cycles[c_id]))

    def include_path(elm, p_seeds, p_steps, cycles, check):
        m = hashlib.md5()
        for s in p_seeds:
            m.update(s)
        elm_uri = __extend_uri(prefixes, elm)
        b_tree = BNode(m.digest().encode('base64').strip())
        s_trees.add(b_tree)
        tree_graph.set((b_tree, RDF.type, AGORA.SearchTree))
        tree_graph.add((b_tree, AGORA.fromType, elm_uri))

        for seed in p_seeds:
            tree_graph.add((b_tree, AGORA.hasSeed, URIRef(seed)))

        for cycle_id in filter(lambda x: x not in described_cycles.keys(), cycles):
            c_node = describe_cycle(cycle_id, plan_graph)
            described_cycles[cycle_id] = c_node
            plan_graph.get_context(c_node).add((b_tree, AGORA.goesThroughCycle, c_node))

        previous_node = b_tree
        inc_tree_length(b_tree, len(p_steps))

        root_index = -1
        pp = []
        for j, step in enumerate(p_steps):
            prop = step.get('property')
            pp.append(prop)
            path_root = step.get('root', None)
            if path_root and root_index < 0:
                root_index = j
            base_id = path_root or b_tree
            base_id += '/'

            if j < len(p_steps) - 1 or (pattern[1] == RDF.type and isinstance(pattern[2], URIRef)):
                b_node = BNode(base_id + '/'.join(pp))
                tree_graph.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop)))
            else:
                b_node = BNode(base_id + '/'.join(pp))

            tree_graph.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type'))))
            tree_graph.add((previous_node, AGORA.next, b_node))
            previous_node = b_node

        p_node = _get_pattern_node(pattern, patterns)
        if pattern[1] == RDF.type and isinstance(pattern[2], URIRef):
            b_id = '{}_{}_{}'.format(pattern[0].n3(plan_graph.namespace_manager),
                                     pattern[1].n3(plan_graph.namespace_manager),
                                     pattern[2].n3(plan_graph.namespace_manager))

            b_node = BNode(b_id)
            tree_graph.add((b_node, AGORA.expectedType, pattern[2]))
            tree_graph.add((previous_node, AGORA.next, b_node))
            tree_graph.add((b_node, AGORA.byPattern, p_node))
            if check:
                tree_graph.add((b_node, AGORA.checkType, Literal(check)))
        else:
            tree_graph.add((previous_node, AGORA.byPattern, p_node))

    plan_graph = ConjunctiveGraph()
    plan_graph.bind('agora', AGORA)
    prefixes = plan.get('prefixes')
    ef_plan = plan.get('plan')
    tree_lengths = {}
    s_trees = set([])
    patterns = {}
    described_cycles = {}

    for (prefix, u) in prefixes.items():
        plan_graph.bind(prefix, u)

    tree_graph = plan_graph.get_context('trees')

    for i, tp_plan in enumerate(ef_plan):
        paths = tp_plan.get('paths')
        pattern = tp_plan.get('pattern')
        hints = tp_plan.get('hints')
        cycles = {}
        for c in tp_plan.get('cycles'):
            cid = str(c['cycle'])
            c_steps = c['steps']
            cycles[cid] = c_steps
            if len(c_steps) > 1:
                cycles[cid + 'r'] = list(reversed(c_steps))
        context = BNode('space_{}'.format(tp_plan.get('context')))

        for path in paths:
            steps = path.get('steps')
            seeds = path.get('seeds')
            check = path.get('check', None)
            ty = None
            if not len(steps) and len(seeds):
                ty = pattern[2]
            elif len(steps):
                ty = steps[0].get('type')
            if ty:
                include_path(ty, seeds, steps, cycles, check)

        for t in s_trees:
            tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer)))

        pattern_node = _get_pattern_node(pattern, patterns)
        plan_graph.add((context, AGORA.definedBy, pattern_node))
        plan_graph.set((context, RDF.type, AGORA.SearchSpace))
        plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern))
        plan_graph.add((pattern_node, RDFS.label, Literal(pattern_node.toPython())))
        (sub, pred, obj) = pattern

        if isinstance(sub, BNode):
            add_variable(pattern_node, str(sub))
        elif isinstance(sub, URIRef):
            plan_graph.add((pattern_node, AGORA.subject, sub))

        if isinstance(obj, BNode):
            add_variable(pattern_node, str(obj), subject=False)
        elif isinstance(obj, Literal):
            node = BNode(str(obj).replace(' ', '').replace(':', ''))
            plan_graph.add((pattern_node, AGORA.object, node))
            plan_graph.set((node, RDF.type, AGORA.Literal))
            plan_graph.set((node, AGORA.value, obj))
        else:
            plan_graph.add((pattern_node, AGORA.object, obj))

        plan_graph.add((pattern_node, AGORA.predicate, pred))
        if pred == RDF.type:
            if 'check' in hints:
                plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean)))

    expected_res = tree_graph.query("""SELECT DISTINCT ?n WHERE {
                                          ?n agora:expectedType ?type
                                       }""")
    node_types = {}
    roots = set(_extract_roots(agp))

    for res in expected_res:
        expected_types = list(tree_graph.objects(res.n, AGORA.expectedType))

        q_expected_types = set(map(lambda x: tree_graph.qname(x), expected_types))
        q_expected_types = filter(
            lambda x: not set.intersection(set(fountain.get_type(x)['super']), q_expected_types), q_expected_types)
        type_hierarchy = len(q_expected_types) == 1
        tree_graph.add((res.n, AGORA.typeHierarchy, Literal(type_hierarchy)))

        direct_patterns = set(tree_graph.objects(res.n, AGORA.byPattern))
        enrich_type_patterns(direct_patterns)
        if is_extensible(res.n, direct_patterns):
            node_types[res.n] = q_expected_types

    c_roots = extract_cycle_roots()
    apply_cycle_extensions(c_roots, node_types)

    for t in s_trees:
        tree_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer)))
        from_types = set([plan_graph.qname(x) for x in plan_graph.objects(t, AGORA.fromType)])
        def_from_types = filter(lambda x: not set.intersection(set(fountain.get_type(x)['sub']), from_types),
                                from_types)
        for dft in def_from_types:
            tree_graph.set((t, AGORA.fromType, __extend_uri(prefixes, dft)))

    for res in plan_graph.query("""SELECT ?tree ?sub ?nxt WHERE {
                           ?tree a agora:SearchTree ;                              
                                 agora:next ?nxt .
                           ?nxt agora:byPattern [
                                   agora:subject ?sub 
                                ]                    
                        }"""):
        if isinstance(res.sub, URIRef):
            plan_graph.set((res.tree, AGORA.hasSeed, res.sub))
            plan_graph.remove((res.nxt, AGORA.isCycleStartOf, None))

    _inform_on_inverses(plan_graph, fountain, prefixes)

    return plan_graph
コード例 #24
0
def graph_plan(plan, fountain):
    plan_graph = ConjunctiveGraph()
    plan_graph.bind('agora', AGORA)
    prefixes = plan.get('prefixes')
    ef_plan = plan.get('plan')
    tree_lengths = {}
    s_trees = set([])
    patterns = {}

    for (prefix, u) in prefixes.items():
        plan_graph.bind(prefix, u)

    def __get_pattern_node(p):
        if p not in patterns:
            patterns[p] = BNode('tp_{}'.format(len(patterns)))
        return patterns[p]

    def __inc_tree_length(tree, l):
        if tree not in tree_lengths:
            tree_lengths[tree] = 0
        tree_lengths[tree] += l

    def __add_variable(p_node, vid, subject=True):
        sub_node = BNode(str(vid).replace('?', 'var_'))
        if subject:
            plan_graph.add((p_node, AGORA.subject, sub_node))
        else:
            plan_graph.add((p_node, AGORA.object, sub_node))
        plan_graph.set((sub_node, RDF.type, AGORA.Variable))
        plan_graph.set((sub_node, RDFS.label, Literal(str(vid), datatype=XSD.string)))

    def include_path(elm, p_seeds, p_steps):
        elm_uri = __extend_uri(prefixes, elm)
        path_g = plan_graph.get_context(elm_uri)
        b_tree = BNode(elm_uri)
        s_trees.add(b_tree)
        path_g.set((b_tree, RDF.type, AGORA.SearchTree))
        path_g.set((b_tree, AGORA.fromType, elm_uri))

        for seed in p_seeds:
            path_g.add((b_tree, AGORA.hasSeed, URIRef(seed)))

        previous_node = b_tree
        __inc_tree_length(b_tree, len(p_steps))
        for j, step in enumerate(p_steps):
            prop = step.get('property')
            b_node = BNode(previous_node.n3() + prop)
            if j < len(p_steps) - 1 or pattern[1] == RDF.type:
                path_g.add((b_node, AGORA.onProperty, __extend_uri(prefixes, prop)))
            path_g.add((b_node, AGORA.expectedType, __extend_uri(prefixes, step.get('type'))))
            path_g.add((previous_node, AGORA.next, b_node))
            previous_node = b_node

        p_node = __get_pattern_node(pattern)
        path_g.add((previous_node, AGORA.byPattern, p_node))

    for i, tp_plan in enumerate(ef_plan):
        paths = tp_plan.get('paths')
        pattern = tp_plan.get('pattern')
        hints = tp_plan.get('hints')
        context = BNode('space_{}'.format(tp_plan.get('context')))
        for path in paths:
            steps = path.get('steps')
            seeds = path.get('seeds')
            if not len(steps) and len(seeds):
                include_path(pattern[2], seeds, steps)
            elif len(steps):
                ty = steps[0].get('type')
                include_path(ty, seeds, steps)

        for t in s_trees:
            plan_graph.set((t, AGORA.length, Literal(tree_lengths.get(t, 0), datatype=XSD.integer)))

        pattern_node = __get_pattern_node(pattern)
        plan_graph.add((context, AGORA.definedBy, pattern_node))
        plan_graph.set((context, RDF.type, AGORA.SearchSpace))
        plan_graph.add((pattern_node, RDF.type, AGORA.TriplePattern))
        (sub, pred, obj) = pattern

        if isinstance(sub, BNode):
            __add_variable(pattern_node, str(sub))
        elif isinstance(sub, URIRef):
            plan_graph.add((pattern_node, AGORA.subject, sub))

        if isinstance(obj, BNode):
            __add_variable(pattern_node, str(obj), subject=False)
        elif isinstance(obj, Literal):
            node = BNode(str(obj).replace(' ', ''))
            plan_graph.add((pattern_node, AGORA.object, node))
            plan_graph.set((node, RDF.type, AGORA.Literal))
            plan_graph.set((node, AGORA.value, Literal(str(obj), datatype=XSD.string)))
        else:
            plan_graph.add((pattern_node, AGORA.object, obj))

        plan_graph.add((pattern_node, AGORA.predicate, pred))
        if pred == RDF.type:
            if 'check' in hints:
                plan_graph.add((pattern_node, AGORA.checkType, Literal(hints['check'], datatype=XSD.boolean)))

        sub_expected = plan_graph.subjects(predicate=AGORA.expectedType)
        for s in sub_expected:
            expected_types = list(plan_graph.objects(s, AGORA.expectedType))
            for et in expected_types:
                plan_graph.remove((s, AGORA.expectedType, et))
            q_expected_types = [plan_graph.qname(t) for t in expected_types]
            expected_types = [d for d in expected_types if
                              not set.intersection(set(fountain.get_type(plan_graph.qname(d)).get('super')),
                                                   set(q_expected_types))]
            for et in expected_types:
                plan_graph.add((s, AGORA.expectedType, et))

    return plan_graph
コード例 #25
0
class WineRDFDatabase(object):
    
    def __init__(self):
	"""Creates the RDF graph"""
	print 'Initialize RDF graph, set namespace mappings'
	self.classes = self.valid_classes()
	self.graph = ConjunctiveGraph()
	self.graph.bind('base', BASE)
	self.graph.bind('rdf', RDF)
	self.graph.bind('rdfs', RDFS)
	self.graph.bind('vocab', VOCAB)
	self.graph.bind('wine', WINE)
	self.graph.bind('wine_prod', WINE_PROD)
	self.graph.bind('whisky', WHISKY)
	
	for wine in Wine.objects.all():
	    self.add_wine(wine)
	for wine_producer in WineProducer.objects.all():
	    self.add_wine_producer(wine_producer)
	print 'Added %i triples ' % len(self.graph)

    def valid_classes(self):
	"""Returns a list of wine styles in the wines.rdf"""
	classes = set()
	root = etree.parse(os.path.join(PROJECT_ROOT, 'backend', 'wine.rdf'))
	for elem in root.iter():
	    id = '{%s}ID' % RDF
	    if id in elem.attrib:
		classes.add(elem.attrib[id])	
	return classes
	
    def add_wine_producer(self, wine_producer):
	"""Add a WineProducer model to the graph"""
	self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]),
	    URIRef(RDF['type']), URIRef(BASE['Winery'])))
	self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]),
	    URIRef(RDF['type']), URIRef(VOCAB['organzation'])))
	self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]),
	    URIRef(RDF['label']), Literal(wine_producer.name))) 
	self.graph.add((URIRef(WINE_PROD[str(wine_producer.id)]),
	    URIRef(VOCAB['address']), Literal(wine_producer.address))) 

    def add_wine(self, wine):
	"""Add a Wine model to the graph"""
	self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(BASE['hasMaker']), 
	    URIRef(WINE_PROD[str(wine.wine_producer.id)])))
	self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDFS['label']), 
	    Literal(wine.name)))
	
	region = wine.region
	if region == 'California' or region.startswith('Santa Barbara'):
	    region = 'CaliforniaRegion'
	if region in self.classes:
	    self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['locatedIn']), 
	        URIRef(BASE[region])))
	else:
	    self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['locatedIn']), 
	        Literal(region)))

	style = wine.style.replace(' ','')
	if style.endswith('Port'):
	    style = 'Port'
	if style in self.classes: 
	    self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['type']), 
		URIRef(BASE[style])))
	#else:
	#    self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(RDF['type']), 
	#	Literal(style)))
	
	if wine.color:
	    self.graph.add((URIRef(WINE[str(wine.id)]), URIRef(BASE['hasColor']), 
		URIRef(BASE[wine.color.replace(' ','')])))
	if wine.percentage_alcohol:
	    self.graph.add((URIRef(WINE[str(wine.id)]),
		URIRef(WHISKY['abv']), Literal(wine.percentage_alcohol)))
	if wine.vintage:
	    self.graph.add((URIRef(WINE[str(wine.id)]),
		URIRef(BASE['hasVintageYear']), Literal(wine.vintage)))

    def remove_resource(self, resource):
	"""Removes from the graph all triples that have `resource` as the 
	subject or object"""
	for triple in self.query_graph(subj=resource, obj=resource):
	    self.graph.remove(triple)
	    
    def query_graph(self, subj=None, pred=None, obj=None, exhaustive=False):
	"""Return a graph of  all triples with subect `sub`, predicate `pred`
	OR object `obj. If `exhaustive`, return all subelements of the given
	arguments (If sub is http://127.0.0.1/api/v1/wine/, return 
	http://127.0.0.1/api/v1/wine/{s} for all s). Arguments must be of type
	URIRef or Literal"""
	g = ConjunctiveGraph()
	count = 0
	if not isinstance(subj, list):
	    subj = [subj]
	for sub in subj:
	    for uri_s, uri_p, uri_o in sorted(self.graph):
		s, p, o = str(uri_s), str(uri_p), str(uri_o)
		if exhaustive:
		    s = s.rpartition('/')[0]
		    p = p.rpartition('/')[0]
		    o = o.rpartition('/')[0]
		else:
		    s = s[:-1] if s.endswith('/') else s
		    p = p[:-1] if p.endswith('/') else p
		    o = o[:-1] if o.endswith('/') else o
		if (sub and sub == s) or (pred and pred == p) or (obj and obj == o):
		    g.add((uri_s, uri_p, uri_o))
		    count += 1
	return g

    def write_graph(self, format='pretty-xml'):
	"""Serialize the graph to the backend directory"""
	if not format in VALID_FORMATS:
	    raise ValueError('`format` must be xml, pretty-xml, turtle or n3')
	abspath = '%s%s' % \
	    (os.path.join(PROJECT_ROOT, 'backend', 'wine_graph_store'), 
	    VALID_FORMATS[format])
	with open(abspath, 'w') as f:
	    f.write(self.graph.serialize(format=format))

    def read_graph(self, filename):
	"""Replace the graph with the data from this file in the backend dir"""
	if not filename.endswith('rdf'):
	    raise ValueError('`filename` must be an rdf file')
	self.graph = ConjunctiveGraph()
	abspath = os.path.join(PROJECT_ROOT, 'backend', filename) 
	self.graph.parse(abspath, format='xml')
コード例 #26
0
ファイル: context_case.py プロジェクト: RDFLib/rdflib-sqlite
class ContextTestCase(unittest.TestCase):
    storetest = True
    store_name = "default"
    create = True

    michel = URIRef(u"michel")
    tarek = URIRef(u"tarek")
    bob = URIRef(u"bob")
    likes = URIRef(u"likes")
    hates = URIRef(u"hates")
    pizza = URIRef(u"pizza")
    cheese = URIRef(u"cheese")
    c1 = URIRef(u"context-1")
    c2 = URIRef(u"context-2")

    def setUp(self):
        self.graph = ConjunctiveGraph(self.store_name, self.identifier)
        self.graph.open(self.tmppath, create=self.create)
        # self.store = plugin.get(self.store_name, store.Store)(
        #         configuration=self.tmppath, identifier=self.identifier)
        # self.graph = ConjunctiveGraph(self.store_name, identifier=self.identifier)
        # self.graph.destroy(self.tmppath)
        # self.graph.open(self.tmppath)

    def tearDown(self):
        # self.graph.destroy(self.tmppath)
        try:
            self.graph.close()
        except:
            pass
        if os.path.exists(self.tmppath):
            os.unlink(self.tmppath)

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEquals(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            if not isinstance(c, basestring):
                return c.identifier
            return c

        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(
                set(c.subject_objects(likes)),
                set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)]),
            )

            asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(
                set(c),
                set(
                    [
                        (bob, hates, michel),
                        (bob, likes, cheese),
                        (tarek, likes, pizza),
                        (michel, likes, pizza),
                        (michel, likes, cheese),
                        (bob, hates, pizza),
                        (tarek, likes, cheese),
                    ]
                ),
            )

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #27
0
ファイル: test_rdfinfer.py プロジェクト: detrout/htsworkflow
class TestInfer(TestCase):
    def setUp(self):
        self.model = ConjunctiveGraph()
        add_default_schemas(self.model)
        self.model.parse(data=MINI_FOAF_ONTOLOGY, format='turtle')

    def test_class(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)

        s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']]
        found = list(self.model.triples(s))
        self.assertEqual(len(found), 0)
        inference._rule_class()
        s = [fooNS['me.jpg'], RDF['type'], RDFS['Class']]
        found = list(self.model.triples(s))
        self.assertEqual(len(found), 1)

    def test_inverse_of(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)
        depiction = (None, FOAF['depiction'], fooNS['me.jpg'])
        size = len(self.model)
        found_statements = list(self.model.triples(depiction))
        self.assertEqual(len(found_statements), 0)
        inference._rule_inverse_of()
        found_statements = list(self.model.triples(depiction))
        self.assertEqual(len(found_statements), 1)

        # we should've added one statement.
        self.assertEqual(len(self.model), size + 1)

        size = len(self.model)
        inference._rule_inverse_of()
        # we should already have both versions in our model
        self.assertEqual(len(self.model), size)

    def test_validate_types(self):
        fooNS = Namespace('http://example.org/')
        self.model.parse(data=FOAF_DATA, format='turtle')
        inference = Infer(self.model)

        errors = list(inference._validate_types())
        self.assertEqual(len(errors), 0)

        s = (fooNS['document'], DC['title'], Literal("bleem"))
        self.model.add(s)
        errors = list(inference._validate_types())
        self.assertEqual(len(errors), 1)

    def test_validate_undefined_properties_in_schemas(self):
        fooNS = Namespace('http://example.org/')
        inference = Infer(self.model)

        errors = list(inference._validate_undefined_properties())
        self.assertEqual(len(errors), 0)

    def test_validate_undefined_properties_in_inference(self):
        fooNS = Namespace('http://example.org/')
        foafNS = Namespace('http://xmlns.com/foaf/0.1/')

        self.model.parse(data=FOAF_DATA, format='turtle')

        inference = Infer(self.model)
        errors = list(inference._validate_undefined_properties())
        self.assertEqual(len(errors), 2)

        inference = Infer(self.model)
        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 0)

        s = (fooNS['me.jpg'], FOAF['firstName'], Literal("name"))
        self.model.add(s)
        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 1)
        startswith = 'Domain of '
        self.assertEqual(errors[0][:len(startswith)], startswith)
        self.assertTrue('http://example.org/me.jpg' in errors[0])
        endswith = 'http://xmlns.com/foaf/0.1/Person'
        self.assertEqual(errors[0][-len(endswith):], endswith)
        self.model.remove(s)

        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 0)
        s = (fooNS['foo.txt'], RDF['type'], FOAF['Document'])
        self.model.add(s)
        s = (fooNS['me.jpg'], FOAF['depicts'], FOAF['foo.txt'])
        self.model.add(s)

        errors = list(inference._validate_property_types())
        self.assertEqual(len(errors), 1)
        startswith = 'Range of '
        self.assertEqual(errors[0][:len(startswith)], startswith)
        self.assertTrue('http://example.org/me.jpg' in errors[0])
        endswith = 'http://www.w3.org/2002/07/owl#Thing'
        self.assertEqual(errors[0][-len(endswith):], endswith)
        self.model.remove(s)

    def test_property_multiple_domain_types(self):
        """Can we process a property with multiple domain types?
        """
        turtle = """
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix foo: <http://example.org/> .
        @prefix bar: <http://example.com/> .

        foo:AClass a rdfs:Class .
        foo:BClass a rdfs:Class .
        bar:ABarClass a rdfs:Class .

        foo:aprop a rdf:Property ;
            rdfs:domain foo:AClass ;
            rdfs:domain bar:ABarClass ;
            rdfs:range foo:BClass .

        foo:object a foo:BClass .
        foo:subject a foo:AClass ;
           foo:aprop foo:object .
        bar:subject a bar:ABarClass ;
           foo:aprop foo:object .
        """
        self.model.parse(data=turtle, format='turtle')
        inference = Infer(self.model)

        errmsg = list(inference._validate_property_types())
        self.assertEqual(len(errmsg), 0)
コード例 #28
0
def main(argv):
    workdir="/data/"
    inputfile=''
    outputfile="output.nq"

    try:
      opts, args = getopt.getopt(argv,"hi:o:",["inputfile="])
    except getopt.GetoptError:
      print ('test.py --inputfile <inputfile>')
      sys.exit(2)
    for opt, arg in opts:
      if opt in ("-i", "--inputfile"):
         inputfile = arg
    # print ('Input file is "', inputfile)
    inputdata=inputfile.split('.')
    # TODO: change all this to take the absolute full path as arg (e.g.: /data/input.nq)
    input_full_path=workdir + inputfile

    data=inputdata[0]
    datatype=inputdata[1]
    if(datatype == "nq"):
        g = ConjunctiveGraph(identifier="http://kraken/graph/data/"+ data)
        g.default_context.parse(input_full_path, format='nquads')
    else:
        g = Graph() #for n3
        if datatype == "nt":
            g.default_context.parse(input_full_path,format='nt')
        elif datatype == "ttl":
            g.default_context.parse(input_full_path,format='n3')

    patternstring1 = re.compile("^([A-Z]|[a-z]+)+$")
    patternstring = re.compile("\w+")
    patterndatey = re.compile("^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$")
    patterndatem = re.compile("^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)\d\d$")
    patterndated = re.compile("^(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.](19|20)\d\d$")
    patternfloat=re.compile("^[-+]?[0-9]*\.?[0-9]+$")
    for s, p, o in g:
        if patternstring.match(o) != None and len(o)>=2 and "symbol" not in str(p): #gene symbols are detected as lang
            # print(s,p,o)
            # print(detect(o))#works but string needs to be at least 3 characters- choice between string and lang
            if "name" in str(p):
                g.remove((s, p, o))
                # g.add((s, p, Literal(o, lang=detect(o)))) #works but string needs to be at least 3 charachters- choice between string and lang
                g.add((s, p, Literal(o, datatype=XSD.string)))
            elif patterndatey.match(o) != None or patterndated.match(o) != None or patterndatem.match(o)!= None:
                # print(o)
                g.remove((s, p, o))
                g.add((s, p,Literal(o, datatype=XSD.date)))
            elif re.search('true', o, re.IGNORECASE) or re.search('false', o, re.IGNORECASE):
                # print(o)
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.boolean)))
            elif patternfloat.match(o) != None:
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.float)))
            elif patternstring1.match(o) != None:
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.string)))
    if(datatype == "nq"):
        g.serialize(destination=workdir + outputfile, format='nquads')
    elif datatype == "nt":
        g.serialize(destination=workdir + outputfile, format='nt')
    elif datatype == "ttl":
        g.default_context.parse(workdir + outputfile,format='n3')
コード例 #29
0
    print("Loading the graph")
    g = ConjunctiveGraph()
    g.parse(args.input, format=args.format)

    print("Convert DOIs in lowercase form")
    doi_to_remove = []
    doi_to_add = []
    for s, p, o in g.triples((None, LITERAL.hasLiteralValue, None)):
        o_str = str(o)
        lower_o_str = o_str.lower()
        if o_str != lower_o_str:
            doi_to_remove.append((s, p, o))
            doi_to_add.append((s, p, Literal(lower_o_str)))
    for s, p, o in doi_to_remove:
        g.remove((s, p, o))
    for s, p, o in doi_to_add:
        g.add((s, p, o))

    if not args.avoid:
        print("Check additional mapping in the oc/ccc triplestore")
        rf = ResourceFinder(ts_url=triplestore_url, default_dir=default_dir)
        with open(args.table, "a") as f:
            for s, p, o in g.triples((None, DATACITE.hasIdentifier, None)):
                if str(s) not in mapping_table:
                    is_doi = False
                    is_isbn = False
                    id_string = None
                    for s1, p2, o2 in g.triples((o, None, None)):
                        if p2 == DATACITE.usesIdentifierScheme:
                            if o2 == DATACITE.doi:
コード例 #30
0
class Store:
    def __init__(self, tripleFile):
        self.graph = ConjunctiveGraph()
        self.storefn = abspath(tripleFile)
        self.storeuri = 'file://' + self.storefn
        if exists(self.storefn):
            self.graph.load(self.storeuri, format='n3')

        self.graph.bind('mo', MusicOntology)
        self.graph.bind('ourvocab', OurVocab)
        self.graph.bind('dc', DC)
        self.graph.bind('foaf', foaf)
        self.graph.bind('geo', geo)
        self.graph.bind('dbpediaowl', dbpediaowl)
        self.graph.bind('rev', 'http://purl.org/stuff/rev#')

    def save(self):
        self.graph.serialize(self.storeuri, format='n3')

    def addTrack(self, mbid, track):
        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % mbid)
        self.graph.add((trackuri, RDF.type, MusicOntology.Track))
        self.graph.add((trackuri, DC.title, Literal(track['name'])))
        self.graph.add(
            (trackuri, OurVocab.has_playcount, Literal(track['playcount'])))
        self.graph.add((trackuri, OurVocab.has_listener_count,
                        Literal(track['listeners'])))

        if track['artist']['mbid'] != '':
            artisturi = URIRef('http://musicbrainz.org/artist/%s#_' %
                               track['artist']['mbid'])
            self.graph.add((artisturi, RDF.type, MusicOntology.MusicArtist))
            self.graph.add((trackuri, MusicOntology.performer, artisturi))
            self.graph.add(
                (artisturi, foaf.name, Literal(track['artist']['name'])))

        if isinstance(track['toptags'],
                      dict) and 'tag' in track['toptags'].keys():
            for tag in track['toptags']['tag']:
                if isinstance(tag, dict):
                    self.graph.add(
                        (trackuri, OurVocab.has_tag, Literal(tag['name'])))

    def addArtist(self, trackMBID, artistData, trackData):
        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID)

        #If there is no mbid, it means there is no earlier artist entry in triplestore
        if trackData['artist']['mbid'] == '':
            artisturi = URIRef(artistData['artist']['value'].encode('utf-8'))
            if artistData['artist']['type'] == 'artist':
                self.graph.add(
                    (artisturi, RDF.type, MusicOntology.MusicArtist))
            else:
                self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup))
            self.graph.add((trackuri, MusicOntology.performer, artisturi))
            self.graph.add(
                (artisturi, foaf.name,
                 Literal(trackData['artist']['name'].encode('utf-8'))))

        #if there is an artist entry, make sure the artist/band association is appropriate
        else:
            artisturi = URIRef('http://musicbrainz.org/artist/%s#_' %
                               trackData['artist']['mbid'])
            if artistData['artist']['type'] == "band" and\
                    (artisturi, RDF.type, MusicOntology.MusicArtist) in self.graph:
                self.graph.remove(
                    (artisturi, RDF.type, MusicOntology.MusicArtist))
                self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup))

        #now the location data!
        if 'hometown' not in artistData.keys():
            return

        if "http" in artistData['hometown']['value']:
            townuri = URIRef(artistData['hometown']['value'].encode('utf-8'))
            if (townuri, RDF.type, dbpediaowl.Place) not in self.graph:
                self.graph.add((townuri, RDF.type, dbpediaowl.Place))
                if "hometownName" in artistData.keys():
                    self.graph.add((townuri, foaf.name,
                                    Literal(artistData['hometownName']
                                            ['value'].encode('utf-8'))))
                if "coordinates" in artistData.keys():
                    self.graph.add((townuri, geo.geometry,
                                    Literal(artistData['coordinates']
                                            ['value'].encode('utf-8'))))
            self.graph.add((artisturi, dbpediaowl.hometown, townuri))
        else:
            self.graph.add((artisturi, dbpediaowl.hometown,
                            Literal(artistData['hometown']['value'])))

    def _matchAlbum(self, trackInfo, albumFiles):
        """
        A function to return the correct match of an album given a track.
        Deprecated for most cases where the match is done using mbids.
        Use only for cases where there is no mbid link betweeb album and track.
        """
        try:
            albumName = trackInfo['album']['name']
            artistName = trackInfo['artist']['name']
        except:
            return None

        for af in albumFiles:
            albumInfo = json.load(file(af))
            albumInfo = albumInfo['album']
            if albumName == albumInfo['name'] and artistName == albumInfo[
                    'artist']:
                return af

    def addAlbum(self, trackMBID, albumInfo):
        """
        A function to add album data into triple store. At the moment, only the releasedate is taken
        from the album data. More to be added soon.
        """
        try:
            albumInfo = albumInfo['album']
        except:
            return

        if 'releasedate' not in albumInfo.keys():
            return

        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID)
        self.graph.add(
            (trackuri, OurVocab.has_releasedate,
             Literal(albumInfo['releasedate'].strip().encode('utf-8'))))
コード例 #31
0
class ContextTestCase(unittest.TestCase):
    store = "default"
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = ConjunctiveGraph(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test",
                                      dir="/tmp",
                                      suffix=".sqlite")
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u"michel")
        self.tarek = URIRef(u"tarek")
        self.bob = URIRef(u"bob")
        self.likes = URIRef(u"likes")
        self.hates = URIRef(u"hates")
        self.pizza = URIRef(u"pizza")
        self.cheese = URIRef(u"cheese")

        self.c1 = URIRef(u"context-1")
        self.c2 = URIRef(u"context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEqual(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.graph.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEqual(len(graph), oldLen + 10)
        self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.graph.get_context(c1))
        self.assertEqual(len(self.graph), oldLen)
        self.assertEqual(len(graph), 0)

    def testLenInMultipleContexts(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEqual(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEqual(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertTrue(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertTrue(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            return c.identifier

        self.assertTrue(self.c1 in map(cid, self.graph.contexts()))
        self.assertTrue(self.c2 in map(cid, self.graph.contexts()))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assertTrue(self.c1 in contextList, (self.c1, contextList))
        self.assertTrue(self.c2 in contextList, (self.c2, contextList))

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEqual(len(Graph(self.graph.store, c1)), 1)
        self.assertEqual(len(self.graph.get_context(c1)), 1)

        self.graph.remove_context(self.graph.get_context(c1))
        self.assertTrue(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEqual(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEqual
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.graph.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob, )))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)),
                    set([(bob, pizza), (bob, michel)]))
            asserte(
                set(c.subject_objects(likes)),
                set([
                    (tarek, cheese),
                    (michel, cheese),
                    (michel, pizza),
                    (bob, cheese),
                    (tarek, pizza),
                ]),
            )

            asserte(set(c.predicate_objects(michel)),
                    set([(likes, cheese), (likes, pizza)]))
            asserte(
                set(c.predicate_objects(bob)),
                set([(likes, cheese), (hates, pizza), (hates, michel)]),
            )
            asserte(set(c.predicate_objects(tarek)),
                    set([(likes, cheese), (likes, pizza)]))

            asserte(
                set(c.subject_predicates(pizza)),
                set([(bob, hates), (tarek, likes), (michel, likes)]),
            )
            asserte(
                set(c.subject_predicates(cheese)),
                set([(bob, likes), (tarek, likes), (michel, likes)]),
            )
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(
                set(c),
                set([
                    (bob, hates, michel),
                    (bob, likes, cheese),
                    (tarek, likes, pizza),
                    (michel, likes, pizza),
                    (michel, likes, cheese),
                    (bob, hates, pizza),
                    (tarek, likes, cheese),
                ]),
            )

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #32
0
def convert(teifile, namespace):
    #graph_uri = "http://contextus.net/resource/blue_velvet/"

    ns = Namespace(namespace)

    graph = ConjunctiveGraph()
    graph.load(teifile, format="rdfa")

    graph.bind("default", ns)

    to_update = ""

    for prefix, nsuri in graph.namespaces():
        #print("prefix: " + str(prefix) + " - " + str(nsuri))
        if nsuri in ns:
            to_update = nsuri

    for s, p, o in graph:
        #    		print s, p, o
        if to_update != "" and to_update in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(to_update, ns))
            graph.add((s, p, o))

    act = ""
    scene = ""
    line = ""
    char = 0
    loc = 0

    #timeline = ns['timeline/narrative']
    #graph.add((timeline, RDF.type, ome['Timeline']))

    tree = ET.parse(teifile)
    cast = dict()

    titleNode = tree.find('//title')

    castItems = tree.findall('/text/body/div1/castList//castItem')
    for castItem in castItems:
        actorNode = castItem.find('actor')
        roleNode = castItem.find('role')

        if roleNode != None:
            id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")

        #print("Found castItem!")

        actor = None
        role = None

        # Check to see if we already have an entry
        if (roleNode != None and roleNode.get("about")):

            charname = roleNode.get("about")

            if (charname.find(":") > -1):
                nmsp, nom = charname.split(":", 1)
                charcode = "character/" + str(char)
                charref = nmsp + ":" + charcode + "]"
                role = extractCURIEorURI(graph, charref, nom[0:-1])
                char += 1
                #print("1:" + charname + ": adding id " + id + " to " + role)
            else:
                role = extractCURIEorURI(graph, charname)
                #print("2:" + charname + ": adding id " + id + " to " + role)

            cast[id] = role
            graph.add((role, RDF.type, omb['Character']))
            #print(charname + ": adding id " + id + " to " + role)

        if (actorNode != None and actorNode.get("about")):
            actor = extractCURIEorURI(graph, actorNode.get("about"))
            graph.add((actor, RDF.type, omb['Being']))

        if actor != None and role != None:
            graph.add((actor, omb['portrays'], role))
            graph.add((role, omb['portrayed-by'], actor))

    eventCount = 1
    groupCount = 1
    prior_event = None

    actItems = tree.findall('/text/body/div1')
    ref = ""

    for actItem in actItems:

        if actItem.get("type") == "act":
            act = actItem.get("n")

        sceneItems = actItem.findall('div2')

        for sceneItem in sceneItems:

            #print("Found sceneItems!")

            if sceneItem.get("type") == "scene":
                scene = sceneItem.get("n")

            # Work out the location of this scene
            location = None
            stageItems = sceneItem.findall("stage")

            #internalnum = 1
            stagenum = 0
            speechnum = 1

            for stageItem in stageItems:
                if stageItem.get("type") == "location":
                    # The RDFa parser doesn't handle the type - so we can grab that here.

                    if stageItem.get("about") != None:
                        locname = stageItem.get("about")

                        # Adding location type/oml:space for location
                        if stageItem.get("typeof") and stageItem.get("about"):
                            type = extractCURIEorURI(graph,
                                                     stageItem.get("typeof"))
                            #print "1. Location: " + str(location) + " Type: " + str(type)
                        elif stageItem.get("about"):
                            #print "2. Location: " + str(locname)
                            type = extractCURIEorURI(graph, oml['Space'])

                        # Get location value and add rdfs:label is location is not using the TEI value
                        if (locname.find(":") > -1):
                            nmsp, nom = locname.split(":", 1)
                            loccode = "location/" + str(loc)
                            locref = nmsp + ":" + loccode + "]"
                            location = extractCURIEorURI(
                                graph, locref, nom[0:-1])
                            loc += 1
                            graph.add((
                                location,
                                rdflib.URIRef(
                                    'http://www.w3.org/2000/01/rdf-schema#label'
                                ), Literal(nom[0:-1])))
                        else:
                            location = extractCURIEorURI(
                                graph, stageItem.get("about"))

                        # Add location to graph
                        graph.add((location, RDF.type, type))
                    else:
                        location = ""

                    #print("Adding location type: " + type + " (" + location + ")")

            if cast:
                # Work out a list of all cast in a given section
                currentCast = list()
                speakers = list()

            # Iterate through elements within stageItem
            # Find speaker events and add to list of current cast for inclusion in social event
            # Find reference events and add to ongoing social event ?
            # Find stage events
            # If event is an entrance then
            # create social event for people talking before entrance
            # create travel event i.e. entrance
            # add new arrival to current cast list
            # If event is exit event then
            # create social event for people talking before exit
            # create travel event i.e. exit
            # if leavers are not named directly the calculate who is leaving
            # remove leavers from current cast list
            # If reach end of scene then create social event with current cast list

            #Also need to check if social event before exit has same composition as social event after exit since then they should be merged

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            refersTo = list()
            #parent = None
            speakerNodes = list()
            speakerRef = list()

            xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(
                perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
            stagecount = 0
            stage_array = list()

            for node in sceneItem.getiterator():
                #print("Node: " + node.tag)
                """
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""

                if node.tag == "sp":
                    id = node.get("who")

                    if id and cast:
                        speakers.append(cast[id[1:]])
                        speakerNodes.append(node)

                        if perseusid == None:
                            speakerRef.append(ref)
                        else:
                            #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
                            speechRef = xpointer + "#xpointer(//div2/sp[" + str(
                                speechnum) + "])"
                            speakerRef.append(speechRef)
                        #print("Line ref: " + ref)

                        if cast[id[1:]] not in currentCast:
                            currentCast.append(cast[id[1:]])

                    #internalnum = 1
                    speechnum += 1
                    stagecount = 0

                    previousl = 0

                    for subnode in node.getiterator():
                        if subnode.tag == "l":
                            previousl += 1

                        if subnode.tag == "stage":
                            #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
                            stage_array.append(previousl)
                            stagecount += 1

                elif node.tag == "stage":

                    if stagecount > 0:
                        s_max = len(stage_array)
                        diff = s_max - stagecount

                        #if diff == 0:
                        #	stagenum += 1

                        entRef = xpointer + "#xpointer(//div2/sp[" + str(
                            speechnum - 1) + "]/l[" + str(
                                stage_array[diff]) + "]/stage)"
                        #internalnum += 1
                        stagecount -= 1
                    else:
                        stagenum += 1
                        entRef = xpointer + "#xpointer(//div2/stage[" + str(
                            stagenum) + "])"

                    if node.get("type") == "entrance":

                        # Add Social Events for all the people who spoke since the last break (if there were any)

                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                        #print("Found entrence event!")
                        if location:
                            graph.add((event, ome['to'], location))

                        involved = node.get("about")

                        if (len(involved) > 0 and involved[0] == "["
                                and involved[-1] == "]"):
                            involved = involved[1:-1]

                        chunks = involved.split()

                        chunk_count = len(chunks)

                        if chunk_count > 1:
                            #type = extractCURIEorURI(graph, "[omb:Group]")
                            #graph.add((group, RDF.type, type))
                            graph.add((group, RDF.type, omb['Group']))

                        event_label = ""
                        en = 1

                        for chunk in chunks:
                            striped = chunk.strip()

                            if (len(striped) > 0 and striped[0] == "["
                                    and striped[-1] == "]"):
                                striped = striped[1:-1]
                                currentCast.append(cast[striped])

                            if chunk_count > 1:
                                graph.add(
                                    (group, ome['contains'], cast[striped]))

                                if en == chunk_count:
                                    event_label = event_label[
                                        0:-2] + " and " + striped
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(event_label + " arrive")))
                                elif en < chunk_count:
                                    event_label += striped + ", "

                            else:
                                #print("Adding person as subject-entity to entry event "   + str(eventCount))
                                graph.add((
                                    event,
                                    rdflib.URIRef(
                                        'http://www.w3.org/2000/01/rdf-schema#label'
                                    ), Literal(striped + " arrives")))
                                graph.add((event, ome['has-subject-entity'],
                                           cast[striped]))

                            en += 1

                        if chunk_count > 1:
                            graph.add(
                                (event, ome['has-subject-entity'], group))
                            #print("Adding group as subject-entity to entry event "   + str(eventCount))
                            groupCount = groupCount + 1
                            group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                    if node.get("type") == "exit":

                        # Add Social Events for all the people who spoke since the last break (if there were any)
                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #exitRef = xpointer
                            #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Found entrence event!")
                        if location != None:
                            graph.add((event, ome['from'], location))

                        involved = node.get("about")

                        if involved.strip() == "" or "-all" in involved:
                            # Remove everyone

                            #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #for peep in currentCast:
                            #	print(peep)

                            if len(currentCast) > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for peep in currentCast:
                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if peep == value:
                                        short_ref = key

                                if len(currentCast) > 1:
                                    graph.add((group, ome['contains'], peep))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exuant event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         peep))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if len(currentCast) > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exuant event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            currentCast = list()

                        elif "!" in involved:
                            #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #print("Event: " + involved);

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            involved = involved.strip()

                            if (len(involved) > 0 and involved[0] == "!"
                                    and involved[1] == "("
                                    and involved[-1] == ")"):
                                involved = involved[2:-1]

                            #print("involved: " + involved)

                            striped = involved.strip()

                            c_ids = striped.split()

                            chunks = list()

                            for stay in c_ids:
                                #print("Staying: " + cast[stay])
                                chunks.append(cast[stay])

                            staying = list()
                            going = list()

                            for player in currentCast:
                                #print("Player: " + player)
                                if player in chunks:
                                    staying.append(player)
                                else:
                                    going.append(player)

                            going_count = len(going)

                            if going_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for ghost in going:
                                #print("ghost: " + ghost)

                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if ghost == value:
                                        short_ref = key

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if going_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(going):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(going):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if going_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        else:
                            #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            striped = involved.strip()
                            chunks = striped.split()

                            #print("striped: " + striped)

                            chunk_count = len(chunks)

                            if chunk_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for chunk in chunks:
                                #print("chunk: " + chunk)

                                ghost = cast[chunk]

                                #print("ghost: " + ghost)

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if chunk_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + chunk
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += chunk + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(chunk + " leaves")))

                                en += 1

                            if chunk_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                #elif node.tag == "rs":
                #	#print("Found rs node")
                #	if parent:
                #		#print("Parent type is " + parent.tag)
                #		if parent.tag == "p" or  parent.tag == "l":
                #			refersTo.append(node.get("about"))

                #parent = node

            # Add Social Events for all the people who spoke since the last break (if there were any)
            #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
            update = list()
            update = getSocial(graph, ns, speakers, speakerNodes, speakerRef,
                               cast, currentCast, eventCount, event,
                               prior_event, location)
            eventCount = update[0]
            prior_event = update[1]

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            speakers = list()
            speakerNodes = list()
            currentCast = list()
            speakerRef = list()

    print graph.serialize(format='xml')
コード例 #33
0
ファイル: tablinker.py プロジェクト: cgueret/DataDump
class TabLinker(object):
    defaultNamespacePrefix = 'http://example.org/resource/'
    annotationsNamespacePrefix = 'http://example.org/annotation/'
    namespaces = {
      'dcterms':Namespace('http://purl.org/dc/terms/'), 
      'skos':Namespace('http://www.w3.org/2004/02/skos/core#'), 
      'tablink':Namespace('http://example.org/ns#'), 
      'qb':Namespace('http://purl.org/linked-data/cube#'), 
      'owl':Namespace('http://www.w3.org/2002/07/owl#')
    }
    annotationNamespaces = {
      'np':Namespace('http://www.nanopub.org/nschema#'),
      'oa':Namespace('http://www.w3.org/ns/openannotation/core/'),
      'xsd':Namespace('http://www.w3.org/2001/XMLSchema#'),
      'dct':Namespace('http://purl.org/dc/terms/')
    }

    def __init__(self, filename, config, level = logging.DEBUG):
        """TabLinker constructor
        
        Keyword arguments:
        filename -- String containing the name of the current Excel file being examined
        config -- Configuration object, loaded from .ini file
        level -- A logging level as defined in the logging module
        """
        self.config = config
        self.filename = filename
         
        self.log = logging.getLogger("TabLinker")
        self.log.setLevel(level)
        
        self.log.debug('Initializing Graphs')
        self.initGraphs()
        
        self.log.debug('Setting Scope')
        basename = os.path.basename(filename)
        basename = re.search('(.*)\.xls',basename).group(1)
        self.setScope(basename)
        
        self.log.debug('Loading Excel file {0}.'.format(filename))
        self.rb = open_workbook(filename, formatting_info=True)
        
        self.log.debug('Reading styles')
        self.styles = Styles(self.rb)
        
        self.log.debug('Copied Workbook to writable copy')
        self.wb = copy(self.rb)
        
        
    def initGraphs(self):
        """
        Initialize the graphs, set default namespaces, and add schema information
        """
    
        self.graph = ConjunctiveGraph()
        # Create a separate graph for annotations
        self.annotationGraph = ConjunctiveGraph()
        
        self.log.debug('Adding namespaces to graphs')
        # Bind namespaces to graphs
        for namespace in self.namespaces:
            self.graph.namespace_manager.bind(namespace, self.namespaces[namespace])

        # Same for annotation graph
        for namespace in self.annotationNamespaces:
            self.annotationGraph.namespace_manager.bind(namespace, self.annotationNamespaces[namespace])
        
        # Add schema information
        self.log.debug('Adding some schema information (dimension and measure properties) ')
        self.addDataCellProperty()

        # Add dimensions                    
        self.graph.add((self.namespaces['tablink']['dimension'], RDF.type, self.namespaces['qb']['DimensionProperty']))
        
        #self.graph.add((self.namespaces['tablink']['label'], RDF.type, RDF['Property']))
    
    def addDataCellProperty(self):
        """
        Add definition of data cell resource to graph
        """

        if len(self.config.get('dataCell', 'propertyName')) > 0 :
            self.dataCellPropertyName = self.config.get('dataCell', 'propertyName')
        else :
            self.dataCellPropertyName = 'hasValue'
        
        self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDF.type, self.namespaces['qb']['MeasureProperty']))
        
        #Take labels from config
        if len(self.config.get('dataCell', 'labels')) > 0 :
            labels = self.config.get('dataCell', 'labels').split(':::')
            for label in labels :
                labelProperties = label.split('-->')
                if len(labelProperties[0]) > 0 and len(labelProperties[1]) > 0 :
                    self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDFS.label, Literal(labelProperties[1],labelProperties[0])))
                    
        if len(self.config.get('dataCell', 'literalType')) > 0 :
            self.graph.add((self.namespaces['tablink'][self.dataCellPropertyName], RDFS.range, URIRef(self.config.get('dataCell', 'literalType'))))
            
    def setScope(self, fileBasename):
        """Set the default namespace and base for all URIs of the current workbook"""
        self.fileBasename = fileBasename
        scopeNamespace = self.defaultNamespacePrefix + fileBasename + '/'
        
        # Annotations go to a different namespace
        annotationScopeNamespace = self.annotationsNamespacePrefix + fileBasename + '/'
        
        self.log.debug('Adding namespace for {0}: {1}'.format(fileBasename, scopeNamespace))
        
        self.namespaces['scope'] = Namespace(scopeNamespace)
        self.annotationNamespaces['scope'] = Namespace(annotationScopeNamespace)
        self.graph.namespace_manager.bind('', self.namespaces['scope'])
        self.annotationGraph.namespace_manager.bind('', self.annotationNamespaces['scope'])
        
    def doLink(self):
        """Start tablinker for all sheets in workbook"""
        self.log.info('Starting TabLinker for all sheets in workbook')
        
        for n in range(self.rb.nsheets) :
            self.log.info('Starting with sheet {0}'.format(n))
            self.r_sheet = self.rb.sheet_by_index(n)
            self.w_sheet = self.wb.get_sheet(n)
            
            self.rowns, self.colns = self.getValidRowsCols()
                 
            self.sheet_qname = urllib.quote(re.sub('\s','_',self.r_sheet.name))
            self.log.info('Base for QName generator set to: {0}'.format(self.sheet_qname))
            
            self.log.debug('Starting parser')
            self.parseSheet()
    
    ###
    #    Utility Functions
    ### 
    
    def insideMergeBox(self, i, j):
        """
        Check if the specified cell is inside a merge box

        Arguments:
        i -- row
        j -- column

        Returns:
        True/False -- depending on whether the cell is inside a merge box
        """
        self.merged_cells = self.r_sheet.merged_cells
        for crange in self.merged_cells:
            rlo, rhi, clo, chi = crange
            if i <=  rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo:
                return True
        return False
        

    def getMergeBoxCoord(self, i, j):
        """
        Get the top-left corner cell of the merge box containing the specified cell

        Arguments:
        i -- row
        j -- column

        Returns:
        (k, l) -- Coordinates of the top-left corner of the merge box
        """
        if not self.insideMergeBox(i,j):
            return (-1, -1)

        self.merged_cells = self.r_sheet.merged_cells
        for crange in self.merged_cells:
            rlo, rhi, clo, chi = crange
            if i <=  rhi - 1 and i >= rlo and j <= chi - 1 and j >= clo:
                return (rlo, clo)            
         
    def getType(self, style):
        """Get type for a given excel style. Style name must be prefixed by 'TL '
    
        Arguments:
        style -- Style (string) to check type for
        
        Returns:
        String -- The type of this field. In case none is found, 'unknown'
        """
        typematch = re.search('TL\s(.*)',style)
        if typematch :
            cellType = typematch.group(1)
        else :
            cellType = 'Unknown'
        return cellType
    
    def isEmpty(self, i,j):
        """Check whether cell is empty.
        
        Arguments:
        i -- row
        j -- column
        
        Returns:
        True/False -- depending on whether the cell is empty
        """
        if (self.r_sheet.cell(i,j).ctype == XL_CELL_EMPTY or self.r_sheet.cell(i,j).ctype == XL_CELL_BLANK) or self.r_sheet.cell(i,j).value == '' :
            return True
        else :
            return False
        
    def isEmptyRow(self, i, colns):
        """
        Determine whether the row 'i' is empty by iterating over all its cells
        
        Arguments:
        i     -- The index of the row to be checked.
        colns -- The number of columns to be checked
        
        Returns:
        true  -- if the row is empty
        false -- if the row is not empty
        """
        for j in range(0,colns) :
            if not self.isEmpty(i,j):
                return False
        return True
    
    def isEmptyColumn(self, j, rowns ):
        """
        Determine whether the column 'j' is empty by iterating over all its cells
        
        Arguments:
        j     -- The index of the column to be checked.
        rowns -- The number of rows to be checked
        
        Returns:
        true  -- if the column is empty
        false -- if the column is not empty
        """
        for i in range(0,rowns) :
            if not self.isEmpty(i,j):
                return False
        return True
    
    def getValidRowsCols(self) :
        """
        Determine the number of non-empty rows and columns in the Excel sheet
        
        Returns:
        rowns -- number of rows
        colns -- number of columns
        """
        colns = number_of_good_cols(self.r_sheet)
        rowns = number_of_good_rows(self.r_sheet)
        
        # Check whether the number of good columns and rows are correct
        while self.isEmptyRow(rowns-1, colns) :
            rowns = rowns - 1 
        while self.isEmptyColumn(colns-1, rowns) :
            colns = colns - 1
            
        self.log.debug('Number of rows with content:    {0}'.format(rowns))
        self.log.debug('Number of columns with content: {0}'.format(colns))
        return rowns, colns
    
    def getQName(self, names):
        """
        Create a valid QName from a string or dictionary of names
        
        Arguments:
        names -- Either dictionary of names or string of a name.
        
        Returns:
        qname -- a valid QName for the dictionary or string
        """
        
        if type(names) == dict :
            qname = self.sheet_qname
            for k in names :
                qname = qname + '_' + self.processString(names[k])
        else :
            qname = self.sheet_qname + '_' + self.processString(names)
        
        self.log.debug('Minted new QName: {}'.format(qname))
        return qname

    def getColHeaderLabel(self, colheaders):
        label = '_'.join(colheaders)
        return label
        
    def getColHeaderValueURI(self, colheaders):
        label = self.getColHeaderLabel(colheaders)
        uri = self.namespaces['scope'][self.processString(label)]
        return uri
        
    def getColHeaderPropertyURI(self, index):
        uri = self.namespaces['scope']['HColHeader' + str(index)]
        return uri
    
    def processString(self, string):
        """
        Remove illegal characters (comma, brackets, etc) from string, and replace it with underscore. Useful for URIs
        
        Arguments:
        string -- The string representing the value of the source cell
        
        Returns:
        processedString -- The processed string
        """
        # TODO accents too
        return urllib.quote(re.sub('\s|\(|\)|,|\.','_',unicode(string).strip().replace('/', '-')).encode('utf-8', 'ignore'))

            
    def addValue(self, source_cell_value, altLabel=None):
        """
        Add a "value" + optional label to the graph for a cell in the source Excel sheet. The value is typically the value stored in the source cell itself, but may also be a copy of another cell (e.g. in the case of 'idem.').
        
        Arguments:
        source_cell_value -- The string representing the value of the source cell
        
        Returns:
        source_cell_value_qname -- a valid QName for the value of the source cell
        """
        source_cell_value_qname = self.getQName(source_cell_value)
        #self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['qb']['dataSet'],self.namespaces['scope'][self.sheet_qname]))
        
        #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['value'],self.namespaces['scope'][source_cell_value_qname]))
        
        # If the source_cell_value is actually a dictionary (e.g. in the case of HierarchicalRowHeaders), then use the last element of the row hierarchy as prefLabel
        # Otherwise just use the source_cell_value as prefLabel
        if type(source_cell_value) == dict :
            self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].prefLabel,Literal(source_cell_value.values()[-1],'nl')))
            
            if altLabel and altLabel != source_cell_value.values()[-1]:
                # If altLabel has a value (typically for HierarchicalRowHeaders) different from the last element in the row hierarchy, we add it as alternative label. 
                self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].altLabel,Literal(altLabel,'nl')))
        else :
            self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].prefLabel,Literal(source_cell_value,'nl')))
            
            if altLabel and altLabel != source_cell_value:
                # If altLabel has a value (typically for HierarchicalRowHeaders) different from the source_cell_value, we add it as alternative label. 
                self.graph.add((self.namespaces['scope'][source_cell_value_qname],self.namespaces['skos'].altLabel,Literal(altLabel,'nl')))
        
        return source_cell_value_qname
    
    def parseSheet(self):
        """
        Parses the currently selected sheet in the workbook, takes no arguments. Iterates over all cells in the Excel sheet and produces relevant RDF Triples. 
        """
        self.log.info("Parsing {0} rows and {1} columns.".format(self.rowns,self.colns))
        
        self.column_dimensions = {}
        self.property_dimensions = {}
        self.row_dimensions = {}
        self.rowhierarchy = {}

        # Get dictionary of annotations
        self.annotations = self.r_sheet.cell_note_map
        
        for i in range(0,self.rowns):
            self.rowhierarchy[i] = {}
            
            for j in range(0, self.colns):
                # Parse cell data
                self.source_cell = self.r_sheet.cell(i,j)
                self.source_cell_name = cellname(i,j)
                self.style = self.styles[self.source_cell].name
                self.cellType = self.getType(self.style)
                self.source_cell_qname = self.getQName(self.source_cell_name)
                
                self.log.debug("({},{}) {}/{}: \"{}\"". format(i,j,self.cellType, self.source_cell_name, self.source_cell.value))

                # Try to parse ints to avoid ugly _0 URIs
                try:
                    if int(self.source_cell.value) == self.source_cell.value:
                        self.source_cell.value = int(self.source_cell.value)
                except ValueError:
                    self.log.debug("(%s.%s) No parseable int" % (i,j))

                                            
                # Parse annotation (if any)
                if self.config.get('annotations', 'enabled') == "1":
                    if (i,j) in self.annotations:
                        self.parseAnnotation(i, j)

                # Parse cell even if empty
                if self.cellType == 'Data':
                    self.parseData(i, j)
                elif (self.cellType == 'HRowHeader') :
                    self.updateRowHierarchy(i, j)
                elif self.cellType == 'ColHeader' :
                    self.parseColHeader(i, j)
                elif self.cellType == 'RowProperty' :
                    self.parseRowProperty(i, j)
                
                # If cell not empty, check for more types
                if not self.isEmpty(i,j) :
                    #self.graph.add((self.namespaces['scope'][self.source_cell_qname],RDF.type,self.namespaces['tablink'][self.cellType]))
                    #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['cell'],Literal(self.source_cell_name)))
                    #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['col'],Literal(colname(j))))
                    #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['row'],Literal(i+1)))
                    #self.graph.add((self.namespaces['scope'][self.source_cell_qname] isrow row
                    if self.cellType == 'Title' :
                        self.parseTitle(i, j)
    
                    elif self.cellType == 'RowHeader' :
                        self.parseRowHeader(i, j)
                    
                    elif self.cellType == 'HRowHeader' :
                        self.parseHierarchicalRowHeader(i, j)
                         
                    elif self.cellType == 'RowLabel' :
                        self.parseRowLabel(i, j)
        
        # Add additional information about the hierarchy of column headers
        for value in self.column_dimensions.values():
            for index in range(1, len(value)):
                uri_sub = self.getColHeaderValueURI(value[:index+1])
                uri_top = self.getColHeaderValueURI(value[:index])
                self.graph.add((uri_sub, self.namespaces['tablink']['subColHeaderOf'], uri_top))
                self.graph.add((uri_sub, self.namespaces['tablink']['depth'], Literal(index)))
                self.graph.add((uri_top, self.namespaces['tablink']['depth'], Literal(index-1)))
        
        self.log.info("Done parsing...")

    def updateRowHierarchy(self, i, j) :
        """
        Build up lists for hierarchical row headers. Cells marked as hierarchical row header are often empty meaning that their intended value is stored somewhere else in the Excel sheet.
        
        Keyword arguments:
        int i -- row number
        int j -- col number
        
        Returns:
        New row hierarchy dictionary
        """
        if (self.isEmpty(i,j) or str(self.source_cell.value).lower().strip() == 'id.') :
            # If the cell is empty, and a HierarchicalRowHeader, add the value of the row header above it.
            # If the cell above is not in the rowhierarchy, don't do anything.
            # If the cell is exactly 'id.', add the value of the row header above it. 
            try :
                self.rowhierarchy[i][j] = self.rowhierarchy[i-1][j]
                self.log.debug("({},{}) Copied from above\nRow hierarchy: {}".format(i,j,self.rowhierarchy[i]))
            except :
                # REMOVED because of double slashes in uris
                # self.rowhierarchy[i][j] = self.source_cell.value
                self.log.debug("({},{}) Top row, added nothing\nRow hierarchy: {}".format(i,j,self.rowhierarchy[i]))
        elif str(self.source_cell.value).lower().startswith('id.') or str(self.source_cell.value).lower().startswith('id '):
            # If the cell starts with 'id.', add the value of the row  above it, and append the rest of the cell's value.
            suffix = self.source_cell.value[3:]               
            try :       
                self.rowhierarchy[i][j] = self.rowhierarchy[i-1][j]+suffix
                self.log.debug("({},{}) Copied from above+suffix\nRow hierarchy {}".format(i,j,self.rowhierarchy[i]))
            except :
                self.rowhierarchy[i][j] = self.source_cell.value
                self.log.debug("({},{}) Top row, added value\nRow hierarchy {}".format(i,j,self.rowhierarchy[i]))
        elif not self.isEmpty(i,j) :
            self.rowhierarchy[i][j] = self.source_cell.value
            self.log.debug("({},{}) Added value\nRow hierarchy {}".format(i,j,self.rowhierarchy[i]))
        return self.rowhierarchy
    
    def parseHierarchicalRowHeader(self, i, j) :
        """
        Create relevant triples for the cell marked as HierarchicalRowHeader (i, j are row and column)
        """
        
        # Use the rowhierarchy to create a unique qname for the cell's contents, 
        # give the source_cell's original value as extra argument
        self.log.debug("Parsing HierarchicalRowHeader")
            
        # Add all the values
        for (index, value) in self.rowhierarchy[i].items():
            prop = self.property_dimensions[index]
            self.row_dimensions.setdefault(i,{})
            self.row_dimensions[i][self.namespaces['scope'][prop]]= Literal(value)
            
        # Relate the hierarchical headers
        keys = self.rowhierarchy[i].keys()
        for i in range(len(keys)-1):
            prop_top = self.namespaces['scope'][self.property_dimensions[keys[i]]]
            prop_sub = self.namespaces['scope'][self.property_dimensions[keys[i+1]]]
            self.graph.add((prop_sub, self.namespaces['tablink']['subPropertyOf'], prop_top))
        

    def parseRowLabel(self, i, j):
        """
        Create relevant triples for the cell marked as Label (i, j are row and column)
        """  
        
        self.log.debug("Parsing Row Label")
        
        # Get the QName of the HierarchicalRowHeader cell that this label belongs to, based on the rowhierarchy for this row (i)
        hierarchicalRowHeader_value_qname = self.getQName(self.rowhierarchy[i])
        
        prefLabels = self.graph.objects(self.namespaces['scope'][hierarchicalRowHeader_value_qname], self.namespaces['skos'].prefLabel)
        for label in prefLabels :
            # If the hierarchicalRowHeader QName already has a preferred label, turn it into a skos:altLabel
            self.graph.remove((self.namespaces['scope'][hierarchicalRowHeader_value_qname],self.namespaces['skos'].prefLabel,label))
            self.graph.add((self.namespaces['scope'][hierarchicalRowHeader_value_qname],self.namespaces['skos'].altLabel,label))
            self.log.debug("Turned skos:prefLabel {} for {} into a skos:altLabel".format(label, hierarchicalRowHeader_value_qname))
        
        # Add the value of the label cell as skos:prefLabel to the header cell
        # self.graph.add((self.namespaces['scope'][hierarchicalRowHeader_value_qname], self.namespaces['skos'].prefLabel, Literal(self.source_cell.value, 'nl')))
            
        # Record that this source_cell_qname is the label for the HierarchicalRowHeader cell
        # self.graph.add((self.namespaces['scope'][self.source_cell_qname], self.namespaces['tablink']['isLabel'], self.namespaces['scope'][hierarchicalRowHeader_value_qname]))
    
    def parseRowHeader(self, i, j) :
        """
        Create relevant triples for the cell marked as RowHeader (i, j are row and column)
        """
        rowHeaderValue = ""

        # Don't attach the cell value to the namespace if it's already a URI
        isURI = urlparse(str(self.source_cell.value))
        if isURI.scheme and isURI.netloc:
            rowHeaderValue = URIRef(self.source_cell.value)
        else:
            self.source_cell_value_qname = self.source_cell.value
            rowHeaderValue = Literal(self.source_cell_value_qname)
        
        # Get the properties to use for the row headers
        prop = self.property_dimensions[j]
        self.row_dimensions.setdefault(i,{})
        self.row_dimensions[i][self.namespaces['scope'][prop]]= rowHeaderValue
        
        return
    
    def parseColHeader(self, i, j) :
        """
        Create relevant triples for the cell marked as Header (i, j are row and column)
        """
        cell_content = self.processString(self.source_cell.value)
        if self.isEmpty(i,j):
            if self.insideMergeBox(i,j):
                k, l = self.getMergeBoxCoord(i,j)
                
                # If we are in a vertical merge box, skip adding the dimension
                if l == j:
                    return

                # Update cell content        
                cell_content = self.processString(self.r_sheet.cell(k,l).value)
            else:
                return

        # Add the value qname to the column_dimensions list for that column
        self.column_dimensions.setdefault(j,[self.sheet_qname]).append(cell_content)
        
        # Add the data to the graph
        resource = self.getColHeaderValueURI(self.column_dimensions[j])
        self.graph.add((resource, RDF.type, self.namespaces['tablink']['ColumnHeader']))
        self.graph.add((resource, self.namespaces['skos']['prefLabel'], Literal(cell_content)))
        self.graph.add((resource, self.namespaces['tablink']['cell'], Literal(self.source_cell_name)))
        return
    
    def parseRowProperty(self, i, j) :
        """
        Create relevant triples for the cell marked as Property (i, j are row and column)
        """
        if self.isEmpty(i,j):
            if self.insideMergeBox(i,j):
                k, l = self.getMergeBoxCoord(i,j)
                self.source_cell_value_qname = self.addValue(self.r_sheet.cell(k,l).value)
            else:
                return
        else:
            self.source_cell_value_qname = self.addValue(self.source_cell.value)   
        #self.graph.add((self.namespaces['scope'][self.source_cell_qname],self.namespaces['tablink']['isDimensionProperty'],self.namespaces['scope'][self.source_cell_value_qname]))
        #self.graph.add((self.namespaces['scope'][self.source_cell_value_qname],RDF.type,self.namespaces['qb']['DimensionProperty']))
        #self.graph.add((self.namespaces['scope'][self.source_cell_value_qname],RDF.type,RDF['Property']))
        
        #self.property_dimensions.setdefault(j,[]).append(self.source_cell_value_qname)
        self.property_dimensions[j] = self.source_cell_value_qname
        
        # Add to graph
        resource = self.namespaces['scope'][self.property_dimensions[j]]
        self.graph.add((resource, RDF.type, self.namespaces['tablink']['RowProperty']))

        return
    
    def parseTitle(self, i, j) :
        """
        Create relevant triples for the cell marked as Title (i, j are row and column)
        """
        self.graph.add((self.namespaces['scope'][self.sheet_qname], 
                        self.namespaces['tablink']['title'], 
                        Literal(self.source_cell.value)))        
        return
        
        
    def parseData(self, i,j) :
        """
        Create relevant triples for the cell marked as Data (i, j are row and column)
        """
        
        if self.isEmpty(i,j) and self.config.get('dataCell', 'implicitZeros') == '0':
            return

        # Use the fully qualified name of the cell for the resource name
        observation = self.namespaces['scope'][self.source_cell_qname]
        
        # It's an observation
        self.graph.add((observation,
                        RDF.type,
                        self.namespaces['qb']['Observation']))
        
        # It's in the data set defined by the current sheet
        self.graph.add((observation,
                        self.namespaces['qb']['dataSet'],
                        self.namespaces['scope'][self.sheet_qname]))
        
        # Add it's value
        # TODO type the value
        if self.isEmpty(i,j) and self.config.get('dataCell', 'implicitZeros') == '1':
            self.graph.add((observation,
                            self.namespaces['scope'][self.dataCellPropertyName],
                            Literal(0)))
        else:
            self.graph.add((observation,
                            self.namespaces['scope'][self.dataCellPropertyName],
                            Literal(self.source_cell.value)))
        
        # Use the row dimensions dictionary to find the properties that link
        # data values to row headers
        try :
            for (prop, value) in self.row_dimensions[i].iteritems() :
                self.graph.add((observation, prop, value))
        except KeyError :
            self.log.debug("({}.{}) No row dimension for cell".format(i,j))
        
        # Use the column dimensions dictionary to find the objects of the 
        # d2s:dimension property
        self.graph.add((observation,
                        self.namespaces['tablink']['dimension'],
                        self.getColHeaderValueURI(self.column_dimensions[j])))

    def parseAnnotation(self, i, j) :
        """
        Create relevant triples for the annotation attached to cell (i, j)
        """

        if self.config.get('annotations', 'model') == 'oa':
            # Create triples according to Open Annotation model

            body = BNode()

            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      RDF.type, 
                                      self.annotationNamespaces['oa']['Annotation']
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['hasBody'], 
                                      body
                                      ))
            self.annotationGraph.add((body,
                                      RDF.value, 
                                      Literal(self.annotations[(i,j)].text.replace("\n", " ").replace("\r", " ").replace("\r\n", " ").encode('utf-8'))
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['hasTarget'], 
                                      self.namespaces['scope'][self.source_cell_qname]
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['annotator'], 
                                      Literal(self.annotations[(i,j)].author.encode('utf-8'))
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['annotated'], 
                                      Literal(datetime.datetime.fromtimestamp(os.path.getmtime(self.filename)).strftime("%Y-%m-%d"),datatype=self.annotationNamespaces['xsd']['date'])
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['generator'], 
                                      URIRef("https://github.com/Data2Semantics/TabLinker")
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['generated'], 
                                      Literal(datetime.datetime.now().strftime("%Y-%m-%d"), datatype=self.annotationNamespaces['xsd']['date'])
                                      ))
            self.annotationGraph.add((self.annotationNamespaces['scope'][self.source_cell_qname], 
                                      self.annotationNamespaces['oa']['modelVersion'], 
                                      URIRef("http://www.openannotation.org/spec/core/20120509.html")
                                      ))
        else:
            # Create triples according to Nanopublications model
            print "Nanopublications not implemented yet!"
コード例 #34
0
class ContextTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        print self.store
        self.graph = ConjunctiveGraph(store=self.store)
        if self.store == "MySQL":
            from mysql import configString
            from rdflib.store.MySQL import MySQL
            path=configString
            MySQL().destroy(path)
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        shutil.rmtree(self.tmppath)

    def get_context(self, identifier):
        assert isinstance(identifier, URIRef) or \
               isinstance(identifier, BNode), type(identifier)
        return Graph(store=self.graph.store, identifier=identifier,
                         namespace_manager=self)
    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel)) # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel)) # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEquals(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEquals(len(graph), oldLen + 10)
        self.assertEquals(len(self.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.get_context(c1))
        self.assertEquals(len(self.graph), oldLen)
        self.assertEquals(len(graph), 0)

    def testLenInMultipleContexts(self):
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEquals(len(self.graph), oldLen + 1) 

        graph = Graph(self.graph.store, self.c1)
        self.assertEquals(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assert_(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assert_(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assert_(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek) # revenge!

        self.addStuffInMultipleContexts()
        def cid(c):
            return c.identifier
        self.assert_(self.c1 in map(cid, self.graph.contexts()))
        self.assert_(self.c2 in map(cid, self.graph.contexts()))

        contextList = map(cid, list(self.graph.contexts(triple)))
        self.assert_(self.c1 in contextList)
        self.assert_(self.c2 in contextList)

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEquals(len(Graph(self.graph.store, c1)), 1)
        self.assertEquals(len(self.get_context(c1)), 1)

        self.graph.remove_context(self.get_context(c1))
        self.assert_(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEquals(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEquals
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)]))

            asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)]))
            asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)]))
            asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)]))

            asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)]))
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)]))

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
コード例 #35
0
ファイル: tei2onto2.py プロジェクト: klaffenboeck/contextus
def convert(teifile, namespace):
	#graph_uri = "http://contextus.net/resource/blue_velvet/"
	
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")
	
	graph.bind("default", ns)
	
	to_update = ""

	for prefix, nsuri in graph.namespaces(): 
		#print("prefix: " + str(prefix) + " - " + str(nsuri))
		if nsuri in ns:
			to_update = nsuri
			
	for s, p, o in graph:
#    		print s, p, o
    		if to_update != "" and to_update in s:
    			graph.remove((s, p, o))
			s = URIRef(s.replace(to_update, ns))			
			graph.add((s, p, o))
	
	act = ""
	scene = ""
	line = ""
	char = 0
	loc = 0
	
	
	#timeline = ns['timeline/narrative']
	#graph.add((timeline, RDF.type, ome['Timeline']))

	tree = ET.parse(teifile)
	cast = dict()
	
	titleNode = tree.find('//title')
	
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')

		if roleNode != None:
			id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")
		
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):		

			charname = roleNode.get("about")
			
			if(charname.find(":") > -1):
				nmsp,nom = charname.split(":", 1)		
				charcode =  "character/" + str(char)
				charref = nmsp + ":" + charcode + "]"
				role = extractCURIEorURI(graph, charref,nom[0:-1])
				char += 1		
				#print("1:" + charname + ": adding id " + id + " to " + role)
			else:
				role = extractCURIEorURI(graph, charname)
				#print("2:" + charname + ": adding id " + id + " to " + role)

			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print(charname + ": adding id " + id + " to " + role)
		
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	groupCount = 1
	prior_event = None
	
	actItems = tree.findall('/text/body/div1')
	ref = ""
	
	for actItem in actItems:
	
		if actItem.get("type") == "act":
			act = actItem.get("n")
		
		sceneItems = actItem.findall('div2')
		
		for sceneItem in sceneItems:
			
			#print("Found sceneItems!")
			
			if sceneItem.get("type") == "scene":
				scene = sceneItem.get("n")		
			
			# Work out the location of this scene
			location = None
			stageItems = sceneItem.findall("stage")
			
			#internalnum = 1
			stagenum = 0
			speechnum = 1
			
			for stageItem in stageItems:
				if stageItem.get("type") == "location":
					# The RDFa parser doesn't handle the type - so we can grab that here.
					
					if stageItem.get("about") != None:
						locname = stageItem.get("about")
					
						# Adding location type/oml:space for location
						if stageItem.get("typeof") and stageItem.get("about"):
							type = extractCURIEorURI(graph, stageItem.get("typeof"))
							#print "1. Location: " + str(location) + " Type: " + str(type)
						elif stageItem.get("about"):	
							#print "2. Location: " + str(locname)											
							type = extractCURIEorURI(graph, oml['Space'])						
						
						
						# Get location value and add rdfs:label is location is not using the TEI value
						if(locname.find(":") > -1):
							nmsp,nom = locname.split(":", 1)		
							loccode =  "location/" + str(loc)
							locref = nmsp + ":" + loccode + "]"
							location = extractCURIEorURI(graph, locref, nom[0:-1])
							loc += 1
							graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1])))
						else:
							location = extractCURIEorURI(graph, stageItem.get("about"))
						
						# Add location to graph
						graph.add((location, RDF.type, type))	
					else:
						location = ""
					
						
					#print("Adding location type: " + type + " (" + location + ")")
	
	
			if cast:
				# Work out a list of all cast in a given section
				currentCast = list()
				speakers = list()
			
	
			# Iterate through elements within stageItem
				# Find speaker events and add to list of current cast for inclusion in social event
				# Find reference events and add to ongoing social event ?
				# Find stage events
					# If event is an entrance then
						# create social event for people talking before entrance
						# create travel event i.e. entrance
						# add new arrival to current cast list
					# If event is exit event then
						# create social event for people talking before exit
						# create travel event i.e. exit
							# if leavers are not named directly the calculate who is leaving
						# remove leavers from current cast list
				# If reach end of scene then create social event with current cast list
				
				#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
				
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]	
			
			refersTo = list()
			#parent = None
			speakerNodes = list()
			speakerRef = list()
			
			xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
			stagecount = 0
			stage_array = list()
						
			for node in sceneItem.getiterator():
				#print("Node: " + node.tag)	
				
				
				"""
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""		
						
				if node.tag == "sp":
					id = node.get("who")
					
					if id and cast:
						speakers.append(cast[id[1:]])	
						speakerNodes.append(node)
						
						if perseusid == None:
							speakerRef.append(ref)
						else:
							#speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
							speechRef  = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])";
							speakerRef.append(speechRef)
						#print("Line ref: " + ref)
						
						if cast[id[1:]] not in currentCast:
							currentCast.append(cast[id[1:]])
							
					#internalnum = 1
					speechnum += 1
					stagecount = 0
					
					
					previousl = 0
					
					for subnode in node.getiterator():
						if subnode.tag == "l":
							previousl += 1
						
						if subnode.tag == "stage":
							#print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
							stage_array.append(previousl)
							stagecount += 1
							
					
						
				elif node.tag == "stage":
					
					if stagecount > 0:
						s_max = len(stage_array)
						diff = s_max - stagecount
						
						#if diff == 0:
						#	stagenum += 1
					
						entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)";
						#internalnum += 1
						stagecount -= 1
					else:
						stagenum += 1
						entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])";				
					
					if node.get("type") == "entrance":		
					
						# Add Social Events for all the people who spoke since the last break (if there were any)
						
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
					
						# Add Travel Event
						
						graph.add((event, RDF.type, omj['Travel']))
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
						
						#print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
	
						#print("Found entrence event!")
						if location:
							graph.add((event, ome['to'], location))		
							
						involved = node.get("about")
						
						if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
							involved = involved[1:-1]
							
						chunks = involved.split()
						
						chunk_count = len(chunks)
						
						if chunk_count > 1:
							#type = extractCURIEorURI(graph, "[omb:Group]")
							#graph.add((group, RDF.type, type))
							graph.add((group, RDF.type, omb['Group']))
							
						event_label = ""	
						en = 1
						
						for chunk in chunks:
							striped = chunk.strip()
							
							if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"):
								striped = striped[1:-1]
								currentCast.append(cast[striped])								
							
							if chunk_count > 1:
								graph.add((group, ome['contains'], cast[striped]))
								
								if en == chunk_count:
									event_label = event_label[0:-2] + " and " + striped
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive")))
								elif en < chunk_count:
									event_label += striped + ", "									
									
							else:
								#print("Adding person as subject-entity to entry event "   + str(eventCount))
								graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives")))
								graph.add((event, ome['has-subject-entity'], cast[striped]))
								
							en += 1
									
							
						if chunk_count > 1:
							graph.add((event, ome['has-subject-entity'], group))	
							#print("Adding group as subject-entity to entry event "   + str(eventCount))
							groupCount = groupCount + 1
							group = ns['group/'+str(groupCount)]	
		
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
									
					if node.get("type") == "exit":		
						
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						
						# Add Travel Event
					
						graph.add((event, RDF.type, omj['Travel']))		
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#exitRef = xpointer
							#graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
	
						#print("Found entrence event!")
						if location != None:
							graph.add((event, ome['from'], location))		
							
						involved = node.get("about")	
						
						if involved.strip() == "" or "-all" in involved:
							# Remove everyone
													
							#print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#for peep in currentCast:	
							#	print(peep)
							
							if len(currentCast) > 1:							
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
															
							event_label = ""
							en = 1
							
							for peep in currentCast:	
								short_ref = ""
								for key, value in cast.iteritems():
									if peep == value:	
										short_ref = key
							
								if len(currentCast) > 1:
									graph.add((group, ome['contains'], peep))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += short_ref + ", "
																	
								else:
									#print("Adding person as subject-entity to exuant event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], peep))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
	
							if len(currentCast) > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exuant event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							
							currentCast = list()
						
						elif "!" in involved:
							#print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#print("Event: " + involved);
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							involved = involved.strip()	
							
							if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"):
								involved = involved[2:-1]	
							
							#print("involved: " + involved)
							
							striped = involved.strip()	
							
							c_ids = striped.split()
							
							chunks = list()
							
							for stay in c_ids:
								#print("Staying: " + cast[stay])
								chunks.append(cast[stay])							
							
							staying = list()
							going = list()
							
							for player in currentCast:
								#print("Player: " + player)							
								if player in chunks:
									staying.append(player)
								else:
									going.append(player)
									
							going_count = len(going)	
							
							if going_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))	
								graph.add((group, RDF.type, omb['Group']))
								

							event_label = ""
							en = 1
								
							for ghost in going:							
								#print("ghost: " + ghost)
								
								short_ref = ""
								for key, value in cast.iteritems():
									if ghost == value:	
										short_ref = key
										
										
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if going_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(going):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(going):
										event_label += short_ref + ", "	
										
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
								
								
							if going_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
		
										
						else:
							#print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							striped = involved.strip()							
							chunks = striped.split()
							
							#print("striped: " + striped)
					
							chunk_count = len(chunks)
						
							if chunk_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
								
								
							event_label = ""
							en = 1								
							
							for chunk in chunks:							
								#print("chunk: " + chunk)			
									
								ghost = cast[chunk]
								
								#print("ghost: " + ghost)
								
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if chunk_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + chunk
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += chunk + ", "										
									
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves")))
									
								en += 1	
								
							if chunk_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
	
		
							
							
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
						
				#elif node.tag == "rs":	
				#	#print("Found rs node")
				#	if parent:
				#		#print("Parent type is " + parent.tag)
				#		if parent.tag == "p" or  parent.tag == "l":
				#			refersTo.append(node.get("about"))
							
				#parent = node
					
	
			# Add Social Events for all the people who spoke since the last break (if there were any)
			#print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
			update = list()
			update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
			eventCount = update[0]
			prior_event = update[1]
			
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]
				
			speakers = list()
			speakerNodes = list()
			currentCast = list()
			speakerRef = list()
		
		
		
	print graph.serialize(format='xml')		
コード例 #36
0
class RDFCrawler:

    logger = logging.getLogger(__name__)

    def __init__(self, uri, domains=set()):
        """

        :param uri: root URI to start crawling .
        :param domains: list of permits domains to crawl.
        """
        self.root = uri
        self.graph_route = 'graph_store_%s' % hash(self.root)
        self.graph = ConjunctiveGraph('Sleepycat')
        self.graph.open(self.graph_route, create=True)
        self._filter_domains = domains
        self._filter_domains.add(uri)
        self.last_process_time = 0.0
        self.lock = RLock()

    def filter_uris(self, uri_list):

        """
        :param uri_list: list of URIs to be filtered.
        :return: filtered list of URIs.
        """
        return [uri for uri in uri_list for match in self._filter_domains
                if match in str(uri)]

    def _has_context(self, graph, subject):
        """

        :param subject: the URIRef or URI to check if it has current context.
        :return: True if subject has a current context.
        """
        return len(graph.get_context(self._get_context_id(subject))) > 1

    @staticmethod
    def _get_context_id(subject):
        """

        :param subject: URIRef or URI from which the get context id.
        :return: context id of the resource.
        Example:
            subject -> http://www.example.org/#fragment
            context_id -> http://www.example.org/
        """
        return str(subject).split('#')[0]

    def start(self):
        """
            start method for crawling.
        """
        self.lock.acquire(True)

        # Erase old graph
        for q in self.graph.quads():
            self.graph.remove(q)

        # Crawl for data
        logging.info('Start crawling: %s' % self.root)
        start_time = time.time()
        self._crawl([self.root])
        end_time = time.time()

        self.last_process_time = end_time - start_time
        logging.info('Crawling complete after: %s seconds with %s predicates.'
                     % (self.last_process_time, len(self.graph)))

        self.lock.release()

    def _crawl(self, uri_list):
        """
        Recursive method that crawl RDF objects
        :param uri_list: list of URIs to crawl
        """
        if len(uri_list) > 0:

            for uri in uri_list:
                try:

                    # A few considerations about parsing params.
                    #   publicID = uri due to redirection issues
                    #   Format = None due to default params use 'XML'
                    self.graph.parse(uri, publicID=uri, format=None)
                    logging.info('[OK]: %s' % uri)
                except Exception as e:
                    logging.info('[Error]: %s: %s' % (uri, e))

            # Check that there are context that remains without parsing
            objects = set([self._get_context_id(o)
                           for o in set(self.graph.objects(None, None))
                           if isinstance(o, URIRef) and
                           not self._has_context(self.graph, o)])

            self._crawl(self.filter_uris(objects))