def del_vocab_from_creator(userid, vocab):
    if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf'%userid)):
        return False
    graph = Graph()
    graph.parse(os.path.join(ag.creatorsdir, '%s.rdf'%userid))
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    for s, p, o in graph.triples((URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(creatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True 
Beispiel #2
0
def del_vocab_from_creator(userid, vocab):
    if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf' % userid)):
        return False
    graph = Graph()
    graph.parse(os.path.join(ag.creatorsdir, '%s.rdf' % userid))
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s" % vocabprefix)
    for s, p, o in graph.triples(
        (URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(creatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Beispiel #3
0
class TestSPARQLToldBNodes(unittest.TestCase):
    def setUp(self):
        NS = u"http://example.org/"
        self.graph = ConjunctiveGraph()
        self.graph.parse(StringInputSource("""
           @prefix    : <http://example.org/> .
           @prefix rdf: <%s> .
           @prefix rdfs: <%s> .
           [ :prop :val ].
           [ a rdfs:Class ]."""%(RDF.RDFNS,RDFS.RDFSNS)), format="n3")
    def testToldBNode(self):
        for s,p,o in self.graph.triples((None,RDF.type,None)):
            pass
        query = """SELECT ?obj WHERE { %s ?prop ?obj }"""%s.n3()
        print query
        rt = self.graph.query(query)
        self.failUnless(len(rt) == 1,"BGP should only match the 'told' BNode by name (result set size: %s)"%len(rt))
Beispiel #4
0
class sparql_funcs():
    def __init__(self):
        self.g = Graph('IOMemory')
        #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf"
        #self.g.bind('geo', geo)

    def find_places(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        o = OFS()

        for b in o.list_buckets():
            endpoint = o.get_stream(b, "endpoint")

        self.g.parse(endpoint)

        for s, _, n in self.g.triples((None, dublin_core['title'], None)):
            loc_key = urllib.unquote(
                n.replace("http://www.opencorrespondence.org/place/resource/",
                          "").replace("/rdf", ""))
            row.add(self.tidy_location(loc_key))

        return row

    def tidy_location(self, location):
        '''
           Function to tidy up some of the places where they refer to the same place
           TODO: prob need some language processing to make this scalable
        '''
        ret_location = ''
        if location == 'Office Of "household Words,':
            ret_location = "Household Words"
        elif location == '"household Words" Office':
            ret_location = "Household Words"
        elif location == '"household Words"':
            ret_location = "Household Words"
        elif location == 'H. W. Office':
            ret_location = "Household Words"
        elif location == '"household Words,':
            ret_location = "Household Words"
        elif location == '"all The Year Round" Office':
            ret_location = "All The Year Round"
        elif location == 'Office Of "all The Year Round,':
            ret_location = "All The Year Round"
        elif location == "Gad's Hill Place":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill Place, Higham":
            ret_location = "Gads Hill"
        elif location == "Tavistock House, Tavistock Square":
            ret_location = "Tavistock House"
        elif location == "London, Tavistock House":
            ret_location = "Tavistock House"
        elif location == "Tavistock House, London":
            ret_location = "Tavistock House"
        else:
            if "U.s." in location:
                location = str(location).replace("U.s", "")
            ret_location = str(location).replace(".", "")

        return ret_location

    def find_correspondents(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        self.g.parse(self.endpoint)

        for s, _, n in self.g.triples((None, letter['correspondent'], None)):
            loc_key = urllib.unquote(
                n.replace(
                    "http://www.opencorrespondence.org/correspondent/resource/",
                    "").replace("/rdf", ""))
            row.add(loc_key)

        return row

    def get_abstract(self, resource_id):

        self.g.parse('http://dbpedia.org/resource/'.resource_id)
        q = '''
          SELECT *
                WHERE 
                {
                ?x dbpedia:abstract ?abstract .
                FILTER (lang(?abstract) = 'en')
                }
        '''
        for row in self.g.query(
                q,
                initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")),
                initBindings={}):
            return row[1]

    def query_dates(self, author):
        '''query to identify individual dates to a correspondent'''
        q = '''
        SELECT ?date
        FROM <http://localhost:5000/data/endpoint/rdf>
        WHERE {
            ?r dc:subject  \'''' + author + '''\' .  
            ?r dc:date  ?date.  
        }
        '''
        dates = []
        for row in self.g.query(
                q,
                initNs=dict(letter=Namespace(
                    "http://www.opencorrespondence.org/schema#"),
                            dc=Namespace("http://purl.org/dc/elements/1.1/")),
                initBindings={}):

            date = str(row[0]).split('-')

            if date[0][1:].isdigit():
                dates.append(date[0])
        print dates
        dic = {}

        for dt in dates:
            dic[dt] = dates.count(dt)

        return dic
Beispiel #5
0
def testN3Store(store="default", configString=None):
    g = ConjunctiveGraph(store=store)
    if configString:
        g.destroy(configString)
        g.open(configString)
    g.parse(StringInputSource(testN3), format="n3")
    print g.store
    try:
        for s, p, o in g.triples((None, implies, None)):
            formulaA = s
            formulaB = o

        assert type(formulaA) == QuotedGraph and type(formulaB) == QuotedGraph
        a = URIRef('http://test/a')
        b = URIRef('http://test/b')
        c = URIRef('http://test/c')
        d = URIRef('http://test/d')
        v = Variable('y')

        universe = ConjunctiveGraph(g.store)

        #test formula as terms
        assert len(list(universe.triples((formulaA, implies, formulaB)))) == 1

        #test variable as term and variable roundtrip
        assert len(list(formulaB.triples((None, None, v)))) == 1
        for s, p, o in formulaB.triples((None, d, None)):
            if o != c:
                assert isinstance(o, Variable)
                assert o == v
        s = list(universe.subjects(RDF.type, RDFS.Class))[0]
        assert isinstance(s, BNode)
        assert len(list(universe.triples((None, implies, None)))) == 1
        assert len(list(universe.triples((None, RDF.type, None)))) == 1
        assert len(list(formulaA.triples((None, RDF.type, None)))) == 1
        assert len(list(formulaA.triples((None, None, None)))) == 2
        assert len(list(formulaB.triples((None, None, None)))) == 2
        assert len(list(universe.triples((None, None, None)))) == 3
        assert len(
            list(formulaB.triples((None, URIRef('http://test/d'), None)))) == 2
        assert len(
            list(universe.triples((None, URIRef('http://test/d'), None)))) == 1

        #context tests
        #test contexts with triple argument
        assert len(list(universe.contexts((a, d, c)))) == 1

        #Remove test cases
        universe.remove((None, implies, None))
        assert len(list(universe.triples((None, implies, None)))) == 0
        assert len(list(formulaA.triples((None, None, None)))) == 2
        assert len(list(formulaB.triples((None, None, None)))) == 2

        formulaA.remove((None, b, None))
        assert len(list(formulaA.triples((None, None, None)))) == 1
        formulaA.remove((None, RDF.type, None))
        assert len(list(formulaA.triples((None, None, None)))) == 0

        universe.remove((None, RDF.type, RDFS.Class))

        #remove_context tests
        universe.remove_context(formulaB)
        assert len(list(universe.triples((None, RDF.type, None)))) == 0
        assert len(universe) == 1
        assert len(formulaB) == 0

        universe.remove((None, None, None))
        assert len(universe) == 0

        g.store.destroy(configString)
    except:
        g.store.destroy(configString)
        raise
Beispiel #6
0
def testN3Store(store="default", configString=None):
    g = ConjunctiveGraph(store=store)
    if configString:
        g.destroy(configString)
        g.open(configString)
    g.parse(StringInputSource(testN3), format="n3")
    print g.store
    try:
        for s,p,o in g.triples((None,implies,None)):
            formulaA = s
            formulaB = o

        assert type(formulaA)==QuotedGraph and type(formulaB)==QuotedGraph
        a = URIRef('http://test/a')
        b = URIRef('http://test/b')
        c = URIRef('http://test/c')
        d = URIRef('http://test/d')
        v = Variable('y')

        universe = ConjunctiveGraph(g.store)

        #test formula as terms
        assert len(list(universe.triples((formulaA,implies,formulaB))))==1

        #test variable as term and variable roundtrip
        assert len(list(formulaB.triples((None,None,v))))==1
        for s,p,o in formulaB.triples((None,d,None)):
            if o != c:
                assert isinstance(o,Variable)
                assert o == v
        s = list(universe.subjects(RDF.type, RDFS.Class))[0]
        assert isinstance(s,BNode)
        assert len(list(universe.triples((None,implies,None)))) == 1
        assert len(list(universe.triples((None,RDF.type,None)))) ==1
        assert len(list(formulaA.triples((None,RDF.type,None))))==1
        assert len(list(formulaA.triples((None,None,None))))==2
        assert len(list(formulaB.triples((None,None,None))))==2
        assert len(list(universe.triples((None,None,None))))==3
        assert len(list(formulaB.triples((None,URIRef('http://test/d'),None))))==2
        assert len(list(universe.triples((None,URIRef('http://test/d'),None))))==1

        #context tests
        #test contexts with triple argument
        assert len(list(universe.contexts((a,d,c))))==1

        #Remove test cases
        universe.remove((None,implies,None))
        assert len(list(universe.triples((None,implies,None))))==0
        assert len(list(formulaA.triples((None,None,None))))==2
        assert len(list(formulaB.triples((None,None,None))))==2

        formulaA.remove((None,b,None))
        assert len(list(formulaA.triples((None,None,None))))==1
        formulaA.remove((None,RDF.type,None))
        assert len(list(formulaA.triples((None,None,None))))==0

        universe.remove((None,RDF.type,RDFS.Class))


        #remove_context tests
        universe.remove_context(formulaB)
        assert len(list(universe.triples((None,RDF.type,None))))==0
        assert len(universe)==1
        assert len(formulaB)==0

        universe.remove((None,None,None))
        assert len(universe)==0

        g.store.destroy(configString)
    except:
        g.store.destroy(configString)
        raise
Beispiel #7
0
from __future__ import division
import sys, time
sys.path.append('pytc-0.7/build/lib.linux-x86_64-2.5')
sys.path.insert(0, 'rdflib/build/lib.linux-x86_64-2.5')

from rdflib.Graph import ConjunctiveGraph
from rdflib import Namespace, Literal

EX = Namespace("http://example.com/")
graph = ConjunctiveGraph('TokyoCabinet')
graph.open("tcgraph", create=True)
graph.add((EX['hello'], EX['world'], Literal("lit")))
print list(graph.triples((None, None, None)))
graph.close()

Beispiel #8
0
class sparql_funcs():
    
    def __init__(self):
        self.g = Graph('IOMemory')
        #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf"
        #self.g.bind('geo', geo)


    def find_places(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        o = OFS()
        
        for b in o.list_buckets():
            endpoint = o.get_stream(b, "endpoint")

        self.g.parse(endpoint)

        for s,_,n in self.g.triples((None, dublin_core['title'], None)):
            loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/place/resource/", "").replace("/rdf",""))
            row.add(self.tidy_location(loc_key))

        return row
    
    def tidy_location (self, location):
        '''
           Function to tidy up some of the places where they refer to the same place
           TODO: prob need some language processing to make this scalable
        '''
        ret_location = '';
        if location == 'Office Of "household Words,':
            ret_location = "Household Words"
        elif location== '"household Words" Office':
            ret_location = "Household Words"
        elif location== '"household Words"':
            ret_location = "Household Words"
        elif location== 'H. W. Office':
            ret_location = "Household Words"
        elif location == '"household Words,':
            ret_location = "Household Words"
        elif location == '"all The Year Round" Office':
            ret_location = "All The Year Round"
        elif location == 'Office Of "all The Year Round,':
            ret_location = "All The Year Round"
        elif location == "Gad's Hill Place":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill Place, Higham":
            ret_location = "Gads Hill"
        elif location == "Tavistock House, Tavistock Square":
            ret_location = "Tavistock House"
        elif location == "London, Tavistock House":
            ret_location = "Tavistock House"
        elif location == "Tavistock House, London":
            ret_location = "Tavistock House"
        else:
            if "U.s." in location:
                location = str(location).replace("U.s", "")
            ret_location = str(location).replace(".", "")    
            
        return ret_location
    
    def find_correspondents(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        self.g.parse(self.endpoint)

        for s,_,n in self.g.triples((None, letter['correspondent'], None)):
            loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/correspondent/resource/", "").replace("/rdf", ""))
            row.add(loc_key)

        return row
    
    def get_abstract (self, resource_id):
        
        self.g.parse('http://dbpedia.org/resource/'.resource_id)
        q = '''
          SELECT *
                WHERE 
                {
                ?x dbpedia:abstract ?abstract .
                FILTER (lang(?abstract) = 'en')
                }
        '''
        for row in self.g.query(q,
                   initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")),
                   initBindings={}):
            return row[1]

    
    def query_dates(self, author):
        '''query to identify individual dates to a correspondent'''
        q = '''
        SELECT ?date
        FROM <http://localhost:5000/data/endpoint/rdf>
        WHERE {
            ?r dc:subject  \'''' + author + '''\' .  
            ?r dc:date  ?date.  
        }
        '''
        dates = []
        for row in self.g.query(q,
                       initNs=dict(letter=Namespace("http://www.opencorrespondence.org/schema#"), dc=Namespace("http://purl.org/dc/elements/1.1/")),
                       initBindings={}):
            
            date = str(row[0]).split('-')
        
            if date[0][1:].isdigit():
                dates.append(date[0])
        print dates
        dic = {}

        for dt in dates:
            dic[dt] = dates.count(dt)
    
        return dic