Exemple #1
0
 def _sparql_construct(self, q, cursor):
     log.debug("_sparql_construct")
     g = Graph()
     results = cursor.execute(q.encode("utf-8"))
     for result in results:
         g.add(resolve(cursor, x) for x in result)
     return g
class BaseParser(object):
    """ This is an abstract class that different parser formats classes must override. """
    def __init__(self, file, format, db_config_path):
        """ Sole constructor.
        :param file: Path to input file location.
        :param format: String defining file format (e.g. 'nt')
        :param db_config_path: Path to redis database configuration file (see geodb.GeoDB class).
        """
        self.graph = Graph()
        self.file = file
        self.format = format
        self.db = GeoDB(db_config_path)

    def parse(self, feed_database=False):
        """ Reads and parses the input file.
        See RDFLib documentation (http://rdflib.readthedocs.org/en/latest/)
        for further information.
        """
        log.debug('Parsing input file...')
        self.graph.parse(self.file, format=self.format)
        log.debug('Number of parsed entries: %d' % len(self.graph))
        if feed_database:
            self.feed_database()

    def feed_database(self):
        """ Abstract method. It fills the database depending on the data model and the file format. """
        raise NotImplementedError('This is an abstract method!')

    def get_db(self):
        """ Retrieves the redis database instance. """
        return self.db
Exemple #3
0
    def request(self, environ, start_response, method, negotiated):
        from rdflib.graph import Graph
        from rdflib.term import URIRef

        negotiated = list(negotiated)

        # we have to invert the autoneg dictionary first to check
        # if the request we have ends with one of the file extensions
        # that is was asked for directly
        extmap = {}
        for content_type, extlist in negotiated:
            [extmap.setdefault(ext, content_type) for ext in extlist]

        # get the graph uri that has been requested
        path = self.get_path(environ)

        # if it ends with an extension, use that
        for ext, content_type in extmap.items():
            if path.endswith("." + ext):
                path = path[: -len(ext) - 1]
                break
            content_type = None

        # otherwise use the content-negotiation
        if content_type is None:
            content_type = negotiated[0][0]

        # initialise the graph over the store
        g = Graph(self.store, identifier=URIRef(path))

        # send the serialised graph
        start_response("200 OK", [("Content-type", content_type), ("Vary", "Accept")])
        yield g.serialize(format=self.serialisations.get(content_type, "pretty-xml"))
Exemple #4
0
def test_e():
    """Test reading N3 from a BytesIO over the string object"""
    g = Graph()
    g.parse(source=BytesIO(rdf_utf8), format='n3')
    v = g.value(subject=URIRef("http://www.test.org/#CI"),
                predicate=URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"))
    assert v == Literal(u"C\u00f4te d'Ivoire", lang='fr')
Exemple #5
0
def test_xml_a():
    """Test reading XML from a unicode object as data"""
    g = Graph()
    g.parse(data=rdfxml, format='xml')
    v = g.value(subject=URIRef("http://www.test.org/#CI"),
                predicate=URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"))
    assert v == Literal(u"C\u00f4te d'Ivoire", lang='fr')
Exemple #6
0
def test_xml_e():
    """Test reading XML from a BytesIO created from utf8 encoded string"""
    g = Graph()
    g.parse(source=BytesIO(rdfxml_utf8), format='xml')
    v = g.value(subject=URIRef("http://www.test.org/#CI"),
                predicate=URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"))
    assert v == Literal(u"C\u00f4te d'Ivoire", lang='fr')
Exemple #7
0
def test_b():
    """Test reading N3 from a utf8 encoded string as data"""
    g = Graph()
    g.parse(data=rdf_utf8, format='n3')
    v = g.value(subject=URIRef("http://www.test.org/#CI"),
                predicate=URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"))
    assert v == Literal(u"C\u00f4te d'Ivoire", lang='fr')
Exemple #8
0
def read_project(request, project_uri):
    """Returns a HttpResponse of the cached project metadata graph"""
    project_uri = URIRef(project_uri)

    if request.user.is_authenticated():
        if permissions.has_permission_over(project_uri, user=request.user, permission=NS.perm.mayRead):
            identifier = uris.uri('semantic_store_projects', uri=project_uri)
            store_metadata_graph = get_project_metadata_graph(project_uri)
            ret_graph = Graph()
            ret_graph += store_metadata_graph

            add_is_described_bys(request, project_uri, ret_graph)

            for permission in ProjectPermission.objects.filter(identifier=project_uri):
                user = permission.user
                user_uri = uris.uri('semantic_store_users', username=user.username)
                perm_uri = permissions.PERMISSION_URIS_BY_MODEL_VALUE[permission.permission]

                ret_graph += user_metadata_graph(user=user)
                ret_graph.add((user_uri, NS.perm.hasPermissionOver, project_uri))
                ret_graph.add((user_uri, perm_uri, project_uri))
            
            if len(ret_graph) > 0:
                return NegotiatedGraphResponse(request, ret_graph)
            else:
                return HttpResponseNotFound()
        else:
            return HttpResponseForbidden('User "%s" does not have read permissions over project "%s"' % (request.user.username, project_uri))
    else:
        return HttpResponse(status=401)
Exemple #9
0
def delete_triples_from_project(request, uri):
    """Deletes the triples in a graph provided by a request object from the project graph.
    Returns an HttpResponse of all the triples which were successfully removed from the graph."""
    if request.user.is_authenticated():
        if permissions.has_permission_over(uri, user=request.user, permission=NS.perm.mayUpdate):
            removed = Graph()
            bind_namespaces(removed)

            try:
                g = parse_request_into_graph(request)
            except (ParserError, SyntaxError) as e:
                return HttpResponse(status=400, content="Unable to parse serialization.\n%s" % e)

            project_g = get_project_graph(uri)
            project_metadata_g = get_project_metadata_graph(uri)

            for t in g:
                if t in project_g:
                    project_g.remove(t)
                    removed.add(t)
                project_metadata_g.remove(t)

            return NegotiatedGraphResponse(request, removed)
        else:
            return HttpResponseForbidden('User "%s" does not have update permissions over project "%s"' % (request.user.username, uri))
    else:
        return HttpResponse(status=401)
Exemple #10
0
class Processor(object):
    def __init__(self, stream_urls):
        self.client = client.Client(stream_urls,
                                    event_callback=self._handle_event,
                                    error_callback=self._handle_error,
                                    separate_events=False)
        self.triple_store = Graph('Sleepycat',
                                  'http://www.it.uc3m.es/jaf/ns/slog/db')
        self.db_dir = 'dbdir'

    def start(self, loop=False):
        self.triple_store.open(self.db_dir)
        self.client.start(loop=loop)

    def stop(self):
        self.triple_store.close()
        self.client.stop()

    def _handle_error(self, message, http_error=None):
        pass

    def _handle_event(self, evs):
        print('Received {} events.'.format(len(evs)))
        for event in evs:
            self.triple_store += event.body
        print(len(self.triple_store))
Exemple #11
0
def test_c():
    """Test reading N3 from a codecs.StreamReader, outputting unicode"""
    g = Graph()
#    rdf_reader.seek(0)
    g.parse(source=rdf_reader, format='n3')
    v = g.value(subject=URIRef("http://www.test.org/#CI"), predicate=URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"))
    assert v==Literal(u"C\u00f4te d'Ivoire", lang='fr')
Exemple #12
0
class NegationOfAtomicConcept(unittest.TestCase):
    def setUp(self):
        self.ontGraph = Graph()
        self.ontGraph.bind('ex', EX_NS)
        self.ontGraph.bind('owl', OWL_NS)
        Individual.factoryGraph = self.ontGraph

    def testAtomicNegation(self):
        bar=EX.Bar
        baz=~bar
        baz.identifier = EX_NS.Baz
        ruleStore,ruleGraph,network=SetupRuleStore(makeNetwork=True)
        individual=BNode()
        individual2=BNode()
        (EX.OtherClass).extent = [individual]
        bar.extent = [individual2]
        NormalFormReduction(self.ontGraph)
        self.assertEqual(repr(baz),
                         "Class: ex:Baz DisjointWith ex:Bar\n")
        posRules,negRules=CalculateStratifiedModel(network,self.ontGraph,[EX_NS.Foo])
        self.failUnless(not posRules,"There should be no rules in the 0 strata!")
        self.failUnless(len(negRules)==1,"There should only be one negative rule in a higher strata")
        self.assertEqual(repr(negRules[0]),
                         "Forall ?X ( ex:Baz(?X) :- not ex:Bar(?X) )")
        baz.graph = network.inferredFacts
        self.failUnless(individual in baz.extent,
                        "%s should be a member of ex:Baz"%individual)
        self.failUnless(individual2 not in baz.extent,
                        "%s should *not* be a member of ex:Baz"%individual2)
Exemple #13
0
class NegatedDisjunctTest(unittest.TestCase):
    def setUp(self):
        self.ontGraph = Graph()
        self.ontGraph.bind('ex', EX_NS)
        self.ontGraph.bind('owl', OWL_NS)
        Individual.factoryGraph = self.ontGraph

    def testStratified(self):
        bar=EX.Bar
        baz=EX.Baz
        noBarOrBaz = ~(bar|baz)
        omega = EX.Omega
        foo = omega & noBarOrBaz
        foo.identifier = EX_NS.Foo
        ruleStore,ruleGraph,network=SetupRuleStore(makeNetwork=True)
        individual=BNode()
        omega.extent = [individual]
        NormalFormReduction(self.ontGraph)
        self.assertEqual(repr(foo),
                         "ex:Omega that ( not ex:Bar ) and ( not ex:Baz )")
        posRules,negRules=CalculateStratifiedModel(network,self.ontGraph,[EX_NS.Foo])
        foo.graph = network.inferredFacts
        self.failUnless(not posRules,"There should be no rules in the 0 strata!")
        self.assertEqual(repr(negRules[0]),"Forall ?X ( ex:Foo(?X) :- And( ex:Omega(?X) not ex:Bar(?X) not ex:Baz(?X) ) )")
        self.failUnless(len(negRules)==1,"There should only be one negative rule in a higher strata")
        self.failUnless(individual in foo.extent,
                        "%s should be a member of ex:Foo"%individual)
Exemple #14
0
    def setUp(self):
        self.graph1 = Graph(store=self.store_name)
        self.graph2 = Graph(store=self.store_name)

        self.graph1.open(self.settings1, True)
        self.graph2.open(self.settings2, True)

        self.oNS = Namespace("http://www.example.org/rdf/things#")
        self.sNS = Namespace("http://www.example.org/rdf/people#")
        self.pNS = Namespace("http://www.example.org/rdf/relations/")

        self.graph1.bind('people',self.sNS)
        self.graph1.bind('relations',self.pNS)
        self.graph1.bind('things',self.oNS)
        self.graph2.bind('people',self.sNS)
        self.graph2.bind('relations',self.pNS)
        self.graph2.bind('things',self.oNS)

        self.michel = self.sNS.michel
        self.tarek = self.sNS.tarek
        self.alice = self.sNS.alice
        self.bob = self.sNS.bob
        self.likes = self.pNS.likes
        self.hates = self.pNS.hates
        self.named = self.pNS.named
        self.pizza = self.oNS.pizza
        self.cheese = self.oNS.cheese
Exemple #15
0
 def open(self, configuration, create=False):
     """Raise a ModificationException if create, as this graph is read-only.
     """
     if create:
         raise ModificationException() #ReadOnlyGraph does not support this
     else:
         Graph.open(self, configuration, create)
    def enhance(self, content, input=Format.TEXT, output=Format.JSON):
        analysis = self.status["analyses"][0]
        params = {
            self.param_in: input.name,
            self.param_out: output.name
        }
        resource = self._build_url("/%s/%s/%s" % (self.path, analysis, self.enhance_path), params)
        logging.debug("Making request to %s" % resource)

        response = self._post(resource, content, input.mimetype, output.mimetype)

        if response.status_code != 200:
            logging.error("Enhance request returned %d: %s" % (response.status_code, response.reason))
            return response.text
        else:
            contentType = from_mimetype(response.headers["Content-Type"])
            if contentType == Format.JSON or contentType == Format.REDLINKJSON:
                return json.loads(response.text)
            elif contentType == Format.XML or contentType == Format.REDLINKXML:
                return minidom.parse(response.text)
            elif contentType.rdflibMapping:
                g = Graph()
                g.parse(data=response.text, format=contentType.rdflibMapping)
                return g
            else:
                logging.warn("Handler not found for %s, so returning raw text response..." % contentType.mimetype)
                return response.text
class Test(unittest.TestCase):


    def setUp(self):
        self.store = CharmeMiddleware.get_store(debug = True)
        
        
        self.graph = 'submitted'
        self.identifier = '%s/%s' % (getattr(settings, 'SPARQL_DATA'), 
                                     self.graph)
        self.g = Graph(store=self.store, identifier=self.identifier)               
        self.factory = RequestFactory()       

    def tearDown(self):   
        for res in self.g:
            self.g.remove(res)


    def test_PUT(self): 
        #self.test_insert_anotation()

        graph = format_graphIRI('submitted')
        request = self.factory.put('/endpoint?graph=%s' % graph, 
                                   data = turtle_usecase1,
                                   content_type = 'text/turtle')
        response = endpoint(request)
        self.assert_(response.status_code in [200, 204], "HTTPResponse has status_code: %s" 
                     % response.status_code)
Exemple #18
0
    def __init__(self,ruleStore,name = None,
                 initialWorkingMemory = None,
                 inferredTarget = None,
                 nsMap = {},
                 graphVizOutFile=None,
                 dontFinalize=False,
                 goal=None):
        self.leanCheck = {}
        self.goal = goal
        self.nsMap = nsMap
        self.name = name and name or BNode()
        self.nodes = {}
        self.alphaPatternHash = {}
        self.ruleSet = set()
        for alphaPattern in xcombine(('1','0'),('1','0'),('1','0')):
            self.alphaPatternHash[tuple(alphaPattern)] = {}
        if inferredTarget is None:
            self.inferredFacts = Graph()
            namespace_manager = NamespaceManager(self.inferredFacts)
            for k,v in nsMap.items():
                namespace_manager.bind(k, v)
            self.inferredFacts.namespace_manager = namespace_manager
        else:
            self.inferredFacts = inferredTarget
        self.workingMemory = initialWorkingMemory and initialWorkingMemory or set()
        self.proofTracers = {}
        self.terminalNodes  = set()
        self.instantiations = {}
        start = time.time()
        self.ruleStore=ruleStore
        self.justifications = {}
        self.dischargedBindings = {}
        if not dontFinalize:
            self.ruleStore._finalize()
        self.filteredFacts = Graph()

        #'Universal truths' for a rule set are rules where the LHS is empty.
        # Rather than automatically adding them to the working set, alpha nodes are 'notified'
        # of them, so they can be checked for while performing inter element tests.
        self.universalTruths = []
        from FuXi.Horn.HornRules import Ruleset
        self.rules=set()
        self.negRules = set()
        for rule in Ruleset(n3Rules=self.ruleStore.rules,nsMapping=self.nsMap):
            import warnings
            warnings.warn(
          "Rules in a network should be built *after* construction via "+
          " self.buildNetworkClause(HornFromN3(n3graph)) for instance",
                          DeprecationWarning,2)
            self.buildNetworkFromClause(rule)
        self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)]
        self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)]
        self._setupDefaultRules()
        if initialWorkingMemory:
            start = time.time()
            self.feedFactsToAdd(initialWorkingMemory)
            print >>sys.stderr,"Time to calculate closure on working memory: %s m seconds"%((time.time() - start) * 1000)
        if graphVizOutFile:
            print >>sys.stderr,"Writing out RETE network to ", graphVizOutFile
            renderNetwork(self,nsMap=nsMap).write(graphVizOutFile)
Exemple #19
0
 def test_16_bnode(self):
     g = Graph(store, identifier=TEST_GRAPH)
     b = BNode()
     g.add((b, RDF.type, RDFS.Resource))
     # get a new graph just to be sure
     g = Graph(store, identifier=TEST_GRAPH)
     assert (b, RDF.type, RDFS.Resource) in g
Exemple #20
0
    def testIssue29(self):
        input = """@prefix foo-bar: <http://example.org/> .

foo-bar:Ex foo-bar:name "Test" . """

        g = Graph()
        g.parse(data=input, format="n3")
Exemple #21
0
class NonEqualityPredicatesTestSuite(unittest.TestCase):

    def setUp(self):
        from FuXi.Rete.RuleStore import N3RuleStore
        from FuXi.Rete import ReteNetwork
        from FuXi.Rete.Util import generateTokenSet
        self.testGraph = Graph()
        self.ruleStore = N3RuleStore()
        self.ruleGraph = Graph(self.ruleStore)
        self.ruleGraph.parse(StringIO(testN3), format='n3')
        self.testGraph.parse(StringIO(testN3), format='n3')
        self.closureDeltaGraph = Graph()
        self.network = ReteNetwork(
            self.ruleStore,
            initialWorkingMemory=generateTokenSet(self.testGraph),
            inferredTarget=self.closureDeltaGraph,
            nsMap={})

    def testParseBuiltIns(self):
        # from FuXi.Rete.RuleStore import N3Builtin
        from FuXi.Rete.AlphaNode import BuiltInAlphaNode
        self.failUnless(self.ruleStore.rules > 0,
                        "No rules parsed out from N3.")
        for alphaNode in self.network.alphaNodes:
            if isinstance(alphaNode, BuiltInAlphaNode):
                self.failUnless(alphaNode.n3builtin.uri == MATH_NS.greaterThan,
                                "Unable to find math:greaterThan func")

    def testEvaluateBuiltIns(self):
        # from FuXi.Rete.RuleStore import N3Builtin
        # from FuXi.Rete.AlphaNode import BuiltInAlphaNode
        self.failUnless(first(
            self.closureDeltaGraph.triples(
                (None, URIRef('http://test/pred1'), Literal(3)))),
            "Missing inferred :pred1 assertions")
class GraphAggregates2(unittest.TestCase):
    # known_issue = True

    def setUp(self):
        memStore = plugin.get("SQLAlchemy", Store)(identifier="rdflib_test", configuration=Literal("sqlite://"))
        self.graph1 = Graph(memStore, URIRef("http://example.com/graph1"))
        self.graph2 = Graph(memStore, URIRef("http://example.com/graph2"))
        self.graph3 = Graph(memStore, URIRef("http://example.com/graph3"))

        for n3Str, graph in [(testGraph1N3, self.graph1), (testGraph2N3, self.graph2), (testGraph3N3, self.graph3)]:
            graph.parse(StringIO(n3Str), format="n3")
        self.graph4 = Graph(memStore, RDFS.uri)
        self.graph4.parse(RDFS.uri)
        self.G = ConjunctiveGraph(memStore)

    def testAggregateSPARQL(self):
        raise SkipTest("known_issue with SELECT from NAMED")
        rt = self.G.query(sparqlQ)
        assert len(rt) > 1
        rt = self.G.query(sparqlQ2, initBindings={u"?graph": URIRef(u"http://example.com/graph3")})
        try:
            import json

            assert json
        except ImportError:
            import simplejson as json
        res = json.loads(rt.serialize(format="json").decode("utf-8"))
        assert len(res["results"]["bindings"]) == 20, len(res["results"]["bindings"])
Exemple #23
0
 def _find_graph(self):
     if self._is_dbpedia_uri():
         return SPARQLGraph('http://dbpedia.org/sparql')
     else:
         graph = Graph()
         graph.parse(self.uri)
         return graph
Exemple #24
0
class ParserTestCase(unittest.TestCase):
    backend = 'default'
    path = 'store'

    def setUp(self):
        self.graph = Graph(store=self.backend)
        self.graph.open(self.path)

    def tearDown(self):
        self.graph.close()

    def testNoPathWithHash(self):
        g = self.graph
        g.parse(data="""\
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
>

<rdfs:Class rdf:about="http://example.org#">
  <rdfs:label>testing</rdfs:label>
</rdfs:Class>

</rdf:RDF>
""", publicID="http://example.org")

        subject = URIRef("http://example.org#")
        label = g.value(subject, RDFS.label)
        self.assertEqual(label, Literal("testing"))
        type = g.value(subject, RDF.type)
        self.assertEqual(type, RDFS.Class)
Exemple #25
0
def test_rdf(mfile):
    g = Graph()
    try:
        g = g.parse(mfile, format='xml')
        return True
    except Exception as inst:
        return False
Exemple #26
0
    def testBaseCumulative(self):
        """
        Test that the n3 parser supports base declarations
        This is issue #22
        """

        input = """
@prefix : <http://example.com/> .
# default base
<foo> :name "Foo" .
# change it
@base <http://example.com/doc/> .
<bar> :name "Bar" .
# and change it more - they are cumulative
@base <doc2/> .
<bing> :name "Bing" .
# unless abosulute
@base <http://test.com/> .
<bong> :name "Bong" .

"""
        g = Graph()
        g.parse(data=input, format="n3")
        print list(g)
        self.assertTrue((None, None, Literal('Foo')) in g)
        self.assertTrue(
            (URIRef('http://example.com/doc/bar'), None, None) in g)
        self.assertTrue(
            (URIRef('http://example.com/doc/doc2/bing'), None, None) in g)
        self.assertTrue((URIRef('http://test.com/bong'), None, None) in g)
    def fromuri2(self, uri):

        self.uri = uri

        if not uri.startswith('http://rdf.freebase.com'):
            self.checkuri()
            try:
                g = Graph()
                g.parse(self.uri)

                if g:
                    logger.info("INFO process_rdf.py - returning graph for " + self.uri)
                    return g

                else:
                    raise Exception('Nothing was returned, probably caused URL serving no RDF or bad RDF (eg. Freebase): '
                                    '"No handlers could be found for logger "process_rdf.py" -- uri was ' + self.uri)

            except URLError as e:
                logger.error("URLError process_rdf.py - " + e.message)
                raise Exception('URLError, cause either bad URL or no internet connection - ' + e.message + '(uri was ' + self.uri + ')')
            except SAXParseException as e:
                logger.error("SAXParseException process_rdf.py - " + e.message + '(uri was' + self.uri + ')')
                raise Exception('SAXParseException')
            except AttributeError as e:
                logger.error("AttributeError process_rdf.py - " + e.message + '(uri was' + self.uri + ')')
                raise Exception('AttributeError')
        else:
            self.fromfreebaseuri()
Exemple #28
0
    def testGraphIntersection(self):
        g1 = Graph()
        g2 = Graph()

        g1.add((self.tarek, self.likes, self.pizza))
        g1.add((self.michel, self.likes, self.cheese))

        g2.add((self.bob, self.likes, self.cheese))
        g2.add((self.michel, self.likes, self.cheese))

        g3 = g1 * g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g3, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g3, True)

        g1 *= g2

        self.assertEquals(len(g1), 1)

        self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False)
        self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.bob, self.likes, self.cheese) in g1, False)

        self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
    def _new_sioc_post(self, topic_uri, recipient_uri):
        """ Create a new rdf/xml sioc:Post based on a topic, and for a given recipient.
            Timestamp is set to now, and sender is taken from WebID authenticated person (TBC). """

        uri = self.message_uri_prefix + uuid.uuid1().hex

        graph = Graph()
        graph.add(
            (rdflib.URIRef(uri),
             rdflib.URIRef("http://xmlns.com/foaf/0.1/primaryTopic"),
             rdflib.URIRef(topic_uri)))

        graph.add(
            (rdflib.URIRef(uri),
             rdflib.URIRef("http://rdfs.org/sioc/ns#addressed_to"),
             rdflib.URIRef(recipient_uri)))

        graph.add(
            (rdflib.URIRef(uri),
             rdflib.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
             rdflib.URIRef(self.sioc_ns+"Post")))

        graph.add(
            (rdflib.URIRef(uri),
             rdflib.URIRef("http://purl.org/dc/terms/created"),
             rdflib.Literal(strftime("%Y-%m-%dT%H:%M:%SZ")))) # FIXME forces zulu time, which may be technically incorrect
        
        rdf = graph.serialize(format="xml") # rdf/xml
        return {"rdf": rdf, "uri": uri}
Exemple #30
0
def harvest_collection(col_url, col_uri, store_host, manifest_file=None):
    store_host = clean_store_host(store_host)
    with transaction.commit_on_success():        
        col_g = Graph(store=rdfstore(), identifier=URIRef(col_uri))
        collection.fetch_and_parse(col_url, col_g, manifest_file=manifest_file)
        localize_describes(store_host, col_uri, col_url, col_g)

        res_uris_urls = collection.aggregated_uris_urls(col_uri, col_g)
        for res_uri, res_url in res_uris_urls:
            res_g = Graph(store=rdfstore(), identifier=URIRef(res_uri))
            collection.fetch_and_parse(res_url, res_g)
            for pred in col_res_attributes:
                for t in res_g.triples((res_uri, pred, None)):
                    col_g.add(t)

            aggr_uris_urls = collection.aggregated_uris_urls(res_uri, res_g)
            for aggr_uri, aggr_url in aggr_uris_urls:
                if aggr_url:
                    collection.fetch_and_parse(aggr_url, res_g)
                    localize_describes(store_host, aggr_uri, aggr_url, res_g)
                    localize_describes(store_host, aggr_uri, aggr_url, col_g)

            seq_uris_urls = collection.aggregated_seq_uris_urls(res_uri, res_g)
            for seq_uri, seq_url in seq_uris_urls:
                page_uris_urls = collection.aggregated_uris_urls(seq_uri, res_g)
                for page_uri, page_url in page_uris_urls:
                    localize_describes(store_host, page_uri, page_url, res_g)
            localize_describes(store_host, res_uri, res_url, res_g)
            localize_describes(store_host, res_uri, res_url, col_g)
Exemple #31
0
 def parse(self,source, cType):
     g = Graph()
     return g.parse(data=source,format=self.fmt[cType])
Exemple #32
0
def import_cidoc() -> None:  # pragma: no cover

    start = time.time()
    classes = {}
    properties: Dict[str, Item] = {}
    properties_inverse: Dict[str, Item] = {}
    graph = Graph()
    graph.parse(FILENAME, format='application/rdf+xml')

    # Get classes and properties
    for subject, predicate, object_ in graph:
        try:
            code, name = subject.replace(CRM_URL, '').split('_', 1)
        except:
            print(f'Not able to parse subject: {subject}')
            continue
        item = Item(code, name.replace('_', ' '), graph.comment(subject))

        # Translations
        for language in ['de', 'en', 'fr', 'ru', 'el', 'pt', 'zh']:
            translation = graph.preferredLabel(subject, lang=language)
            if translation and translation[0][1]:
                item.label[language] = translation[0][1]

        if code[0] == 'E':
            classes[code] = item
        elif code[0] == 'P':
            if code in EXCLUDE_PROPERTIES:
                pass
            elif code[-1] == 'i':
                properties_inverse[code[:-1]] = item
            else:
                properties[code] = item

    for code, property_inverse in properties_inverse.items():
        if code in properties:
            properties[code].name_inverse = property_inverse.name
        else:
            print(f'Missing property code: {code}')

    # Get subClassOf
    subs = graph.triples(
        (None, URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'),
         None))
    for subject__, predicate__, object__ in subs:
        class_ = subject__.replace(CRM_URL, '').split('_', 1)[0]
        sub_class_of = object__.replace(CRM_URL, '').split('_', 1)[0]
        classes[class_].sub_class_of.append(sub_class_of)

    # Get subPropertyOf
    subs = graph.triples(
        (None, URIRef('http://www.w3.org/2000/01/rdf-schema#subPropertyOf'),
         None))
    for subject__, predicate__, object__ in subs:
        property_ = subject__.replace(CRM_URL, '').split('_', 1)[0]
        if property_[-1] == 'i' or property_ in EXCLUDE_PROPERTIES:
            continue
        sub_property_of = object__.replace(CRM_URL, '').split('_', 1)[0]
        sub_property_of = sub_property_of.replace('i', '')  # P10i, P130i, P59i
        properties[property_].sub_property_of.append(sub_property_of)

    # Get domain for properties
    domains = graph.triples(
        (None, URIRef('http://www.w3.org/2000/01/rdf-schema#domain'), None))
    for subject__, predicate__, object__ in domains:
        property_ = subject__.replace(CRM_URL, '').split('_', 1)[0]
        if property_[-1] == 'i' or property_ in EXCLUDE_PROPERTIES:
            continue
        properties[property_].domain_code = \
            object__.replace(CRM_URL, '').split('_', 1)[0]

    # Get range for properties
    ranges = graph.triples(
        (None, URIRef('http://www.w3.org/2000/01/rdf-schema#range'), None))
    for subject__, predicate__, object__ in ranges:
        property_ = subject__.replace(CRM_URL, '').split('_', 1)[0]
        if property_[-1] == 'i' or property_ in EXCLUDE_PROPERTIES:
            continue
        properties[property_].range_code = \
            object__.replace(CRM_URL, '').split('_', 1)[0]

    # OpenAtlas shortcuts
    properties['OA7'] = Item(
        'OA7', 'has relationship to',
        'OA7 is used to link two Actors (E39) via a certain relationship E39 Actor linked with E39 Actor: E39 (Actor) - P11i (participated in) - E5 (Event) - P11 (had participant) - E39 (Actor) Example: [ Stefan (E21)] participated in [ Relationship from Stefan to Joachim (E5)] had participant [Joachim (E21)] The connecting event is defined by an entity of class E55 (Type): [Relationship from Stefan to Joachim (E5)] has type [Son to Father (E55)]'
    )
    properties['OA7'].domain_code = 'E39'
    properties['OA7'].range_code = 'E39'
    properties['OA7'].label = {
        'en': 'has relationship to',
        'de': 'hat Beziehung zu'
    }

    properties['OA8'] = Item(
        'OA8', ' begins in',
        "OA8 is used to link the beginning of a persistent item's (E77) life span (or time of usage) with a certain place. E.g to document the birthplace of a person. E77 Persistent Item linked with a E53 Place: E77 (Persistent Item) - P92i (was brought into existence by) - E63 (Beginning of Existence) - P7 (took place at) - E53 (Place) Example: [Albert Einstein (E21)] was brought into existence by [Birth of Albert Einstein (E12)] took place at [Ulm (E53)]"
    )
    properties['OA8'].domain_code = 'E77'
    properties['OA8'].range_code = 'E53'
    properties['OA8'].label = {'en': 'begins in', 'de': 'beginnt in'}

    properties['OA9'] = Item(
        'OA9', ' begins in',
        "OA9 is used to link the end of a persistent item's (E77) life span (or time of usage) with a certain place. E.g to document a person's place of death. E77 Persistent Item linked with a E53 Place: E77 (Persistent Item) - P93i (was taken out of existence by) - E64 (End of Existence) - P7 (took place at) - E53 (Place) Example: [Albert Einstein (E21)] was taken out of by [Death of Albert Einstein (E12)] took place at [Princeton (E53)]"
    )
    properties['OA9'].domain_code = 'E77'
    properties['OA9'].range_code = 'E53'
    properties['OA9'].label = {'en': 'ends in', 'de': 'endet in'}

    connection = connect()
    cursor = connection.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
    cursor.execute("""
        BEGIN;

        UPDATE model.entity SET (cidoc_class_code, openatlas_class_name) = ('E41', 'appellation') WHERE cidoc_class_code = 'E82';
        UPDATE model.link SET property_code = 'P1' WHERE property_code = 'P131';
        DELETE FROM model.openatlas_class WHERE cidoc_class_code = 'E82';
        ALTER TABLE model.cidoc_class DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        ALTER TABLE model.cidoc_class_inheritance DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        ALTER TABLE model.cidoc_class_i18n DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        ALTER TABLE model.property DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        ALTER TABLE model.property_inheritance DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        ALTER TABLE model.property_i18n DROP COLUMN IF EXISTS created, DROP COLUMN IF EXISTS modified;
        DROP TRIGGER IF EXISTS update_modified ON model.cidoc_class;
        DROP TRIGGER IF EXISTS update_modified ON model.cidoc_class_inheritance;
        DROP TRIGGER IF EXISTS update_modified ON model.cidoc_class_i18n;
        DROP TRIGGER IF EXISTS update_modified ON model.property;
        DROP TRIGGER IF EXISTS update_modified ON model.property_inheritance;
        DROP TRIGGER IF EXISTS update_modified ON model.property_i18n;

        ALTER TABLE model.entity DROP CONSTRAINT IF EXISTS entity_class_code_fkey;
        ALTER TABLE model.entity DROP CONSTRAINT IF EXISTS entity_openatlas_class_name_fkey;
        ALTER TABLE model.link DROP CONSTRAINT IF EXISTS link_property_code_fkey;
        ALTER TABLE model.cidoc_class_inheritance DROP CONSTRAINT IF EXISTS class_inheritance_super_code_fkey;
        ALTER TABLE model.cidoc_class_inheritance DROP CONSTRAINT IF EXISTS class_inheritance_sub_code_fkey;
        ALTER TABLE model.cidoc_class_i18n DROP CONSTRAINT IF EXISTS class_i18n_class_code_fkey;
        ALTER TABLE model.property DROP CONSTRAINT IF EXISTS property_domain_class_code_fkey;
        ALTER TABLE model.property DROP CONSTRAINT IF EXISTS property_range_class_code_fkey;
        ALTER TABLE model.property_inheritance DROP CONSTRAINT IF EXISTS property_inheritance_super_code_fkey;
        ALTER TABLE model.property_inheritance DROP CONSTRAINT IF EXISTS property_inheritance_sub_code_fkey;
        ALTER TABLE model.property_i18n DROP CONSTRAINT IF EXISTS property_i18n_property_code_fkey;
        ALTER TABLE model.openatlas_class DROP CONSTRAINT IF EXISTS openatlas_class_cidoc_class_code_fkey;
        ALTER TABLE web.reference_system_openatlas_class DROP CONSTRAINT IF EXISTS reference_system_openatlas_class_openatlas_class_name_fkey;

        TRUNCATE model.cidoc_class_inheritance, model.cidoc_class_i18n, model.cidoc_class, model.property_inheritance, model.property_i18n, model.property;

        ALTER SEQUENCE model.cidoc_class_id_seq RESTART;
        ALTER SEQUENCE model.cidoc_class_inheritance_id_seq RESTART;
        ALTER SEQUENCE model.cidoc_class_i18n_id_seq RESTART;
        ALTER SEQUENCE model.property_id_seq RESTART;
        ALTER SEQUENCE model.property_inheritance_id_seq RESTART;
        ALTER SEQUENCE model.property_i18n_id_seq RESTART;""")

    # Classes
    for code, class_ in classes.items():
        sql = """
            INSERT INTO model.cidoc_class (code, name, comment)
            VALUES (%(code)s, %(name)s, %(comment)s);"""
        cursor.execute(sql, {
            'code': class_.code,
            'name': class_.name,
            'comment': class_.comment
        })
    for code, class_ in classes.items():
        for sub_code_of in class_.sub_class_of:
            sql = """
                INSERT INTO model.cidoc_class_inheritance (super_code, sub_code)
                VALUES (%(super_code)s, %(sub_code)s);"""
            cursor.execute(sql, {
                'super_code': sub_code_of,
                'sub_code': class_.code
            })
        for language, label in class_.label.items():
            sql = """
                INSERT INTO model.cidoc_class_i18n
                    (class_code, language_code, text)
                VALUES (%(class)s, %(language)s, %(text)s);"""
            cursor.execute(sql, {
                'class': class_.code,
                'language': language,
                'text': label
            })

    # Properties
    for code, property_ in properties.items():
        sql = """
            INSERT INTO model.property (
                code, name, name_inverse, comment, domain_class_code,
                range_class_code)
            VALUES (
                %(code)s, %(name)s, %(name_inverse)s, %(comment)s,
                %(domain_code)s, %(range_code)s);"""
        cursor.execute(
            sql, {
                'code': property_.code,
                'name': property_.name,
                'name_inverse': property_.name_inverse,
                'comment': property_.comment,
                'domain_code': property_.domain_code,
                'range_code': property_.range_code
            })
    for code, property_ in properties.items():
        for sub_property_of in property_.sub_property_of:
            sql = """
                INSERT INTO model.property_inheritance (super_code, sub_code)
                VALUES (%(super_code)s, %(sub_code)s);"""
            cursor.execute(sql, {
                'super_code': sub_property_of,
                'sub_code': property_.code
            })

        for language, label in property_.label.items():
            text_inverse = None
            if property_.code in properties_inverse \
                    and language in properties_inverse[property_.code].label:
                text_inverse = properties_inverse[
                    property_.code].label[language]
            sql = """
                INSERT INTO model.property_i18n
                    (property_code, language_code, text, text_inverse)
                VALUES
                    (%(property)s, %(language)s, %(text)s, %(text_inverse)s);"""
            cursor.execute(
                sql, {
                    'property': property_.code,
                    'language': language,
                    'text': label,
                    'text_inverse': text_inverse
                })
    cursor.execute("""
        ALTER TABLE ONLY model.entity ADD CONSTRAINT entity_class_code_fkey FOREIGN KEY (cidoc_class_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.link ADD CONSTRAINT link_property_code_fkey FOREIGN KEY (property_code) REFERENCES model.property(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.cidoc_class_inheritance ADD CONSTRAINT class_inheritance_super_code_fkey FOREIGN KEY (super_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.cidoc_class_inheritance ADD CONSTRAINT class_inheritance_sub_code_fkey FOREIGN KEY (sub_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.cidoc_class_i18n ADD CONSTRAINT class_i18n_class_code_fkey FOREIGN KEY (class_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.property ADD CONSTRAINT property_domain_class_code_fkey FOREIGN KEY (domain_class_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.property ADD CONSTRAINT property_range_class_code_fkey FOREIGN KEY (range_class_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.property_inheritance ADD CONSTRAINT property_inheritance_super_code_fkey FOREIGN KEY (super_code) REFERENCES model.property(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.property_inheritance ADD CONSTRAINT property_inheritance_sub_code_fkey FOREIGN KEY (sub_code) REFERENCES model.property(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.property_i18n ADD CONSTRAINT property_i18n_property_code_fkey FOREIGN KEY (property_code) REFERENCES model.property(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.entity ADD CONSTRAINT entity_openatlas_class_name_fkey FOREIGN KEY (openatlas_class_name) REFERENCES model.openatlas_class(name) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY model.openatlas_class ADD CONSTRAINT openatlas_class_cidoc_class_code_fkey FOREIGN KEY (cidoc_class_code) REFERENCES model.cidoc_class(code) ON UPDATE CASCADE ON DELETE CASCADE;
        ALTER TABLE ONLY web.reference_system_openatlas_class ADD CONSTRAINT reference_system_openatlas_class_openatlas_class_name_fkey FOREIGN KEY (openatlas_class_name) REFERENCES model.openatlas_class(name) ON UPDATE CASCADE ON DELETE CASCADE;
        """)
    cursor.execute("COMMIT")
    print('Execution time: ' + str(int(time.time() - start)) + ' seconds')
Exemple #33
0
 def setUp(self):
     self.testGraph = Graph()
     Individual.factoryGraph = self.testGraph
Exemple #34
0
    unittest.main()
    sys.exit(1)
    from optparse import OptionParser
    parser = OptionParser()

    parser.add_option('--verbose',
                      action="store_true",
                      default=False,
                      help='Output debug print statements or not')
    parser.add_option('--format',
                      default="xml",
                      help='The RDF serialization syntax to parse with')

    (options, args) = parser.parse_args()

    owlGraph = Graph()
    for input in args[0:]:
        if options.verbose:
            print("Parsing ", input, " as ", options.format)
        owlGraph.parse(input, format=options.format)

    Individual.factoryGraph = owlGraph

    def topList(node, g):
        for s in g.subjects(RDF.rest, node):
            yield s

    for negativeClass in owlGraph.subjects(predicate=OWL_NS.complementOf):
        containingList = first(owlGraph.subjects(RDF.first, negativeClass))
        prevLink = None
        while containingList:
Exemple #35
0
 def main(argument_namespace_callback, **kwargs):
     from rdflib.graph import Graph
     argument_namespace_callback.output_mode = 'json'
     return Mock(name='graph', spec=Graph())
Exemple #36
0
class StoreTestCase(unittest.TestCase):
    """
    Test case for testing store performance... probably should be
    something other than a unit test... but for now we'll add it as a
    unit test.
    """

    store = "default"
    tmppath = None
    configString = os.environ.get("DBURI", "dburi")

    def setUp(self):
        self.gcold = gc.isenabled()
        gc.collect()
        gc.disable()
        self.graph = Graph(store=self.store)
        if self.store == "MySQL":
            # from test.mysql import configString
            from rdflib.store.MySQL import MySQL

            path = self.configString
            MySQL().destroy(path)
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.input = input = Graph()
        input.parse("http://eikeon.com")

    def tearDown(self):
        self.graph.close()
        if self.gcold:
            gc.enable()
        del self.graph
        shutil.rmtree(self.tmppath)

    def testTime(self):
        number = 1
        print(self.store)
        print("input:", end=" ")
        for i in itertools.repeat(None, number):
            self._testInput()
        print("random:", end=" ")
        for i in itertools.repeat(None, number):
            self._testRandom()
        print(".")

    def _testRandom(self):
        number = len(self.input)
        store = self.graph

        def add_random():
            s = random_uri()
            p = random_uri()
            o = random_uri()
            store.add((s, p, o))

        it = itertools.repeat(None, number)
        t0 = time()
        for _i in it:
            add_random()
        t1 = time()
        print("%.3g" % (t1 - t0), end=" ")

    def _testInput(self):
        number = 1
        store = self.graph

        def add_from_input():
            for t in self.input:
                store.add(t)

        it = itertools.repeat(None, number)
        t0 = time()
        for _i in it:
            add_from_input()
        t1 = time()
        print("%.3g" % (t1 - t0), end=" ")
 def testI(self):
     g = Graph()
     g.add((self.uriref, RDF.value, self.literal))
     g.add((self.uriref, RDF.value, self.uriref))
     self.assertEqual(len(g), 2)
Exemple #38
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option(
        '--why',
        default=None,
        help='Specifies the goals to solve for using the non-naive methods' +
        'see --method')

    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')

    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')

    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', "
        +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )

    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')

    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used with with --method=bfp to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')

    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')

    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")

    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )

    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
        "or 'rdfa'.  The default is %default")

    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")

    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')

    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )

    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')

    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')

    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=True,
                  help='Include debugging output')

    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option(
        '--method',
        default='naive',
        metavar='reasoning algorithm',
        choices=['gms', 'bfp', 'naive'],
        help='Used with --why to specify how to evaluate answers for query.  '
        + 'One of: gms, sld, bfp, naive')

    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')

    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')

    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')

    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')

    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )

    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")

    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )

    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )

    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(
            CollapseDictionary(
                dict([(k, v) for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [
                            sc for sc in c.subClassOf if sc.isPrimitive()
                        ]
                        if len(primAnc) > 1:
                            warnings.warn(
                                "Branches of primitive skeleton taxonomy" +
                                " should form trees: %s has %s primitive parents: %s"
                                % (c.qname, len(primAnc), primAnc),
                                UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [
                                    o for o in children if o is not child
                            ]:
                                if not otherChild in [
                                        c.identifier
                                        for c in Class(child).disjointWith
                                ]:  # and \
                                    warnings.warn(
                                        "Primitive children (of %s) " % (c.qname) + \
                                        "must be mutually disjoint: %s and %s" % (
                                    Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds = []
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                 format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prologue:
            query.prologue = Prologue(None, [])
            query.prologue.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prologue.prefixBindings:
                    query.prologue.prefixBindings[prefix] = nsInst
        print("query.prologue", query.prologue)
        print("query.query", query.query)
        print("query.query.whereClause", query.query.whereClause)
        print("query.query.whereClause.parsedGraphPattern",
              query.query.whereClause.parsedGraphPattern)
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
            query.query.whereClause.parsedGraphPattern,
            query.prologue).patterns])
        # dPreds=[]# p for s, p, o in goals ]
        # print("goals", goals)
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds = IdentifyDerivedPredicates(
                ddlGraph, Graph(), ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref] + uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed = AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation)",
                      goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t", factGraph.qname(p))
            for rule in MagicSetTransformation(
                    factGraph,
                    ruleSet,
                    goals,
                    derivedPreds=bottomUpDerivedPreds,
                    strictCheck=nameMap[options.strictness],
                    defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
                    noMagic=noMagic):
                magicRuleNo += 1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" %
                      (100 -
                       (float(magicRuleNo) / float(len(list(ruleSet)))) * 100,
                       len(list(ruleSet)), magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                    rule for rule in factGraph.adornedProgram if len(rule.sip)
            ]:
                warnings.warn(
                    "Using GMS sideways information strategy with no " +
                    "information to pass from query.  Falling back to " +
                    "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            print("Time to calculate closure on working memory: ", sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                     format='n3')
                builtinDict = dict([
                    (pred, template) for pred, _ignore, template in
                    builtinTemplateGraph.triples((None,
                                                  TEMPLATES.filterTemplate,
                                                  None))
                ])
            else:
                builtinDict = None
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=ruleSet,
                DEBUG=options.debug,
                derivedPredicates=topDownDPreds,
                templateMap=builtinDict,
                nsBindings=network.nsMap,
                identifyHybridPredicates=options.hybrid
                if options.method == 'bfp' else False,
                hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print("Goal to solve ", query)
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds" % sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds" % sTime
                print("Time to reach answer ground goal answer of %s: %s" %
                      (result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                        "Time to reach answer %s via top-down SPARQL sip strategy: %s"
                        % (rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp'
    ]:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model (inferred %s facts): %s"
            % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(
            cGraph.serialize(destination=None,
                             format=options.output,
                             base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(
            network.inferredFacts.serialize(destination=None,
                                            format=options.output,
                                            base=None))
Exemple #39
0
def read_manifest(item, manifest_file):
    triples = []
    namespaces = {}
    seeAlsoFiles = []
    oxdsClasses = [
        'http://vocab.ox.ac.uk/dataset/schema#Grouping',
        'http://vocab.ox.ac.uk/dataset/schema#DataSet'
    ]

    aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")

    g = Graph()
    gparsed = g.parse(manifest_file, format='xml')
    namespaces = dict(g.namespaces())
    #Get the subjects
    subjects = {}
    for s in gparsed.subjects():
        if s in subjects:
            continue
        if type(s).__name__ == 'URIRef':
            if str(s).startswith('file://'):
                ss = str(s).replace('file://', '')
                if manifest_file in ss:
                    subjects[s] = URIRef(item.uri)
                else:
                    manifest_file_path, manifest_file_name = os.path.split(
                        manifest_file)
                    ss = ss.replace(manifest_file_path, '').strip('/')
                    for file_uri in aggregates:
                        if ss in str(file_uri):
                            subjects[s] = URIRef(file_uri)
                            break
                    if not s in subjects:
                        subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = URIRef(s)
        elif type(s).__name__ == 'BNode':
            replace_subject = True
            for o in gparsed.objects():
                if o == s:
                    replace_subject = False
            if replace_subject:
                subjects[s] = URIRef(item.uri)
            else:
                subjects[s] = s
    #Get the dataset type
    #set the subject uri to item uri if it is of type as defined in oxdsClasses
    datasetType = False
    for s, p, o in gparsed.triples((None, RDF.type, None)):
        if str(o) in oxdsClasses:
            if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(
                    item.uri) and str(subjects[s]) != str(item.uri):
                namespaces['owl'] = URIRef("http://www.w3.org/2002/07/owl#")
                triples.append((item.uri, 'owl:sameAs', s))
                triples.append((item.uri, RDF.type, o))
            elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(
                    item.uri) or str(subjects[s]) == str(item.uri):
                gparsed.remove((s, p, o))
            subjects[s] = item.uri

    #Get the uri for the see also files
    for s, p, o in gparsed.triples(
        (None, URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), None)):
        if type(o).__name__ == 'URIRef' and len(o) > 0:
            obj = str(o)
            if obj.startswith('file://'):
                obj_path, obj_name = os.path.split(obj)
                obj = obj.replace(obj_path, '').strip('/')
            for file_uri in aggregates:
                if obj in str(file_uri):
                    seeAlsoFiles.append(file_uri)
        gparsed.remove((s, p, o))

    #Add remaining triples
    for s, p, o in gparsed.triples((None, None, None)):
        triples.append((subjects[s], p, o))
    return namespaces, triples, seeAlsoFiles
Exemple #40
0
    def testGraphIntersection(self):
        g1=Graph()
        g2=Graph()

        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        
        g1.add((tarek, likes, pizza))
        g1.add((michel, likes, cheese))

        g2.add((bob, likes, cheese))
        g2.add((michel, likes, cheese))

        g3=g1*g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((tarek, likes, pizza) in g3, False)
        self.assertEquals((tarek, likes, cheese) in g3, False)

        self.assertEquals((bob, likes, cheese) in g3, False)

        self.assertEquals((michel, likes, cheese) in g3, True)

        g1*=g2

        self.assertEquals(len(g1), 1)

        self.assertEquals((tarek, likes, pizza) in g1, False)
        self.assertEquals((tarek, likes, cheese) in g1, False)

        self.assertEquals((bob, likes, cheese) in g1, False)

        self.assertEquals((michel, likes, cheese) in g1, True)
Exemple #41
0
class GraphTestCase(unittest.TestCase):
    store_name = 'default'
    tmppath = None

    def setUp(self):
        self.graph = Graph(store=self.store_name)
        self.tmppath = mkdtemp()
        self.graph.open(self.tmppath)

        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

    def tearDown(self):
        self.graph.close()
        shutil.rmtree(self.tmppath)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.add((tarek, likes, pizza))
        self.graph.add((tarek, likes, cheese))
        self.graph.add((michel, likes, pizza))
        self.graph.add((michel, likes, cheese))
        self.graph.add((bob, likes, cheese))
        self.graph.add((bob, hates, pizza))
        self.graph.add((bob, hates, michel)) # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese

        self.graph.remove((tarek, likes, pizza))
        self.graph.remove((tarek, likes, cheese))
        self.graph.remove((michel, likes, pizza))
        self.graph.remove((michel, likes, cheese))
        self.graph.remove((bob, likes, cheese))
        self.graph.remove((bob, hates, pizza))
        self.graph.remove((bob, hates, michel)) # gasp!

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        asserte = self.assertEquals
        triples = self.graph.triples
        Any = None

        self.addStuff()

        # unbound subjects
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound
        asserte(len(list(triples((Any, Any, Any)))), 7)
        self.removeStuff()
        asserte(len(list(triples((Any, Any, Any)))), 0)


    def testStatementNode(self):
        graph = self.graph

        from rdflib.term import Statement
        c = URIRef("http://example.org/foo#c")
        r = URIRef("http://example.org/foo#r")
        s = Statement((self.michel, self.likes, self.pizza), c)
        graph.add((s, RDF.value, r))
        self.assertEquals(r, graph.value(s, RDF.value))
        self.assertEquals(s, graph.value(predicate=RDF.value, object=r))

    def testGraphValue(self):
        from rdflib.graph import GraphValue

        graph = self.graph

        alice = URIRef("alice")
        bob = URIRef("bob")
        pizza = URIRef("pizza")
        cheese = URIRef("cheese")

        g1 = Graph()
        g1.add((alice, RDF.value, pizza))
        g1.add((bob, RDF.value, cheese))
        g1.add((bob, RDF.value, pizza))

        g2 = Graph()
        g2.add((bob, RDF.value, pizza))
        g2.add((bob, RDF.value, cheese))
        g2.add((alice, RDF.value, pizza))

        gv1 = GraphValue(store=graph.store, graph=g1)
        gv2 = GraphValue(store=graph.store, graph=g2)
        graph.add((gv1, RDF.value, gv2))
        v = graph.value(gv1)
        #print type(v)
        self.assertEquals(gv2, v)
        #print list(gv2)
        #print gv2.identifier
        graph.remove((gv1, RDF.value, gv2))

    def testConnected(self):
        graph = self.graph
        self.addStuff()
        self.assertEquals(True, graph.connected())

        jeroen = URIRef("jeroen")
        unconnected = URIRef("unconnected")

        graph.add((jeroen,self.likes,unconnected))

        self.assertEquals(False, graph.connected())

    def testSub(self):
        g1=Graph()
        g2=Graph()

        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        
        g1.add((tarek, likes, pizza))
        g1.add((bob, likes, cheese))

        g2.add((bob, likes, cheese))

        g3=g1-g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((tarek, likes, pizza) in g3, True)
        self.assertEquals((tarek, likes, cheese) in g3, False)

        self.assertEquals((bob, likes, cheese) in g3, False)

        g1-=g2

        self.assertEquals(len(g1), 1)
        self.assertEquals((tarek, likes, pizza) in g1, True)
        self.assertEquals((tarek, likes, cheese) in g1, False)

        self.assertEquals((bob, likes, cheese) in g1, False)

    def testGraphAdd(self):
        g1=Graph()
        g2=Graph()

        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        
        g1.add((tarek, likes, pizza))

        g2.add((bob, likes, cheese))

        g3=g1+g2

        self.assertEquals(len(g3), 2)
        self.assertEquals((tarek, likes, pizza) in g3, True)
        self.assertEquals((tarek, likes, cheese) in g3, False)

        self.assertEquals((bob, likes, cheese) in g3, True)

        g1+=g2

        self.assertEquals(len(g1), 2)
        self.assertEquals((tarek, likes, pizza) in g1, True)
        self.assertEquals((tarek, likes, cheese) in g1, False)

        self.assertEquals((bob, likes, cheese) in g1, True)

    def testGraphIntersection(self):
        g1=Graph()
        g2=Graph()

        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        
        g1.add((tarek, likes, pizza))
        g1.add((michel, likes, cheese))

        g2.add((bob, likes, cheese))
        g2.add((michel, likes, cheese))

        g3=g1*g2

        self.assertEquals(len(g3), 1)
        self.assertEquals((tarek, likes, pizza) in g3, False)
        self.assertEquals((tarek, likes, cheese) in g3, False)

        self.assertEquals((bob, likes, cheese) in g3, False)

        self.assertEquals((michel, likes, cheese) in g3, True)

        g1*=g2

        self.assertEquals(len(g1), 1)

        self.assertEquals((tarek, likes, pizza) in g1, False)
        self.assertEquals((tarek, likes, cheese) in g1, False)

        self.assertEquals((bob, likes, cheese) in g1, False)

        self.assertEquals((michel, likes, cheese) in g1, True)

    def testFinalNewline(self):
        """
        http://code.google.com/p/rdflib/issues/detail?id=5
        """
        import sys
        import platform
        if getattr(sys, 'pypy_version_info', None) or platform.system() == 'Java':
            from nose import SkipTest
            raise SkipTest(
                'Testing under pypy and Jython2.5 fails to detect that ' + \
                'IOMemory is a context_aware store')

        failed = set()
        for p in rdflib.plugin.plugins(None, rdflib.plugin.Serializer):
            if p.name is not 'nquads':
                v = self.graph.serialize(format=p.name)
                lines = v.split(b("\n"))
                if b("\n") not in v or (lines[-1]!=b('')):
                    failed.add(p.name)
        self.assertEqual(len(failed), 0, "No final newline for formats: '%s'" % failed)

    def testConNeg(self): 
        thread.start_new_thread(runHttpServer, tuple())
        # hang on a second while server starts
        time.sleep(1)
        self.graph.parse("http://localhost:12345/foo", format="xml")
        self.graph.parse("http://localhost:12345/foo", format="n3")
        self.graph.parse("http://localhost:12345/foo", format="nt")
Exemple #42
0
    def testGraphValue(self):
        from rdflib.graph import GraphValue

        graph = self.graph

        alice = URIRef("alice")
        bob = URIRef("bob")
        pizza = URIRef("pizza")
        cheese = URIRef("cheese")

        g1 = Graph()
        g1.add((alice, RDF.value, pizza))
        g1.add((bob, RDF.value, cheese))
        g1.add((bob, RDF.value, pizza))

        g2 = Graph()
        g2.add((bob, RDF.value, pizza))
        g2.add((bob, RDF.value, cheese))
        g2.add((alice, RDF.value, pizza))

        gv1 = GraphValue(store=graph.store, graph=g1)
        gv2 = GraphValue(store=graph.store, graph=g2)
        graph.add((gv1, RDF.value, gv2))
        v = graph.value(gv1)
        #print type(v)
        self.assertEquals(gv2, v)
        #print list(gv2)
        #print gv2.identifier
        graph.remove((gv1, RDF.value, gv2))
Exemple #43
0
 def setUp(self):
     self.graph = Graph(store=self.store_name)
     self.graph.destroy(self.path)
     self.graph.open(self.path, create=self.create)
Exemple #44
0
    def testIssue68(self):
        input = """@prefix : <http://some.url/pome#>.\n\n:Brecon a :Place;\n\t:hasLord\n\t\t:Bernard_of_Neufmarch\xc3\xa9 .\n """

        g = Graph()
        g.parse(data=input, format="n3")
Exemple #45
0
class QueryGraph:
    def __init__(self, sparql_query, prefix_bindings):
        """sparql_query is query string, which will be converted into sparqlAlgebra
        during construction.
        """

        ## using direct algebra
        query_algebra = prepareQuery(sparql_query, initNs=prefix_bindings)
        # pprintAlgebra(query_algebra)

        self.algebra = query_algebra.algebra

        self.query_type = query_algebra.algebra.name
        # print(query_type)

        self.parameter_variables = query_algebra.algebra['PV']
        # print(*parameter_variables, sep=', ')

        bgp_expression = self.find_p_name_list('BGP')
        # self.bgp_variables = query_algebra.algebra['p']['p']['p']['_vars']

        try:
            # Create queryGraph
            # self.graph = Graph()
            # for triple in triples_list:
            #     self.graph.add(triple)

            # Create processed queryGraph
            self.processed_graph = Graph()

            self.bgp_variables = bgp_expression['_vars']
            self.triples_list = bgp_expression['triples']
            self.processed_triples = []

            bgp_variables_dict = {}
            var_count = 0
            param_variables_dict = {}
            param_var_count = 0

            for variable in self.bgp_variables:
                if variable not in self.parameter_variables:
                    var_count += 1
                    bgp_variables_dict[variable] = "var%d" % var_count

            for pv in self.parameter_variables:
                param_var_count += 1
                param_variables_dict[pv] = "param_var%d" % param_var_count

            # anonymizing uri's
            uri_seen_dict = {}
            uri_seen_count = 0
            for tuple_spo in self.triples_list:
                processed_tuple_spo = []
                for item in tuple_spo:
                    if item in param_variables_dict.keys():
                        processed_tuple_spo.append(
                            URIRef(param_variables_dict[item]))

                    elif item in bgp_variables_dict.keys():
                        processed_tuple_spo.append(
                            URIRef(bgp_variables_dict[item]))

                    elif isinstance(item, URIRef):
                        # processed_tuple_spo.append(item)
                        if item not in uri_seen_dict.keys():
                            uri_seen_count += 1
                            uri_seen_dict[item] = "uri_ref_%d" % uri_seen_count
                            processed_tuple_spo.append(
                                URIRef(uri_seen_dict[item]))
                        elif item in uri_seen_dict.keys():
                            processed_tuple_spo.append(
                                URIRef(uri_seen_dict[item]))

                processed_tuple_spo = tuple(processed_tuple_spo)
                self.processed_triples.append(processed_tuple_spo)
                self.processed_graph.add(processed_tuple_spo)

        except Exception as e:
            pass

    def find_p_name_list(self, p_name):
        def find_p_name(algebra_expression, p_name):
            if not isinstance(algebra_expression, CompValue):
                raise Exception('query algebra format not found')
            elif isinstance(algebra_expression, CompValue):
                if algebra_expression.name == p_name:
                    return algebra_expression
                else:
                    return find_p_name(algebra_expression['p'], p_name)

        algebra_expresison = self.algebra

        try:
            return find_p_name(algebra_expresison, p_name)

        except Exception:
            pass

    def get_graph(self):
        return self.graph

    def compare_for_isomorphism(self,
                                template_graph,
                                ignore_labels_of=["BNode"]):
        # BNode label is ignored IsomorphicTestableGraph method is used

        if "BNode" in ignore_labels_of:
            return self.graph == template_graph

        # elif "URIRef" in ignore_labels_of:
        #     return

    # def compare_for_equality(self, template_graph):
    #     # Uses an algorithm to compute unique hashes which takes bnodes into account.
    #     return isomorphic(self.graph, template_graph.graph)
    def compare_for_equality(self, template_graph):
        # Uses an algorithm to compute unique hashes which takes bnodes into account.
        return isomorphic(self.processed_graph, template_graph.processed_graph)

    # def compare_for_similarity(self, template_graph):
    #     # Checks if the two graphs are “similar”, by comparing sorted triples where all bnodes have been replaced
    #     # by a singular mock bnode (the _MOCK_BNODE).
    #     return similar(self.graph, template_graph.graph)
    def compare_for_similarity(self, template_graph):
        # Checks if the two graphs are “similar”, by comparing sorted triples where all bnodes have been replaced
        # by a singular mock bnode (the _MOCK_BNODE).
        return similar(self.processed_graph, template_graph.processed_graph)

    def write_image(self, display=False):
        """" to do """

        with open("%s.dot" % str(self.graph), 'w') as f:
            rdf2dot(Graph(self.graph), f)
            if display == "True":
                Source.main(f)
Exemple #46
0
    def load_dataset(self, file_path):
        g = Graph()
        g.parse(
            file_path,
            format="nt")  # file to parse data from (should be in nt format)

        # initializing adjacency matrix - 3D matrix which has first, second dimension
        # for entity indices and thrid dimension for relation indices and is equal
        # to one when the triplet fact is true else false

        # loop in g for each triplet
        index_e, index_r = 0, 0  # each entity and relation is mapped to a integer
        triplet_dict_entity = {
        }  # mapping dictionary from entities to integers
        triplet_dict_relation = {
        }  # mapping dictionary from relations to integers
        counter_e = []
        counter_r = []
        dataset = []
        for stmt in g:
            subject = stmt[0]  # subject entity
            relation = stmt[1]  # predicate relation
            object = stmt[2]  # object entity
            if subject in triplet_dict_entity.keys():
                index = triplet_dict_entity[subject]
                counter_e[index] += 1
            else:
                triplet_dict_entity[subject] = index_e
                counter_e.append(1)
                index_e += 1

            if object in triplet_dict_entity.keys():
                index = triplet_dict_entity[object]
                counter_e[index] += 1
            else:
                triplet_dict_entity[object] = index_e
                counter_e.append(1)
                index_e += 1

            if relation in triplet_dict_relation.keys():
                index = triplet_dict_relation[relation]
                counter_r[index] += 1
            else:
                triplet_dict_relation[relation] = index_r
                counter_r.append(1)
                index_r += 1

            int_s = triplet_dict_entity[subject]
            int_o = triplet_dict_entity[object]
            int_r = triplet_dict_relation[relation]
            dataset.append([int_s, int_o, int_r])

        self.entity_to_integer_mapping = triplet_dict_entity
        self.relation_to_integer_mapping = triplet_dict_relation
        num_entities = len(triplet_dict_entity)
        num_relations = len(triplet_dict_relation)
        labels = np.ones((len(dataset), 1))  # labels

        # initializing adjacency matrix - 3D matrix which has first, second dimension
        # for entity indices and thrid dimension for relation indices and is equal
        # to one when the triplet fact is true else false
        self.adj_matrix = np.zeros((num_entities, num_entities, num_relations))
        for row in dataset:
            idx_0 = int(row[0])  # entity s mapped integer
            idx_1 = int(row[1])  # entity o mapped integer
            idx_2 = int(row[2])  # relation mapped integer
            self.adj_matrix[idx_0][idx_1][
                idx_2] = 1  # make the triplet entry 1

        # randomly synthesing false facts
        critical_size = len(
            dataset
        )  # false facts should be upto this critical size which is size of true fact triplet dataset
        count = 0
        while count < critical_size:
            idx_0 = random.randrange(0, num_entities - 1)
            idx_1 = random.randrange(0, num_entities - 1)
            idx_2 = random.randrange(0, num_relations - 1)
            if self.adj_matrix[idx_0][idx_1][idx_2] == 0:
                count += 1
                dataset.append([idx_0, idx_1, idx_2])

        X = np.array(dataset)  # triplets of true and false facts
        y = np.concatenate((labels, np.zeros((count, 1))), axis=0)
        X, y = shuffle(X, y)  # randomly shuffle the dataset

        return X, y, num_entities, num_relations
Exemple #47
0
def triples_list_to_graph(triples_list):
    q_graph = Graph()
    for tuple in triples_list:
        q_graph.add(tuple)
    return q_graph
Exemple #48
0
class MetadataCache(with_metaclass(abc.ABCMeta, object)):
    """Super-class for all metadata cache implementations.

    """
    def __init__(self, store, cache_uri):
        self.store = store
        self.cache_uri = cache_uri
        self.graph = Graph(store=self.store, identifier=_DB_IDENTIFIER)
        self.is_open = False
        self.catalog_source = _GUTENBERG_CATALOG_URL

    @property
    def exists(self):
        """Detect if the cache exists.

        """
        return os.path.exists(self._local_storage_path)

    def open(self):
        """Opens an existing cache.

        """
        try:
            self.graph.open(self.cache_uri, create=False)
            self._add_namespaces(self.graph)
            self.is_open = True
        except Exception:
            raise InvalidCacheException('The cache is invalid or not created')

    def close(self):
        """Closes an opened cache.

        """
        self.graph.close()
        self.is_open = False

    def delete(self):
        """Delete the cache.

        """
        self.close()
        remove(self._local_storage_path)

    def populate(self):
        """Populates a new cache.

        """
        if self.exists:
            raise CacheAlreadyExistsException('location: %s' % self.cache_uri)

        self._populate_setup()

        with closing(self.graph):
            with self._download_metadata_archive() as metadata_archive:
                for fact in self._iter_metadata_triples(metadata_archive):
                    self._add_to_graph(fact)

    def _add_to_graph(self, fact):
        """Adds a (subject, predicate, object) RDF triple to the graph.

        """
        self.graph.add(fact)

    def _populate_setup(self):
        """Executes operations necessary before the cache can be populated.

        """
        pass

    def refresh(self):
        """Refresh the cache by deleting the old one and creating a new one.

        """
        if self.exists:
            self.delete()
        self.populate()
        self.open()

    @property
    def _local_storage_path(self):
        """Returns a path to the on-disk structure of the cache.

        """
        return self.cache_uri

    @staticmethod
    def _add_namespaces(graph):
        """Function to ensure that the graph always has some specific namespace
        aliases set.

        """
        graph.bind('pgterms', PGTERMS)
        graph.bind('dcterms', DCTERMS)

    @contextmanager
    def _download_metadata_archive(self):
        """Makes a remote call to the Project Gutenberg servers and downloads
        the entire Project Gutenberg meta-data catalog. The catalog describes
        the texts on Project Gutenberg in RDF. The function returns a
        file-pointer to the catalog.

        """
        with tempfile.NamedTemporaryFile(delete=False) as metadata_archive:
            shutil.copyfileobj(urlopen(self.catalog_source), metadata_archive)
        yield metadata_archive.name
        remove(metadata_archive.name)

    @classmethod
    def _metadata_is_invalid(cls, fact):
        """Determines if the fact is not well formed.

        """
        return any(
            isinstance(token, URIRef) and ' ' in token for token in fact)

    @classmethod
    def _iter_metadata_triples(cls, metadata_archive_path):
        """Yields all meta-data of Project Gutenberg texts contained in the
        catalog dump.

        """
        pg_rdf_regex = re.compile(r'pg\d+.rdf$')
        with closing(tarfile.open(metadata_archive_path)) as metadata_archive:
            for item in metadata_archive:
                if pg_rdf_regex.search(item.name):
                    with disable_logging():
                        extracted = metadata_archive.extractfile(item)
                        graph = Graph().parse(extracted)
                    for fact in graph:
                        if cls._metadata_is_invalid(fact):
                            logging.info('skipping invalid triple %s', fact)
                        else:
                            yield fact
Exemple #49
0
    def __init__(self, sparql_query, prefix_bindings):
        """sparql_query is query string, which will be converted into sparqlAlgebra
        during construction.
        """

        ## using direct algebra
        query_algebra = prepareQuery(sparql_query, initNs=prefix_bindings)
        # pprintAlgebra(query_algebra)

        self.algebra = query_algebra.algebra

        self.query_type = query_algebra.algebra.name
        # print(query_type)

        self.parameter_variables = query_algebra.algebra['PV']
        # print(*parameter_variables, sep=', ')

        bgp_expression = self.find_p_name_list('BGP')
        # self.bgp_variables = query_algebra.algebra['p']['p']['p']['_vars']

        try:
            # Create queryGraph
            # self.graph = Graph()
            # for triple in triples_list:
            #     self.graph.add(triple)

            # Create processed queryGraph
            self.processed_graph = Graph()

            self.bgp_variables = bgp_expression['_vars']
            self.triples_list = bgp_expression['triples']
            self.processed_triples = []

            bgp_variables_dict = {}
            var_count = 0
            param_variables_dict = {}
            param_var_count = 0

            for variable in self.bgp_variables:
                if variable not in self.parameter_variables:
                    var_count += 1
                    bgp_variables_dict[variable] = "var%d" % var_count

            for pv in self.parameter_variables:
                param_var_count += 1
                param_variables_dict[pv] = "param_var%d" % param_var_count

            # anonymizing uri's
            uri_seen_dict = {}
            uri_seen_count = 0
            for tuple_spo in self.triples_list:
                processed_tuple_spo = []
                for item in tuple_spo:
                    if item in param_variables_dict.keys():
                        processed_tuple_spo.append(
                            URIRef(param_variables_dict[item]))

                    elif item in bgp_variables_dict.keys():
                        processed_tuple_spo.append(
                            URIRef(bgp_variables_dict[item]))

                    elif isinstance(item, URIRef):
                        # processed_tuple_spo.append(item)
                        if item not in uri_seen_dict.keys():
                            uri_seen_count += 1
                            uri_seen_dict[item] = "uri_ref_%d" % uri_seen_count
                            processed_tuple_spo.append(
                                URIRef(uri_seen_dict[item]))
                        elif item in uri_seen_dict.keys():
                            processed_tuple_spo.append(
                                URIRef(uri_seen_dict[item]))

                processed_tuple_spo = tuple(processed_tuple_spo)
                self.processed_triples.append(processed_tuple_spo)
                self.processed_graph.add(processed_tuple_spo)

        except Exception as e:
            pass
Exemple #50
0
    ns = Namespace("http://love.com#")

    mary = URIRef("http://love.com/lovers/mary")
    john = URIRef("http://love.com/lovers/john")

    cmary = URIRef("http://love.com/lovers/mary")
    cjohn = URIRef("http://love.com/lovers/john")

    store = Memory()

    g = ConjunctiveGraph(store=store)
    g.bind("love", ns)

    # add a graph for Mary's facts to the Conjunctive Graph
    gmary = Graph(store=store, identifier=cmary)
    # Mary's graph only contains the URI of the person she love, not his cute name
    gmary.add((mary, ns["hasName"], Literal("Mary")))
    gmary.add((mary, ns["loves"], john))

    # add a graph for John's facts to the Conjunctive Graph
    gjohn = Graph(store=store, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))

    # enumerate contexts
    for c in g.contexts():
        print("-- %s " % c)

    # separate graphs
    print(gjohn.serialize(format="n3"))
Exemple #51
0
from io import StringIO

rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)

rules = u"""
@prefix owl: <http://www.w3.org/2002/07/owl#> .
{ ?x owl:sameAs ?y } => { ?y owl:sameAs ?x } .
{ ?x owl:sameAs ?y . ?x ?p ?o } => { ?y ?p ?o } .
"""

for rule in HornFromN3(StringIO(rules)):
    network.buildNetworkFromClause(rule)

facts = """
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix ex: <http://example.org/> .
@prefix exterms: <http://example.org/terms/> .
ex:foo
        a exterms:Something ;
        exterms:hasX "blah blah" ;
        owl:sameAs ex:bar .
ex:bar
        exterms:hasY "yyyy" .
"""
g = Graph()
g.parse(data=facts, format="n3")

network.feedFactsToAdd(generateTokenSet(g))

print(network.inferredFacts.serialize(format="n3"))
Exemple #52
0
from tardis.tardis_portal.auth import decorators as authz
from tardis.tardis_portal.models import \
    Experiment, Schema
from tardis.tardis_portal.shortcuts import render_response_index, \
    return_response_error, return_response_not_found, \
    RestfulExperimentParameterSet

from .forms import FoRCodeForm

SCHEMA_URI = 'http://purl.org/asc/1297.0/2008/for/'
PARAMETER_NAMES = FoRCodeForm().fields.keys()

plugin.register('application/octet-stream', Parser,
                'rdflib.plugins.parsers.notation3', 'N3Parser')

for_graph = Graph()
for_graph.parse(SCHEMA_URI)


def _get_schema_func(schema_uri):
    def get_schema():
        try:
            return Schema.objects.get(namespace=schema_uri)
        except Schema.DoesNotExist:
            from django.core.management import call_command
            call_command('loaddata', 'anzsrc_for_schema')
            return get_schema()

    return get_schema

Exemple #53
0
    def startElementNS(self, name, qname, attrs):

        if name[0] != str(TRIXNS):
            self.error(
                "Only elements in the TriX namespace are allowed. %s!=%s"
                % (name[0], TRIXNS))

        if name[1] == "TriX":
            if self.state == 0:
                self.state = 1
            else:
                self.error("Unexpected TriX element")

        elif name[1] == "graph":
            if self.state == 1:
                self.state = 2
            else:
                self.error("Unexpected graph element")

        elif name[1] == "uri":
            if self.state == 2:
                # the context uri
                self.state = 3
            elif self.state == 4:
                # part of a triple
                pass
            else:
                self.error("Unexpected uri element")

        elif name[1] == "triple":
            if self.state == 2:
                if self.graph is None:
                    # anonymous graph, create one with random bnode id
                    self.graph = Graph(store=self.store.store)
                # start of a triple
                self.triple = []
                self.state = 4
            else:
                self.error("Unexpected triple element")

        elif name[1] == "typedLiteral":
            if self.state == 4:
                # part of triple
                self.lang = None
                self.datatype = None

                try:
                    self.lang = attrs.getValue((unicode(XMLNS), u"lang"))
                except:
                    # language not required - ignore
                    pass
                try:
                    self.datatype = attrs.getValueByQName(u"datatype")
                except KeyError:
                    self.error("No required attribute 'datatype'")
            else:
                self.error("Unexpected typedLiteral element")

        elif name[1] == "plainLiteral":
            if self.state == 4:
                # part of triple
                self.lang = None
                self.datatype = None
                try:
                    self.lang = attrs.getValue((unicode(XMLNS), u"lang"))
                except:
                    # language not required - ignore
                    pass

            else:
                self.error("Unexpected plainLiteral element")

        elif name[1] == "id":
            if self.state == 2:
                # the context uri
                self.state = 3

            elif self.state == 4:
                # part of triple
                pass
            else:
                self.error("Unexpected id element")

        else:
            self.error("Unknown element %s in TriX namespace" % name[1])

        self.chars = ""
from rdflib.graph import Graph
from location.KGraph import read_RDF_Graph_and_store_Locations
import time

################################
#   reads OS_extended and OS_new files, get the locations
#   and stores them in the datasets/locations_csv/locations.csv and .json
################################
start = time.time()

extended_part1_file_RDF_graph = Graph()
extended_part1_file_RDF_graph.parse(
    "C:/Users/panai/Desktop/yago2geo_uk/os/OS_extended_p1.ttl", format="n3")
print("len", len(extended_part1_file_RDF_graph))
#
extended_part2_file_RDF_graph = Graph()
extended_part2_file_RDF_graph.parse(
    "C:/Users/panai/Desktop/yago2geo_uk/os/OS_extended_p2.ttl", format="n3")
print("len", len(extended_part2_file_RDF_graph))
# #
extended_part3_file_RDF_graph = Graph()
extended_part3_file_RDF_graph.parse(
    "C:/Users/panai/Desktop/yago2geo_uk/os/OS_extended_p3.ttl", format="n3")
print("len", len(extended_part3_file_RDF_graph))
# # #
extended_part4_file_RDF_graph = Graph()
extended_part4_file_RDF_graph.parse(
    "C:/Users/panai/Desktop/yago2geo_uk/os/OS_extended_p4.ttl", format="n3")
print("len", len(extended_part4_file_RDF_graph))
# # # #
new_file_RDF_graph = Graph()
Exemple #55
0
    def endElementNS(self, name, qname):
        if name[0] != str(TRIXNS):
            self.error(
                "Only elements in the TriX namespace are allowed. %s!=%s"
                % (name[0], TRIXNS))

        if name[1] == "uri":
            if self.state == 3:
                self.graph = Graph(store=self.store.store,
                                   identifier=URIRef(self.chars.strip()))
                self.state = 2
            elif self.state == 4:
                self.triple += [URIRef(self.chars.strip())]
            else:
                self.error(
                    "Illegal internal self.state - This should never " +
                    "happen if the SAX parser ensures XML syntax correctness")

        elif name[1] == "id":
            if self.state == 3:
                self.graph = Graph(self.store.store, identifier=self.get_bnode(
                    self.chars.strip()))
                self.state = 2
            elif self.state == 4:
                self.triple += [self.get_bnode(self.chars.strip())]
            else:
                self.error(
                    "Illegal internal self.state - This should never " +
                    "happen if the SAX parser ensures XML syntax correctness")

        elif name[1] == "plainLiteral" or name[1] == "typedLiteral":
            if self.state == 4:
                self.triple += [Literal(
                    self.chars, lang=self.lang, datatype=self.datatype)]
            else:
                self.error(
                    "This should never happen if the SAX parser " +
                    "ensures XML syntax correctness")

        elif name[1] == "triple":
            if self.state == 4:
                if len(self.triple) != 3:
                    self.error("Triple has wrong length, got %d elements: %s" %
                               (len(self.triple), self.triple))

                self.graph.add(self.triple)
                #self.store.store.add(self.triple,context=self.graph)
                #self.store.addN([self.triple+[self.graph]])
                self.state = 2
            else:
                self.error(
                    "This should never happen if the SAX parser " +
                    "ensures XML syntax correctness")

        elif name[1] == "graph":
            self.graph = None
            self.state = 1

        elif name[1] == "TriX":
            self.state = 0

        else:
            self.error("Unexpected close element")
Exemple #56
0
    def create(self, conjunctive=False, gid=None, loader=None, format=None):
        try:
            if conjunctive:
                uuid = shortuuid.uuid()
                g = get_triple_store(self.__persist_mode,
                                     base=self.__base_path,
                                     path=uuid)
                return g
            else:
                p = self._r.pipeline(transaction=True)
                p.multi()

                g = Graph(identifier=gid)

                lock = self.uri_lock(gid)
                with lock:
                    uuid = self._r.hget('{}:gids'.format(self.__cache_key),
                                        gid)
                    if not uuid:
                        uuid = shortuuid.uuid()
                        p.hset('{}:gids'.format(self.__cache_key), gid, uuid)

                    gid_key = '{}:{}'.format(self.__cache_key, uuid)

                    ttl_ts = self._r.hget(gid_key, 'ttl')
                    if ttl_ts is not None:
                        ttl_dt = dt.utcfromtimestamp(int(ttl_ts))
                        now = dt.utcnow()
                        if ttl_dt > now:
                            try:
                                g = self.__recall(gid)
                            except KeyError:
                                source_z = self._r.hget(gid_key, 'data')
                                source = zlib.decompress(source_z)
                                g.parse(StringIO(source), format=format)
                                self.__memoize(gid, g)

                            ttl = math.ceil(
                                (ttl_dt - dt.utcnow()).total_seconds())
                            return g, math.ceil(ttl)

                    log.debug('Caching {}'.format(gid))
                    response = loader(gid, format)
                    if response is None and loader != http_get:
                        response = http_get(gid, format)

                    if isinstance(response, bool):
                        return response

                    ttl = self.__min_cache_time
                    source, headers = response
                    if not isinstance(source, Graph) and not isinstance(
                            source, ConjunctiveGraph):
                        parse_rdf(g, source, format, headers)
                        data = g.serialize(format='turtle')
                    else:
                        data = source.serialize(format='turtle')
                        for prefix, ns in source.namespaces():
                            g.bind(prefix, ns)
                        g.__iadd__(source)

                    self.__memoize(gid, g)

                    if not self.__force_cache_time:
                        ttl = extract_ttl(headers) or ttl

                    p.hset(gid_key, 'data', zlib.compress(data))
                    ttl_ts = calendar.timegm(
                        (dt.utcnow() + delta(seconds=ttl)).timetuple())
                    p.hset(gid_key, 'ttl', ttl_ts)
                    p.expire(gid_key, ttl)
                    p.execute()
                return g, int(ttl)
        except ConnectionError as e:
            raise EnvironmentError(e.message)
Exemple #57
0
 def __init__(self, store, cache_uri):
     self.store = store
     self.cache_uri = cache_uri
     self.graph = Graph(store=self.store, identifier=_DB_IDENTIFIER)
     self.is_open = False
     self.catalog_source = _GUTENBERG_CATALOG_URL
Exemple #58
0
class TriXHandler(handler.ContentHandler):
    """An Sax Handler for TriX. See http://sw.nokia.com/trix/"""

    def __init__(self, store):
        self.store = store
        self.preserve_bnode_ids = False
        self.reset()

    def reset(self):
        self.bnode = {}
        self.graph = self.store
        self.triple = None
        self.state = 0
        self.lang = None
        self.datatype = None

    # ContentHandler methods

    def setDocumentLocator(self, locator):
        self.locator = locator

    def startDocument(self):
        pass

    def startPrefixMapping(self, prefix, namespace):
        pass

    def endPrefixMapping(self, prefix):
        pass

    def startElementNS(self, name, qname, attrs):

        if name[0] != str(TRIXNS):
            self.error(
                "Only elements in the TriX namespace are allowed. %s!=%s"
                % (name[0], TRIXNS))

        if name[1] == "TriX":
            if self.state == 0:
                self.state = 1
            else:
                self.error("Unexpected TriX element")

        elif name[1] == "graph":
            if self.state == 1:
                self.state = 2
            else:
                self.error("Unexpected graph element")

        elif name[1] == "uri":
            if self.state == 2:
                # the context uri
                self.state = 3
            elif self.state == 4:
                # part of a triple
                pass
            else:
                self.error("Unexpected uri element")

        elif name[1] == "triple":
            if self.state == 2:
                if self.graph is None:
                    # anonymous graph, create one with random bnode id
                    self.graph = Graph(store=self.store.store)
                # start of a triple
                self.triple = []
                self.state = 4
            else:
                self.error("Unexpected triple element")

        elif name[1] == "typedLiteral":
            if self.state == 4:
                # part of triple
                self.lang = None
                self.datatype = None

                try:
                    self.lang = attrs.getValue((unicode(XMLNS), u"lang"))
                except:
                    # language not required - ignore
                    pass
                try:
                    self.datatype = attrs.getValueByQName(u"datatype")
                except KeyError:
                    self.error("No required attribute 'datatype'")
            else:
                self.error("Unexpected typedLiteral element")

        elif name[1] == "plainLiteral":
            if self.state == 4:
                # part of triple
                self.lang = None
                self.datatype = None
                try:
                    self.lang = attrs.getValue((unicode(XMLNS), u"lang"))
                except:
                    # language not required - ignore
                    pass

            else:
                self.error("Unexpected plainLiteral element")

        elif name[1] == "id":
            if self.state == 2:
                # the context uri
                self.state = 3

            elif self.state == 4:
                # part of triple
                pass
            else:
                self.error("Unexpected id element")

        else:
            self.error("Unknown element %s in TriX namespace" % name[1])

        self.chars = ""

    def endElementNS(self, name, qname):
        if name[0] != str(TRIXNS):
            self.error(
                "Only elements in the TriX namespace are allowed. %s!=%s"
                % (name[0], TRIXNS))

        if name[1] == "uri":
            if self.state == 3:
                self.graph = Graph(store=self.store.store,
                                   identifier=URIRef(self.chars.strip()))
                self.state = 2
            elif self.state == 4:
                self.triple += [URIRef(self.chars.strip())]
            else:
                self.error(
                    "Illegal internal self.state - This should never " +
                    "happen if the SAX parser ensures XML syntax correctness")

        elif name[1] == "id":
            if self.state == 3:
                self.graph = Graph(self.store.store, identifier=self.get_bnode(
                    self.chars.strip()))
                self.state = 2
            elif self.state == 4:
                self.triple += [self.get_bnode(self.chars.strip())]
            else:
                self.error(
                    "Illegal internal self.state - This should never " +
                    "happen if the SAX parser ensures XML syntax correctness")

        elif name[1] == "plainLiteral" or name[1] == "typedLiteral":
            if self.state == 4:
                self.triple += [Literal(
                    self.chars, lang=self.lang, datatype=self.datatype)]
            else:
                self.error(
                    "This should never happen if the SAX parser " +
                    "ensures XML syntax correctness")

        elif name[1] == "triple":
            if self.state == 4:
                if len(self.triple) != 3:
                    self.error("Triple has wrong length, got %d elements: %s" %
                               (len(self.triple), self.triple))

                self.graph.add(self.triple)
                #self.store.store.add(self.triple,context=self.graph)
                #self.store.addN([self.triple+[self.graph]])
                self.state = 2
            else:
                self.error(
                    "This should never happen if the SAX parser " +
                    "ensures XML syntax correctness")

        elif name[1] == "graph":
            self.graph = None
            self.state = 1

        elif name[1] == "TriX":
            self.state = 0

        else:
            self.error("Unexpected close element")

    def get_bnode(self, label):
        if self.preserve_bnode_ids:
            bn = BNode(label)
        else:
            if label in self.bnode:
                bn = self.bnode[label]
            else:
                bn = BNode(label)
                self.bnode[label] = bn
        return bn

    def characters(self, content):
        self.chars += content

    def ignorableWhitespace(self, content):
        pass

    def processingInstruction(self, target, data):
        pass

    def error(self, message):
        locator = self.locator
        info = "%s:%s:%s: " % (
            locator.getSystemId(),
            locator.getLineNumber(),
            locator.getColumnNumber())
        raise ParserError(info + message)
    def __init__(
        self,
        ctx: ContextType,
        schemagraph: Optional[Graph] = None,
        foreign_properties: Optional[Set[str]] = None,
        idx: Optional[IdxType] = None,
        cache: Optional[CacheType] = None,
        session: Optional[requests.sessions.Session] = None,
        fetcher_constructor: Optional[FetcherCallableType] = None,
        skip_schemas: Optional[bool] = None,
        url_fields: Optional[Set[str]] = None,
        allow_attachments: Optional[AttachmentsType] = None,
        doc_cache: Union[str, bool] = True,
    ) -> None:

        self.idx = (NormDict(lambda url: urllib.parse.urlsplit(url).geturl())
                    if idx is None else idx)  # type: IdxType

        self.ctx = {}  # type: ContextType
        self.graph = schemagraph if schemagraph is not None else Graph()
        self.foreign_properties = (set(foreign_properties) if
                                   foreign_properties is not None else set())
        self.cache = cache if cache is not None else {}
        self.skip_schemas = skip_schemas if skip_schemas is not None else False

        if session is None:
            if doc_cache is False:
                self.session = requests.Session()
            elif doc_cache is True:
                if "HOME" in os.environ:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(
                            os.path.join(os.environ["HOME"], ".cache",
                                         "salad")),
                    )
                elif "TMP" in os.environ:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(
                            os.path.join(os.environ["TMP"], ".cache",
                                         "salad")),
                    )
                else:
                    self.session = CacheControl(
                        requests.Session(),
                        cache=FileCache(os.path.join("/tmp", ".cache",
                                                     "salad")),
                    )
            elif isinstance(doc_cache, str):
                self.session = CacheControl(requests.Session(),
                                            cache=FileCache(doc_cache))
        else:
            self.session = session

        self.fetcher_constructor = (fetcher_constructor if fetcher_constructor
                                    is not None else DefaultFetcher)
        self.fetcher = self.fetcher_constructor(self.cache, self.session)
        self.fetch_text = self.fetcher.fetch_text
        self.check_exists = self.fetcher.check_exists
        self.url_fields = (set() if url_fields is None else set(url_fields)
                           )  # type: Set[str]
        self.scoped_ref_fields = {}  # type: Dict[str, int]
        self.vocab_fields = set()  # type: Set[str]
        self.identifiers = []  # type: List[str]
        self.identity_links = set()  # type: Set[str]
        self.standalone = None  # type: Optional[Set[str]]
        self.nolinkcheck = set()  # type: Set[str]
        self.vocab = {}  # type: Dict[str, str]
        self.rvocab = {}  # type: Dict[str, str]
        self.idmap = {}  # type: Dict[str, str]
        self.mapPredicate = {}  # type: Dict[str, str]
        self.type_dsl_fields = set()  # type: Set[str]
        self.subscopes = {}  # type:  Dict[str, str]
        self.secondaryFile_dsl_fields = set()  # type: Set[str]
        self.allow_attachments = allow_attachments

        self.add_context(ctx)
Exemple #60
0
class ProxyStore(Store):
    """
    A Proxy store implemention.

   :param configuration: Can be a string or a dictionary. May be 
        passed to __init__() or to open(). Specified as a
        configuration string (store database connection string). For
        KTBS, it is preferably a dictionary which may contain
        credentials for HTTP requests, the URI of the graph and an
        httpresponse supplied by the client (contains an RDF
        serialized graph already posted with HTTPLIB2 and the header
        of the response). If the parameters are in a string, the
        format should be "key1:value1;key2:value2".  May be passed to
        __init__() or to open().  Optionnal.

    :param identifier:
        URIRef identifying the graph to cache in the store.

    See http://www.rdflib.net/store/ for the detail of a store.
    Take store.py for the squeletton.

    The real store is on a server accessed with a REST protocol.
    """

    # Already define in the Store class
    context_aware = False
    formula_aware = False
    transaction_aware = False

    def __init__(self, configuration=None, identifier=None):
        """ ProxyStore initialization.

            Creates an empty Graph, intializes the HTTP client.
            Use the defaut for internal graph storage, i.e IOMemory.
            The URIref of the graph must be supplied either in identifier or
            in configuration parameter. It will be checked by open().
            The cache file path could be given in the configuration dictionary
            (__init__ only). We have to search about the memory cache.
        """

        LOG.debug("-- ProxyStore.init(configuration=%s, identifer=%s) --\n",
                  configuration, identifier)

        self._identifier = identifier
        self._format = None
        self._etags = None
        self._req_headers = {}

        self.configuration = None
        configuration = self._configuration_extraction(configuration)

        self._graph = Graph()

        # Most important parameter : identifier and graph address
        # If not given, we can not go further
        if (identifier is not None) and len(identifier) > 0:
            if len(configuration) == 0:
                configuration = {PS_CONFIG_URI: identifier}

        # Show the network activity
        if PS_CONFIG_DEBUG_HTTP in configuration.keys():
            httplib2.debuglevel = 1

        # Use provided Http connection if any
        http_cx = configuration.get(PS_CONFIG_HTTP_CX)
        if http_cx is None:
            http_cx = httplib2.Http()
        else:
            assert isinstance(http_cx, httplib2.Http)
        self.httpserver = http_cx

        # Store will call open() if configuration is not None
        Store.__init__(self, configuration)

    @property
    def prefered_format(self):
        """The format that the remote server seems to prefer.

        Return a tuple (content_type, rdflib_format)
        """
        return _CONTENT_TYPE_SERIALIZERS.get(self._format, "text/turtle"), \
               (self._format or "turtle")

    def open(self, configuration, create=False):
        """ Opens the store specified by the configuration string. 
            For the ProxyStore, the identifier is the graph address.

            :param configuration: Usually a configuration string of the store 
                (for database connection). May contain credentials for HTTP 
                requests. Can be a string or a dictionary. May be passed to 
                __init__() or to open(). 
            :param create: True to create a store. This not meaningfull for the
                ProxyStore. Optionnal.


            :returns: * VALID_STORE on success
                      * UNKNOWN No identifier or wrong identifier
                      * NO_STORE
        """
        LOG.debug(
            "-- ProxyStore.open(configuration=%s, create=%s), "
            "identifier: %s --\n", configuration, create, self._identifier)

        self.configuration = self._configuration_extraction(configuration)

        if (self._identifier is None) or len(self._identifier) == 0:
            if PS_CONFIG_URI in self.configuration.keys():
                self._identifier = self.configuration[PS_CONFIG_URI]
            else:
                raise StoreIdentifierError(identifier=self._identifier)
        else:
            if (PS_CONFIG_URI in self.configuration.keys()) and \
               (self._identifier != self.configuration[PS_CONFIG_URI]):
                raise StoreIdentifierError(identifier=self._identifier)

        if PS_CONFIG_HTTP_RESPONSE in self.configuration.keys():
            # Serialized graph already sent by the client to the server
            # Populated the graph with the server response, no need to pull
            # the data from the server again
            if len(self.configuration[PS_CONFIG_HTTP_RESPONSE]) == 2:
                self._parse_header(\
                        self.configuration[PS_CONFIG_HTTP_RESPONSE][0])
                self._parse_content(\
                        self.configuration[PS_CONFIG_HTTP_RESPONSE][1])

        return VALID_STORE

    @staticmethod
    def _configuration_extraction(configuration):
        """ Extract configuration data passed to ProxyStore.

            What do we do if configuration is passed twice (once in __init__
            and again in open) ? For the moment, overwrite.

            For the moment, ignore invalid configuration parameters (no
            StoreInvalidConfigurationError exception).

            :param configuration: Usually a configuration string of the store 
                (for database connection). May contain credentials for HTTP 
                requests. Can be a string or a dictionary. May be passed to 
                __init__() or to open(). Optionnal.

            :returns: A dictionnary with the extracted configuration.
        """

        extracted_configuration = {}

        # TODO LATER ? if self.configuration is not None:
        if isinstance(configuration, types.DictType):
            extracted_configuration = configuration

        elif isinstance(configuration, types.StringTypes):

            if len(configuration) > 0:

                # Expect to get a key1:value1;key2:value2;.... string
                # If not formatted like this, nothing should be extracted
                for item in configuration.split(";"):
                    elems = item.split(":")

                    if len(elems) == 2:
                        extracted_configuration[elems[0]] = elems[1]

        return extracted_configuration

    def _parse_header(self, header):
        """ Parses the header of the HTTP request or response.
            TODO LATER Analyse Content-Type HTTP header to determine
                 the serialization used
            TODO LATER The serialization must be stored

            :param header: Header of the HTTP request or response.
        """
        ctype = header.get("content-type", "text/turtle").split(";", 1)[0]
        self._format = _CONTENT_TYPE_PARSERS[ctype]

        LOG.debug(
            "-- ProxyStore._parse_header(), "
            "content-type=%s, self._format=%s --", ctype, self._format)

        self._etags = header.get('etag')

    def _parse_content(self, content):
        """ Parses the data in the content parameter to build the graph to 
            cache.

            :param content: HTTP received data either got by ProxyStore or
                passed by RDFREST Client.
        """
        # Creates the graph
        LOG.debug("-- ProxyStore._parse_content() using %s format",
                  self._format)

        parse_format = self._format
        if parse_format == "nt":
            parse_format = "n3"  # seems to be more efficient!...
        self.remove((None, None, None), None)  # efficiently empties graph
        # the above is much faster than remove((None, None, None))
        self._graph.parse(StringIO(content),
                          format=self._format,
                          publicID=self._identifier)

    def _pull(self):
        """Update cache before an operation.
           This method must be called before each get-type request.
        """
        LOG.debug("-- _pull ... start ...")

        assert self._identifier is not None, "The store must be open."

        # TODO SOON - If there is a problem to get the graph (wrong address...)
        # Set an indication to notify it
        req_headers = {
            "accept": ACCEPT,
        }

        req_headers.update(self._req_headers)

        self._req_headers.clear()

        header, content = self.httpserver.request(self._identifier,
                                                  headers=req_headers)
        LOG.debug("[received header]\n%s", header)

        # TODO SOON Refine, test and define use-cases
        # httplib2 raises a httplib2.ServerNotFoundError exception when ...
        # Throw a ResourceAccessError exception in case of HTTP 404 as we have
        # no better mean at the moment
        if header.status == httplib.NOT_FOUND:
            raise ResourceAccessError(header.status, self._identifier,
                                      self.configuration)

        if not header.fromcache or self._format is None:
            LOG.debug("[received content]\n%s", content)

            if self._format is None:
                LOG.debug("Creating proxy graph  ....")
            else:
                LOG.debug("Updating proxy graph  ....")

            self._parse_header(header)
            self._parse_content(content)

        else:
            LOG.debug("Proxy graph is up to date ...")

        LOG.debug("-- _pull() ... stop ...")

    def force_refresh(self, clear_cache=False):
        """Forces the cache to be updated with HTTP specific headers.

        If `clear_cache` is False (default),
        etags will still be used, so the server may reply with a 304 Not Changed.
        If `clear_cache` is True,
        the cache will be cleared, so the content will have to be resent by the server.
        """
        LOG.debug("-- force_refresh called ()")

        if clear_cache:
            self._req_headers = {
                "Cache-Control": "no-cache",
            }
        else:
            self._req_headers = {
                "Cache-Control": "max-age=0",
            }

    def _push(self):
        """ Send data to server.
            Apply the modifications on the cache, trigger an exception if data
            has already been modified on the server.
        """
        LOG.debug("-- _push() ... start ... --")

        assert self._identifier is not None, "The store must be open."

        # TODO SOON : How to build the "PUT" request ?
        # Which data in the header ?
        # Which serialization ? The same as we received but does rdflib supply
        # all kind of parsing / serialization ?
        headers = {
            'Content-Type':
            '%s; charset=UTF-8' % _CONTENT_TYPE_SERIALIZERS[self._format],
            'Accept':
            ACCEPT,
        }
        if self._etags:
            headers['If-Match'] = self._etags
        data = self._graph.serialize(format=self._format)

        LOG.debug("[sent headers]\n%s", headers)
        LOG.debug("[sent data]\n%s", data)

        # TODO SOON : Analyze the server response
        #        The server will tell if the graph has changed
        #        The server will supply new ETags ... update the data with the
        # response
        rheader, rcontent = self.httpserver.request(self._identifier,
                                                    'PUT',
                                                    data,
                                                    headers=headers)

        LOG.debug("[response header]\n%s", rheader)
        LOG.debug("[response content]\n%s", rcontent)

        if rheader.status in (httplib.OK, ):
            self._parse_header(rheader)
        elif rheader.status in (httplib.PRECONDITION_FAILED, ):
            raise GraphChangedError(url=self._identifier, msg=rheader.status)
        elif str(rheader.status)[0] == "5":
            raise ServerError(url=self._identifier, msg=rheader.status)
        else:
            raise RuntimeError(
                "%s: %s %s\n%s" %
                (self._identifier, rheader.status, rheader.reason, rcontent))

        LOG.debug("-- _push() ... stop ... --")

    def add(self, triple, context=None, quoted=False):
        """ Add a triple to the store.
            Apply the modifications on the cache, trigger an exception if data
            has already been modified on the server.
            
            :param triple: Triple (subject, predicate, object) to add.
            :param context: 
            :param quoted: The quoted argument is interpreted by formula-aware
                stores to indicate this statement is quoted/hypothetical. It
                should be an error to not specify a context and have the
                quoted argument be True. It should also be an error for the
                quoted argument to be True when the store is not
                formula-aware.

            :returns: 
        """

        LOG.debug("-- ProxyStore.add(triple=%s, context=%s, quoted=%s) --",
                  triple, context, quoted)

        assert self._identifier is not None, "The store must be open."

        # TODO LATER : Wrong, assert is made to test bugs
        assert self._format is not None, "The store must be open."
        assert quoted == False, "The store -proxyStore- is not formula-aware"

        Store.add(self, triple, context, quoted)

        # Instruction suivant extraite du plugin Sleepycat
        # Store.add(self, (subject, predicate, object), context, quoted)
        self._graph.add(triple)

    def remove(self, triple, context):
        """Remove the set of triples matching the pattern from the store

        :param triple: Triple (subject, predicate, object) to remove.
        :param context: 

        :returns: 
        """
        # pylint: disable-msg=W0222
        # Signature differs from overriden method
        LOG.debug("-- ProxyStore.remove(triple=%s, context=%s) --", triple,
                  context)

        Store.remove(self, triple, context)

        if triple == (None, None, None):
            self._graph = Graph()
            # the default implementation of Graph is not efficient in doing
            # this, so better create a new empty one
        else:
            self._graph.store.remove(triple)

    def triples(self, triple, context=None):
        """ Returns an iterator over all the triples (within the conjunctive
        graph or just the given context) matching the given pattern.

        :param triple: Triple (subject, predicate, object) to remove.
        :param context: ProxyStore is not context aware but it's internal
            cache IOMemory store is. Avoid context parameter.

        :returns: An iterator over the triples.
        """
        LOG.debug("-- ProxyStore.triples(triple=%s, context=%s) --", triple,
                  context)

        Store.triples(self, triple)  #, context=None)

        self._pull()

        return self._graph.store.triples(triple)  #, context=None)

    def __len__(self, context=None):
        """ Number of statements in the store.

            :returns: The number of statements in the store.
        """
        self._pull()
        ret = len(self._graph)
        LOG.debug("******** __len__ : ProxyStore, nb statements %d", ret)
        return ret

    # ---------- Formula / Context Interfaces ----------
    #def contexts(self, triple=None):
    # Generator over all contexts in the graph. If triple is specified, a
    # generator over all contexts the triple is in.
    #def remove_context(self, identifier)
    # ---------- Formula / Context Interfaces ----------

    # ---------- Optional Transactional methods ----------
    def commit(self):
        """ Sends the modifications to the server.
        """
        self._push()

    def rollback(self):
        """ Cancel the modifications. Get the graph from the server.
        """
        self._pull()

    # ---------- Optional Transactional methods ----------

    def close(self, commit_pending_transaction=False):
        """ This closes the database connection. 

            :param commit_pending_transaction: Specifies whether to commit all
                pending transactions before closing (if the store is
                transactional). 
        """
        LOG.debug("******** close (%s) ", commit_pending_transaction)

        self._identifier = None
        self._etags = None
        self.configuration = None

        self._format = None
        self._graph.close()

        self.httpserver.clear_credentials()

    def destroy(self, configuration):
        """ This destroys the instance of the store identified by the
        configuration string.

        :param configuration: Configuration string identifying the store
        """
        LOG.debug("******** destroy (%s) ", configuration)

    def query(self,
              query,
              initNs=None,
              initBindings=None,
              queryGraph=None,
              **kw):
        """ I provide SPARQL query processing as a store.

        I simply pass through the query to the underlying graph. This prevents
        an external SPARQL engine to make multiple accesses to that store,
        which can generate HTTP traffic.
        """
        # initNs and initBindings are invalid names for pylint (C0103), but
        # method `query` is specified by rdflib, so #pylint: disable=C0103
        if initNs is None:
            initNs = {}
        if initBindings is None:
            initBindings = {}
        self._pull()
        return self._graph.query(query,
                                 initNs=initNs,
                                 initBindings=initBindings,
                                 **kw)