Esempio n. 1
0
    def __get_by_subquery(self, params):
        context = params.get("context", None)

        inner_query = select("?s")
        inner_params = params.copy()
        if "order" in params:
            # "order" needs to stay in subquery,
            # but doesn't do anything useful in main query
            del params["order"]
        self.__apply_limit_offset_order_get_by_filter(inner_params, inner_query)

        query = select("?s", "?p", "?v", "?c").distinct()
        query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c')))
        query.where(inner_query)
        if not (context is None):
            query.from_(context)

        # Need ordering in outer query
        if "order" in params:
            if params["order"] == True:
                # Order by subject URI
                query.order_by("?s")
            else:
                # Match another variable, order by it
                query.optional_group(("?s", params["order"], "?order"))
                query.order_by("?order")

        table = self._to_table(self._execute(query))
        subjects = {}
        results = []
        for match in table:
            # Make sure subject and predicate are URIs (they have to be!),
            # this works around bug in Virtuoso -- it sometimes returns
            # URIs as Literals.
            subject = URIRef(match["s"])
            predicate = URIRef(match["p"])
            value = match["v"]

            # Add subject to result list if it's not there
            if not subject in subjects:
                instance_data = {"direct" : {}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            # Add predicate to subject's direct predicates if it's not there
            direct_attributes = subjects[subject]["direct"]
            if not predicate in direct_attributes:
                direct_attributes[predicate] = {}

            # Add value to subject->predicate if ...
            predicate_values = direct_attributes[predicate]
            if not value in predicate_values:
                predicate_values[value] = []

            # Add RDF type of the value to subject->predicate->value list
            if "c" in match:
                predicate_values[value].append(match["c"])

        return results
Esempio n. 2
0
def get_schema_label(uri,prefix,format):
    #store = surf.Store(reader='rdflib',writer='rdflib',rdflib_store='IOMemory')
    store = surf.Store(**{"reader": "librdf", "writer" : "librdf", })
    session = surf.Session(store)
    try:
        store.load_triples(source='file://'+settings.PROJECT_PATH+'/d2rq/vocab/'+prefix+'.'+format)
        print "Loading from file : "+prefix+"."+format
    except:
        store.load_triples(source=uri)
        print "Loading from Internet : "+uri
    #store.enable_logging(True)
    ontology = session.get_class(ns.OWL.Ontology)
    if(ontology.all().first()): #contient un node Ontology
        print "Le schema est une ontologie OWL"
        vocab_type = 'owl'
        titles = ['rdfs_comment','rdfs_label','dc_title','dcterms_title']
        #import pdb; pdb.set_trace() 
        for o in ontology.all():
            for title in titles:
                if(len(o.__getattr__(title))>0):
                    lookup = literal_lang_select(o.__getattr__(title))
                    if(lookup != None):label=lookup
            if not label:
                label = unicode(uri)
    else: # là c'est la vraie methode, 
    #au-dessus c'est juste pour les OWL dont l'URL ≠ de l'URI sujet
    #donc pas forcément obligatoire à garder non plus
        try:
            print "Le schema est un vocabulaire RDFS"
            vocab_type = 'rdfs'
            label = get_label(rdflib.term.URIRef(uri),uri,store)
        except:
            raise
    lookup_args = { 'owl':  (ns.OWL.Class,ns.OWL.ObjectProperty),
                    'rdfs': (ns.RDFS.Class,ns.RDF.Property)}
    vocab_classes = list(store.reader._to_table(store.reader._execute(
                        select("?s").where(("?s", a, lookup_args[vocab_type][0])).filter('(regex(str(?s),"'+uri+'","i"))')# vocab own classes only
                        )))
    vocab_proprietes = list(store.reader._to_table(store.reader._execute(
                        select("?s").where(("?s", a, lookup_args[vocab_type][1])).filter('(regex(str(?s),"'+uri+'","i"))')
                        )))   
    cp = {'classes':[],'proprietes':[]}                        
    for items in ((vocab_classes,'classes'),(vocab_proprietes,'proprietes')):
        for triple in items[0]:
            if(isinstance(triple['s'],rdflib.term.URIRef) and bool(re.compile(uri).match(unicode(triple['s'])))): #ugly patch waiting for query to work
                clabel = get_label(triple['s'],uri,store) #on envoie le subject pour l'identifier
                cp[items[1]].append((unicode(triple['s']),clabel))
                print items[1]+':'+clabel
    store.clear()
    store.close()
    session.close()
    return((label,vocab_type,cp))
Esempio n. 3
0
    def __get_by_n_queries(self, params):
        context = params.get("context", None)

        query = select("?s")
        if not (context is None):
            query.from_(context)

        self.__apply_limit_offset_order_get_by_filter(params, query)

        # Load details, for now the simplest approach with N queries.
        # Use _to_table instead of convert to preserve order.
        results = []
        for match in self._to_table(self._execute(query)):
            subject = match["s"]
            instance_data = {}

            result = self._execute(query_S(subject, True, context))
            result = self.convert(result, 'p', 'v', 'c')
            instance_data["direct"] = result

            if not params.get("only_direct"):
                result = self._execute(query_S(subject, False, context))
                result = self.convert(result, 'p', 'v', 'c')
                instance_data["inverse"] = result

            results.append((subject, instance_data))

        return results
Esempio n. 4
0
   def process(self, input, output):

      print 'processing ' + input.subject

      # prefix void: <http://rdfs.org/ns/void#>
      #
      # select ?from ?to ?overlap
      # where {
      #    ?from void:subset [ 
      #       a void:Linkset; 
      #       void:target  ?from, 
      #                    ?to; 
      #       void:triples ?overlap;
      #    ] .
      #    filter( ?from != ?to )
      # }
      query = select('?to ?overlap').where((input.subject, ns.VOID['subset'],  '?linkset'),
                                           ('?linkset',    a,                  ns.VOID['Linkset']),
                                           ('?linkset',    ns.VOID['target'],  input.subject),
                                           ('?linkset',    ns.VOID['target'],  '?to'),
                                           ('?linkset',    ns.VOID['triples'], '?overlap')).filter('(?to != <'+input.subject+'>)')
      results = input.session.default_store.execute(query)
      for binding in results:
         target  = binding[0]
         overlap = binding[1]
         #print target + ' ' + overlap
         if overlap >= 50: # "We arbitrarily require at least 50 links." -http://richard.cyganiak.de/2007/10/lod/
            output.rdf_type.append(ns.DATAFAQS['Satisfactory'])

      if ns.DATAFAQS['Satisfactory'] not in output.rdf_type:
         output.rdf_type.append(ns.DATAFAQS['Unsatisfactory'])

      output.save()
Esempio n. 5
0
    def test_same_as_inference_works(self):
        """ Test owl:sameAs inferencing. """

        store, session = self._get_store_session()
        self._create_persons(session)

        # Let's say Jonathan is the same Person as John
        Person = session.get_class(surf.ns.FOAF["Person"])
        john = session.get_resource("http://John", Person)
        john.load()

        jonathan = session.get_resource("http://Jonathan", Person)
        jonathan.foaf_homepage = 'http://example.com'

        john[surf.ns.OWL['sameAs']] = jonathan
        session.commit()

        store.reader.define = 'input:same-as "yes"'

        query = select("?s").from_(self.CONTEXT)\
                            .where((jonathan.subject, surf.ns.FOAF['name'], '?s'))
        r = store.execute_sparql(unicode(query))

        self.assertEquals(set(entry['s']['value'] for entry in r["results"]["bindings"]),
                          set([john.foaf_name[0]]))
Esempio n. 6
0
def _classification_search(query, start, limit, **kw):
    crdf = CignoRDF()
    rootNode = kw.get('node','').split('|')[-1]
    rootResource = crdf.Collections(rootNode)
    where = crdf.get_where_tree(rootResource)
    query_obj = select("?s").where(*where).distinct()
    resources = crdf.session.default_store.execute_sparql("%s" % query_obj)
    results = []
    for resource in resources['results']['bindings']:
      res = crdf.CignoResources(resource['s']['value'])
      result = {}
      result['title'] = res.rdfs_label.first.format()
      result['uuid'] = res.cigno_uuid.first.format()
      result['detail'] = res.subject.format()
      results.append(result)

    result = {'rows': results, 
              'total': len(results)}

    result['query_info'] = {
        'start': start,
        'limit': limit,
        'q': query
    }
    if start > 0: 
        prev = max(start - limit, 0)
        params = urlencode({'q': query, 'start': prev, 'limit': limit})
        result['prev'] = reverse('geonode.maps.views.metadata_search') + '?' + params

    next = 3
    if next > 0:
        params = urlencode({'q': query, 'start': next - 1, 'limit': limit})
        result['next'] = reverse('geonode.maps.views.metadata_search') + '?' + params
    
    return result
Esempio n. 7
0
def query_p_s(c, p, direct, context):
    """
    Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns.

    :param c: the `class`
    :param p: the `predicate`
    :param bool direct: whether the predicate is direct or inverse
    :param context: the context
    :return: the query
    :rtype: :class:`surf.query.Query`
    """

    query = select('?s', '?c').distinct()
    if context:
        query.from_(context)

    for i in range(len(p)):
        s, v = ('?s', '?v{0:d}'.format(i)) if direct else ('?v{0:d}'.format(i),
                                                           '?s')
        if type(p[i]) is URIRef:
            query.where((s, p[i], v))

    query.optional_group(('?s', a, '?c'))

    return query
Esempio n. 8
0
    def _get_by_n_queries(self, params):
        context = params.get("context", None)

        query = select("?s")
        if not (context is None):
            query.from_(context)

        _apply_solution_modifiers(params, query)

        # Load details, for now the simplest approach with N queries.
        # Use _to_table instead of convert to preserve order.
        results = []
        for match in self._to_table(self._execute(query)):
            subject = match["s"]
            instance_data = {}

            result = self._execute(query_s(subject, True, context))
            result = self.convert(result, 'p', 'v', 'c')
            instance_data["direct"] = result

            if not params.get("direct_only"):
                result = self._execute(query_s(subject, False, context))
                result = self.convert(result, 'p', 'v', 'c')
                instance_data["inverse"] = result

            results.append((subject, instance_data))

        return results
Esempio n. 9
0
    def __get_by_n_queries(self, params):
        contexts = params.get("contexts", None)

        query = select("?s")
        if contexts:
            query.from_(*contexts)
            query.from_named(*contexts)

        self.__apply_limit_offset_order_get_by_filter(params, query)

        # Load details, for now the simplest approach with N queries.
        # Use _to_table instead of convert to preserve order.
        results = []
        for match in self._to_table(self._execute(query)):
            subject = match["s"]
            instance_data = {}

            result = self._execute(query_S(subject, True, contexts))
            result = self.convert(result, 'p', 'v', 'g', 'c')
            instance_data["direct"] = result

            if not params.get("only_direct"):
                result = self._execute(query_S(subject, False, contexts))
                result = self.convert(result, 'p', 'v', 'g', 'c')
                instance_data["inverse"] = result

            results.append((subject, instance_data))

        return results
Esempio n. 10
0
def test_from_none():
    """
    Check that .from_(None) raises.
    """

    query = select("?s")
    with pytest.raises(ValueError):
        query.from_(None)
Esempio n. 11
0
    def test_subquery(self):
        """ Try to produce query that contains subquery in WHERE clause. """

        expected = canonical(u"""
            SELECT ?s ?p ?o 
            WHERE { 
                ?s ?p ?o. 
                { SELECT ?s WHERE { ?s ?a ?b } LIMIT 3 }
            }
        """)

        subquery = select("?s").where(("?s", "?a", "?b")).limit(3)

        query = select("?s", "?p", "?o").where(("?s", "?p", "?o"), subquery)
        result = canonical(SparqlTranslator(query).translate())

        self.assertEqual(expected, result)
Esempio n. 12
0
 def test_subquery(self):
     """ Try to produce query that contains subquery in WHERE clause. """
     
     expected = canonical(u"""
         SELECT ?s ?p ?o 
         WHERE { 
             ?s ?p ?o. 
             { SELECT ?s WHERE { ?s ?a ?b } LIMIT 3 }
         }
     """)
     
     subquery = select("?s").where(("?s", "?a", "?b")).limit(3)
     
     query = select("?s", "?p", "?o").where(("?s", "?p", "?o"), subquery)
     result = canonical(SparqlTranslator(query).translate())
     
     self.assertEqual(expected, result)
Esempio n. 13
0
def test_from_none():
    """
    Check that .from_(None) raises.
    """

    query = select("?s")
    with pytest.raises(ValueError):
        query.from_(None)
   def process(self, input, output):

      print 'processing ' + input.subject

      # Fails:
      #
      #PREFIX owl:        <http://www.w3.org/2002/07/owl#>
      #PREFIX dcterms:    <http://purl.org/dc/terms/>
      #PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
      #SELECT count(distinct ?o) as ?count
      #WHERE {
      #  GRAPH <http://logd.tw.rpi.edu/source/nci-nih-gov/dataset/tobacco-law-coverage/version/2010-Aug-25/conversion/enhancement/1/subset/sample> {
      #   ?s ?p ?o .
      #   filter(regex(str(?o),'http://logd.tw.rpi.edu/id/us.*'))
      #  }
      #}

      # Passes:
      #
      #PREFIX owl:        <http://www.w3.org/2002/07/owl#>
      #PREFIX dcterms:    <http://purl.org/dc/terms/>
      #PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
      #SELECT count(distinct ?o) as ?count
      #WHERE {
      #  GRAPH <http://logd.tw.rpi.edu/source/data-gov/dataset/1000/version/2010-Aug-30/conversion/enhancement/1/subset/sample> {
      #   ?s ?p ?o .
      #   filter(regex(str(?o),'http://logd.tw.rpi.edu/id/us.*'))
      #  }
      #}

      ####
      # Query a SPARQL endpoint
      #store = Store(reader = 'sparql_protocol', endpoint = 'http://dbpedia.org/sparql')
      #session = Session(store)
      #session.enable_logging = False
      #result = session.default_store.execute_sparql('select distinct ?type where {[] a ?type} limit 2')
      #if result:
      #   for binding in result['results']['bindings']:
      #      type  = binding['type']['value']
      #      print type
      ####

      store = Store(reader = 'sparql_protocol', endpoint = 'http://logd.tw.rpi.edu/sparql')
      session = Session(store)
      session.enable_logging = False
      query = select("?o").named_group(input.subject,('?s', '?p', '?o')).filter('regex(str(?o),"http://logd.tw.rpi.edu/id/us.*")')
      print unicode(query)
      result = session.default_store.execute(query)
      if result:
         for binding in result['results']['bindings']:
            print binding['o']['value']
            output.rdf_type.append(ns.DATAFAQS['Satisfactory'])

      if ns.DATAFAQS['Satisfactory'] not in output.rdf_type:
         output.rdf_type.append(ns.DATAFAQS['Unsatisfactory'])

      output.save()
Esempio n. 15
0
def query_SP(s, p, direct, context):
    """ Construct :class:`surf.query.Query` with `?v` and `?c` as unknowns. """

    s, v = direct and (s, '?v') or ('?v', s)
    query = select('?v', '?c').distinct()
    query.where((s, p, v)).optional_group(('?v', a, '?c'))
    if context:
        query.from_(context)

    return query
Esempio n. 16
0
def query_concept(s):
    """
    Construct :class:`surf.query.Query` with `?c` as the unknown.

    :param s: the `subject`
    :return: the query
    :rtype: :class:`surf.query.Query`
    """

    return select('?c').distinct().where((s, a, '?c'))
Esempio n. 17
0
def query_SP(s, p, direct, context):
    """ Construct :class:`surf.query.Query` with `?v` and `?c` as unknowns. """

    s, v = direct and (s, '?v') or ('?v', s)
    query = select('?v', '?c').distinct()
    query.where((s, p, v)).optional_group(('?v', a, '?c'))
    if context:
        query.from_(context)

    return query
Esempio n. 18
0
def query_concept(s):
    """
    Construct :class:`surf.query.Query` with `?c` as the unknown.

    :param s: the `subject`
    :return: the query
    :rtype: :class:`surf.query.Query`
    """

    return select('?c').distinct().where((s, a, '?c'))
Esempio n. 19
0
    def test_simple(self):
        """ Try to produce a simple "SELECT ... WHERE ..." query.  """

        expected = canonical(u"SELECT ?s ?p ?o WHERE { ?s ?p ?o }")
        query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
        result = SparqlTranslator(query).translate()

        # Translated query should be unicode object.
        self.assertTrue(isinstance(result, unicode))

        result = canonical(result)
        self.assertEqual(expected, result)
Esempio n. 20
0
def get_label(subject,baseuri,store):
    label = None
    ns_titles = [ns.RDFS["comment"],ns.RDFS["label"],ns.DC["title"],ns.DCTERMS["title"]]
    for title in ns_titles:
        query = select("?o").where((subject,title,"?o"))
        found_title = list(store.reader._to_table(store.reader._execute(query)))
        #print 'Nombre de labels '+title+' :'+str(len(found_title))
        if(len(found_title)>0):
            label = literal_lang_select([found_title[x]['o'] for x in range(len(found_title))])
    if(label==None): #on n'a rien trouvé du tout
        label = unicode(subject).replace(baseuri,'')
    return label    
Esempio n. 21
0
    def test_simple(self):
        """ Try to produce a simple "SELECT ... WHERE ..." query.  """

        expected = canonical(u"SELECT ?s ?p ?o WHERE { ?s ?p ?o }")
        query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
        result = SparqlTranslator(query).translate()

        # Translated query should be unicode object.
        self.assertTrue(isinstance(result, unicode))

        result = canonical(result)
        self.assertEqual(expected, result)
Esempio n. 22
0
def test_simple():
    """
    Try to produce a simple "SELECT ... WHERE ..." query.
    """

    expected = canonical(u"SELECT ?s ?p ?o WHERE { ?s ?p ?o }")
    query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
    result = SparqlTranslator(query).translate()

    # Translated query should be unicode object.
    assert isinstance(result, basestring)

    result = canonical(result)
    assert expected == result
Esempio n. 23
0
    def test_union(self):
        """ Try to produce query containing union. """
        
        expected = canonical(u"""
            SELECT ?s
            WHERE {
                { ?s ?v1 ?v2} UNION { ?s ?v3  ?v4 }
            }
        """)

        query = select("?s").union(("?s", "?v1", "?v2"), ("?s", "?v3", "?v4"))
        result = canonical(SparqlTranslator(query).translate())
        
        self.assertEqual(expected, result)
Esempio n. 24
0
    def test_union(self):
        """ Try to produce query containing union. """

        expected = canonical(u"""
            SELECT ?s
            WHERE {
                { ?s ?v1 ?v2} UNION { ?s ?v3  ?v4 }
            }
        """)

        query = select("?s").union(("?s", "?v1", "?v2"), ("?s", "?v3", "?v4"))
        result = canonical(SparqlTranslator(query).translate())

        self.assertEqual(expected, result)
Esempio n. 25
0
def query_P_S(c, p, direct, context):
    """ Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns. """

    query = select('?s', '?c').distinct()
    if context:
        query.from_(context)

    for i in range(len(p)):
        s, v = direct and  ('?s', '?v%d' % i) or ('?v%d' % i, '?s')
        if type(p[i]) is URIRef:
            query.where((s, p[i], v))

    query.optional_group(('?s', a, '?c'))

    return query
Esempio n. 26
0
def query_P_S(c, p, direct, context):
    """ Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns. """

    query = select('?s', '?c').distinct()
    if context:
        query.from_(context)

    for i in range(len(p)):
        s, v = direct and ('?s', '?v%d' % i) or ('?v%d' % i, '?s')
        if type(p[i]) is URIRef:
            query.where((s, p[i], v))

    query.optional_group(('?s', a, '?c'))

    return query
Esempio n. 27
0
 def test_str(self):
     """ Try str(query). """
     
     expected = canonical(u"""
         SELECT ?s ?p ?o
         WHERE { 
             ?s ?p ?o 
         }
     """)
     
     query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
     # test str()
     self.assertEqual(expected, canonical(unicode(str(query))))
     # test unicode()
     self.assertEqual(expected, canonical(unicode(query)))
Esempio n. 28
0
    def test_str(self):
        """ Try str(query). """

        expected = canonical(u"""
            SELECT ?s ?p ?o
            WHERE { 
                ?s ?p ?o 
            }
        """)

        query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
        # test str()
        self.assertEqual(expected, canonical(unicode(str(query))))
        # test unicode()
        self.assertEqual(expected, canonical(unicode(query)))
Esempio n. 29
0
def query_s(s, direct, context):
    """
    Construct :class:`surf.query.Query` with `?p`, `?v` and `?c` as unknowns.

    :param s: the `subject`
    :param bool direct: whether the predicate is direct or inverse
    :param context: the context
    :return: the query
    :rtype: :class:`surf.query.Query`
    """
    s, v = (s, '?v') if direct else ('?v', s)
    query = select('?p', '?v', '?c').distinct()
    query.where((s, '?p', v)).optional_group(('?v', a, '?c'))
    if context:
        query.from_(context)

    return query
Esempio n. 30
0
 def test_from(self):
     """ Try to produce query that contains FROM clauses. """
     
     expected = canonical(u"""
         SELECT ?s ?p ?o
         FROM <http://uri1>
         FROM <http://uri2> 
         WHERE { 
             ?s ?p ?o 
         }
     """)
     
     query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
     query.from_("http://uri1", URIRef("http://uri2"))
     result = canonical(SparqlTranslator(query).translate())
     
     self.assertEqual(expected, result)
Esempio n. 31
0
def query_S(s, direct, contexts):
    """ Construct :class:`surf.query.Query` with `?p`, `?v` and `?g`, `?c` as
    unknowns. """
    s, v = direct and (s, '?v') or ('?v', s)
    query = select('?p', '?v', '?c', '?g').distinct()
    # Get predicate, objects and optionally rdf:type & named graph of
    # subject rdf:type and object rdf:type
    # TODO fails under Virtuoso as V. doesn't allow ?g to be bound to two
    # optional matches
    query.where((s, '?p', v)).optional_group(('?v', a, '?c'))\
                             .optional_group(named_group('?g', (s, a, v)))\
                             .optional_group(named_group('?g', ('?v', a, '?c')))
    if contexts:
        query.from_(*contexts)
        query.from_named(*contexts)

    return query
Esempio n. 32
0
def query_s(s, direct, context):
    """
    Construct :class:`surf.query.Query` with `?p`, `?v` and `?c` as unknowns.

    :param s: the `subject`
    :param bool direct: whether the predicate is direct or inverse
    :param context: the context
    :return: the query
    :rtype: :class:`surf.query.Query`
    """
    s, v = (s, '?v') if direct else ('?v', s)
    query = select('?p', '?v', '?c').distinct()
    query.where((s, '?p', v)).optional_group(('?v', a, '?c'))
    if context:
        query.from_(context)

    return query
Esempio n. 33
0
    def test_exceptions(self):
        """ Test that exceptions are raised on invalid queries. """

        store = surf.Store(reader="sparql_protocol",
                           writer="sparql_protocol",
                           endpoint="invalid")

        def try_query():
            store.execute(query)

        query = select("?a")
        self.assertRaises(SparqlReaderException, try_query)

        def try_add_triple():
            store.add_triple("?s", "?p", "?o")

        self.assertRaises(SparqlWriterException, try_add_triple)
Esempio n. 34
0
    def test_from(self):
        """ Try to produce query that contains FROM clauses. """

        expected = canonical(u"""
            SELECT ?s ?p ?o
            FROM <http://uri1>
            FROM <http://uri2> 
            WHERE { 
                ?s ?p ?o 
            }
        """)

        query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
        query.from_("http://uri1", URIRef("http://uri2"))
        result = canonical(SparqlTranslator(query).translate())

        self.assertEqual(expected, result)
Esempio n. 35
0
    def test_exceptions(self):
        """ Test that exceptions are raised on invalid queries. """

        store = surf.Store(reader = "sparql_protocol",
                           writer = "sparql_protocol",
                           endpoint = "invalid")

        def try_query():
            store.execute(query)

        query = select("?a")
        self.assertRaises(SparqlReaderException, try_query)

        def try_add_triple():
            store.add_triple("?s", "?p", "?o")

        self.assertRaises(SparqlWriterException, try_add_triple)
Esempio n. 36
0
   def process(self, input, output):
    
      store = surf.Store(reader = 'rdflib', writer = 'rdflib', rdflib_store = 'IOMemory')
      session = surf.Session(store) 
      store.load_triples(source = input.subject)
      output.datafaqs_resolved_triples = store.size();

      query = select('?triples').where((input.subject, ns.VOID['triples'], '?triples'))
      for count in store.execute(query):
         output.void_triples.append(count)
         output.rdf_type.append(ns.DATAFAQS['Satisfactory'])
         print str(store.size()) + ' dereferenced RDF triples asserted that ' + input.subject + ' has ' + str(count) + ' triples.'

      if ns.DATAFAQS['Satisfactory'] not in output.rdf_type:
         output.rdf_type.append(ns.DATAFAQS['Unsatisfactory'])
         print str(store.size()) + ' dereferenced RDF triples, but no void:triples asserted for ' + input.subject

      output.save()
Esempio n. 37
0
    def _get_by(self, params):
        # Decide which loading strategy to use
        if "full" in params:
            if self.use_subqueries:
                return self.__get_by_subquery(params)
            else:
                return self.__get_by_n_queries(params)

        # No details, just subjects and classes
        query = select("?s", "?c", "?g")
        self.__apply_limit_offset_order_get_by_filter(params, query)
        query.optional_group(("?s", a, "?c"))
        # Query for the same tuple to get the named graph if obtainable
        query.optional_group(named_group("?g", ("?s", a, "?c")))

        contexts = params.get("contexts", None)
        if contexts:
            query.from_(*contexts)
            query.from_named(*contexts)

        # Load just subjects and their types
        table = self._to_table(self._execute(query))

        # Create response structure, preserve order, don't include
        # duplicate subjects if some subject has multiple types
        subjects = {}
        results = []
        for match in table:
            subject = match["s"]
            if not subject in subjects:
                instance_data = {"direct" : {a : {}}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            # "context" comes from an optional group and is missing if the
            # triple is stored in the unamed graph
            context = match.get("g")

            if "c" in match:
                concept = match["c"]
                subjects[subject]["direct"][a][concept] = {context: []}

        return results
Esempio n. 38
0
def test_from_named():
    """
    Try to produce query that contains FROM & FROM NAMED clauses.
    """

    expected = canonical(u"""
        SELECT ?s ?p ?o
        FROM <http://uri1>
        FROM NAMED <http://uri1>
        FROM NAMED <http://uri2>
        WHERE {
            ?s ?p ?o
        }
    """)

    query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
    query.from_("http://uri1")
    query.from_named("http://uri1", URIRef("http://uri2"))
    result = canonical(SparqlTranslator(query).translate())

    assert expected == result
Esempio n. 39
0
def test_from_named():
    """
    Try to produce query that contains FROM & FROM NAMED clauses.
    """

    expected = canonical(u"""
        SELECT ?s ?p ?o
        FROM <http://uri1>
        FROM NAMED <http://uri1>
        FROM NAMED <http://uri2>
        WHERE {
            ?s ?p ?o
        }
    """)

    query = select("?s", "?p", "?o").where(("?s", "?p", "?o"))
    query.from_("http://uri1")
    query.from_named("http://uri1", URIRef("http://uri2"))
    result = canonical(SparqlTranslator(query).translate())

    assert expected == result
Esempio n. 40
0
    def test_json_datatypes(self):
        """ Test that proper datatypes are returned. """
        # Tests for a bug wrt datatype uri with AllegroGraph

        store, session = self._get_store_session(use_default_context=False)
        Person = session.get_class(surf.ns.FOAF + "Person")

        # Store datatype
        jake = session.get_resource("http://Jake", Person)
        jake.foaf_name = "Jake"
        jake.foaf_age = 62
        jake.save()

        # Get birthday
        query = select('?age').where(('?s', a, Person.uri),
                                   ('?s', ns.FOAF.age, '?age'))
        result = store.execute_sparql(unicode(query))
        assert len(result['results']['bindings']) == 1
        entry = result['results']['bindings'][0]

        # Test that rdflib type is properly constructed
        age = json_to_rdflib(entry['age'])
        self.assertEquals(age.toPython(), 62)
Esempio n. 41
0
    def test_json_datatypes(self):
        """ Test that proper datatypes are returned. """
        # Tests for a bug wrt datatype uri with AllegroGraph

        store, session = self._get_store_session(use_default_context=False)
        Person = session.get_class(surf.ns.FOAF + "Person")

        # Store datatype
        jake = session.get_resource("http://Jake", Person)
        jake.foaf_name = "Jake"
        jake.foaf_age = 62
        jake.save()

        # Get birthday
        query = select('?age').where(('?s', a, Person.uri),
                                     ('?s', ns.FOAF.age, '?age'))
        result = store.execute_sparql(unicode(query))
        assert len(result['results']['bindings']) == 1
        entry = result['results']['bindings'][0]

        # Test that rdflib type is properly constructed
        age = json_to_rdflib(entry['age'])
        self.assertEquals(age.toPython(), 62)
Esempio n. 42
0
    def test_same_as_inference_works(self):
        """ Test owl:sameAs inferencing. """

        store, session = self._get_store_session()
        # Let's say Jonathan is the same Person as John
        Person = session.get_class(surf.ns.FOAF["Person"])
        john = session.get_resource("http://John", Person)
        john.load()

        jonathan = session.get_resource("http://Jonathan", Person)
        jonathan.foaf_homepage = 'http://example.com'

        john[surf.ns.OWL['sameAs']] = jonathan
        session.commit()

        store.reader.define = 'input:same-as "yes"'

        query = select("?s").from_(self.CONTEXT)\
                            .where((jonathan.subject, surf.ns.FOAF['name'], '?s'))
        r = store.execute_sparql(unicode(query))

        self.assertEquals(
            set(entry['s'] for entry in r["results"]["bindings"]),
            set([john.foaf_name[0]]))
Esempio n. 43
0
def query_p_s(c, p, direct, context):
    """
    Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns.

    :param c: the `class`
    :param p: the `predicate`
    :param bool direct: whether the predicate is direct or inverse
    :param context: the context
    :return: the query
    :rtype: :class:`surf.query.Query`
    """

    query = select('?s', '?c').distinct()
    if context:
        query.from_(context)

    for i in range(len(p)):
        s, v = ('?s', '?v{0:d}'.format(i)) if direct else ('?v{0:d}'.format(i), '?s')
        if type(p[i]) is URIRef:
            query.where((s, p[i], v))

    query.optional_group(('?s', a, '?c'))

    return query
Esempio n. 44
0
    def _get_by(self, params):
        # Decide which loading strategy to use
        if "full" in params:
            if self.use_subqueries:
                return self.__get_by_subquery(params)
            else:
                return self.__get_by_n_queries(params)

        # No details, just subjects and classes
        query = select("?s", "?c")
        self.__apply_limit_offset_order_get_by_filter(params, query)
        query.optional_group(("?s", a, "?c"))

        context = params.get("context", None)
        if not (context is None):
            query.from_(context)

        # Load just subjects and their types
        table = self._to_table(self._execute(query))

        # Create response structure, preserve order, don't include
        # duplicate subjects if some subject has multiple types
        subjects = {}
        results = []
        for match in table:
            subject = match["s"]
            if not subject in subjects:
                instance_data = {"direct" : {a : {}}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            if "c" in match:
                concept = match["c"]
                subjects[subject]["direct"][a][concept] = []

        return results
Esempio n. 45
0
def _classification_search(query, start, limit, **kw):
    crdf = CignoRDF()
    rootNode = kw.get('node','').split('|')[-1]
    rootResource = crdf.Collections(rootNode)
    where = crdf.get_where_tree(rootResource)
    query_obj = select("?s").where(*where).distinct()
    querys = sanitize_sparql("%s" % query_obj)
    resources = crdf.session.default_store.execute_sparql(querys)
    results = []
    for resource in resources['results']['bindings']:
      res = crdf.CignoResources(resource['s']['value'])
      result = {}
      result['title'] = res.rdfs_label.first.format()
      result['uuid'] = res.cigno_uuid.first.format()
      result['detail'] = res.subject.format()
      results.append(result)

    result = {'rows': results, 
              'total': len(results)}

    result['query_info'] = {
        'start': start,
        'limit': limit,
        'q': query
    }
    if start > 0: 
        prev = max(start - limit, 0)
        params = urlencode({'q': query, 'start': prev, 'limit': limit})
        result['prev'] = reverse('geonode.maps.views.metadata_search') + '?' + params

    next = 3
    if next > 0:
        params = urlencode({'q': query, 'start': next - 1, 'limit': limit})
        result['next'] = reverse('geonode.maps.views.metadata_search') + '?' + params
    
    return result
Esempio n. 46
0
    def _get_by(self, params):
        # Decide which loading strategy to use
        if "full" in params:
            if self.use_subqueries:
                return self._get_by_subquery(params)
            else:
                return self._get_by_n_queries(params)

        # No details, just subjects and classes
        query = select("?s", "?c")
        _apply_solution_modifiers(params, query)
        query.optional_group(("?s", a, "?c"))

        context = params.get("context", None)
        if not (context is None):
            query.from_(context)

        # Load just subjects and their types
        table = self._to_table(self._execute(query))

        # Create response structure, preserve order, don't include
        # duplicate subjects if some subject has multiple types
        subjects = {}
        results = []
        for match in table:
            subject = match["s"]
            if subject not in subjects:
                instance_data = {"direct": {a: {}}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            if "c" in match:
                concept = match["c"]
                subjects[subject]["direct"][a][concept] = []

        return results
Esempio n. 47
0
    def __get_by_subquery(self, params):
        contexts = params.get("contexts", None)

        inner_query = select("?s")
        inner_params = params.copy()
        if "order" in params:
            # "order" needs to stay in subquery,
            # but doesn't do anything useful in main query
            del params["order"]
        self.__apply_limit_offset_order_get_by_filter(inner_params, inner_query)


        query = select("?s", "?p", "?v", "?c", "?g").distinct()
        # Get values with object type & context
        # TODO we need to query both contexts, from ?s -> rdf_type & ?v -> rdf_type but Virtuoso does not bind ?g twice. Bug or feature?
        query.group(('?s', '?p', '?v'),
                    optional_group(('?v', a, '?c')),
                    optional_group(named_group("?g", ("?s", a, "?v"))))
                    #optional_group(named_group("?g", ("?v", a, "?c"))))
        query.where(inner_query)
        if contexts:
            query.from_(*contexts)
            query.from_named(*contexts)

        # Need ordering in outer query
        if "order" in params:
            if params["order"] == True:
                # Order by subject URI
                query.order_by("?s")
            else:
                # Match another variable, order by it
                query.optional_group(("?s", params["order"], "?order"))
                query.order_by("?order")

        table = self._to_table(self._execute(query))
        subjects = {}
        results = []
        for match in table:
            # Make sure subject and predicate are URIs (they have to be!),
            # this works around bug in Virtuoso -- it sometimes returns
            # URIs as Literals.
            subject = URIRef(match["s"])
            predicate = URIRef(match["p"])
            value = match["v"]

            # Add subject to result list if it's not there
            if not subject in subjects:
                instance_data = {"direct" : {}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            # Add predicate to subject's direct predicates if it's not there
            direct_attributes = subjects[subject]["direct"]
            if not predicate in direct_attributes:
                direct_attributes[predicate] = {}

            # "context" comes from an optional group and is missing if the
            # triple is stored in the unamed graph
            context = match.get("g")

            # Add value to subject->predicate if ...
            predicate_values = direct_attributes[predicate]
            if not value in predicate_values:
                predicate_values[value] = {context: []}

            # Add RDF type of the value to subject->predicate->value list
            if "c" in match:
                predicate_values[value][context].append(match["c"])

        return results
Esempio n. 48
0
    def _get_by_subquery(self, params):
        context = params.get("context", None)

        inner_query = select("?s")
        inner_params = params.copy()
        if "order" in params:
            # "order" needs to stay in subquery,
            # but doesn't do anything useful in main query
            del params["order"]
        _apply_solution_modifiers(inner_params, inner_query)

        if params.get('direct_only'):
            query = select("?s", "?p", "?v", "?c").distinct()
            query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c')))
        else:
            direct_query = select("?s", "?p", "?v", "?c", '("0" AS ?i)')
            direct_query.distinct()
            direct_query.group(('?s', '?p', '?v'),
                               optional_group(('?v', a, '?c')))

            indirect_query = select("?s", "?p", "?v", "?c", '("1" AS ?i)')
            indirect_query.distinct()
            indirect_query.group(('?v', '?p', '?s'),
                                 optional_group(('?v', a, '?c')))

            query = select("?s", "?p", "?v", "?c", "?i")
            query.union(direct_query, indirect_query)

        query.where(inner_query)
        if not (context is None):
            query.from_(context)

        # Need ordering in outer query
        if "order" in params:
            if params["order"]:
                # Order by subject URI
                query.order_by("?s")
            else:
                # Match another variable, order by it
                query.optional_group(("?s", params["order"], "?order"))
                query.order_by("?order")

        table = self._to_table(self._execute(query))
        subjects = {}
        results = []
        for match in table:
            # Make sure subject and predicate are URIs (they have to be!),
            # this works around bug in Virtuoso -- it sometimes returns
            # URIs as Literals.
            subject = URIRef(match["s"])
            predicate = URIRef(match["p"])
            value = match["v"]
            # Inverse given if direct_only is False
            inverse = match.get("i") == "1"

            # Add subject to result list if it's not there
            if subject not in subjects:
                instance_data = {"direct": {}, "inverse": {}}
                subjects[subject] = instance_data
                results.append((subject, instance_data))

            if inverse:
                attributes = subjects[subject]["inverse"]
            else:
                attributes = subjects[subject]["direct"]
            # Add predicate to subject's predicates if it's not there
            if predicate not in attributes:
                attributes[predicate] = {}

            # Add value to subject->predicate if ...
            predicate_values = attributes[predicate]
            if value not in predicate_values:
                predicate_values[value] = []

            # Add RDF type of the value to subject->predicate->value list
            if "c" in match:
                predicate_values[value].append(match["c"])

        return results
Esempio n. 49
0
 def get_not_empty_members(self, res):
   where = self.get_where_tree(res)
   query = select("?c").where(*where).distinct()
   querys = sanitize_sparql("%s" % query)
   result = self.session.default_store.execute_sparql(querys)
   return result['results']['bindings']
Esempio n. 50
0
   def process(self, input, output):

      print 'processing ' + input.subject

      endpoint = False

      # TODO: add    dcat:distribution [ a sd:NamedGraph; sd:name ; prov:hadLocation
      # like https://github.com/timrdf/DataFAQs/blob/master/services/sadi/ckan/add-metadata-materials/sample-inputs/arrayexpress-e-afmx-1.ttl#L49

      if len(input.void_sparqlEndpoint) > 0:
         # <http://datahub.io/dataset/dbpedia> 
         #    void:sparqlEndpoint <http://dbpedia.org/sparql> .
         endpoint = self.surfSubject(input.void_sparqlEndpoint.first)
         print 'void:sparqlEndpoint: ' + endpoint
      else:
         # <http://datahub.io/dataset/dbpedia> 
         #    dcat:distribution [
         #       dct:format [
         #          a dct:IMT;
         #          rdf:value  "api/sparql";
         #          rdfs:label "api/sparql"
         #       ];
         #      a dcat:Distribution ;
         #      dcat:accessURL <http://dbpedia.org/sparql>
         #   ]; .

         query = select("?url").where((input.subject, ns.DCAT['distribution'], "?distro"),
                                      ("?distro",     ns.DCTERMS['format'],    "?format"),
                                      ("?format",     ns.RDF['value'],         rdflib.Literal('api/sparql')),
                                      ("?distro",     ns.DCAT['accessURL'],    "?url"))
         for bindings in input.session.default_store.execute(query):
            #print 'creator: ' + bindings[0] + ' ' + bindings[1]
            endpoint = self.surfSubject(bindings[0])
            print 'dcat:distribution dcat:accessURL: ' + endpoint

      if endpoint is False:
         print 'WARNING: could not find SPARQL endpoint to query; skipping ' + input.subject
         output.rdf_type.append(ns.DATAFAQS['Unsatisfactory'])
         output.save()
         return
         
      ng='http://purl.org/twc/health/source/healthdata-tw-rpi-edu/dataset/cr-full-dump/version/latest'

      Class     = output.session.get_class(ns.RDFS['Class'])
      Predicate = output.session.get_class(ns.RDF['Property'])

      ####
      # Query a SPARQL endpoint
      store = Store(reader = 'sparql_protocol', endpoint = endpoint)
      session = Session(store)
      session.enable_logging = False
      for predicate in self.predicates:
         results = session.default_store.execute_sparql( # TODO: handle optional named graph.
            '''
            select (count(*) as ?count)
            where {
                [] <'''+predicate+'''> []
            }'''
         )
              #graph <'''+ng+'''> {
         count = False
         if isinstance(results, xml.dom.minidom.Document):
            for result in results.getElementsByTagName('result'):
               for binding in result.getElementsByTagName('binding'):
                  for value in result.getElementsByTagName('literal'):
                     count = int(value.firstChild.data)
                     predR = output.session.get_resource(predicate, Predicate)
                     predR.sio_count = count
                     predR.save()
                     output.rdf_type.append(ns.DATAFAQS['Satisfactory'])
                     print str(count) + ' ' + predicate
         elif results:
            print results
            for binding in results['results']['bindings']:
               count = binding['count']['value']
               print count
      for classU in self.classes:
         results = session.default_store.execute_sparql( # TODO: handle optional named graph.
            '''
            select (count(*) as ?count)
            where {
                [] a <'''+classU+'''>
            }'''
         )
              #graph <'''+ng+'''> {
         count = False
         if isinstance(results, xml.dom.minidom.Document):
            for result in results.getElementsByTagName('result'):
               for binding in result.getElementsByTagName('binding'):
                  for value in result.getElementsByTagName('literal'):
                     count = int(value.firstChild.data)
                     classR = output.session.get_resource(classU, Class)
                     classR.sio_count = count
                     classR.save()
                     output.rdf_type.append(ns.DATAFAQS['Satisfactory'])
                     print str(count) + ' ' + classU
         elif results:
            print results
            for binding in results['results']['bindings']:
               count = binding['count']['value']
               print count
      ####

      # Query the RDF graph POSTed: input.session.default_store.execute

      # Walk through all Things in the input graph (using SuRF):
      # Thing = input.session.get_class(ns.OWL['Thing'])
      # for person in Thing.all():

      # Create a class in the output graph:
      # Document = output.session.get_class(ns.FOAF['Document'])

      if ns.DATAFAQS['Satisfactory'] not in output.rdf_type:
         output.rdf_type.append(ns.DATAFAQS['Unsatisfactory'])
      else:
         Ontology = output.session.get_class(ns.OWL['Ontology'])
         output.void_vocabulary.append(Ontology('http://www.w3.org/ns/prov#'))

      output.save()
Esempio n. 51
0
    def test_from_none(self):
        """ Check that .from_(None) raises. """

        query = select("?s")
        self.assertRaises(ValueError, lambda: query.from_(None))
Esempio n. 52
0
    def test_from_none(self):
        """ Check that .from_(None) raises. """

        query = select("?s")
        self.assertRaises(ValueError, lambda: query.from_(None))
Esempio n. 53
0
def query_Concept(subject):
    """ Construct :class:`surf.query.Query` with `?c` as the unknown. """

    return select('?c').distinct().where((subject, a, '?c'))
Esempio n. 54
0
def query_Concept(subject):
    """ Construct :class:`surf.query.Query` with `?c` as the unknown. """

    return select('?c').distinct().where((subject, a, '?c'))