def object(): """ This is the general RESTful endpoint and corresponding Python function to handle requests for individual objects, be they a Vocabulary, Concept Scheme, Collection or Concept. Only those 4 classes of object are supported for the moment. An HTTP URI query string argument parameter 'vocab_id' must be supplied, indicating the vocab this object is within An HTTP URI query string argument parameter 'uri' must be supplied, indicating the URI of the object being requested :return: A Flask Response object :rtype: :class:`flask.Response` """ language = request.values.get('lang') or config.DEFAULT_LANGUAGE vocab_id = request.values.get('vocab_id') uri = request.values.get('uri') # check this vocab ID is known if vocab_id not in g.VOCABS.keys(): return Response( 'The vocabulary ID you\'ve supplied is not known. Must be one of:\n ' + '\n'.join(g.VOCABS.keys()), status=400, mimetype='text/plain') if uri is None: return Response( 'A Query String Argument \'uri\' must be supplied for this endpoint, ' 'indicating an object within a vocabulary', status=400, mimetype='text/plain') vocab_source = Source(vocab_id, request, language) try: # TODO reuse object within if, rather than re-loading graph c = vocab_source.get_object_class() if c == 'http://www.w3.org/2004/02/skos/core#Concept': concept = vocab_source.get_concept() return ConceptRenderer(request, concept).render() elif c == 'http://www.w3.org/2004/02/skos/core#Collection': collection = vocab_source.get_collection(uri) return CollectionRenderer(request, collection).render() else: return render_invalid_object_class_response(vocab_id, uri, c) except VbException as e: return render_vb_exception_response(e)
def build_concept_hierarchy(vocab_id): g = FILE.load_pickle_graph(vocab_id) # get uri uri = None for s, p, o in g.triples((None, RDF.type, SKOS.ConceptScheme)): uri = str(s) # get TopConcept topConcepts = [] for s, p, o in g.triples((URIRef(uri), SKOS.hasTopConcept, None)): topConcepts.append(str(o)) hierarchy = [] if topConcepts: topConcepts.sort() for tc in topConcepts: hierarchy.append((1, tc, Source.get_prefLabel_from_uri(tc))) hierarchy += Source.get_narrowers(tc, 1) return hierarchy else: raise Exception('topConcept not found')
def vocabulary(vocab_id): language = request.values.get('lang') or config.DEFAULT_LANGUAGE if vocab_id not in g.VOCABS.keys(): return render_invalid_vocab_id_response() # get vocab details using appropriate source handler try: vocab = Source(vocab_id, request, language).get_vocabulary() except VbException as e: return render_vb_exception_response(e) return VocabularyRenderer(request, vocab).render()
def vocabulary_list(vocab_id): language = request.values.get('lang') or config.DEFAULT_LANGUAGE if vocab_id not in g.VOCABS.keys(): return render_invalid_vocab_id_response() vocab_source = Source(vocab_id, request, language) concepts = vocab_source.list_concepts() concepts.sort(key=lambda x: x['title']) total = len(concepts) # Search query = request.values.get('search') results = [] if query: for m in match(concepts, query): results.append(m) concepts[:] = results concepts.sort(key=lambda x: x['title']) total = len(concepts) page = int(request.values.get('page')) if request.values.get( 'page') is not None else 1 per_page = int(request.values.get('per_page')) if request.values.get( 'per_page') is not None else 20 start = (page - 1) * per_page end = start + per_page concepts = concepts[start:end] test = SkosRegisterRenderer( request, [], concepts, g.VOCABS[vocab_id].title + ' concepts', total, search_query=query, search_enabled=True, vocabulary_url=[request.url_root + 'vocabulary/' + vocab_id], vocab_id=vocab_id) return test.render()
def get_concept_hierarchy(self): # return FILE.hierarchy[self.vocab_id] pass g = FILE.load_pickle_graph(self.vocab_id) result = g.query( """ PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT (COUNT(?mid) AS ?length) ?c ?pl ?parent WHERE {{ ?c a skos:Concept . ?cs (skos:hasTopConcept | skos:narrower)* ?mid . ?mid (skos:hasTopConcept | skos:narrower)+ ?c . ?c skos:prefLabel ?pl . ?c (skos:topConceptOf | skos:broader) ?parent . FILTER (?cs = <{}>) }} GROUP BY ?c ?pl ?parent ORDER BY ?length ?parent ?pl """.format(self.uri) ) cs = [] for row in result: cs.append({ 'length': {'value': row['length']}, 'c': {'value': row['c']}, 'pl': {'value': row['pl']}, 'parent': {'value': row['parent']} }) hierarchy = [] previous_parent_uri = None last_index = 0 for c in cs: # insert all topConceptOf directly if str(c['parent']['value']) == self.uri: hierarchy.append(( int(c['length']['value']), c['c']['value'], c['pl']['value'], None )) else: # If this is not a topConcept, see if it has the same URI as the previous inserted Concept # If so, use that Concept's index + 1 this_parent = c['parent']['value'] if this_parent == previous_parent_uri: # use last inserted index hierarchy.insert(last_index + 1, ( int(c['length']['value']), c['c']['value'], c['pl']['value'], c['parent']['value'] )) last_index += 1 # This is not a TopConcept and it has a differnt parent from the previous insert # So insert it after it's parent else: i = 0 parent_index = 0 for t in hierarchy: if this_parent in t[1]: parent_index = i i += 1 hierarchy.insert(parent_index + 1, ( int(c['length']['value']), c['c']['value'], c['pl']['value'], c['parent']['value'] )) last_index = parent_index + 1 previous_parent_uri = this_parent return Source.draw_concept_hierarchy(hierarchy, self.request, self.vocab_id)
def get_concept_hierarchy(self): # same as parent query, only: # running against rdflib in-memory graph, not SPARQL endpoint # a single graph, not a multi-graph (since it's an RDF/XML or Turtle file) """ Function to draw concept hierarchy for vocabulary """ def build_hierarchy(bindings_list, broader_concept=None, level=0): """ Recursive helper function to build hierarchy list from a bindings list Returns list of tuples: (<level>, <concept>, <concept_preflabel>, <broader_concept>) """ level += 1 # Start with level 1 for top concepts hierarchy = [] narrower_list = sorted( [ binding_dict for binding_dict in bindings_list if # Top concept ((broader_concept is None) and (binding_dict.get("broader_concept") is None)) or # Narrower concept ((binding_dict.get("broader_concept") is not None) and (binding_dict["broader_concept"] == broader_concept)) ], key=lambda binding_dict: binding_dict["concept_preflabel"], ) for binding_dict in narrower_list: concept = binding_dict["concept"] hierarchy += [( level, concept, binding_dict["concept_preflabel"], binding_dict["broader_concept"] if binding_dict.get("broader_concept") else None, )] + build_hierarchy(bindings_list, concept, level) return hierarchy vocab = g.VOCABS[self.vocab_id] q = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dct: <http://purl.org/dc/terms/> SELECT DISTINCT ?concept ?concept_preflabel ?broader_concept WHERE {{ {{ ?concept skos:inScheme <{vocab_uri}> . }} UNION {{ ?concept skos:topConceptOf <{vocab_uri}> . }} UNION {{ <{vocab_uri}> skos:hasTopConcept ?concept . }} ?concept skos:prefLabel ?concept_preflabel . OPTIONAL {{ ?concept skos:broader ?broader_concept . ?broader_concept skos:inScheme <{vocab_uri}> . }} FILTER(lang(?concept_preflabel) = "{language}" || lang(?concept_preflabel) = "") }} ORDER BY ?concept_preflabel""".format( vocab_uri=vocab.concept_scheme_uri, language=self.language) bindings_list = [] for r in self.gr.query(q): bindings_list.append({ # ?concept ?concept_preflabel ?broader_concept "concept": r[0], "concept_preflabel": r[1], "broader_concept": r[2], }) assert bindings_list is not None, "FILE concept hierarchy query failed" hierarchy = build_hierarchy(bindings_list) return Source.draw_concept_hierarchy(hierarchy, self.request, self.vocab_id)
def get_top_concepts(self): # same as parent query, only running against rdflib in-memory graph, not SPARQL endpoint vocab = g.VOCABS[self.vocab_id] q = """ PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT DISTINCT ?tc ?pl WHERE {{ {{ GRAPH ?g {{ {{ <{concept_scheme_uri}> skos:hasTopConcept ?tc . }} UNION {{ ?tc skos:topConceptOf <{concept_scheme_uri}> . }} {{ ?tc skos:prefLabel ?pl . FILTER(lang(?pl) = "{language}" || lang(?pl) = "") }} }} }} UNION {{ {{ <{concept_scheme_uri}> skos:hasTopConcept ?tc . }} UNION {{ ?tc skos:topConceptOf <{concept_scheme_uri}> . }} {{ ?tc skos:prefLabel ?pl . FILTER(lang(?pl) = "{language}" || lang(?pl) = "") }} }} }} ORDER BY ?pl """.format(concept_scheme_uri=vocab.concept_scheme_uri, language=self.language) top_concepts = Source.sparql_query(vocab.sparql_endpoint, q, vocab.sparql_username, vocab.sparql_password) if top_concepts is not None: # cache prefLabels and do not add duplicates. This prevents Concepts with sameAs properties appearing twice pl_cache = [] tcs = [] for tc in top_concepts: if tc[1] not in pl_cache: # only add if not already in cache tcs.append((tc[0], tc[1])) pl_cache.append(tc[1]) if len(tcs) == 0: q = """ PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT DISTINCT ?tc ?pl WHERE {{ {{ GRAPH ?g {{ {{ <{concept_scheme_uri}> skos:hasTopConcept ?tc . }} UNION {{ ?tc skos:inScheme <{concept_scheme_uri}> . }} {{ ?tc skos:prefLabel ?pl . FILTER(lang(?pl) = "{language}" || lang(?pl) = "") }} }} }} UNION {{ {{ <{concept_scheme_uri}> skos:hasTopConcept ?tc . }} UNION {{ ?tc skos:inScheme <{concept_scheme_uri}> . }} {{ ?tc skos:prefLabel ?pl . FILTER(lang(?pl) = "{language}" || lang(?pl) = "") }} }} }} ORDER BY ?pl """.format(concept_scheme_uri=vocab.concept_scheme_uri, language=self.language) for tc in self.gr.query(q): if tc[1] not in pl_cache: # only add if not already in cache tcs.append((tc[0], tc[1])) pl_cache.append(tc[1]) return tcs else: return None
def get_concept_hierarchy(self, concept_scheme_uri): # returns an ordered list of tuples, (hierarchy level, Concept URI, Concept prefLabel) s = VOCBENCH('x', self.request)._authed_request_object() r = s.post(config.VB_ENDPOINT + '/SPARQL/evaluateQuery', data={ 'query': ''' PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT (COUNT(?mid) AS ?length) ?c ?pl ?parent WHERE {{ ?c a skos:Concept . ?cs (skos:hasTopConcept | skos:narrower)* ?mid . ?mid (skos:hasTopConcept | skos:narrower)+ ?c . ?c skos:prefLabel ?pl . ?c (skos:topConceptOf | skos:broader) ?parent . FILTER (?cs = <{}>) }} GROUP BY ?c ?pl ?parent ORDER BY ?length ?parent ?pl'''.format(concept_scheme_uri), 'ctx_project': self.vocab_id }) if r.status_code == 200: test = r.content.decode('utf-8') """<?xml version="1.0" encoding="UTF-8"?><stresponse request="evaluateQuery" type="error"> <msg>org.eclipse.rdf4j.repository.http.HTTPQueryEvaluationException: Query evaluation error: com.ontotext.trree.util.NotEnoughMemoryForDistinctGroupBy: Insufficient free Heap Memory 172Mb for group by and distinct, threshold:250Mb, reached 0Mb (HTTP status 500)</msg> </stresponse> """ cs = json.loads(r.content.decode( 'utf-8'))['result']['sparql']['results']['bindings'] hierarchy = [] previous_parent_uri = None last_index = 0 for c in cs: # insert all topConceptOf directly test = c if 'parent' not in c: continue if str(c['parent']['value']) == concept_scheme_uri: hierarchy.append((int(c['length']['value']), c['c']['value'], c['pl']['value'], None)) else: # If this is not a topConcept, see if it has the same URI as the previous inserted Concept # If so, use that Concept's index + 1 this_parent = c['parent']['value'] if this_parent == previous_parent_uri: # use last inserted index hierarchy.insert( last_index + 1, (int(c['length']['value']), c['c']['value'], c['pl']['value'], c['parent']['value'])) last_index += 1 # This is not a TopConcept and it has a differnt parent from the previous insert # So insert it after it's parent else: i = 0 parent_index = 0 for t in hierarchy: if this_parent in t[1]: parent_index = i i += 1 hierarchy.insert( parent_index + 1, (int(c['length']['value']), c['c']['value'], c['pl']['value'], c['parent']['value'])) last_index = parent_index + 1 previous_parent_uri = this_parent return Source.draw_concept_hierarchy(hierarchy, self.request, self.vocab_id) else: raise VbException('There was an error: ' + r.content.decode('utf-8'))
def collect(details): """ For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes 'ga-jena-fuseki': { 'source': VocabSource.SPARQL, 'sparql_endpoint': 'http://dev2.nextgen.vocabs.ga.gov.au/fuseki/vocabs', 'sparql_username': '******', # Optional username for SPARQL endpoint 'sparql_password': '******', # Optional password for SPARQL endpoint #'uri_filter_regex': '.*', # Regular expression to filter vocabulary URIs - Everything #'uri_filter_regex': '^http(s?)://pid.geoscience.gov.au/def/voc/ga/', # Regular expression to filter vocabulary URIs - GA #'uri_filter_regex': '^https://gcmdservices.gsfc.nasa.gov', # Regular expression to filter vocabulary URIs - GCMD 'uri_filter_regex': '^http(s?)://resource.geosciml.org/', # Regular expression to filter vocabulary URIs - CGI }, """ logging.debug("SPARQL collect()...") # Get all the ConceptSchemes from the SPARQL endpoint # Interpret each CS as a Vocab q = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT * WHERE {{ {{ GRAPH ?g {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs owl:versionInfo ?version }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} UNION {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs owl:versionInfo ?version }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} ORDER BY ?title""".format(language=DEFAULT_LANGUAGE) # record just the IDs & title for the VocPrez in-memory vocabs list concept_schemes = Source.sparql_query( details["sparql_endpoint"], q, sparql_username=details.get("sparql_username"), sparql_password=details.get("sparql_password"), ) assert concept_schemes is not None, "Unable to query conceptSchemes" sparql_vocabs = {} for cs in concept_schemes: # handling CS URIs that end with '/' vocab_id = cs["cs"]["value"].replace("/conceptScheme", "").split("/")[-1] # TODO: Investigate putting regex into SPARQL query # print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value'])) if details.get("uri_filter_regex") and not re.search( details["uri_filter_regex"], cs["cs"]["value"]): logging.debug("Skipping vocabulary {}".format(vocab_id)) continue if len(vocab_id) < 2: vocab_id = cs["cs"]["value"].split("/")[-2] sparql_vocabs[vocab_id] = Vocabulary( vocab_id, cs["cs"]["value"], cs["title"].get("value") or vocab_id if cs.get("title") else vocab_id, # Need string value for sorting, not None cs["description"].get("value") if cs.get("description") is not None else None, None, # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs dateutil.parser.parse(cs.get("created").get("value")) if cs.get("created") is not None else None, # dct:issued not in Vocabulary # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None, dateutil.parser.parse(cs.get("modified").get("value")) if cs.get("modified") is not None else None, cs["version"].get("value") if cs.get("version") is not None else None, # versionInfo config.VocabSource.SPARQL, cs["cs"]["value"], sparql_endpoint=details["sparql_endpoint"], sparql_username=details["sparql_username"], sparql_password=details["sparql_password"], ) g.VOCABS = {**g.VOCABS, **sparql_vocabs} logging.debug("SPARQL collect() complete.")
def collect(details): """ For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes 'gsq-graphdb': { 'source': VocabSource.SPARQL, 'sparql_endpoint': 'http://graphdb.gsq.digital:7200/repositories/GSQ_Vocabularies_core' }, """ logging.debug('SPARQL collect()...') # Get all the ConceptSchemes from the SPARQL endpoint # Interpret each CS as a Vocab q = ''' PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT * WHERE {{ GRAPH ?g {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} ORDER BY ?l '''.format(language=DEFAULT_LANGUAGE) # record just the IDs & title for the VocPrez in-memory vocabs list concept_schemes = Source.sparql_query( details['sparql_endpoint'], q, sparql_username=details.get('sparql_username'), sparql_password=details.get('sparql_password')) or {} sparql_vocabs = {} for cs in concept_schemes: # handling CS URIs that end with '/' vocab_id = cs['cs']['value'].replace('/conceptScheme', '').split('/')[-1] #print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value'])) if details.get('uri_filter_regex') and not re.search( details['uri_filter_regex'], cs['cs']['value']): logging.debug('Skipping vocabulary {}'.format(vocab_id)) continue if len(vocab_id) < 2: vocab_id = cs['cs']['value'].split('/')[-2] sparql_vocabs[vocab_id] = Vocabulary( vocab_id, cs['cs']['value'].replace('/conceptScheme', ''), cs['title'].get('value') or vocab_id if cs.get('title') else vocab_id, # Need string value for sorting, not None cs['description'].get('value') if cs.get('description') is not None else None, None, # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs dateutil.parser.parse(cs.get('created').get('value')) if cs.get('created') is not None else None, # dct:issued not in Vocabulary # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None, dateutil.parser.parse(cs.get('modified').get('value')) if cs.get('modified') is not None else None, None, # versionInfo config.VocabSource.SPARQL, cs['cs']['value'], sparql_endpoint=details['sparql_endpoint'], sparql_username=details['sparql_username'], sparql_password=details['sparql_password']) g.VOCABS = {**g.VOCABS, **sparql_vocabs} logging.debug('SPARQL collect() complete.')