コード例 #1
0
def object():
    """
    This is the general RESTful endpoint and corresponding Python function to handle requests for individual objects,
    be they a Vocabulary, Concept Scheme, Collection or Concept. Only those 4 classes of object are supported for the
    moment.

    An HTTP URI query string argument parameter 'vocab_id' must be supplied, indicating the vocab this object is within
    An HTTP URI query string argument parameter 'uri' must be supplied, indicating the URI of the object being requested

    :return: A Flask Response object
    :rtype: :class:`flask.Response`
    """
    language = request.values.get('lang') or config.DEFAULT_LANGUAGE
    vocab_id = request.values.get('vocab_id')
    uri = request.values.get('uri')

    # check this vocab ID is known
    if vocab_id not in g.VOCABS.keys():
        return Response(
            'The vocabulary ID you\'ve supplied is not known. Must be one of:\n '
            + '\n'.join(g.VOCABS.keys()),
            status=400,
            mimetype='text/plain')

    if uri is None:
        return Response(
            'A Query String Argument \'uri\' must be supplied for this endpoint, '
            'indicating an object within a vocabulary',
            status=400,
            mimetype='text/plain')

    vocab_source = Source(vocab_id, request, language)

    try:
        # TODO reuse object within if, rather than re-loading graph
        c = vocab_source.get_object_class()

        if c == 'http://www.w3.org/2004/02/skos/core#Concept':
            concept = vocab_source.get_concept()
            return ConceptRenderer(request, concept).render()
        elif c == 'http://www.w3.org/2004/02/skos/core#Collection':
            collection = vocab_source.get_collection(uri)

            return CollectionRenderer(request, collection).render()
        else:
            return render_invalid_object_class_response(vocab_id, uri, c)
    except VbException as e:
        return render_vb_exception_response(e)
コード例 #2
0
ファイル: FILE.py プロジェクト: internetofwater/VocPrez
    def build_concept_hierarchy(vocab_id):
        g = FILE.load_pickle_graph(vocab_id)

        # get uri
        uri = None
        for s, p, o in g.triples((None, RDF.type, SKOS.ConceptScheme)):
            uri = str(s)

        # get TopConcept
        topConcepts = []
        for s, p, o in g.triples((URIRef(uri), SKOS.hasTopConcept, None)):
            topConcepts.append(str(o))

        hierarchy = []
        if topConcepts:
            topConcepts.sort()
            for tc in topConcepts:
                hierarchy.append((1, tc, Source.get_prefLabel_from_uri(tc)))
                hierarchy += Source.get_narrowers(tc, 1)
            return hierarchy
        else:
            raise Exception('topConcept not found')
コード例 #3
0
def vocabulary(vocab_id):
    language = request.values.get('lang') or config.DEFAULT_LANGUAGE

    if vocab_id not in g.VOCABS.keys():
        return render_invalid_vocab_id_response()

    # get vocab details using appropriate source handler
    try:
        vocab = Source(vocab_id, request, language).get_vocabulary()
    except VbException as e:
        return render_vb_exception_response(e)

    return VocabularyRenderer(request, vocab).render()
コード例 #4
0
def vocabulary_list(vocab_id):
    language = request.values.get('lang') or config.DEFAULT_LANGUAGE

    if vocab_id not in g.VOCABS.keys():
        return render_invalid_vocab_id_response()

    vocab_source = Source(vocab_id, request, language)
    concepts = vocab_source.list_concepts()
    concepts.sort(key=lambda x: x['title'])
    total = len(concepts)

    # Search
    query = request.values.get('search')
    results = []
    if query:
        for m in match(concepts, query):
            results.append(m)
        concepts[:] = results
        concepts.sort(key=lambda x: x['title'])
        total = len(concepts)

    page = int(request.values.get('page')) if request.values.get(
        'page') is not None else 1
    per_page = int(request.values.get('per_page')) if request.values.get(
        'per_page') is not None else 20
    start = (page - 1) * per_page
    end = start + per_page
    concepts = concepts[start:end]

    test = SkosRegisterRenderer(
        request, [],
        concepts,
        g.VOCABS[vocab_id].title + ' concepts',
        total,
        search_query=query,
        search_enabled=True,
        vocabulary_url=[request.url_root + 'vocabulary/' + vocab_id],
        vocab_id=vocab_id)
    return test.render()
コード例 #5
0
ファイル: FILE.py プロジェクト: internetofwater/VocPrez
    def get_concept_hierarchy(self):
        # return FILE.hierarchy[self.vocab_id]
        pass
        g = FILE.load_pickle_graph(self.vocab_id)
        result = g.query(
            """
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

            SELECT (COUNT(?mid) AS ?length) ?c ?pl ?parent
            WHERE {{ 
                ?c      a                                       skos:Concept .   
                ?cs     (skos:hasTopConcept | skos:narrower)*   ?mid .
                ?mid    (skos:hasTopConcept | skos:narrower)+   ?c .                      
                ?c      skos:prefLabel                          ?pl .
                ?c		(skos:topConceptOf | skos:broader)		?parent .
                FILTER (?cs = <{}>)
            }}
            GROUP BY ?c ?pl ?parent
            ORDER BY ?length ?parent ?pl
            """.format(self.uri)
        )

        cs = []
        for row in result:
            cs.append({
                'length': {'value': row['length']},
                'c': {'value': row['c']},
                'pl': {'value': row['pl']},
                'parent': {'value': row['parent']}
            })

        hierarchy = []
        previous_parent_uri = None
        last_index = 0

        for c in cs:
            # insert all topConceptOf directly
            if str(c['parent']['value']) == self.uri:
                hierarchy.append((
                    int(c['length']['value']),
                    c['c']['value'],
                    c['pl']['value'],
                    None
                ))
            else:
                # If this is not a topConcept, see if it has the same URI as the previous inserted Concept
                # If so, use that Concept's index + 1
                this_parent = c['parent']['value']
                if this_parent == previous_parent_uri:
                    # use last inserted index
                    hierarchy.insert(last_index + 1, (
                        int(c['length']['value']),
                        c['c']['value'],
                        c['pl']['value'],
                        c['parent']['value']
                    ))
                    last_index += 1
                # This is not a TopConcept and it has a differnt parent from the previous insert
                # So insert it after it's parent
                else:
                    i = 0
                    parent_index = 0
                    for t in hierarchy:
                        if this_parent in t[1]:
                            parent_index = i
                        i += 1

                    hierarchy.insert(parent_index + 1, (
                        int(c['length']['value']),
                        c['c']['value'],
                        c['pl']['value'],
                        c['parent']['value']
                    ))

                    last_index = parent_index + 1
                previous_parent_uri = this_parent
        return Source.draw_concept_hierarchy(hierarchy, self.request, self.vocab_id)
コード例 #6
0
    def get_concept_hierarchy(self):
        # same as parent query, only:
        #   running against rdflib in-memory graph, not SPARQL endpoint
        #   a single graph, not a multi-graph (since it's an RDF/XML or Turtle file)
        """
        Function to draw concept hierarchy for vocabulary
        """
        def build_hierarchy(bindings_list, broader_concept=None, level=0):
            """
            Recursive helper function to build hierarchy list from a bindings list
            Returns list of tuples: (<level>, <concept>, <concept_preflabel>, <broader_concept>)
            """
            level += 1  # Start with level 1 for top concepts
            hierarchy = []

            narrower_list = sorted(
                [
                    binding_dict for binding_dict in bindings_list if
                    # Top concept
                    ((broader_concept is None) and
                     (binding_dict.get("broader_concept") is None)) or
                    # Narrower concept
                    ((binding_dict.get("broader_concept") is not None) and
                     (binding_dict["broader_concept"] == broader_concept))
                ],
                key=lambda binding_dict: binding_dict["concept_preflabel"],
            )

            for binding_dict in narrower_list:
                concept = binding_dict["concept"]
                hierarchy += [(
                    level,
                    concept,
                    binding_dict["concept_preflabel"],
                    binding_dict["broader_concept"]
                    if binding_dict.get("broader_concept") else None,
                )] + build_hierarchy(bindings_list, concept, level)

            return hierarchy

        vocab = g.VOCABS[self.vocab_id]

        q = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX dct: <http://purl.org/dc/terms/>
            SELECT DISTINCT ?concept ?concept_preflabel ?broader_concept
            WHERE {{
                {{ ?concept skos:inScheme <{vocab_uri}> . }}
                UNION
                {{ ?concept skos:topConceptOf <{vocab_uri}> . }}
                UNION
                {{ <{vocab_uri}> skos:hasTopConcept ?concept . }}                
                ?concept skos:prefLabel ?concept_preflabel .
                OPTIONAL {{ ?concept skos:broader ?broader_concept .
                    ?broader_concept skos:inScheme <{vocab_uri}> .
                    }}
                FILTER(lang(?concept_preflabel) = "{language}" || lang(?concept_preflabel) = "")
            }}
            ORDER BY ?concept_preflabel""".format(
            vocab_uri=vocab.concept_scheme_uri, language=self.language)

        bindings_list = []
        for r in self.gr.query(q):
            bindings_list.append({
                # ?concept ?concept_preflabel ?broader_concept
                "concept": r[0],
                "concept_preflabel": r[1],
                "broader_concept": r[2],
            })

        assert bindings_list is not None, "FILE concept hierarchy query failed"

        hierarchy = build_hierarchy(bindings_list)

        return Source.draw_concept_hierarchy(hierarchy, self.request,
                                             self.vocab_id)
コード例 #7
0
    def get_top_concepts(self):
        # same as parent query, only running against rdflib in-memory graph, not SPARQL endpoint
        vocab = g.VOCABS[self.vocab_id]
        q = """
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            SELECT DISTINCT ?tc ?pl
            WHERE {{
                {{ GRAPH ?g 
                    {{
                        {{
                            <{concept_scheme_uri}> skos:hasTopConcept ?tc .                
                        }}
                        UNION 
                        {{
                            ?tc skos:topConceptOf <{concept_scheme_uri}> .
                        }}
                        {{ ?tc skos:prefLabel ?pl .
                            FILTER(lang(?pl) = "{language}" || lang(?pl) = "") 
                        }}
                    }}
                }}
                UNION
                {{
                    {{
                        <{concept_scheme_uri}> skos:hasTopConcept ?tc .                
                    }}
                    UNION 
                    {{
                        ?tc skos:topConceptOf <{concept_scheme_uri}> .
                    }}
                    {{ ?tc skos:prefLabel ?pl .
                        FILTER(lang(?pl) = "{language}" || lang(?pl) = "")
                    }}
                }}
            }}
            ORDER BY ?pl
            """.format(concept_scheme_uri=vocab.concept_scheme_uri,
                       language=self.language)
        top_concepts = Source.sparql_query(vocab.sparql_endpoint, q,
                                           vocab.sparql_username,
                                           vocab.sparql_password)

        if top_concepts is not None:
            # cache prefLabels and do not add duplicates. This prevents Concepts with sameAs properties appearing twice
            pl_cache = []
            tcs = []
            for tc in top_concepts:
                if tc[1] not in pl_cache:  # only add if not already in cache
                    tcs.append((tc[0], tc[1]))
                    pl_cache.append(tc[1])

            if len(tcs) == 0:
                q = """
                    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
                    SELECT DISTINCT ?tc ?pl
                    WHERE {{
                        {{ GRAPH ?g {{
                            {{
                                <{concept_scheme_uri}> skos:hasTopConcept ?tc .                
                            }}
                            UNION 
                            {{
                                ?tc skos:inScheme <{concept_scheme_uri}> .
                            }}
                            {{ ?tc skos:prefLabel ?pl .
                                FILTER(lang(?pl) = "{language}" || lang(?pl) = "") 
                            }}
                        }} }}
                        UNION
                        {{
                            {{
                                <{concept_scheme_uri}> skos:hasTopConcept ?tc .                
                            }}
                            UNION 
                            {{
                                ?tc skos:inScheme <{concept_scheme_uri}> .
                            }}
                            {{ ?tc skos:prefLabel ?pl .
                                FILTER(lang(?pl) = "{language}" || lang(?pl) = "")
                            }}
                        }}
                    }}
                    ORDER BY ?pl
                    """.format(concept_scheme_uri=vocab.concept_scheme_uri,
                               language=self.language)
                for tc in self.gr.query(q):
                    if tc[1] not in pl_cache:  # only add if not already in cache
                        tcs.append((tc[0], tc[1]))
                        pl_cache.append(tc[1])

            return tcs
        else:
            return None
コード例 #8
0
    def get_concept_hierarchy(self, concept_scheme_uri):
        # returns an ordered list of tuples, (hierarchy level, Concept URI, Concept prefLabel)
        s = VOCBENCH('x', self.request)._authed_request_object()
        r = s.post(config.VB_ENDPOINT + '/SPARQL/evaluateQuery',
                   data={
                       'query':
                       '''
                    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

                    SELECT (COUNT(?mid) AS ?length) ?c ?pl ?parent
                    WHERE {{ 
                        ?c      a                                       skos:Concept .   
                        ?cs     (skos:hasTopConcept | skos:narrower)*   ?mid .
                        ?mid    (skos:hasTopConcept | skos:narrower)+   ?c .                      
                        ?c      skos:prefLabel                          ?pl .
                        ?c		(skos:topConceptOf | skos:broader)		?parent .
                        FILTER (?cs = <{}>)
                    }}
                    GROUP BY ?c ?pl ?parent
                    ORDER BY ?length ?parent ?pl'''.format(concept_scheme_uri),
                       'ctx_project':
                       self.vocab_id
                   })

        if r.status_code == 200:
            test = r.content.decode('utf-8')
            """<?xml version="1.0" encoding="UTF-8"?><stresponse request="evaluateQuery" type="error">
  <msg>org.eclipse.rdf4j.repository.http.HTTPQueryEvaluationException: Query evaluation error: com.ontotext.trree.util.NotEnoughMemoryForDistinctGroupBy: Insufficient free Heap Memory 172Mb for group by and distinct, threshold:250Mb, reached 0Mb (HTTP status 500)</msg>
</stresponse>
"""
            cs = json.loads(r.content.decode(
                'utf-8'))['result']['sparql']['results']['bindings']
            hierarchy = []
            previous_parent_uri = None
            last_index = 0

            for c in cs:
                # insert all topConceptOf directly
                test = c
                if 'parent' not in c:
                    continue
                if str(c['parent']['value']) == concept_scheme_uri:
                    hierarchy.append((int(c['length']['value']),
                                      c['c']['value'], c['pl']['value'], None))
                else:
                    # If this is not a topConcept, see if it has the same URI as the previous inserted Concept
                    # If so, use that Concept's index + 1
                    this_parent = c['parent']['value']
                    if this_parent == previous_parent_uri:
                        # use last inserted index
                        hierarchy.insert(
                            last_index + 1,
                            (int(c['length']['value']), c['c']['value'],
                             c['pl']['value'], c['parent']['value']))
                        last_index += 1
                    # This is not a TopConcept and it has a differnt parent from the previous insert
                    # So insert it after it's parent
                    else:
                        i = 0
                        parent_index = 0
                        for t in hierarchy:
                            if this_parent in t[1]:
                                parent_index = i
                            i += 1

                        hierarchy.insert(
                            parent_index + 1,
                            (int(c['length']['value']), c['c']['value'],
                             c['pl']['value'], c['parent']['value']))

                        last_index = parent_index + 1
                    previous_parent_uri = this_parent
            return Source.draw_concept_hierarchy(hierarchy, self.request,
                                                 self.vocab_id)
        else:
            raise VbException('There was an error: ' +
                              r.content.decode('utf-8'))
コード例 #9
0
ファイル: SPARQL.py プロジェクト: andrewmackie/VocPrez
    def collect(details):
        """
        For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes

        'ga-jena-fuseki': {
            'source': VocabSource.SPARQL,
            'sparql_endpoint': 'http://dev2.nextgen.vocabs.ga.gov.au/fuseki/vocabs',
            'sparql_username': '******', # Optional username for SPARQL endpoint
            'sparql_password': '******', # Optional password for SPARQL endpoint
            #'uri_filter_regex': '.*', # Regular expression to filter vocabulary URIs - Everything
            #'uri_filter_regex': '^http(s?)://pid.geoscience.gov.au/def/voc/ga/', # Regular expression to filter vocabulary URIs - GA
            #'uri_filter_regex': '^https://gcmdservices.gsfc.nasa.gov', # Regular expression to filter vocabulary URIs - GCMD
            'uri_filter_regex': '^http(s?)://resource.geosciml.org/', # Regular expression to filter vocabulary URIs - CGI

        },
        """
        logging.debug("SPARQL collect()...")

        # Get all the ConceptSchemes from the SPARQL endpoint
        # Interpret each CS as a Vocab
        q = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT * WHERE {{
    {{ GRAPH ?g {{
        ?cs a skos:ConceptScheme .
        OPTIONAL {{ ?cs skos:prefLabel ?title .
            FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
        OPTIONAL {{ ?cs dcterms:created ?created }}
        OPTIONAL {{ ?cs dcterms:issued ?issued }}
        OPTIONAL {{ ?cs dcterms:modified ?modified }}
        OPTIONAL {{ ?cs owl:versionInfo ?version }}
        OPTIONAL {{ ?cs skos:definition ?description .
            FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
    }} }}
    UNION
    {{
        ?cs a skos:ConceptScheme .
        OPTIONAL {{ ?cs skos:prefLabel ?title .
            FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
        OPTIONAL {{ ?cs dcterms:created ?created }}
        OPTIONAL {{ ?cs dcterms:issued ?issued }}
        OPTIONAL {{ ?cs dcterms:modified ?modified }}
        OPTIONAL {{ ?cs owl:versionInfo ?version }}
        OPTIONAL {{ ?cs skos:definition ?description .
            FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
    }}
}} 
ORDER BY ?title""".format(language=DEFAULT_LANGUAGE)
        # record just the IDs & title for the VocPrez in-memory vocabs list
        concept_schemes = Source.sparql_query(
            details["sparql_endpoint"],
            q,
            sparql_username=details.get("sparql_username"),
            sparql_password=details.get("sparql_password"),
        )
        assert concept_schemes is not None, "Unable to query conceptSchemes"

        sparql_vocabs = {}
        for cs in concept_schemes:
            # handling CS URIs that end with '/'
            vocab_id = cs["cs"]["value"].replace("/conceptScheme",
                                                 "").split("/")[-1]

            # TODO: Investigate putting regex into SPARQL query
            # print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value']))
            if details.get("uri_filter_regex") and not re.search(
                    details["uri_filter_regex"], cs["cs"]["value"]):
                logging.debug("Skipping vocabulary {}".format(vocab_id))
                continue

            if len(vocab_id) < 2:
                vocab_id = cs["cs"]["value"].split("/")[-2]

            sparql_vocabs[vocab_id] = Vocabulary(
                vocab_id,
                cs["cs"]["value"],
                cs["title"].get("value") or vocab_id if cs.get("title") else
                vocab_id,  # Need string value for sorting, not None
                cs["description"].get("value")
                if cs.get("description") is not None else None,
                None,  # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs
                dateutil.parser.parse(cs.get("created").get("value"))
                if cs.get("created") is not None else None,
                # dct:issued not in Vocabulary
                # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None,
                dateutil.parser.parse(cs.get("modified").get("value"))
                if cs.get("modified") is not None else None,
                cs["version"].get("value")
                if cs.get("version") is not None else None,  # versionInfo
                config.VocabSource.SPARQL,
                cs["cs"]["value"],
                sparql_endpoint=details["sparql_endpoint"],
                sparql_username=details["sparql_username"],
                sparql_password=details["sparql_password"],
            )
        g.VOCABS = {**g.VOCABS, **sparql_vocabs}
        logging.debug("SPARQL collect() complete.")
コード例 #10
0
    def collect(details):
        """
        For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes

        'gsq-graphdb': {
            'source': VocabSource.SPARQL,
            'sparql_endpoint': 'http://graphdb.gsq.digital:7200/repositories/GSQ_Vocabularies_core'
        },
        """
        logging.debug('SPARQL collect()...')

        # Get all the ConceptSchemes from the SPARQL endpoint
        # Interpret each CS as a Vocab
        q = '''
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
            SELECT * WHERE {{
                GRAPH ?g {{
                    ?cs a skos:ConceptScheme .
                    OPTIONAL {{ ?cs skos:prefLabel ?title .
                        FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
                    OPTIONAL {{ ?cs dcterms:created ?created }}
                    OPTIONAL {{ ?cs dcterms:issued ?issued }}
                    OPTIONAL {{ ?cs dcterms:modified ?modified }}
                    OPTIONAL {{ ?cs skos:definition ?description .
                        FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
                }}
            }} 
            ORDER BY ?l
        '''.format(language=DEFAULT_LANGUAGE)
        # record just the IDs & title for the VocPrez in-memory vocabs list
        concept_schemes = Source.sparql_query(
            details['sparql_endpoint'],
            q,
            sparql_username=details.get('sparql_username'),
            sparql_password=details.get('sparql_password')) or {}
        sparql_vocabs = {}
        for cs in concept_schemes:
            # handling CS URIs that end with '/'
            vocab_id = cs['cs']['value'].replace('/conceptScheme',
                                                 '').split('/')[-1]

            #print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value']))
            if details.get('uri_filter_regex') and not re.search(
                    details['uri_filter_regex'], cs['cs']['value']):
                logging.debug('Skipping vocabulary {}'.format(vocab_id))
                continue

            if len(vocab_id) < 2:
                vocab_id = cs['cs']['value'].split('/')[-2]

            sparql_vocabs[vocab_id] = Vocabulary(
                vocab_id,
                cs['cs']['value'].replace('/conceptScheme', ''),
                cs['title'].get('value') or vocab_id if cs.get('title') else
                vocab_id,  # Need string value for sorting, not None
                cs['description'].get('value')
                if cs.get('description') is not None else None,
                None,  # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs
                dateutil.parser.parse(cs.get('created').get('value'))
                if cs.get('created') is not None else None,
                # dct:issued not in Vocabulary
                # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None,
                dateutil.parser.parse(cs.get('modified').get('value'))
                if cs.get('modified') is not None else None,
                None,  # versionInfo
                config.VocabSource.SPARQL,
                cs['cs']['value'],
                sparql_endpoint=details['sparql_endpoint'],
                sparql_username=details['sparql_username'],
                sparql_password=details['sparql_password'])
        g.VOCABS = {**g.VOCABS, **sparql_vocabs}
        logging.debug('SPARQL collect() complete.')