예제 #1
0
def query_complete_classes_of_entity(ent):
    if ent.startswith(dbp_prefix):
        ent = ent.split(dbp_prefix)[1]
    classes = set()
    try:
        s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")

        statement = 'select distinct ?superclass where { <%s%s> rdf:type ?e. ' \
                    '?e rdfs:subClassOf* ?superclass. FILTER(strstarts(str(?superclass), "%s"))}' \
                    % (dbp_prefix, ent, dbo_prefix)
        result = s.query(statement)
        for row in result.fetchone():
            cls_uri = str(row[0])
            cls = cls_uri.split(dbo_prefix)[1]
            classes.add(cls)

        statement = 'select distinct ?ss where {<%s%s> dbo:wikiPageRedirects ?e. ?e rdf:type ?s. ' \
                    '?s rdfs:subClassOf* ?ss. FILTER(strstarts(str(?ss), "%s"))}' \
                    % (dbp_prefix, ent, dbo_prefix)
        result = s.query(statement)
        for row in result.fetchone():
            cls_uri = str(row[0])
            cls = cls_uri.split(dbo_prefix)[1]
            classes.add(cls)
    except UnicodeDecodeError:
        print('     %s: UnicodeDecodeError' % ent)
        pass
    return classes
예제 #2
0
def get_text_info_for_entity(entity_uri):
    handle = sparql.Service('http://dbpedia.org/sparql', "utf-8")
    statement = _gen_sparql_for_abstract_and_comment(entity_uri)
    try:
        result = handle.query(statement)
    # except HTTPError as e:
    except:
        import ipdb
        ipdb.set_trace()
        print(e.args[0])
    res_all = result.fetchall()
    keys = result.variables
    values = ['Nothing'] * len(keys)
    try:
        if len(res_all) > 0:
            _v = res_all[0]
            for idx in range(len(values)):
                values[
                    idx] = _v[idx].value if _v[idx] is not None else 'Nothing'
    except:
        import ipdb
        ipdb.set_trace()
    r = {k: values[idx] for idx, k in enumerate(keys)}
    # print(r)
    return r
예제 #3
0
def query_ent_num(cls):
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    statement = 'select count(?ent) where {?ent rdf:type ?t. ?t rdfs:subClassOf* <%s%s>}' % (
        dbo_prefix, cls)
    result = s.query(statement)
    for row in result.fetchone():
        return int(str(row[0]))
예제 #4
0
def test_single_query():
    handle = sparql.Service('http://dbpedia.org/sparql', "utf-8")
    statement = \
        'PREFIX foaf:  <http://xmlns.com/foaf/0.1/> ' \
        'PREFIX dbo:   <http://dbpedia.org/ontology/> ' \
        'PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#> ' \
        'PREFIX dbr:   <http://dbpedia.org/resource/> ' \
        \
        'SELECT ?abs, ?comment ' \
        'WHERE {' \
        'dbr:Apple_II dbo:abstract ?abs . ' \
        'dbr:Apple_II rdfs:comment ?comment . ' \
        'FILTER (langMatches(lang(?abs),"en")) ' \
        'FILTER (langMatches(lang(?comment),"en")) ' \
        '} ' \
        'limit 10'
    t = time.time()
    for _ in range(100):
        result = handle.query(statement)
        print(time.time() - t)
        t = time.time()
        res_all = result.fetchall()
        keys = result.variables
        values = res_all[0] if len(res_all) > 0 else ['Nothing']*len(keys)
        r = {k:values[idx].value[:10] for idx, k in enumerate(keys)}
        print(r)
    return
예제 #5
0
파일: manage.py 프로젝트: eea/eea.seris
def update_country_names():
    SPARQL_QUERY = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX eea: <http://rdfdata.eionet.europa.eu/eea/ontology/>

            SELECT DISTINCT ?code ?countryname ?publishingCode
              IF(bound(?eumember),'Yes','') AS ?eu
              IF(bound(?eeamember),'Yes','') AS ?eea
              IF(bound(?eionetmember),'Yes','') AS ?eionet
              IF(bound(?eun22member),'Yes','') AS ?eun22
            WHERE {
              ?ucountry a eea:Country ;
                        eea:code ?code;
                        eea:publishingCode ?publishingCode;
                        rdfs:label ?countryname
             OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EU> skos:member ?ucountry, ?eumember }
             OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EUN22> skos:member ?ucountry, ?eun22member }
             OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EEA> skos:member ?ucountry, ?eeamember }
             OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EIONET> skos:member ?ucountry, ?eionetmember }
            }"""
    SPARQL_ENDPOINT = 'http://semantic.eea.europa.eu/sparql'

    s = sparql.Service(SPARQL_ENDPOINT)
    results = [i for i in s.query(SPARQL_QUERY).fetchone()]
    countries = {}
    for item in results:
        countries[item[0].value.lower()] = item[1].value
    f = open(("refdata/countries.json"), "w")
    json.dump(countries, f)
    f.close()
예제 #6
0
def get_envelope_release_date(file_url):
    q = """
PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#>
PREFIX terms: <http://purl.org/dc/terms/>
PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#>

SELECT ?released
WHERE {
?file terms:date ?date .
?file cr:mediaType 'text/xml'.
?file terms:isPartOf ?part .
?part schema:released ?released .
FILTER (str(?file) = '%s')
}
ORDER BY DESC(?date)
LIMIT 1
""" % file_url

    service = sparql.Service('https://cr.eionet.europa.eu/sparql')

    try:
        req = service.query(q)
        rows = req.fetchall()

        released = rows[0][0].value
    except:
        logger.exception(
            'Got an error in querying SPARQL endpoint for '
            'file_url: %s', file_url)

        raise

    release_date = _to_datetime(released)

    return release_date
예제 #7
0
def wikidata_nuts_to_geonames(client, args):
    db = client.geostore
    nuts = db.nuts

    s = sparql.Service(WIKIDATA_ENDPOINT, "utf-8", "GET")
    statement = '''
    SELECT ?s ?o ?g WHERE {
      ?s wdt:P605 ?o.
      ?s wdt:P1566 ?g
    }
    '''
    result = s.query(statement)
    for row in result.fetchone():
        wikidata_id = row[0].value.strip()
        nuts_code = row[1].value.strip()
        geon = 'http://sws.geonames.org/' + row[2].value + '/'

        entry = nuts.find_one({'_id': nuts_code})
        if entry:
            nuts.update_one({'_id': nuts_code}, {'$set': {'geonames': geon, 'wikidata': wikidata_id}})
        else:
            logging.debug('NUTS entry not in DB: ' + str(nuts_code))
            geon_entry = db.geonames.find_one({'_id': geon})
            if geon_entry:
                if 'country' in geon_entry:
                    nuts.insert_one({'_id': nuts_code, 'geonames': geon, 'wikidata': wikidata_id, 'name': geon_entry['name'], 'country': geon_entry['country']})
                else:
                    nuts.insert_one({'_id': nuts_code, 'geonames': geon, 'wikidata': wikidata_id, 'name': geon_entry['name']})
예제 #8
0
def queryLabelByClass(class_text):
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?l) where {<%s%s> rdfs:label ?l. FILTER(langMatches(lang(?l), ' \
                '"en"))}' % (ONTOLOGY_NS, class_text)
    result = s.query(statement)
    for row in result.fetchone():
        return row[0]
    return None
예제 #9
0
def queryEngLabelByEntity(e):
    labels = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?l) where {<%s> rdfs:label ?l. FILTER( langMatches(lang(?l), "en"))}' % e
    result = s.query(statement)
    for row in result.fetchone():
        labels.add(row[0])
    return labels
예제 #10
0
파일: data.py 프로젝트: eea/wise.msfd
def _get_report_fileurl_art11_2014(country, region, article, descriptor):
    # return [
    #     'https://cdr.eionet.europa.eu/de/eu/msfd_mp/balde/envvfjbwg/BALDE_MSFD11Mon_20141105.xml',
    #     'https://cdr.eionet.europa.eu/de/eu/msfd_mp/balde/envu58cfw/BALDE_MSFD11MonSub_BALDE_Sub_099_20141015.xml'
    # ]

    q = """
PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#>
PREFIX terms: <http://purl.org/dc/terms/>
PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#>
PREFIX core: <http://www.w3.org/2004/02/skos/core#>

SELECT distinct ?file
WHERE {
?file terms:date ?date .
?file cr:mediaType 'text/xml' .
?file terms:isPartOf ?isPartOf .
?file cr:xmlSchema ?schema .
?file schema:restricted ?restricted.
?isPartOf schema:locality ?locality .
?isPartOf schema:obligation ?obligation .
?obligation core:notation ?obligationNr .
?locality core:notation ?notation .
FILTER (?notation = '%s')
FILTER (?obligationNr = '611')
FILTER (str(?schema) = 'http://dd.eionet.europa.eu/schemas/MSFD11Mon/MSFD11MonSub_1p0.xsd'
|| str(?schema) = 'http://dd.eionet.europa.eu/schemas/MSFD11Mon/MSFD11Mon_1p1.xsd')
#FILTER regex(str(?file), '/%s')
#FILTER (?restricted = 0)
}
ORDER BY DESC(?date)
""" % (country.upper(), region.lower())

    service = sparql.Service('https://cr.eionet.europa.eu/sparql')

    logger.info("Getting fileurl Art11 with SPARQL: %s - %s", country, region)
    try:
        req = service.query(q)
        rows = req.fetchall()

        urls = []

        for row in rows:
            url = row[0].value
            splitted = url.split('/')

            # filename_from_url = splitted[-1]

            urls.append(url)

    except:
        logger.exception(
            'Got an error in querying SPARQL endpoint for '
            'Art11: %s - %s', country, region)

        raise

    return urls
예제 #11
0
def queryDescendantByClass(c):
    descendants = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?d) where {?d rdfs:subClassOf* <%s>. ' \
                'FILTER(strstarts(str(?d), "%s"))}' % (c, ONTOLOGY_NS)
    result = s.query(statement)
    for row in result.fetchone():
        descendants.add(row[0])
    return descendants
예제 #12
0
def queryAncestorByClass(c):
    ancestors = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?a) where {<%s> rdfs:subClassOf* ?a. ' \
                'FILTER(strstarts(str(?a), "%s"))}' % (c, ONTOLOGY_NS)
    result = s.query(statement)
    for row in result.fetchone():
        ancestors.add(row[0])
    return ancestors
예제 #13
0
def querySiblingByClass(c):
    siblings = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?s) where {<%s> rdfs:subClassOf ?p. ?s rdfs:subClassOf ?p. ' \
                'FILTER(?s != <%s> && strstarts(str(?s), "%s"))}' % (c, c, ONTOLOGY_NS)
    result = s.query(statement)
    for row in result.fetchone():
        siblings.add(row[0])
    return siblings
예제 #14
0
def queryTripleByClass(top_k, c):
    triples = list()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?s), str(?p), str(?o), str(?l) where {?s ?p ?o. ?o rdf:type <%s>. ' \
                '?o rdfs:label ?l. FILTER( langMatches(lang(?l), "en"))} ORDER BY RAND() limit %d' % (c, top_k)
    result = s.query(statement)
    for row in result.fetchone():
        triples.append([row[0], row[1], row[2], row[3]])
    return triples
예제 #15
0
def queryObjEntitiesByProperty(top_k, p):
    objs = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?o) where {?s <%s> ?o. FILTER (strstarts(str(?o), "%s"))} ' \
                'ORDER BY RAND() limit %d' % (p, RESOURCE_NS, top_k)
    result = s.query(statement)
    for row in result.fetchone():
        objs.add(row[0])
    return objs
예제 #16
0
def querySubByPropertyAndObject(p, o, top_k):
    subs = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?s) where {?s <%s> <%s> } limit %d' % (
        p, o, top_k)
    result = s.query(statement)
    for row in result.fetchone():
        subs.add(row[0])
    return subs
예제 #17
0
    def __init__(self):
        config = ConfigParser.SafeConfigParser()
        config.read('db.conf')
        endpoint = config.get('endpoint', 'url')
        #       DBUSER = config.get('database', 'user')
        #       DBPASS = config.get('database', 'password')

        self.server = sparql.Service(endpoint)
        self.subjects = {}
        self.invertedalso = False
예제 #18
0
def querySubObjLabByProperty(top_k, p):
    subs_objs_labs = list()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?s),str(?o),str(?l) where {?s <%s> ?o. ?o rdfs:label ?l. ' \
                'FILTER (strstarts(str(?o), "%s") ' \
                'and langMatches(lang(?l), "en"))} ORDER BY RAND() LIMIT %d' % (p, RESOURCE_NS, top_k)
    result = s.query(statement)
    for row in result.fetchone():
        subs_objs_labs.append([row[0], row[1], row[2]])
    return subs_objs_labs
예제 #19
0
def equivalent_classes(cls):
    classes = set()
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    statement = 'SELECT distinct ?eqclass WHERE { <%s%s> owl:equivalentClass ' \
                '?eqclass. FILTER ( strstarts(str(?eqclass), "%s"))}' % (dbo_prefix, cls, dbo_prefix)
    result = s.query(statement)
    for row in result.fetchone():
        cls_uri = str(row[0])
        cls = cls_uri.split(dbo_prefix)[1]
        classes.add(cls)
    return classes
예제 #20
0
def querySubLiteralsByProperty(top_k, p, max_str_len):
    subs_lits = list()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select str(?sub), str(?obj) where {?sub <%s> ?obj.  ' \
                'FILTER ( (regex(datatype(?obj), "langString") || regex(datatype(?obj), "string")) ' \
                '&& strlen(str(?obj))<%d )} ' \
                'ORDER BY RAND() LIMIT %d' % (p, max_str_len, top_k)
    result = s.query(statement)
    for row in result.fetchone():
        subs_lits.append([row[0], row[1]])
    return subs_lits
예제 #21
0
def query_property_ent_num(cls):
    prop_ent_num = dict()
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    statement = 'select ?pro (count(distinct ?ent) as ?ent_num) where {?ent rdf:type ?t. ?t rdfs:subClassOf* <%s%s>. ' \
                '?ent ?pro ?obj. FILTER(strstarts(str(?pro), "%s"))} group by ?pro' \
                % (dbo_prefix, cls, dbo_prefix)
    result = s.query(statement)
    for row in result.fetchone():
        p = str(row[0])
        n = int(str(row[1]))
        prop_ent_num[p] = n
    return prop_ent_num
예제 #22
0
def query_super_classes(cls):
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    supers = set()
    statement = 'SELECT distinct ?superclass WHERE { <%s%s> rdfs:subClassOf* ?superclass. ' \
                'FILTER ( strstarts(str(?superclass), "%s"))}' % (dbo_prefix, cls, dbo_prefix)
    result = s.query(statement)
    for row in result.fetchone():
        super_str = str(row[0])
        super_name = super_str.split(dbo_prefix)[1]
        supers.add(super_name)

    return supers
예제 #23
0
파일: data.py 프로젝트: eea/wise.msfd
def get_text_reports_2018(country_code):
    if country_code == 'EL':
        country_code = 'GR'

    if country_code == 'UK':
        country_code = 'GB'

    q = """
PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#>
PREFIX terms: <http://purl.org/dc/terms/>
PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#>
PREFIX core: <http://www.w3.org/2004/02/skos/core#>

SELECT distinct ?file, ?released
WHERE {
?file terms:date ?date .
#?file cr:mediaType 'text/xml' .
?file terms:isPartOf ?isPartOf .
?isPartOf schema:released ?released .
?isPartOf schema:locality ?locality .
?isPartOf schema:obligation ?obligation .
?obligation core:notation ?obligationNr .
?locality core:notation ?notation .
FILTER (?notation = '%s')
FILTER (?obligationNr = '761')
}
ORDER BY DESC(?date)
""" % country_code

    service = sparql.Service('https://cr.eionet.europa.eu/sparql')
    res = []

    try:
        req = service.query(q, timeout=30)
        rows = req.fetchall()

        for row in rows:
            file_url = row[0].value
            release_date = _to_datetime(row[1].value)

            res.append((file_url, release_date))

    except:
        logger.exception(
            'Got an error in querying SPARQL endpoint when '
            'getting text reports for: %s', country_code)

        raise

    return res
예제 #24
0
def querySubLiteralsByPropertyStr(top_k, p, l_str, max_str_len):
    subs_lits = list()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    l_str = l_str.replace("'", "\\'")
    statement = '''select str(?s),str(?o) where {?s <%s> ?o. FILTER ( contains(str(?o), '%s') 
                && (regex(datatype(?o), "langString") || regex(datatype(?o), "string")) && strlen(str(?o))<%d )} 
                ORDER BY RAND() LIMIT %d''' % (p, l_str, max_str_len, top_k)
    try:
        result = s.query(statement)
        for row in result.fetchone():
            subs_lits.append([row[0], row[1]])
    except UnicodeDecodeError:
        pass
    return subs_lits
예제 #25
0
def __get_report_filename_art3_4(country, region, schema, obligation):
    """ Retrieve from CDR the latest filename for Article 3/4
    """

    q = """
PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#>
PREFIX terms: <http://purl.org/dc/terms/>
PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#>
PREFIX core: <http://www.w3.org/2004/02/skos/core#>

SELECT ?file
WHERE {
?file terms:date ?date .
?file cr:mediaType 'text/xml' .
?file terms:isPartOf ?isPartOf .
?file cr:xmlSchema ?schema .
?isPartOf schema:locality ?locality .
?isPartOf schema:obligation ?obligation .
?obligation core:notation ?obligationNr .
?locality core:notation ?notation .
FILTER (?notation = '%s')
FILTER (?obligationNr = '%s')
FILTER (str(?schema) = '%s')
FILTER regex(str(?file), '%s')
}
ORDER BY DESC(?date)
LIMIT 1
""" % (country.upper(), obligation, schema, region.upper())

    service = sparql.Service('https://cr.eionet.europa.eu/sparql')
    filename = ''
    try:
        req = service.query(q)
        rows = req.fetchall()
        if not rows:
            logger.warning("Filename not found for query: %s", q)
            return filename

        url = rows[0][0].value
        splitted = url.split('/')
        filename = splitted[-1]
    except:
        logger.exception(
            'Got an error in querying SPARQL endpoint for '
            'Article 3/4 country: %s', country)

        raise

    return filename
예제 #26
0
def super_classes(col_classes):
    dbo_prefix = 'http://dbpedia.org/ontology/'
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    for i, col in enumerate(col_classes.keys()):
        ori_cls = col_classes[col][0]
        statement = 'SELECT distinct ?superclass WHERE { dbo:%s rdfs:subClassOf* ?superclass. ' \
                    'FILTER ( strstarts(str(?superclass), "%s"))}' % (ori_cls, dbo_prefix)
        result = s.query(statement)
        for row in result.fetchone():
            super_cls = str(row[0])
            super_cls_name = super_cls.split(dbo_prefix)[1]
            if super_cls_name not in col_classes[col]:
                col_classes[col].append(super_cls_name)
        if i % 10 == 0:
            print('%d columns done' % (i + 1))
    return col_classes
예제 #27
0
def queryClassByEntity(e):
    types = set()
    s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET")
    statement = 'select distinct str(?s) where { <%s> rdf:type ?e. ?e rdfs:subClassOf* ?s. ' \
                'FILTER(strstarts(str(?s), "%s"))}' % (e, ONTOLOGY_NS)
    result = s.query(statement)
    for row in result.fetchone():
        types.add(row[0])

    statement = 'select distinct str(?ss) where { <%s>  dbo:wikiPageRedirects ?e. ?e rdf:type ?s. ' \
                '?s rdfs:subClassOf* ?ss. FILTER(strstarts(str(?ss), "%s"))}' % (e, ONTOLOGY_NS)
    result = s.query(statement)
    for row in result.fetchone():
        types.add(row[0])

    return types
예제 #28
0
def query_general_entities(cls_entities):
    dbp_prefix = 'http://dbpedia.org/resource/'
    cls_gen_entities = dict()
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    for cls in cls_entities.keys():
        par_entities = cls_entities[cls]
        entities = list()
        statement = 'select distinct ?e where {?e a dbo:%s} ORDER BY RAND() limit 1000' % cls
        result = s.query(statement)
        for row in result.fetchone():
            ent_uri = str(row[0])
            ent = ent_uri.split(dbp_prefix)[1]
            if ent not in par_entities:
                entities.append(ent)
        cls_gen_entities[cls] = entities
        print('%s done, %d entities' % (cls, len(entities)))
    return cls_gen_entities
예제 #29
0
def super_classes_of_classes(classes):
    super_clses = dict()
    dbo_prefix = 'http://dbpedia.org/ontology/'
    s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET")
    for cls in classes:
        supers = set()
        statement = 'SELECT distinct ?superclass WHERE { dbo:%s rdfs:subClassOf* ?superclass. ' \
                    'FILTER ( strstarts(str(?superclass), "%s"))}' % (cls, dbo_prefix)
        result = s.query(statement)
        for row in result.fetchone():
            super_str = str(row[0])
            super_name = super_str.split(dbo_prefix)[1]
            if super_name not in supers:
                supers.add(super_name)

        super_clses[cls] = supers

    return super_clses
예제 #30
0
def wikidata_osmrelations_to_geonames(client, args):
    db = client.geostore

    s = sparql.Service(WIKIDATA_ENDPOINT, "utf-8", "GET")
    statement = '''
    SELECT ?s ?o ?g WHERE {
          ?s wdt:P402 ?o.
          ?s wdt:P1566 ?g.
        }
    '''
    result = s.query(statement)
    for row in result.fetchone():
        wikidata_id = row[0].value.strip()
        osm_relation = row[1].value.strip()
        geon = 'http://sws.geonames.org/' + row[2].value + '/'

        geon_entry = db.geonames.find_one({'_id': geon})
        if geon_entry:
            db.geonames.update_one({'_id': geon}, {'$set': {'osm_relation': osm_relation, 'wikidata': wikidata_id}})