コード例 #1
0
    def prepare(self, records):
        sparql = Sparql(SPARQL_ENDPOINT)

        # Get BCP 47 language code map
        self.language_code_map = {}
        raw_language_code_map = sparql.request(LANGUAGE_QUERY)

        for line in raw_language_code_map:
            self.language_code_map[sparql.format_value(
                line, "item")] = sparql.format_value(line, "code")

            # Extract all different locations
        locations = set()
        for record in records:
            if record["speaker"]["residence"] is not None:
                locations.add(record["speaker"]["residence"])

        self.location_map = {}
        raw_location_map = sparql.request(
            LOCATION_QUERY.replace("$1", " wd:".join(locations)))
        for line in raw_location_map:
            country = sparql.format_value(line, "countryLabel")
            location = sparql.format_value(line, "locationLabel")
            self.location_map[sparql.format_value(line, "location")] = country
            if country != location:
                self.location_map[sparql.format_value(
                    line, "location")] += (" (" + location + ")")

        return records
コード例 #2
0
    def feature_generator(self, data):

        data_name = (data.split('/')[-1]).split('.')[0]

        with codecs.open('features/ceccarelli/%s.svm' % data_name,
                         'w',
                         encoding='utf-8') as data_write:

            with codecs.open(data, 'r', encoding='utf-8') as data_read:

                for i, line in enumerate(data_read):

                    wiki_id_query, qid, wiki_id_candidate, relevance, doc_id = self.parse_ceccarelli_line(
                        line)

                    # print(wiki_id_query)

                    uri_query = Sparql.get_uri_from_wiki_id(wiki_id_query)

                    uri_candidate = Sparql.get_uri_from_wiki_id(
                        wiki_id_candidate)

                    self.write_line(uri_query, qid, uri_candidate, relevance,
                                    data_write, doc_id)

        print('finished writing features')

        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #3
0
    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Updates centroids of OSM relations',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument(
            '-s',
            '--cache-strategy',
            action='store',
            dest='cacheType',
            choices=['sparse', 'dense'],
            default='dense',
            help='Which node strategy to use (default: %(default)s)')
        parser.add_argument('-c',
                            '--nodes-file',
                            action='store',
                            dest='cacheFile',
                            default=None,
                            help='File to store node cache.')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')

        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)
        self.skipped = []

        if self.options.cacheFile:
            if self.options.cacheType == 'sparse':
                idx = 'sparse_file_array,' + self.options.cacheFile
            else:
                idx = 'dense_file_array,' + self.options.cacheFile
            self.nodeCache = osmium.index.create_map(idx)
        else:
            self.nodeCache = None
コード例 #4
0
ファイル: entity2vec.py プロジェクト: Loricanal/entity2rec
    def run(self):

        if self.sparql:
            sparql_query = Sparql(self.entities, self.config_file, self.dataset, self.sparql, self.default_graph,
                                  self.entity_class)

            sparql_query.get_property_graphs()

        self.e2v_walks_learn()  # run node2vec for each property-specific graph
コード例 #5
0
    def run(self):

        if self.sparql:
            sparql_query = Sparql(self.entities, self.config_file,
                                  self.dataset, self.sparql,
                                  self.default_graph, self.entity_class)

            sparql_query.get_property_graphs()

        self.e2v_walks_learn()  # run node2vec for each property-specific graph
コード例 #6
0
ファイル: main.py プロジェクト: dfilipeloja/MoviesOntology
def menu_add_consumer():
    getMoviesObject()

    for index, item in enumerate(moviesList):
        moviePrefix = item['movieprefix'].split('#')[1]
        print(f'{index+1} - {moviePrefix} ({item["moviename"]})')

    movieIndexId = int(input('Seleccione o index do filme: ')) - 1
    selectedMovie = moviesList[movieIndexId]['movieprefix'].split('#')[1]

    consumerName = input('Nome do cliente: ')
    consumerNameId = 'mov:consumer_' + remove_characters_except_number_letter(
        consumerName)

    consumerInsert = Sparql()
    consumerExists = consumerInsert.ask(
        f'ASK {{ {consumerNameId} rdf:type mov:Consumer }}')

    if not consumerExists:
        consumerInsert.insertTriple(f'{consumerNameId} rdf:type mov:Consumer')
        consumerInsert.insertTriple(
            f'{consumerNameId} foaf:name "{consumerName}"')

    consumerInsert.insertTriple(
        f'mov:{selectedMovie} mov:alugadoPor {consumerNameId}')
コード例 #7
0
def game():
    logged_in = session.get('logged_in')
    if logged_in:

        visited = dbHandler.getUserVisitedList(session.get("userid"))
        if len(visited) < 5:
            flash("You have to learn first!")
            return redirect(url_for('home'))

        question = {}
        question["question"] = "start"
        question["correctAnswer"] = ""
        question["answers"] = []

        scores = dbHandler.getUserScores(session.get("userid"))

        if request.method == "POST":
            answer = request.form['ans']
            if answer == session["currentQuestion"]["correctAnswer"]:
                session["correctAnswers"] = session["correctAnswers"] + 1
            session["numberOfQuestions"] = session["numberOfQuestions"] + 1
            if session["numberOfQuestions"] < 10:
                s = Sparql()
                question = s.generate_question(session.get("userid"))
                session["currentQuestion"] = question
                return render_template("quiz.html",
                                       username=session.get("username"),
                                       question=question,
                                       scores=scores)
            else:
                flash("Your score is " + str(session["correctAnswers"]) +
                      " correct answers out of 10!")
                dbHandler.addScore(session.get("userid"),
                                   session["correctAnswers"])
                s = Sparql()
                question = s.generate_question(session.get("userid"))
                session["currentQuestion"] = question
                session["numberOfQuestions"] = 0
                session["correctAnswers"] = 0
                return render_template("quiz.html",
                                       username=session.get("username"),
                                       question=question,
                                       scores=scores)

        if request.method == "GET":
            session["currentQuestion"] = question
            session["numberOfQuestions"] = 0
            session["correctAnswers"] = 0
            s = Sparql()
            question = s.generate_question(session.get("userid"))
            session["currentQuestion"] = question
            return render_template("quiz.html",
                                   username=session.get("username"),
                                   question=question,
                                   scores=scores)
    else:
        return render_template("index.html")
コード例 #8
0
    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Download and update stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        parser.add_argument('-b',
                            '--go-backwards',
                            action='store_true',
                            dest='go_backwards',
                            default=False,
                            help='Go back up to (maxfiles) and exit')
        parser.add_argument(
            '-m',
            '--maxfiles',
            action='store',
            dest='max_files',
            default=1,
            type=int,
            help='Maximum number of pageview stat files to process at once')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)
        self.pvstat = '<https://dumps.wikimedia.org/other/pageviews/>'
        self.stats_url = 'https://dumps.wikimedia.org/other/pageviews/{0:%Y}/{0:%Y-%m}/pageviews-{0:%Y%m%d-%H}0000.gz'

        # oldest file is https://dumps.wikimedia.org/other/pageviews/2015/2015-05/pageviews-20150501-010000.gz
        self.minimum_data_ts = datetime(2015, 5, 1, tzinfo=dt.timezone.utc)
コード例 #9
0
ファイル: main.py プロジェクト: dfilipeloja/MoviesOntology
def getMoviesObject():
    sparql = Sparql()
    moviesSelect = sparql.select("""SELECT ?movie ?name WHERE {
            ?movie rdf:type mov:Movie .
            ?movie foaf:name ?name
            }
    """)

    movies = moviesSelect['results']['bindings']

    for movie in movies:
        idMovie = movie['movie']['value']
        name = movie['name']['value']
        moviesList.append({'movieprefix': idMovie, 'moviename': name})
コード例 #10
0
ファイル: main.py プロジェクト: dfilipeloja/MoviesOntology
def getFilmesAlugados():
    sparql = Sparql()
    moviesAlugados = sparql.select(
        """SELECT ?movieid ?moviename (GROUP_CONCAT(?cid;SEPARATOR=', ') AS ?consumerid) (GROUP_CONCAT(?cname; SEPARATOR=', ') as ?consumername) WHERE {
            ?movieid rdf:type mov:Movie .
            ?movieid foaf:name ?moviename .
            ?movieid mov:alugadoPor ?cid .
            ?cid foaf:name ?cname
        } GROUP BY ?movieid ?moviename
    """)

    movies = moviesAlugados['results']['bindings']
    for movie in movies:
        moviename = movie['moviename']['value']
        consumers = (movie['consumername']['value']).split(', ')
        filmesAlugados.append({'moviename': moviename, 'consumers': consumers})
コード例 #11
0
def main():
    sparql = Sparql()
    moviesSelect = sparql.select("""SELECT ?movie ?name WHERE {
            ?movie rdf:type mov:Movie .
            ?movie foaf:name ?name
            }
    """)

    movies = moviesSelect['results']['bindings']

    for movie in movies:
        # print(movie)
        idMovie = movie['movie']['value']
        name = movie['name']['value']
        moviesList.append({'id': idMovie, 'name': name})

    print(moviesList)
コード例 #12
0
    def __init__(self):
        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Download and update stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument(
            '-d',
            '--queries-dir',
            action='store',
            dest='queries_dir',
            default=str(Path(os.path.dirname(__file__)) / 'maintenance'),
            help='Do not modify RDF database.')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)
コード例 #13
0
ファイル: entity2rel.py プロジェクト: Loricanal/entity2rec
    def feature_generator(self, data):

        data_name = (data.split('/')[-1]).split('.')[0]

        with codecs.open('features/ceccarelli/%s.svm' %data_name,'w', encoding='utf-8') as data_write:

            with codecs.open(data,'r', encoding='utf-8') as data_read:

                for i, line in enumerate(data_read):

                    wiki_id_query, qid, wiki_id_candidate, relevance, doc_id = self.parse_ceccarelli_line(line)

                    print(wiki_id_query)

                    uri_query = Sparql.get_uri_from_wiki_id(wiki_id_query)

                    uri_candidate = Sparql.get_uri_from_wiki_id(wiki_id_candidate)

                    self.write_line(uri_query, qid, uri_candidate, relevance, data_write, doc_id)

        print('finished writing features')

        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #14
0
    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Update key and tag usage stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url,
                                 'query' if opts.dry_run else False)
        self.date_subject = '<https://taginfo.openstreetmap.org>'
        self.url_stats = 'https://taginfo.openstreetmap.org/api/4/key/stats'
        self.url_keys = ' https://taginfo.openstreetmap.org/api/4/keys/all'
        self.ids = {}
コード例 #15
0
    def _define_properties(self):

        with codecs.open(self.config_file, 'r',
                         encoding='utf-8') as config_read:

            property_file = json.loads(config_read.read())

        try:

            self.properties = [i for i in property_file[self.dataset]]
            self.properties.append('feedback')

        except KeyError:  # if no list of properties is specified, take them all

            if self.sparql:  # get all the properties from the sparql endpoint

                sparql_query = Sparql(self.entities, self.config_file,
                                      self.dataset, self.sparql,
                                      self.default_graph, self.entity_class)

                self.properties = sparql_query.properties

                self.properties.append(
                    'feedback'
                )  # add the feedback property that is not defined in the graph

            else:  # get everything you have in the folder

                path_to_graphs = 'datasets/%s/graphs' % self.dataset

                onlyfiles = [
                    f for f in listdir(path_to_graphs)
                    if isfile(join(path_to_graphs, f))
                ]

                self.properties = [
                    file.replace('.edgelist', '') for file in onlyfiles
                ]

                if 'feedback' in self.properties:  # feedback property always the last one of the list
                    self.properties.remove('feedback')
                    self.properties.append('feedback')
コード例 #16
0
class SparqlMaintainer(object):
    def __init__(self):
        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Download and update stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument(
            '-d',
            '--queries-dir',
            action='store',
            dest='queries_dir',
            default=str(Path(os.path.dirname(__file__)) / 'maintenance'),
            help='Do not modify RDF database.')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)

    def run(self):
        dir = Path(self.options.queries_dir)
        self.log.info(f'Running scripts from {dir}')
        while True:
            queries = {}
            for file in dir.glob('*.sparql'):
                with file.open() as f:
                    queries[file.stem] = f.read()

            suffix = '-test'
            for filename in sorted(queries.keys()):
                if filename.endswith(suffix):
                    if filename[:-len(suffix)] not in queries:
                        self.log.warning(
                            f'File {filename} has no matching query (without the "{suffix}" suffix)'
                        )
                    continue
                testfile = filename + suffix
                if testfile in queries:
                    if not self.rdf_server.run('query', queries[testfile]):
                        self.log.info(
                            f'Skipping {filename} (test is negative)')
                        continue
                self.log.info(f'Executing {filename}')
                self.rdf_server.run('update', queries[filename])
                self.log.info(f'Done running {filename}')

            time.sleep(600)
コード例 #17
0
 def on_search_released(self):  # Auto connected
     #----------------------------
     query = Sparql.create_from_XML(self.as_XML())
     results = query.execute(self.config.rdfstore)
コード例 #18
0
    def execute(self, record):
        # Normalize the record using ocwiktionary's titles conventions
        transcription = self.normalize(record["transcription"])

        # Fetch the content of the page having the transcription for title
        (is_already_present, wikicode,
         basetimestamp) = self.get_entry(transcription, record["file"])

        # Whether there is no entry for this record on ocwiktionary
        if wikicode == False:
            return False

            # Whether the record is already inside the entry
        if is_already_present == True:
            print(record["id"] + ": already on ocwiktionary")
            return False

            # Check if the record's language has a BCP 47 code, stop here if not
        if record["language"]["qid"] not in self.language_code_map:
            print(record["id"] + ": language code not found")
            return False

        lang = self.language_code_map[record["language"]["qid"]]

        motvar = re.search(r"^oc\-([^\-]*?)(\-|$)", lang)

        labelvar = False

        if motvar:
            codevar = motvar.group(1)
            if record["language"]["qid"] in self.language_label_map:
                labelvar = self.language_label_map[record["language"]["qid"]]
            lang = "oc"

            # Whether there is no section for the current language
        if "{=" + lang + "=}" not in wikicode:
            print(record["id"] + ": language section not found")
            return False

        motif = ""
        stringlg = "{=" + lang + "=}"
        for i in range(0, len(stringlg)):
            lettre = stringlg[i]
            if i > 0:
                motif = motif + "|"
            motif = motif + stringlg[0:i].replace("{", "\{")
            motif = motif + "[^" + stringlg[i].replace("{", "\{") + "]"

        motif = re.search(
            r"{{=" + lang +
            "=}}(([^{]|{[^{]|{{[^\-=]|{{-[^p]|{{-p[^r]|{{-pr[^o]|{{-pro[^n]|{{-pron[^-]|{{-pron-[^}]|{{-pron-}[^}])*?)({{=([^\=]*?)=}}|$)",
            wikicode,
        )

        if motif:
            wikicode = re.sub(
                r"{{=" + lang +
                "=}}(([^{]|{[^{]|{{[^\-=]|{{-[^p]|{{-p[^r]|{{-pr[^o]|{{-pro[^n]|{{-pron[^-]|{{-pron-[^}]|{{-pron-}[^}])*?)({{=([^\=]*?)=}}|{{-sil-}}|{{-([^\-]*?)\-\|([a-z]+)}}|$)",
                "{{=" + lang + "=}}\g<1>{{-pron-}}\g<3>",
                wikicode,
            )

        loccode = ""
        if record["speaker"]["residence"]:

            sparql = Sparql(SPARQL_ENDPOINT)

            self.location_map = {}
            raw_location_map = sparql.request(
                LOCATION_QUERY.replace("$1", " wd:" +
                                       record["speaker"]["residence"]))
            if len(raw_location_map) > 0:
                country = sparql.format_value(raw_location_map[0],
                                              "countryLabel")
                location = sparql.format_value(raw_location_map[0],
                                               "locationLabel")

                if country:
                    loccode = country

                    if location and location != country:
                        loccode = loccode + " (" + location + ")"
                elif location:
                    loccode = location
                else:
                    loccode = ""

                if labelvar:
                    loccode = loccode + " - " + labelvar

                if loccode != "":
                    loccode = loccode + " : "

        codefichier = (loccode + "escotar « " + record["transcription"] +
                       " » [[Fichièr:" + record["file"] + "]]")

        wikicode = re.sub(
            r"\{=" + lang +
            "=\}(([^\{]|\{[^=])*?)\{\{-pron-\}\}(([^\{]|\{[^\{]|\{\{[^\-])*?)(\{\{-|\{\{=|$)",
            "{=" + lang + "=}\g<1>{{-pron-}}\g<3>" + codefichier + "\n\g<5>",
            wikicode,
        )

        # Save the result
        try:
            result = self.do_edit(transcription, wtp.parse(wikicode),
                                  basetimestamp)
        except Exception as e:
            # If we got an editconflict, just restart from the beginning
            if str(e).find("editconflict") > -1:
                self.execute(record)
            else:
                raise e

        if result == True:
            print(record["id"] +
                  ": added to ocwiktionary - https://oc.wiktionary.org/wiki/" +
                  transcription)

        return result
コード例 #19
0
 def __init__(self, options):
     super(RdfUpdateHandler, self).__init__(options)
     self.pending = {}
     self.pendingCounter = 0
     self.rdf_server = Sparql(self.options.rdf_url, self.options.dry_run)
コード例 #20
0
class RdfUpdateHandler(RdfHandler):
    def __init__(self, options):
        super(RdfUpdateHandler, self).__init__(options)
        self.pending = {}
        self.pendingCounter = 0
        self.rdf_server = Sparql(self.options.rdf_url, self.options.dry_run)

    def finalize_object(self, obj, statements, obj_type):
        super(RdfUpdateHandler, self).finalize_object(obj, statements,
                                                      obj_type)

        prefixed_id = osmutils.types[obj_type] + str(obj.id)

        if prefixed_id in self.pending:
            # Not very efficient, but if the same object is updated more than once within
            # the same batch, it does not get deleted because all deletes happen first
            self.flush()

        if statements:
            self.pending[prefixed_id] = [
                prefixed_id + ' ' + s + '.'
                for s in osmutils.toStrings(statements)
            ]
            self.pendingCounter += len(statements)
        else:
            self.pending[prefixed_id] = False
            self.pendingCounter += 1

        if self.pendingCounter > 5000:
            self.flush()

    def flush(self, seqid=0):
        sparql = ''

        if self.pending:
            # Remove all staetments with these subjects
            sparql += f'''
DELETE {{ ?s ?p ?o . }}
WHERE {{
  VALUES ?s {{ {' '.join(self.pending.keys())} }}
  ?s ?p ?o .
  FILTER (osmm:task != ?p)
}};'''
            # flatten list of lists, and if sublist is truthy, use it
            insertSparql = '\n'.join([
                v for sublist in self.pending.values() if sublist
                for v in sublist
            ])
            if insertSparql:
                sparql += f'INSERT {{ {insertSparql} }} WHERE {{}};\n'

        if seqid > 0:
            if self.last_timestamp.year < 2000:  # Something majorly wrong
                raise Exception('last_timestamp was not updated')
            sparql += set_status_query('osmroot:', self.last_timestamp,
                                       'version', seqid)

        if sparql:
            sparql = '\n'.join(osmutils.prefixes) + '\n\n' + sparql
            self.rdf_server.run('update', sparql)
            self.pendingCounter = 0
            self.pending = {}
        elif self.pendingCounter != 0:
            # Safety check
            raise Exception(f'pendingCounter={self.pendingCounter}')

    def get_osm_schema_ver(self, repserv):
        result = query_status(self.rdf_server,
                              '<https://www.openstreetmap.org>', 'version')

        ver = result['version']
        if ver is not None:
            log.info(f'schema:version={ver}')
            return int(ver)

        mod_date = result['dateModified']
        if mod_date is not None:
            log.info(
                f'schema:dateModified={mod_date}, shifting back and getting sequence ID'
            )
            mod_date -= dt.timedelta(minutes=60)
            return repserv.timestamp_to_sequence(mod_date)

        log.error(
            'Neither schema:version nor schema:dateModified are set for <https://www.openstreetmap.org>'
        )
        return None

    def run(self):
        repserv = ReplicationServer(self.options.osm_updater_url)
        last_time = datetime.utcnow()
        if self.options.seqid:
            seqid = self.options.seqid
        else:
            seqid = self.get_osm_schema_ver(repserv)
            if seqid is None:
                raise Exception('Unable to determine sequence ID')

        log.info(f'Initial sequence id: {seqid}')
        state = None
        last_seqid = seqid

        while True:

            # must not read data newer than the published sequence id
            # or we might end up reading partial data

            sleep = True
            if state is None:
                state = repserv.get_state_info()
                if state is not None and seqid + 2 < state.sequence:
                    log.info(
                        f'Replication server has data up to #{state.sequence}')

            if state is not None and seqid <= state.sequence:
                try:
                    diffdata = repserv.get_diff_block(seqid)
                except:
                    diffdata = ''

                # We assume there are no empty diff files
                if len(diffdata) > 0:
                    log.debug("Downloaded change %d. (size=%d)" %
                              (seqid, len(diffdata)))

                    if self.options.addWayLoc:
                        self.apply_buffer(diffdata,
                                          repserv.diff_type,
                                          locations=True,
                                          idx=self.get_index_string())
                    else:
                        self.apply_buffer(diffdata, repserv.diff_type)

                    self.flush(seqid)

                    seqid += 1
                    sleep = False

            seconds_since_last = (datetime.utcnow() -
                                  last_time).total_seconds()
            if seconds_since_last > 60:
                log.info(
                    f'Processed {seqid - last_seqid - 1}, ' +
                    f'todo {(state.sequence - seqid + 1 if state else "???")};  {self.format_stats()}'
                )
                last_seqid = seqid - 1
                last_time = datetime.utcnow()

            if state is not None and seqid > state.sequence:
                state = None  # Refresh state

            if sleep:
                time.sleep(60)
コード例 #21
0
 def on_search_released(self):  # Auto connected
 #----------------------------
   query = Sparql.create_from_XML(self.as_XML())
   results = query.execute(self.config.rdfstore)
コード例 #22
0
class UpdateRelLoc(object):
    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Updates centroids of OSM relations',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument(
            '-s',
            '--cache-strategy',
            action='store',
            dest='cacheType',
            choices=['sparse', 'dense'],
            default='dense',
            help='Which node strategy to use (default: %(default)s)')
        parser.add_argument('-c',
                            '--nodes-file',
                            action='store',
                            dest='cacheFile',
                            default=None,
                            help='File to store node cache.')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')

        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)
        self.skipped = []

        if self.options.cacheFile:
            if self.options.cacheType == 'sparse':
                idx = 'sparse_file_array,' + self.options.cacheFile
            else:
                idx = 'dense_file_array,' + self.options.cacheFile
            self.nodeCache = osmium.index.create_map(idx)
        else:
            self.nodeCache = None

    def run(self):
        while True:
            self.run_once()
            time.sleep(600)  # every 10 minutes

    def run_once(self):
        query = '''# Get relations without osmm:loc
SELECT ?rel WHERE {
  ?rel osmm:type 'r' .
  FILTER NOT EXISTS { ?rel osmm:loc ?relLoc . }
}'''  # LIMIT 100000
        result = self.rdf_server.run('query', query)
        self.skipped = [
            'osmrel:' +
            i['rel']['value'][len('https://www.openstreetmap.org/relation/'):]
            for i in result
        ]

        while True:
            rel_ids = self.skipped
            self.skipped = []
            count = len(rel_ids)
            self.log.info(f'** Processing {count} relations')
            self.run_list(rel_ids)
            if len(self.skipped) >= count:
                self.log.info(
                    f'** Unable to process {len(self.skipped)} relations, exiting'
                )
                break
            else:
                self.log.info(
                    f'** Processed {count - len(self.skipped)} out of {count} relations'
                )

        self.log.info('done')

    def run_list(self, rel_ids):
        for chunk in chunks(rel_ids, 2000):
            self.fix_relations(chunk)

    def fix_relations(self, rel_ids):
        pairs = self.get_relation_members(rel_ids)

        insert_statements = []
        for group in self.group_by_values(pairs):
            insert_statements.append(self.process_single_rel(*group))

        if len(insert_statements) > 0:
            sparql = '\n'.join(osmutils.prefixes) + '\n\n'
            sparql += 'INSERT {\n'
            sparql += '\n'.join(insert_statements)
            sparql += '\n} WHERE {};'

            self.rdf_server.run('update', sparql)
            self.log.info(f'Updated {len(insert_statements)} relations')

    def get_relation_members(self, rel_ids):
        query = f'''# Get relation member's locations
SELECT
  ?rel ?member ?loc
WHERE {{
  VALUES ?rel {{ {' '.join(rel_ids)} }}
  ?rel osmm:has ?member .
  OPTIONAL {{ ?member osmm:loc ?loc . }}
}}'''
        result = self.rdf_server.run('query', query)

        return [(
            'osmrel:' +
            i['rel']['value'][len('https://www.openstreetmap.org/relation/'):],
            i['member']['value'], i['loc']['value'] if 'loc' in i else '')
                for i in result]

    @staticmethod
    def process_single_rel(rel_id, member_points):
        points = MultiPoint([loads(p) for p in member_points])
        return rel_id + ' ' + osmutils.formatPoint('osmm:loc',
                                                   points.centroid) + '.'

    def group_by_values(self, tupples):
        """Yield a tuple (rid, [list of ids])"""
        points = None
        last_id = None
        skip = False
        for rid, ref, value in sorted(tupples):
            if last_id != rid:
                if last_id is not None and not skip:
                    if not points:
                        self.skipped.append(last_id)
                    else:
                        yield (last_id, points)
                skip = False
                points = []
                last_id = rid
            if not skip:
                if value == '':
                    if ref.startswith('https://www.openstreetmap.org/node/'):
                        if self.nodeCache:
                            node_id = ref[
                                len('https://www.openstreetmap.org/node/'):]
                            try:
                                point = self.nodeCache.get(int(node_id))
                                points.append(
                                    f'Point({point.lon} {point.lat})')
                            except osmium._osmium.NotFoundError:
                                pass
                    elif ref.startswith('https://www.openstreetmap.org/way/'):
                        pass  # not much we can do about missing way's location
                    elif ref.startswith(
                            'https://www.openstreetmap.org/relation/'):
                        skip = True
                        self.skipped.append(rid)
                    else:
                        raise ValueError('Unknown ref ' + ref)
                else:
                    points.append(value)
        if last_id is not None and not skip:
            if not points:
                self.skipped.append(last_id)
            else:
                yield (last_id, points)
コード例 #23
0
def get_records(query):
    sparql = Sparql(ENDPOINT)
    raw_records = sparql.request(query)
    records = []
    for record in raw_records:
        records += [{
            "id":
            sparql.format_value(record, "record"),
            "file":
            sparql.format_value(record, "file"),
            "date":
            sparql.format_value(record, "date"),
            "transcription":
            sparql.format_value(record, "transcription"),
            "qualifier":
            sparql.format_value(record, "qualifier"),
            "user":
            sparql.format_value(record, "linkeduser"),
            "speaker": {
                "id": sparql.format_value(record, "speaker"),
                "name": sparql.format_value(record, "speakerLabel"),
                "gender": sparql.format_value(record, "gender"),
                "residence": sparql.format_value(record, "residence"),
            },
            "links": {
                "wikidata": sparql.format_value(record, "wikidataId"),
                "lexeme": sparql.format_value(record, "lexemeId"),
                "wikipedia": sparql.format_value(record, "wikipediaTitle"),
                "wiktionary": sparql.format_value(record, "wiktionaryEntry"),
            },
            "language": {
                "iso": sparql.format_value(record, "languageIso"),
                "qid": sparql.format_value(record, "languageQid"),
                "wm": sparql.format_value(record, "languageWMCode"),
            },
        }]

    return records
コード例 #24
0
def home():
    logged_in = session.get('logged_in')
    if logged_in:
        info = {}
        diseaseInfo = ""
        diseaseName = ""
        diseaseTreatment = ""

        name = []
        symptoms = []
        treatment = []
        hasCauses = []
        causeOf = []
        image = ""

        info["symptoms"] = symptoms
        info["treatment"] = treatment
        info["hasCauses"] = hasCauses
        info["causeOf"] = causeOf
        info["info"] = diseaseInfo
        info["name"] = name
        info["image"] = image

        diseaseInfo = False

        userList = []
        userListDb = dbHandler.getUserList(session.get("userid"))
        for l in userListDb:
            el = dbHandler.getElementByID(l[1])
            userList.append(el[0][1])

        b = True
        i = 0
        mostVisited = []
        visited = dbHandler.getElements("disease")
        while b == True:
            if i < 5 and len(visited) > i:
                mostVisited.append(visited[i][1])
                i = i + 1
            else:
                b = False

        suggestions = {}
        s = Sparql()
        suggestions = s.getSuggestions(session.get("userid"))

        if request.method == "GET":
            diseaseName = request.args.get("diseaseName")
            diseaseSymptoms = request.args.get("symptoms")
            diseaseTreatment = request.args.get("treatment")
            diseaseCauses = request.args.get("causes")
            diseaseType = request.args.get("type")
            diseaseCountry = request.args.get("country")
            diseaseClimate = request.args.get("climate")
            diseaseFood = request.args.get("food")
            first = True

            if (type(diseaseName) == str and len(diseaseName) != 0):
                s = Sparql()
                info = s.search_name(diseaseName)
                dbHandler.addElement(diseaseName, "disease")
                session["diseaseName"] = diseaseName
                diseaseInfo = True
                dbHandler.addVisitedDisease(session.get("userid"), diseaseName,
                                            "disease")

            if (type(diseaseSymptoms) == str and len(diseaseSymptoms) != 0):
                s = Sparql()
                if first == True:
                    info = s.search_symptoms(diseaseSymptoms)
                    first = False
                else:
                    infoTemp = s.search_symptoms(diseaseSymptoms)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp["name"]))
                symptoms = diseaseSymptoms.split(", ")
                for s in symptoms:
                    dbHandler.addElement(s, "symptom")

            if (type(diseaseTreatment) == str and len(diseaseTreatment) != 0):
                s = Sparql()

                if first == True:
                    info = s.search_treatment(diseaseTreatment)
                    first = False
                else:
                    infoTemp = s.search_treatment(diseaseTreatment)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp["name"]))

                treatments = diseaseTreatment.split(", ")
                for t in treatments:
                    dbHandler.addElement(t, "treatment")

            if (type(diseaseCauses) == str and len(diseaseCauses) != 0):
                s = Sparql()

                if first == True:
                    info = s.search_bycauses(diseaseCauses)
                    first = False
                else:
                    infoTemp = s.search_bycauses(diseaseCauses)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp["name"]))

                causes = diseaseCauses.split(", ")
                for c in causes:
                    dbHandler.addElement(c, "causes")

            if (type(diseaseType) == str and len(diseaseType) != 0):
                s = Sparql()
                if first == True:
                    info = s.search_bytype(diseaseType)
                    first = False
                else:
                    infoTemp = s.search_bytype(diseaseType)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp["name"]))
                types = diseaseType.split(", ")
                for t in types:
                    dbHandler.addElement(t, "types")

            if (type(diseaseCountry) == str and len(diseaseCountry) != 0):
                s = Sparql()
                if first == True:
                    info["name"] = s.search_diseaseByCountry(diseaseCountry)
                    first = False
                else:
                    infoTemp = s.search_diseaseByCountry(diseaseCountry)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp))

                dbHandler.addElement(diseaseCountry, "country")

            if (type(diseaseClimate) == str and len(diseaseClimate) != 0):
                s = Sparql()

                if first == True:
                    info["name"] = s.search_diseaseByClimate(diseaseClimate)
                    first = False
                else:
                    infoTemp = s.search_diseaseByClimate(diseaseClimate)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp))

                dbHandler.addElement(diseaseClimate, "climate")

            if (type(diseaseFood) == str and len(diseaseFood) != 0):
                s = Sparql()

                if first == True:
                    info["name"] = s.search_diseaseByFood(diseaseFood)
                    first = False
                else:
                    infoTemp = s.search_diseaseByFood(diseaseFood)
                    info["name"] = list(
                        set(info["name"]).intersection(infoTemp))

                dbHandler.addElement(diseaseFood, "food")

            return render_template("home.html",
                                   username=session.get("username"),
                                   disease=info["name"],
                                   symptoms=info["symptoms"],
                                   treatment=info["treatment"],
                                   hasCauses=info["hasCauses"],
                                   causeOf=info["causeOf"],
                                   info=info["info"],
                                   image=info["image"],
                                   userList=userList,
                                   visited=mostVisited,
                                   suggestions=suggestions,
                                   diseaseInfo=diseaseInfo)

        if request.method == "POST":
            dbHandler.addElementInList(session.get("userid"),
                                       session.get("diseaseName"), "disease")
            userList.append(session.get("diseaseName"))
            return render_template("home.html",
                                   userList=userList,
                                   visited=mostVisited,
                                   suggestions=suggestions)
    else:
        return redirect(url_for('index'))
コード例 #25
0
async def sparql(ctx, *text):
    ctx.send(
        _("Your results are as follows.\n"
          "```\n"
          "{result}"
          "```\n").format(result=Sparql(text[0], text[1:])))
コード例 #26
0
class UpdatePageViewStats(object):
    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Download and update stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        parser.add_argument('-b',
                            '--go-backwards',
                            action='store_true',
                            dest='go_backwards',
                            default=False,
                            help='Go back up to (maxfiles) and exit')
        parser.add_argument(
            '-m',
            '--maxfiles',
            action='store',
            dest='max_files',
            default=1,
            type=int,
            help='Maximum number of pageview stat files to process at once')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url, opts.dry_run)
        self.pvstat = '<https://dumps.wikimedia.org/other/pageviews/>'
        self.stats_url = 'https://dumps.wikimedia.org/other/pageviews/{0:%Y}/{0:%Y-%m}/pageviews-{0:%Y%m%d-%H}0000.gz'

        # oldest file is https://dumps.wikimedia.org/other/pageviews/2015/2015-05/pageviews-20150501-010000.gz
        self.minimum_data_ts = datetime(2015, 5, 1, tzinfo=dt.timezone.utc)

    async def run(self):
        backwards = self.options.go_backwards
        while True:
            ver = query_status(self.rdf_server, self.pvstat)
            if ver is None:
                self.log.info(
                    f'schema:dateModified is not set for {self.pvstat}')
                # Calculate last valid file. Assume current data will not be available for at least a few hours
                ver = datetime.utcnow() + dt.timedelta(
                    minutes=50) - dt.timedelta(hours=5)
                ver = datetime(ver.year,
                               ver.month,
                               ver.day,
                               ver.hour,
                               tzinfo=dt.timezone.utc)
            self.log.info(
                f'Processing {"backwards" if backwards else "forward"} from {ver}'
            )
            stats, timestamp = await self.process_files(ver, backwards)
            if timestamp is not None and len(stats) > 0:
                self.log.info(f'Updating {len(stats)} stats')
                self.save_stats(stats, timestamp)
            if backwards:
                # Do a single iteration only
                return
            self.log.info('Pausing...')
            time.sleep(1000)

    async def process_files(self, last_processed, backwards):
        stats = defaultdict(int)
        new_last = None

        conn = aiohttp.TCPConnector(limit=3)
        timeout = aiohttp.ClientTimeout(total=None,
                                        connect=None,
                                        sock_read=60,
                                        sock_connect=60)
        async with aiohttp.ClientSession(connector=conn,
                                         timeout=timeout) as session:
            futures = []
            for date in self.iterate_hours(last_processed,
                                           self.options.max_files, backwards):
                futures.append(self.process_file(session, date, stats))
            if futures:
                done, _ = await asyncio.wait(futures)
                for fut in done:
                    date, ok = fut.result()
                    # always find the latest possible timestamp even if going backwards
                    if ok and (new_last is None or date > new_last):
                        new_last = date

        return stats, new_last

    def iterate_hours(self, last_processed, max_count, backwards=True):
        delta = dt.timedelta(hours=(-1 if backwards else 1))
        done = 0
        current = last_processed
        if not backwards:
            # Inclusive when going backwards, exclusive when going forward
            current += delta
        while current > self.minimum_data_ts if backwards else current < datetime.now(
                dt.timezone.utc):
            if done >= max_count:
                break
            yield current
            done += 1
            current += delta

    async def process_file(self, session, date, stats):
        url = self.stats_url.format(date)
        try:
            async with session.get(url) as response:
                start = datetime.utcnow()
                if response.status != 200:
                    self.log.warning(f'Url {url} returned {response.status}')
                    return date, False
                for line in gzip.decompress(await
                                            response.read()).splitlines():
                    try:
                        parts = line.decode('utf-8', 'strict').split(' ')
                        page_url = self.page_url(parts[0], parts[1])
                        if page_url:
                            stats[page_url] += int(parts[2])
                    except:
                        self.log.error(f'Error parsing {url} line "{line}"')
                self.log.info(
                    f'Finished processing {url} in {(datetime.utcnow() - start).total_seconds():.1f} seconds'
                )
            return date, True
        except:
            self.log.warning(f'Failed to process {url}')
            return date, False

    def page_url(self, prefix, title):
        parts = prefix.split('.', 1)

        if len(parts) == 1:
            site = '.wikipedia.org/wiki/'
        # elif parts[1] == 'b':
        #     site = '.wikibooks.org/wiki/'
        # elif parts[1] == 'd':
        #     site = '.wiktionary.org/wiki/'
        # elif parts[1] == 'n':
        #     site = '.wikinews.org/wiki/'
        # elif parts[1] == 'q':
        #     site = '.wikiquote.org/wiki/'
        # elif parts[1] == 's':
        #     site = '.wikisource.org/wiki/'
        # elif parts[1] == 'v':
        #     site = '.wikiversity.org/wiki/'
        # elif parts[1] == 'voy':
        #     site = '.wikivoyage.org/wiki/'
        else:
            return None

        if not reWikiLanguage.match(parts[0]):
            if parts[0] != 'test2':  # This is the only number-containing prefix so far
                self.log.error(
                    f'Skipping unexpected language prefix "{parts[0]}"')
            return None

        return make_wiki_url(parts[0], site, title)

    def save_stats(self, stats, timestamp):

        # From https://stackoverflow.com/questions/46030514/update-or-create-numeric-counters-in-sparql-upsert/46042692

        done = 0
        last_print = datetime.utcnow()
        for keys in chunks(stats.keys(), 1000):
            # (<...> 10) (<...> 15) ...
            values = ' '.join(
                ['(' + k + ' ' + str(stats[k]) + ')' for k in keys])
            sparql = f'''
PREFIX pvstat: {self.pvstat}
DELETE {{ ?sitelink pvstat: ?outdated }}
INSERT {{ ?sitelink pvstat: ?updated }}
WHERE {{
    VALUES (?sitelink ?increment) {{ {values} }}
    OPTIONAL {{?sitelink pvstat: ?outdated}}
    BIND ((IF(BOUND(?outdated), ?outdated + ?increment, ?increment)) AS ?updated)
}}'''
            self.rdf_server.run('update', sparql)
            done += len(keys)
            if (datetime.utcnow() - last_print).total_seconds() > 60:
                self.log.info(
                    f'Imported {done} pageview stats, pausing for a few seconds...'
                )
                time.sleep(5000)
                last_print = datetime.utcnow()

        self.rdf_server.run('update', set_status_query(self.pvstat, timestamp))
        self.log.info(f'Finished importing {done} pageview stats')
コード例 #27
0
class UpdateUsageStats(object):
    ids: Dict[str, str]

    def __init__(self):

        self.log = logging.getLogger('osm2rdf')
        self.log.setLevel(logging.INFO)

        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(
            logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
        self.log.addHandler(ch)

        # create the top-level parser
        parser = argparse.ArgumentParser(
            description='Update key and tag usage stats',
            usage='python3 %(prog)s [options]')

        parser.add_argument(
            '--host',
            action='store',
            dest='rdf_url',
            default='http://localhost:9999/bigdata/namespace/wdq/sparql',
            help='Host URL to upload data. Default: %(default)s')
        parser.add_argument('-n',
                            '--dry-run',
                            action='store_true',
                            dest='dry_run',
                            default=False,
                            help='Do not modify RDF database.')
        opts = parser.parse_args()

        self.options = opts
        self.rdf_server = Sparql(opts.rdf_url,
                                 'query' if opts.dry_run else False)
        self.date_subject = '<https://taginfo.openstreetmap.org>'
        self.url_stats = 'https://taginfo.openstreetmap.org/api/4/key/stats'
        self.url_keys = ' https://taginfo.openstreetmap.org/api/4/keys/all'
        self.ids = {}

    def run(self):

        while True:
            self.run_once()
            time.sleep(1000)

    def run_once(self):
        ts_taginfo = self.get_current_ts()
        ts_db = query_status(
            self.rdf_server,
            self.date_subject) if not self.options.dry_run else None

        if ts_taginfo is not None and ts_taginfo == ts_db:
            self.log.info(f'Data is up to date {ts_taginfo}, sleeping...')
            return

        if ts_db is None:
            self.log.info(
                f'schema:dateModified is not set for {self.date_subject}, performing first import'
            )
        else:
            self.log.info(f'Loading taginfo data, last updated {ts_db}')
        stats, ts = self.get_stats()
        if stats:
            self.log.info(f'Updating {len(stats)} stats')
            self.save_stats(stats, ts)

        self.log.info('Import is done, waiting for new data...')

    def get_stats(self):
        if self.options.dry_run:
            with open('/home/yurik/dev/sophox/all.keys.json', 'r') as f:
                data = json.load(f)
        else:
            data = requests.get(self.url_keys).json()

        ts = parse_utc(data['data_until'])
        stats = {}
        for row in data['data']:
            stats[row['key']] = tuple([row[k] for k in info_keys])

        return stats, ts

    def save_stats(self, stats, timestamp):
        # Resolve keys to IDs
        for keys in chunks([k for k in stats.keys() if k not in self.ids],
                           5000):
            sparql = f'''
SELECT ?key ?id WHERE {{
  VALUES ?key {{{' '.join([stringify(k) for k in keys])}}}
  ?id osmdt:P16 ?key.
}}'''
            res = self.rdf_server.run('query', sparql)
            # http://wiki.openstreetmap.org/entity/Q103
            self.ids.update({
                v['key']['value']:
                v['id']['value'][len('http://wiki.openstreetmap.org/entity/'):]
                for v in res
            })

        self.log.info(f'Total resolved keys is {len(self.ids)}, updating...')

        # Delete all usage counters
        sparql = f'''
DELETE {{ ?s ?p ?o }} WHERE {{
  VALUES ?p {{ {' '.join([f'osmm:{k}' for k in info_keys])} }}
         ?s ?p ?o .
}}'''

        self.rdf_server.run('update', sparql)
        self.log.info(f'Existing counts deleted, importing...')

        done = 0
        last_print = datetime.utcnow()
        for keys in chunks(stats.keys(), 5000):
            sparql = (
                'INSERT {\n' +
                '\n'.join([f'?id osmm:{k} ?{k}.'
                           for k in info_keys]) + '\n} WHERE {\n' +
                f"VALUES (?id {' '.join([f'?{k}' for k in info_keys])}) {{\n" +
                '\n'.join([
                    f"(osmd:{self.ids[k]} {' '.join([str(stats[k][i]) for i in range(len(info_keys))])})"
                    for k in keys if k in self.ids
                ]) + '\n} }')

            self.rdf_server.run('update', sparql)
            done += len(keys)
            if (datetime.utcnow() - last_print).total_seconds() > 60:
                self.log.info(
                    f'Imported {done} pageview stats, pausing for a few seconds...'
                )
                time.sleep(60)
                last_print = datetime.utcnow()

        self.rdf_server.run('update',
                            set_status_query(self.date_subject, timestamp))
        self.log.info(f'Finished importing {done} pageview stats')

    def get_current_ts(self):
        ts_str = requests.get(self.url_stats).json()['data_until']
        return parse_utc(ts_str)