Ejemplo n.º 1
0
    def _update(args):
        from ..tools import replication
        from ..indexer.indexer import Indexer

        params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
        params.update(base_url=args.config.REPLICATION_URL,
                      update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
                      import_file=args.project_dir / 'osmosischange.osc',
                      max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
                      indexed_only=not args.once)

        # Sanity check to not overwhelm the Geofabrik servers.
        if 'download.geofabrik.de'in params['base_url']\
           and params['update_interval'] < 86400:
            LOG.fatal("Update interval too low for download.geofabrik.de.\n"
                      "Please check install documentation "
                      "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
                      "setting-up-the-update-process).")
            raise UsageError("Invalid replication update interval setting.")

        if not args.once:
            if not args.do_index:
                LOG.fatal("Indexing cannot be disabled when running updates continuously.")
                raise UsageError("Bad argument '--no-index'.")
            recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')

        while True:
            with connect(args.config.get_libpq_dsn()) as conn:
                start = dt.datetime.now(dt.timezone.utc)
                state = replication.update(conn, params)
                if state is not replication.UpdateState.NO_CHANGES:
                    status.log_status(conn, start, 'import')
                batchdate, _, _ = status.get_status(conn)

            if state is not replication.UpdateState.NO_CHANGES and args.do_index:
                index_start = dt.datetime.now(dt.timezone.utc)
                indexer = Indexer(args.config.get_libpq_dsn(),
                                  args.threads or 1)
                indexer.index_boundaries(0, 30)
                indexer.index_by_rank(0, 30)

                with connect(args.config.get_libpq_dsn()) as conn:
                    status.set_indexed(conn, True)
                    status.log_status(conn, index_start, 'index')
            else:
                index_start = None

            if LOG.isEnabledFor(logging.WARNING):
                UpdateReplication._report_update(batchdate, start, index_start)

            if args.once:
                break

            if state is replication.UpdateState.NO_CHANGES:
                LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
                time.sleep(recheck_interval)
Ejemplo n.º 2
0
def update(conn, options):
    """ Update database from the next batch of data. Returns the state of
        updates according to `UpdateState`.
    """
    startdate, startseq, indexed = status.get_status(conn)

    if startseq is None:
        LOG.error("Replication not set up. "
                  "Please run 'nominatim replication --init' first.")
        raise UsageError("Replication not set up.")

    if not indexed and options['indexed_only']:
        LOG.info("Skipping update. There is data that needs indexing.")
        return UpdateState.MORE_PENDING

    last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
    update_interval = dt.timedelta(seconds=options['update_interval'])
    if last_since_update < update_interval:
        duration = (update_interval - last_since_update).seconds
        LOG.warning("Sleeping for %s sec before next update.", duration)
        time.sleep(duration)

    if options['import_file'].exists():
        options['import_file'].unlink()

    # Read updates into file.
    repl = ReplicationServer(options['base_url'])

    outhandler = WriteHandler(str(options['import_file']))
    endseq = repl.apply_diffs(outhandler,
                              startseq + 1,
                              max_size=options['max_diff_size'] * 1024)
    outhandler.close()

    if endseq is None:
        return UpdateState.NO_CHANGES

    # Consume updates with osm2pgsql.
    options['append'] = True
    options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
    run_osm2pgsql(options)

    # Write the current status to the file
    endstate = repl.get_state_info(endseq)
    status.set_status(conn,
                      endstate.timestamp if endstate else None,
                      seq=endseq,
                      indexed=False)

    return UpdateState.UP_TO_DATE
Ejemplo n.º 3
0
def check_for_updates(conn, base_url):
    """ Check if new data is available from the replication service at the
        given base URL.
    """
    _, seq, _ = status.get_status(conn)

    if seq is None:
        LOG.error("Replication not set up. "
                  "Please run 'nominatim replication --init' first.")
        return 254

    state = ReplicationServer(base_url).get_state_info()

    if state is None:
        LOG.error("Cannot get state for URL %s.", base_url)
        return 253

    if state.sequence <= seq:
        LOG.warning("Database is up to date.")
        return 2

    LOG.warning("New data available (%i => %i).", seq, state.sequence)
    return 0
Ejemplo n.º 4
0
    def _update(args):
        from ..tools import replication
        from ..indexer.indexer import Indexer
        from ..tokenizer import factory as tokenizer_factory

        update_interval = UpdateReplication._compute_update_interval(args)

        params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
        params.update(
            base_url=args.config.REPLICATION_URL,
            update_interval=update_interval,
            import_file=args.project_dir / 'osmosischange.osc',
            max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
            indexed_only=not args.once)

        if not args.once:
            if not args.do_index:
                LOG.fatal(
                    "Indexing cannot be disabled when running updates continuously."
                )
                raise UsageError("Bad argument '--no-index'.")
            recheck_interval = args.config.get_int(
                'REPLICATION_RECHECK_INTERVAL')

        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)

        while True:
            with connect(args.config.get_libpq_dsn()) as conn:
                start = dt.datetime.now(dt.timezone.utc)
                state = replication.update(conn, params)
                if state is not replication.UpdateState.NO_CHANGES:
                    status.log_status(conn, start, 'import')
                batchdate, _, _ = status.get_status(conn)
                conn.commit()

            if state is not replication.UpdateState.NO_CHANGES and args.do_index:
                index_start = dt.datetime.now(dt.timezone.utc)
                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
                                  args.threads or 1)
                indexer.index_full(analyse=False)

                with connect(args.config.get_libpq_dsn()) as conn:
                    status.set_indexed(conn, True)
                    status.log_status(conn, index_start, 'index')
                    conn.commit()
            else:
                index_start = None

            if state is replication.UpdateState.NO_CHANGES and \
               args.catch_up or update_interval > 40*60:
                while indexer.has_pending():
                    indexer.index_full(analyse=False)

            if LOG.isEnabledFor(logging.WARNING):
                UpdateReplication._report_update(batchdate, start, index_start)

            if args.once or (args.catch_up
                             and state is replication.UpdateState.NO_CHANGES):
                break

            if state is replication.UpdateState.NO_CHANGES:
                LOG.warning("No new changes. Sleeping for %d sec.",
                            recheck_interval)
                time.sleep(recheck_interval)