Ejemplo n.º 1
0
def cmd_sysrefresh(bot, trigger, db=None):
    """
    Refreshes the starsystem database if you have halfop or better.  Reports the last refresh time otherwise.

    -f: Force refresh even if data is stale.  Requires op.
    """
    access = ratlib.sopel.best_channel_mode(bot, trigger.nick)
    privileged = access & (HALFOP | OP)
    msg = ""

    if privileged:
        try:
            refreshed = refresh_database(
                bot,
                force=access & OP and trigger.group(2) and trigger.group(2) == '-f',
                callback=lambda: bot.say("Starting starsystem refresh...")
            )
            if refreshed:
                bot.say(refresh_time_stats(bot))
                return
            msg = "Not yet.  "
        except ConcurrentOperationError:
            bot.say("A starsystem refresh operation is already in progress.")
            return

    when = get_status(db).starsystem_refreshed
    if not when:
        msg += "The starsystem database appears to have never been initialized."
    else:
        when = when.astimezone(datetime.timezone.utc)
        msg += "The starsystem database was refreshed at {} ({}) or an update is still in progress. It is only allowed every {} seconds.".format(
            ratlib.format_timestamp(when), ratlib.format_timedelta(when), bot.config.ratbot.edsm_maxage or '<unknown>'
        )
    bot.say(msg)
Ejemplo n.º 2
0
def cmd_sysrefresh(bot, trigger, db=None):
    """
    Refreshes the starsystem database if you have halfop or better.  Reports the last refresh time otherwise.

    -f: Force refresh even if data is stale.  Requires op.
    """
    access = ratlib.sopel.best_channel_mode(bot, trigger.nick)
    privileged = access & (HALFOP | OP)
    msg = ""

    if privileged:
        options = "" if not trigger.group(
            2) or trigger.group(2)[0] != '-' else trigger.group(2)[1:]
        force = 'f' in options and (access & OP)
        prune = not ('p' in options and (access & OP))

        try:
            refreshed = refresh_database(
                bot,
                force=force,
                prune=prune,
                callback=lambda: bot.say("Starting starsystem refresh..."))
            if refreshed:
                bot.say(refresh_time_stats(bot))
                return
            msg = "Not yet.  "
        except ConcurrentOperationError:
            bot.say("A starsystem refresh operation is already in progress.")
            return

    when = get_status(db).starsystem_refreshed
    if not when:
        msg += "The starsystem database appears to have never been initialized."
    else:
        when = when.astimezone(datetime.timezone.utc)
        msg += "The starsystem database was refreshed at {} ({}) or an update is still in progress. It is only allowed every {} seconds.".format(
            timeutil.format_timestamp(when), timeutil.format_timedelta(when),
            bot.config.ratbot.edsm_maxage or '<unknown>')
    bot.say(msg)
Ejemplo n.º 3
0
def _refresh_database(bot, force=False, prune=True, callback=None, background=False, db=None):
    """
    Actual implementation of refresh_database.

    Refreshes the database of starsystems.  Also rebuilds the bloom filter.
    :param bot: Bot instance
    :param force: True to force refresh
    :param prune: True to prune non-updated systems.  Keep True unless performance testing.
    :param callback: Optional function that is called as soon as the system determines a refresh is needed.
    :param background: If True and a refresh is needed, it is submitted as a background task rather than running
        immediately.
    :param db: Database handle

    Note that this function executes some raw SQL queries (among other voodoo).  This is for performance reasons
    concerning the insanely large dataset being handled, and should NOT serve as an example for implementation
    elsewhere.
    """
    eddb_url = bot.config.ratbot.edsm_url or "https://eddb.io/archive/v5/systems.csv"
    chunked = bot.config.ratbot.chunked_systems

    # Should really implement this, but until then
    if chunked:
        raise NotImplementedError("Chunked system loading is not implemented yet.")

    status = get_status(db)
    eddb_maxage = float(bot.config.ratbot.edsm_maxage or (7*86400))  # Once per week = 604800 seconds
    if not (
        force or
        not status.starsystem_refreshed or
        (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > eddb_maxage
    ):
        # No refresh needed.
        # print('not force and no refresh needed')
        return False

    if callback:
        callback()

    if background:
        print('Scheduling background refresh of starsystem data')
        return bot.memory['ratbot']['executor'].submit(
            _refresh_database, bot, force=True, callback=None, background=False
        )

    conn = db.connection()
    # Now in actual implementation beyond background scheduling

    # Counters for stats
    # All times in seconds
    stats = {
        'load': 0,      # Time spent retrieving the CSV file(s) and dumping it into a temptable in the db.
        'prune': 0,     # Time spent removing non-update updates.
        'systems': 0,   # Time spent merging starsystems into the db.
        'prefixes': 0,  # Time spent merging starsystem prefixes into the db.
        'stats': 0,     # Time spent (re)computing system statistics
        'bloom': 0,     # Time spent (re)building the system prefix bloom filter.
        'optimize': 0,  # Time spent optimizing/analyzing tables.
        'misc': 0,      # Miscellaneous tasks (total time - all other stats)
        'total': 0,     # Total time spent.
    }

    def log(fmt, *args, **kwargs):
        print("[{}] ".format(datetime.datetime.now()) + fmt.format(*args, **kwargs))

    overall_timer = TimedResult()
    log("Starsystem refresh started")
    if chunked:
        # FIXME: Needs to be reimplemented.
        log("Retrieving starsystem index at {}", eddb_url)
        with timed() as t:
            response = requests.get(eddb_url)
            response.raise_for_status()
            urls = list(urljoin(eddb_url, chunk["SectorName"]) for chunk in response.json())
        stats['index'] += t.seconds
        log("{} file(s) queued for starsystem refresh.  (Took {}}", len(urls), format_timestamp(t.delta))
    else:
        urls = [eddb_url]

    temptable = sa.Table(
        '_temp_new_starsystem', sa.MetaData(),
        sa.Column('id', sa.Integer, primary_key=True, autoincrement=True),
        sa.Column('eddb_id', sa.Integer),
        sa.Column('name_lower', sa.Text(collation="C")),
        sa.Column('name', sa.Text(collation="C")),
        sa.Column('first_word', sa.Text(collation="C")),
        sa.Column('word_ct', sa.Integer),
        sa.Column('xz', SQLPoint),
        sa.Column('y', sa.Numeric),
        # sa.Index('_temp_id_ix', 'eddb_id'),
        prefixes=['TEMPORARY'], postgresql_on_commit='DROP'
    )
    temptable.create(conn)

    sql_args = {
        'sp': StarsystemPrefix.__tablename__,
        's': Starsystem.__tablename__,
        'ts': temptable.name,
        'tsp': '_temp_new_prefixes'
    }

    buffer = io.StringIO()  # Temporary IO buffer for COPY FROM
    columns = ['eddb_id', 'name_lower', 'name', 'first_word', 'word_ct', 'xz', 'y']  # Columns to copy to temptable
    getter = operator.itemgetter(*columns)
    total_flushed = 0  # Total number of flushed items so far
    pending_flush = 0  # Number of items waiting to flush

    def exec(sql, *args, **kwargs):
        try:
            conn.execute(sql.format(*args, **kwargs, **sql_args))
        except Exception as ex:
            log("Query failed.")
            import traceback
            traceback.print_exc()
            raise

    def flush():
        nonlocal buffer, total_flushed, pending_flush
        if not pending_flush:
            return
        log("Flushing system(s) {}-{}", total_flushed + 1, total_flushed + pending_flush)
        buffer.seek(0)
        cursor = conn.connection.cursor()
        cursor.copy_from(buffer, temptable.name, sep='\t', null='', columns=columns)
        buffer = io.StringIO()
        # systems = []
        total_flushed += pending_flush
        pending_flush = 0


    with timed() as t:
        for url in urls:
            log("Retrieving starsystem data at {}", url)
            try:
                response = requests.get(url, stream=True)
                reader = csv.DictReader(io.TextIOWrapper(response.raw))

                for row in reader:
                    # Parse and reformat system info from CSV
                    name, word_ct = re.subn(r'\s+', ' ', row['name'].strip())
                    name_lower = name.lower()
                    first_word, *unused = name_lower.split(" ", 1)
                    word_ct += 1
                    if all((row['x'], row['y'], row['z'])):
                        xz = "({x},{z})".format(**row)
                        y = row['y']
                    else:
                        xz = y = ''
                    system_raw = {
                        'eddb_id': str(row['id']),
                        'name_lower': name_lower,
                        'name': name,
                        'first_word': first_word,
                        'xz': xz,
                        'y': y,
                        'word_ct': str(word_ct)
                    }
                    pending_flush += 1
                    buffer.write("\t".join(getter(system_raw)))
                    buffer.write("\n")

                    if pending_flush >= FLUSH_THRESHOLD:
                        flush()
            except ValueError:
                pass
            except Exception as ex:
                log("Failed to retrieve data")
                import traceback
                traceback.print_exc()
            flush()
        log("Creating index")
        exec("CREATE INDEX ON {ts}(eddb_id)")
    stats['load'] += t.seconds

    with timed() as t:
        log("Removing possible duplicates")
        exec("DELETE FROM {ts} WHERE eddb_id NOT IN(SELECT MAX(id) AS id FROM {ts} GROUP BY eddb_id)")

        # No need for the temporary 'id' column at this point.
        exec("ALTER TABLE {ts} DROP id CASCADE");
        # Making this a primary key (or even just a unique key) apparently affects query planner performance vs the
        # non-existing unique key.
        exec("ALTER TABLE {ts} ADD PRIMARY KEY(eddb_id)");

        if prune:
            log("Removing non-updates to existing systems")
            # If a starsystem has been updated, at least one of 'name', 'xz' or 'y' are guaranteed to have changed.
            # (A change that effects word_ct would effect name as well, for instance.)
            # Delete any temporary systems that exist in the real table with matching attributes.
            exec("""
                DELETE FROM {ts} AS t USING {s} AS s
                WHERE s.eddb_id=t.eddb_id
                AND ROW(s.name, s.y) IS NOT DISTINCT FROM ROW(t.name, t.y)
                AND ((s.xz IS NULL)=(t.xz IS NULL)) AND (s.xz~=t.xz OR s.xz IS NULL)
            """)
        else:
            log("Skipping non-update removal phase")
    stats['prune'] += t.seconds

    with timed() as t:
        log("Building list of distinct prefixes")
        # Create list of unique prefixes in this batch
        exec("""
            CREATE TEMPORARY TABLE {tsp} ON COMMIT DROP
            AS SELECT DISTINCT first_word, word_ct FROM {ts}
        """)

        # Insert new prefixes
        exec("""
            INSERT INTO {sp} (first_word, word_ct)
            SELECT t.first_word, t.word_ct
            FROM
                {tsp} AS t
                LEFT JOIN {sp} AS sp ON sp.first_word=t.first_word AND sp.word_ct=t.word_ct
            WHERE sp.first_word IS NULL
        """)
    stats['prefixes'] += t.seconds

    with timed() as t:
        log("Updating existing systems.")
        exec("""
            UPDATE {s} AS s
            SET name_lower=t.name_lower, name=t.name, first_word=t.first_word, word_ct=t.word_ct, xz=t.xz, y=t.y
            FROM {ts} AS t
            WHERE s.eddb_id=t.eddb_id
        """)

        log("Inserting new systems.")
        exec("""
            INSERT INTO {s} (eddb_id, name_lower, name, first_word, word_ct, xz, y)
            SELECT t.eddb_id, t.name_lower, t.name, t.first_word, t.word_ct, t.xz, t.y
            FROM {ts} AS t
            LEFT JOIN {s} AS s ON s.eddb_id=t.eddb_id
            WHERE s.eddb_id IS NULL
        """)

    stats['systems'] += t.seconds

    with timed() as t:
        log('Computing prefix statistics')
        exec("""
            UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio
            FROM (
                SELECT
                    t.first_word, t.word_ct, ct/(SUM(ct) OVER w) AS ratio,
                    (SUM(ct) OVER p)/(SUM(ct) OVER w) AS cume_ratio
                FROM (
                    SELECT sp.*, COUNT(s.eddb_id) AS ct
                    FROM
                        {sp} AS sp
                        LEFT JOIN {s} AS s USING (first_word, word_ct)
                    WHERE sp.first_word IN(SELECT first_word FROM {tsp})
                    GROUP BY sp.first_word, sp.word_ct
                    HAVING COUNT(*) > 0
                ) AS t
                WINDOW
                w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
                p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
            ) AS t
            WHERE {sp}.first_word=t.first_word AND {sp}.word_ct=t.word_ct
        """)
    stats['stats'] += t.seconds
    with timed() as t:
        log("Analyzing tables")
        exec("ANALYZE {sp}")
        exec("ANALYZE {s}")
    stats['optimize'] += t.seconds

    log("Starsystem database update complete")
    # Update refresh time
    try:
        status = get_status(db)
        status.starsystem_refreshed = sql.func.clock_timestamp()
        db.add(status)
        db.commit()
    except Exception as ex:
        import traceback
        traceback.print_exc()
        raise
    log("Starsystem database update committed")

    with timed() as t:
        log("Rebuilding bloom filter")
        refresh_bloom(bot)
    stats['bloom'] += t.seconds

    overall_timer.stop()
    stats['misc'] = overall_timer.seconds - sum(stats.values())
    stats['total'] = overall_timer.seconds
    bot.memory['ratbot']['stats']['starsystem_refresh'] = stats
    log("Starsystem refresh finished")
    return True
Ejemplo n.º 4
0
def _refresh_database(bot, force=False, callback=None, background=False, db=None):
    """
    Actual implementation of refresh_database.

    Refreshes the database of starsystems.  Also rebuilds the bloom filter.
    :param bot: Bot instance
    :param force: True to force refresh
    :param callback: Optional function that is called as soon as the system determines a refresh is needed.
    :param background: If True and a refresh is needed, it is submitted as a background task rather than running
        immediately.
    :param db: Database handle
    """
    start = time()
    edsm_url = bot.config.ratbot.edsm_url or "http://edsm.net/api-v1/systems?coords=1"
    status = get_status(db)

    edsm_maxage = bot.config.ratbot.maxage or 60*12*12
    if not (
        force or
        not status.starsystem_refreshed or
        (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > edsm_maxage
    ):
        # No refresh needed.
        return False

    if callback:
        callback()

    if background:
        return bot.memory['ratbot']['executor'].submit(
            _refresh_database, bot, force=True, callback=None, background=False
        )

    fetch_start = time()
    data = requests.get(edsm_url).json()
    fetch_end = time()
    # with open('run/systems.json') as f:
    #     import json
    #     data = json.load(f)

    db.query(Starsystem).delete()  # Wipe all old data
    db.query(StarsystemPrefix).delete()  # Wipe all old data
    # Pass 1: Load JSON data into stats table.
    systems = []
    ct = 0

    def _format_system(s):
        nonlocal ct
        ct += 1
        name, word_ct = re.subn(r'\s+', ' ', s['name'].strip())
        word_ct += 1
        return {
            'name_lower': name.lower(),
            'name': name,
            'x': s.get('x'), 'y': s.get('y'), 'z': s.get('z'),
            'word_ct': word_ct
        }
    load_start = time()
    for chunk in chunkify(data, 5000):
        db.bulk_insert_mappings(Starsystem, [_format_system(s) for s in chunk])
        # print(ct)
    del data
    db.connection().execute("ANALYZE " + Starsystem.__tablename__)
    load_end = time()

    stats_start = time()
    # Pass 2: Calculate statistics.
    # 2A: Quick insert of prefixes for single-name systems
    db.connection().execute(
        sql.insert(StarsystemPrefix).from_select(
            (StarsystemPrefix.first_word, StarsystemPrefix.word_ct),
            db.query(Starsystem.name_lower, Starsystem.word_ct).filter(Starsystem.word_ct == 1).distinct()
        )
    )

    def _gen():
        for s in (
            db.query(Starsystem)
            .order_by(Starsystem.word_ct, Starsystem.name_lower)
            .filter(Starsystem.word_ct > 1)
        ):
            first_word, *words = s.name_lower.split(" ")
            yield (first_word, s.word_ct), words, s

    ct = 0
    for chunk in chunkify(itertools.groupby(_gen(), operator.itemgetter(0)), 100):
        for (first_word, word_ct), group in chunk:
            ct += 1
            const_words = None
            for _, words, system in group:
                if const_words is None:
                    const_words = words.copy()
                else:
                    for ix, (common, word) in enumerate(zip(const_words, words)):
                        if const_words[ix] != words[ix]:
                            const_words = const_words[:ix]
                            break
            prefix = StarsystemPrefix(
                first_word=first_word, word_ct=word_ct, const_words=" ".join(const_words)
            )
            db.add(prefix)
        # print(ct)
        db.flush()
    db.connection().execute(
        sql.update(
            Starsystem, values={
                Starsystem.prefix_id: db.query(StarsystemPrefix.id).filter(
                    StarsystemPrefix.first_word == sql.func.split_part(Starsystem.name_lower, ' ', 1),
                    StarsystemPrefix.word_ct == Starsystem.word_ct
                ).as_scalar()
            }
        )
    )
    db.connection().execute(
        """
        UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio
        FROM (
            SELECT t.id, ct/SUM(ct) OVER w AS ratio, SUM(ct) OVER p/SUM(ct) OVER w AS cume_ratio
            FROM (
                SELECT sp.*, COUNT(*) AS ct
                FROM
                    {sp} AS sp
                    INNER JOIN {s} AS s ON s.prefix_id=sp.id
                GROUP BY sp.id
                HAVING COUNT(*) > 0
            ) AS t
            WINDOW
            w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
            p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
        ) AS t
        WHERE t.id=starsystem_prefix.id
        """.format(sp=StarsystemPrefix.__tablename__, s=Starsystem.__tablename__)
    )
    stats_end = time()

    # Update refresh time
    status = get_status(db)
    status.starsystem_refreshed = sql.func.clock_timestamp()
    db.add(status)
    db.commit()
    bloom_start = time()
    refresh_bloom(bot)
    bloom_end = time()
    end = time()
    stats = {
        'stats': stats_end - stats_start, 'load': load_end - load_start, 'fetch': fetch_end - fetch_start,
        'bloom': bloom_end - bloom_start
    }
    stats['misc'] = (end - start) - sum(stats.values())
    stats['all'] = end - start
    bot.memory['ratbot']['stats']['starsystem_refresh'] = stats
    return True
Ejemplo n.º 5
0
def _refresh_database(bot, force=False, callback=None, background=False, db=None):
    """
    Actual implementation of refresh_database.

    Refreshes the database of starsystems.  Also rebuilds the bloom filter.
    :param bot: Bot instance
    :param force: True to force refresh
    :param callback: Optional function that is called as soon as the system determines a refresh is needed.
    :param background: If True and a refresh is needed, it is submitted as a background task rather than running
        immediately.
    :param db: Database handle

    Note that this function executes some raw SQL queries (among other voodoo).  This is for performance reasons
    concerning the insanely large dataset being handled, and should NOT serve as an example for implementation
    elsewhere.
    """
    eddb_url = bot.config.ratbot.edsm_url or "https://eddb.io/archive/v5/systems.csv"
    chunked = bot.config.ratbot.chunked_systems

    # Should really implement this, but until then
    if chunked:
        raise NotImplementedError("Chunked system loading is not implemented yet.")

    status = get_status(db)
    eddb_maxage = float(bot.config.ratbot.edsm_maxage or (7*86400))  # Once per week = 604800 seconds
    if not (
        force or
        not status.starsystem_refreshed or
        (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > eddb_maxage
    ):
        # No refresh needed.
        # print('not force and no refresh needed')
        return False

    if callback:
        callback()

    if background:
        print('Scheduling background refresh of starsystem data')
        return bot.memory['ratbot']['executor'].submit(
            _refresh_database, bot, force=True, callback=None, background=False
        )

    conn = db.connection()
    # Now in actual implementation beyond background scheduling

    # Counters for stats
    # All times in seconds
    stats = {
        'load': 0,      # Time spent retrieving the CSV file(s) and dumping it into a temptable in the db.
        'prune': 0,     # Time spent removing non-update updates.
        'systems': 0,   # Time spent merging starsystems into the db.
        'prefixes': 0,  # Time spent merging starsystem prefixes into the db.
        'stats': 0,     # Time spent (re)computing system statistics
        'bloom': 0,     # Time spent (re)building the system prefix bloom filter.
        'optimize': 0,  # Time spent optimizing/analyzing tables.
        'misc': 0,      # Miscellaneous tasks (total time - all other stats)
        'total': 0,     # Total time spent.
    }

    def log(fmt, *args, **kwargs):
        print("[{}] ".format(datetime.datetime.now()) + fmt.format(*args, **kwargs))

    overall_timer = TimedResult()
    log("Starsystem refresh started")
    if chunked:
        # FIXME: Needs to be reimplemented.
        log("Retrieving starsystem index at {}", eddb_url)
        with timed() as t:
            response = requests.get(eddb_url)
            response.raise_for_status()
            urls = list(urljoin(eddb_url, chunk["SectorName"]) for chunk in response.json())
        stats['index'] += t.seconds
        log("{} file(s) queued for starsystem refresh.  (Took {}}", len(urls), format_timestamp(t.delta))
    else:
        urls = [eddb_url]

    temptable = sa.Table(
        '_temp_new_starsystem', sa.MetaData(),
        sa.Column('id', sa.Integer, primary_key=True, autoincrement=True),
        sa.Column('eddb_id', sa.Integer),
        sa.Column('name_lower', sa.Text(collation="C")),
        sa.Column('name', sa.Text(collation="C")),
        sa.Column('first_word', sa.Text(collation="C")),
        sa.Column('word_ct', sa.Integer),
        sa.Column('xz', SQLPoint),
        sa.Column('y', sa.Numeric),
        # sa.Index('_temp_id_ix', 'eddb_id'),
        prefixes=['TEMPORARY'], postgresql_on_commit='DROP'
    )
    temptable.create(conn)

    tablenames = {
        'sp': StarsystemPrefix.__tablename__,
        's': Starsystem.__tablename__,
        'ts': temptable.name,
        'tsp': '_temp_new_prefixes'
    }

    buffer = io.StringIO()  # Temporary IO buffer for COPY FROM
    columns = ['eddb_id', 'name_lower', 'name', 'first_word', 'word_ct', 'xz', 'y']  # Columns to copy to temptable
    getter = operator.itemgetter(*columns)
    total_flushed = 0  # Total number of flushed items so far
    pending_flush = 0  # Number of items waiting to flush

    def exec(sql, *args, **kwargs):
        try:
            conn.execute(sql.format(*args, **kwargs, **tablenames))
        except Exception as ex:
            log("Query failed.")
            import traceback
            traceback.print_exc()
            raise

    def flush():
        nonlocal buffer, total_flushed, pending_flush
        if not pending_flush:
            return
        log("Flushing system(s) {}-{}", total_flushed + 1, total_flushed + pending_flush)
        buffer.seek(0)
        cursor = conn.connection.cursor()
        cursor.copy_from(buffer, temptable.name, sep='\t', null='', columns=columns)
        buffer = io.StringIO()
        # systems = []
        total_flushed += pending_flush
        pending_flush = 0


    with timed() as t:
        for url in urls:
            log("Retrieving starsystem data at {}", url)
            try:
                response = requests.get(url, stream=True)
                reader = csv.DictReader(io.TextIOWrapper(response.raw))

                for row in reader:
                    # Parse and reformat system info from CSV
                    name, word_ct = re.subn(r'\s+', ' ', row['name'].strip())
                    name_lower = name.lower()
                    first_word, *unused = name_lower.split(" ", 1)
                    word_ct += 1
                    if all((row['x'], row['y'], row['z'])):
                        xz = "({x},{z})".format(**row)
                        y = row['y']
                    else:
                        xz = y = ''
                    system_raw = {
                        'eddb_id': str(row['id']),
                        'name_lower': name_lower,
                        'name': name,
                        'first_word': first_word,
                        'xz': xz,
                        'y': y,
                        'word_ct': str(word_ct)
                    }
                    pending_flush += 1
                    buffer.write("\t".join(getter(system_raw)))
                    buffer.write("\n")

                    if pending_flush >= FLUSH_THRESHOLD:
                        flush()
            except ValueError:
                pass
            except Exception as ex:
                log("Failed to retrieve data")
                import traceback
                traceback.print_exc()
            flush()
        log("Creating index")
        exec("CREATE INDEX ON {ts}(eddb_id)")
    stats['load'] += t.seconds

    with timed() as t:
        log("Removing possible duplicates")
        # Not the most elegant, but it'll do
        exec("""
            WITH latest AS (
                SELECT MAX(id) AS id, eddb_id
                FROM {ts}
                GROUP BY eddb_id
            ) DELETE FROM {ts} AS t USING latest WHERE latest.eddb_id=t.eddb_id AND latest.id<>t.id
        """)
        log("Removing non-updates to existing systems")
        # If a starsystem has been updated, at least one of 'name', 'xz' or 'y' are guarunteed to have changed.
        # (A change that effects word_ct would effect name as well, for instance.)
        # Delete any temporary systems that exist in the real table with matching attributes.
        exec("""
            DELETE FROM {ts} AS t USING {s} AS s
            WHERE s.eddb_id=t.eddb_id
            AND ROW(s.name, s.y) IS NOT DISTINCT FROM ROW(t.name, t.y)
            AND ((s.xz IS NULL)=(t.xz IS NULL)) AND (s.xz~=t.xz OR s.xz IS NULL)
        """)
    stats['prune'] += t.seconds

    with timed() as t:
        log("Building list of distinct prefixes")
        # Create list of unique prefixes in this batch
        exec("""
            CREATE TEMPORARY TABLE {tsp} ON COMMIT DROP
            AS SELECT DISTINCT first_word, word_ct FROM {ts}
        """)

        # Outdate stats on listed prefixes
        # This is now implemented differently.
        # exec("""
        #     UPDATE {sp} AS sp
        #     SET ratio=NULL, cume_ratio=NULL
        #     FROM {tsp} AS t
        #     WHERE sp.first_word=t.first_word AND sp.word_ct=t.word_ct
        # """)

        # Insert new prefixes
        exec("""
            INSERT INTO {sp} (first_word, word_ct)
            SELECT t.first_word, t.word_ct
            FROM
                {tsp} AS t
                LEFT JOIN starsystem_prefix AS sp ON sp.first_word=t.first_word AND sp.word_ct=t.word_ct
            WHERE sp.first_word IS NULL
        """)
    stats['prefixes'] += t.seconds

    with timed() as t:
        # Update existing systems
        exec("""
            UPDATE {s} AS s
            SET name_lower=t.name_lower, name=t.name, first_word=t.first_word, word_ct=t.word_ct, xz=t.xz, y=t.y
            FROM {ts} AS t
            WHERE s.eddb_id=t.eddb_id
        """)

        # Insert new systems
        exec("""
            INSERT INTO {s} (eddb_id, name_lower, name, first_word, word_ct, xz, y)
            SELECT t.eddb_id, t.name_lower, t.name, t.first_word, t.word_ct, t.xz, t.y
            FROM {ts} AS t
            LEFT JOIN {s} AS s ON s.eddb_id=t.eddb_id
            WHERE s.eddb_id IS NULL
        """)
    stats['systems'] += t.seconds

    with timed() as t:
        log('Computing prefix statistics')
        exec("""
            UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio
            FROM (
                SELECT
                    t.first_word, t.word_ct, ct/(SUM(ct) OVER w) AS ratio,
                    (SUM(ct) OVER p)/(SUM(ct) OVER w) AS cume_ratio
                FROM (
                    SELECT sp.*, COUNT(s.eddb_id) AS ct
                    FROM
                        {sp} AS sp
                        LEFT JOIN {s} AS s USING (first_word, word_ct)
                    WHERE sp.first_word IN(SELECT first_word FROM {tsp})
                    GROUP BY sp.first_word, sp.word_ct
                    HAVING COUNT(*) > 0
                ) AS t
                WINDOW
                w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
                p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
            ) AS t
            WHERE {sp}.first_word=t.first_word AND {sp}.word_ct=t.word_ct
        """)
    stats['stats'] += t.seconds
    with timed() as t:
        log("Analyzing tables")
        exec("ANALYZE {sp}")
        exec("ANALYZE {s}")
    stats['optimize'] += t.seconds

    log("Starsystem database update complete")
    # Update refresh time
    try:
        status = get_status(db)
        status.starsystem_refreshed = sql.func.clock_timestamp()
        db.add(status)
        db.commit()
    except Exception as ex:
        import traceback
        traceback.print_exc()
        raise
    log("Starsystem database update committed")

    with timed() as t:
        log("Rebuilding bloom filter")
        refresh_bloom(bot)
    stats['bloom'] += t.seconds

    overall_timer.stop()
    stats['misc'] = overall_timer.seconds - sum(stats.values())
    stats['total'] = overall_timer.seconds
    bot.memory['ratbot']['stats']['starsystem_refresh'] = stats
    log("Starsystem refresh finished")
    return True