def cmd_sysrefresh(bot, trigger, db=None): """ Refreshes the starsystem database if you have halfop or better. Reports the last refresh time otherwise. -f: Force refresh even if data is stale. Requires op. """ access = ratlib.sopel.best_channel_mode(bot, trigger.nick) privileged = access & (HALFOP | OP) msg = "" if privileged: try: refreshed = refresh_database( bot, force=access & OP and trigger.group(2) and trigger.group(2) == '-f', callback=lambda: bot.say("Starting starsystem refresh...") ) if refreshed: bot.say(refresh_time_stats(bot)) return msg = "Not yet. " except ConcurrentOperationError: bot.say("A starsystem refresh operation is already in progress.") return when = get_status(db).starsystem_refreshed if not when: msg += "The starsystem database appears to have never been initialized." else: when = when.astimezone(datetime.timezone.utc) msg += "The starsystem database was refreshed at {} ({}) or an update is still in progress. It is only allowed every {} seconds.".format( ratlib.format_timestamp(when), ratlib.format_timedelta(when), bot.config.ratbot.edsm_maxage or '<unknown>' ) bot.say(msg)
def cmd_sysrefresh(bot, trigger, db=None): """ Refreshes the starsystem database if you have halfop or better. Reports the last refresh time otherwise. -f: Force refresh even if data is stale. Requires op. """ access = ratlib.sopel.best_channel_mode(bot, trigger.nick) privileged = access & (HALFOP | OP) msg = "" if privileged: options = "" if not trigger.group( 2) or trigger.group(2)[0] != '-' else trigger.group(2)[1:] force = 'f' in options and (access & OP) prune = not ('p' in options and (access & OP)) try: refreshed = refresh_database( bot, force=force, prune=prune, callback=lambda: bot.say("Starting starsystem refresh...")) if refreshed: bot.say(refresh_time_stats(bot)) return msg = "Not yet. " except ConcurrentOperationError: bot.say("A starsystem refresh operation is already in progress.") return when = get_status(db).starsystem_refreshed if not when: msg += "The starsystem database appears to have never been initialized." else: when = when.astimezone(datetime.timezone.utc) msg += "The starsystem database was refreshed at {} ({}) or an update is still in progress. It is only allowed every {} seconds.".format( timeutil.format_timestamp(when), timeutil.format_timedelta(when), bot.config.ratbot.edsm_maxage or '<unknown>') bot.say(msg)
def _refresh_database(bot, force=False, prune=True, callback=None, background=False, db=None): """ Actual implementation of refresh_database. Refreshes the database of starsystems. Also rebuilds the bloom filter. :param bot: Bot instance :param force: True to force refresh :param prune: True to prune non-updated systems. Keep True unless performance testing. :param callback: Optional function that is called as soon as the system determines a refresh is needed. :param background: If True and a refresh is needed, it is submitted as a background task rather than running immediately. :param db: Database handle Note that this function executes some raw SQL queries (among other voodoo). This is for performance reasons concerning the insanely large dataset being handled, and should NOT serve as an example for implementation elsewhere. """ eddb_url = bot.config.ratbot.edsm_url or "https://eddb.io/archive/v5/systems.csv" chunked = bot.config.ratbot.chunked_systems # Should really implement this, but until then if chunked: raise NotImplementedError("Chunked system loading is not implemented yet.") status = get_status(db) eddb_maxage = float(bot.config.ratbot.edsm_maxage or (7*86400)) # Once per week = 604800 seconds if not ( force or not status.starsystem_refreshed or (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > eddb_maxage ): # No refresh needed. # print('not force and no refresh needed') return False if callback: callback() if background: print('Scheduling background refresh of starsystem data') return bot.memory['ratbot']['executor'].submit( _refresh_database, bot, force=True, callback=None, background=False ) conn = db.connection() # Now in actual implementation beyond background scheduling # Counters for stats # All times in seconds stats = { 'load': 0, # Time spent retrieving the CSV file(s) and dumping it into a temptable in the db. 'prune': 0, # Time spent removing non-update updates. 'systems': 0, # Time spent merging starsystems into the db. 'prefixes': 0, # Time spent merging starsystem prefixes into the db. 'stats': 0, # Time spent (re)computing system statistics 'bloom': 0, # Time spent (re)building the system prefix bloom filter. 'optimize': 0, # Time spent optimizing/analyzing tables. 'misc': 0, # Miscellaneous tasks (total time - all other stats) 'total': 0, # Total time spent. } def log(fmt, *args, **kwargs): print("[{}] ".format(datetime.datetime.now()) + fmt.format(*args, **kwargs)) overall_timer = TimedResult() log("Starsystem refresh started") if chunked: # FIXME: Needs to be reimplemented. log("Retrieving starsystem index at {}", eddb_url) with timed() as t: response = requests.get(eddb_url) response.raise_for_status() urls = list(urljoin(eddb_url, chunk["SectorName"]) for chunk in response.json()) stats['index'] += t.seconds log("{} file(s) queued for starsystem refresh. (Took {}}", len(urls), format_timestamp(t.delta)) else: urls = [eddb_url] temptable = sa.Table( '_temp_new_starsystem', sa.MetaData(), sa.Column('id', sa.Integer, primary_key=True, autoincrement=True), sa.Column('eddb_id', sa.Integer), sa.Column('name_lower', sa.Text(collation="C")), sa.Column('name', sa.Text(collation="C")), sa.Column('first_word', sa.Text(collation="C")), sa.Column('word_ct', sa.Integer), sa.Column('xz', SQLPoint), sa.Column('y', sa.Numeric), # sa.Index('_temp_id_ix', 'eddb_id'), prefixes=['TEMPORARY'], postgresql_on_commit='DROP' ) temptable.create(conn) sql_args = { 'sp': StarsystemPrefix.__tablename__, 's': Starsystem.__tablename__, 'ts': temptable.name, 'tsp': '_temp_new_prefixes' } buffer = io.StringIO() # Temporary IO buffer for COPY FROM columns = ['eddb_id', 'name_lower', 'name', 'first_word', 'word_ct', 'xz', 'y'] # Columns to copy to temptable getter = operator.itemgetter(*columns) total_flushed = 0 # Total number of flushed items so far pending_flush = 0 # Number of items waiting to flush def exec(sql, *args, **kwargs): try: conn.execute(sql.format(*args, **kwargs, **sql_args)) except Exception as ex: log("Query failed.") import traceback traceback.print_exc() raise def flush(): nonlocal buffer, total_flushed, pending_flush if not pending_flush: return log("Flushing system(s) {}-{}", total_flushed + 1, total_flushed + pending_flush) buffer.seek(0) cursor = conn.connection.cursor() cursor.copy_from(buffer, temptable.name, sep='\t', null='', columns=columns) buffer = io.StringIO() # systems = [] total_flushed += pending_flush pending_flush = 0 with timed() as t: for url in urls: log("Retrieving starsystem data at {}", url) try: response = requests.get(url, stream=True) reader = csv.DictReader(io.TextIOWrapper(response.raw)) for row in reader: # Parse and reformat system info from CSV name, word_ct = re.subn(r'\s+', ' ', row['name'].strip()) name_lower = name.lower() first_word, *unused = name_lower.split(" ", 1) word_ct += 1 if all((row['x'], row['y'], row['z'])): xz = "({x},{z})".format(**row) y = row['y'] else: xz = y = '' system_raw = { 'eddb_id': str(row['id']), 'name_lower': name_lower, 'name': name, 'first_word': first_word, 'xz': xz, 'y': y, 'word_ct': str(word_ct) } pending_flush += 1 buffer.write("\t".join(getter(system_raw))) buffer.write("\n") if pending_flush >= FLUSH_THRESHOLD: flush() except ValueError: pass except Exception as ex: log("Failed to retrieve data") import traceback traceback.print_exc() flush() log("Creating index") exec("CREATE INDEX ON {ts}(eddb_id)") stats['load'] += t.seconds with timed() as t: log("Removing possible duplicates") exec("DELETE FROM {ts} WHERE eddb_id NOT IN(SELECT MAX(id) AS id FROM {ts} GROUP BY eddb_id)") # No need for the temporary 'id' column at this point. exec("ALTER TABLE {ts} DROP id CASCADE"); # Making this a primary key (or even just a unique key) apparently affects query planner performance vs the # non-existing unique key. exec("ALTER TABLE {ts} ADD PRIMARY KEY(eddb_id)"); if prune: log("Removing non-updates to existing systems") # If a starsystem has been updated, at least one of 'name', 'xz' or 'y' are guaranteed to have changed. # (A change that effects word_ct would effect name as well, for instance.) # Delete any temporary systems that exist in the real table with matching attributes. exec(""" DELETE FROM {ts} AS t USING {s} AS s WHERE s.eddb_id=t.eddb_id AND ROW(s.name, s.y) IS NOT DISTINCT FROM ROW(t.name, t.y) AND ((s.xz IS NULL)=(t.xz IS NULL)) AND (s.xz~=t.xz OR s.xz IS NULL) """) else: log("Skipping non-update removal phase") stats['prune'] += t.seconds with timed() as t: log("Building list of distinct prefixes") # Create list of unique prefixes in this batch exec(""" CREATE TEMPORARY TABLE {tsp} ON COMMIT DROP AS SELECT DISTINCT first_word, word_ct FROM {ts} """) # Insert new prefixes exec(""" INSERT INTO {sp} (first_word, word_ct) SELECT t.first_word, t.word_ct FROM {tsp} AS t LEFT JOIN {sp} AS sp ON sp.first_word=t.first_word AND sp.word_ct=t.word_ct WHERE sp.first_word IS NULL """) stats['prefixes'] += t.seconds with timed() as t: log("Updating existing systems.") exec(""" UPDATE {s} AS s SET name_lower=t.name_lower, name=t.name, first_word=t.first_word, word_ct=t.word_ct, xz=t.xz, y=t.y FROM {ts} AS t WHERE s.eddb_id=t.eddb_id """) log("Inserting new systems.") exec(""" INSERT INTO {s} (eddb_id, name_lower, name, first_word, word_ct, xz, y) SELECT t.eddb_id, t.name_lower, t.name, t.first_word, t.word_ct, t.xz, t.y FROM {ts} AS t LEFT JOIN {s} AS s ON s.eddb_id=t.eddb_id WHERE s.eddb_id IS NULL """) stats['systems'] += t.seconds with timed() as t: log('Computing prefix statistics') exec(""" UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio FROM ( SELECT t.first_word, t.word_ct, ct/(SUM(ct) OVER w) AS ratio, (SUM(ct) OVER p)/(SUM(ct) OVER w) AS cume_ratio FROM ( SELECT sp.*, COUNT(s.eddb_id) AS ct FROM {sp} AS sp LEFT JOIN {s} AS s USING (first_word, word_ct) WHERE sp.first_word IN(SELECT first_word FROM {tsp}) GROUP BY sp.first_word, sp.word_ct HAVING COUNT(*) > 0 ) AS t WINDOW w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS t WHERE {sp}.first_word=t.first_word AND {sp}.word_ct=t.word_ct """) stats['stats'] += t.seconds with timed() as t: log("Analyzing tables") exec("ANALYZE {sp}") exec("ANALYZE {s}") stats['optimize'] += t.seconds log("Starsystem database update complete") # Update refresh time try: status = get_status(db) status.starsystem_refreshed = sql.func.clock_timestamp() db.add(status) db.commit() except Exception as ex: import traceback traceback.print_exc() raise log("Starsystem database update committed") with timed() as t: log("Rebuilding bloom filter") refresh_bloom(bot) stats['bloom'] += t.seconds overall_timer.stop() stats['misc'] = overall_timer.seconds - sum(stats.values()) stats['total'] = overall_timer.seconds bot.memory['ratbot']['stats']['starsystem_refresh'] = stats log("Starsystem refresh finished") return True
def _refresh_database(bot, force=False, callback=None, background=False, db=None): """ Actual implementation of refresh_database. Refreshes the database of starsystems. Also rebuilds the bloom filter. :param bot: Bot instance :param force: True to force refresh :param callback: Optional function that is called as soon as the system determines a refresh is needed. :param background: If True and a refresh is needed, it is submitted as a background task rather than running immediately. :param db: Database handle """ start = time() edsm_url = bot.config.ratbot.edsm_url or "http://edsm.net/api-v1/systems?coords=1" status = get_status(db) edsm_maxage = bot.config.ratbot.maxage or 60*12*12 if not ( force or not status.starsystem_refreshed or (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > edsm_maxage ): # No refresh needed. return False if callback: callback() if background: return bot.memory['ratbot']['executor'].submit( _refresh_database, bot, force=True, callback=None, background=False ) fetch_start = time() data = requests.get(edsm_url).json() fetch_end = time() # with open('run/systems.json') as f: # import json # data = json.load(f) db.query(Starsystem).delete() # Wipe all old data db.query(StarsystemPrefix).delete() # Wipe all old data # Pass 1: Load JSON data into stats table. systems = [] ct = 0 def _format_system(s): nonlocal ct ct += 1 name, word_ct = re.subn(r'\s+', ' ', s['name'].strip()) word_ct += 1 return { 'name_lower': name.lower(), 'name': name, 'x': s.get('x'), 'y': s.get('y'), 'z': s.get('z'), 'word_ct': word_ct } load_start = time() for chunk in chunkify(data, 5000): db.bulk_insert_mappings(Starsystem, [_format_system(s) for s in chunk]) # print(ct) del data db.connection().execute("ANALYZE " + Starsystem.__tablename__) load_end = time() stats_start = time() # Pass 2: Calculate statistics. # 2A: Quick insert of prefixes for single-name systems db.connection().execute( sql.insert(StarsystemPrefix).from_select( (StarsystemPrefix.first_word, StarsystemPrefix.word_ct), db.query(Starsystem.name_lower, Starsystem.word_ct).filter(Starsystem.word_ct == 1).distinct() ) ) def _gen(): for s in ( db.query(Starsystem) .order_by(Starsystem.word_ct, Starsystem.name_lower) .filter(Starsystem.word_ct > 1) ): first_word, *words = s.name_lower.split(" ") yield (first_word, s.word_ct), words, s ct = 0 for chunk in chunkify(itertools.groupby(_gen(), operator.itemgetter(0)), 100): for (first_word, word_ct), group in chunk: ct += 1 const_words = None for _, words, system in group: if const_words is None: const_words = words.copy() else: for ix, (common, word) in enumerate(zip(const_words, words)): if const_words[ix] != words[ix]: const_words = const_words[:ix] break prefix = StarsystemPrefix( first_word=first_word, word_ct=word_ct, const_words=" ".join(const_words) ) db.add(prefix) # print(ct) db.flush() db.connection().execute( sql.update( Starsystem, values={ Starsystem.prefix_id: db.query(StarsystemPrefix.id).filter( StarsystemPrefix.first_word == sql.func.split_part(Starsystem.name_lower, ' ', 1), StarsystemPrefix.word_ct == Starsystem.word_ct ).as_scalar() } ) ) db.connection().execute( """ UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio FROM ( SELECT t.id, ct/SUM(ct) OVER w AS ratio, SUM(ct) OVER p/SUM(ct) OVER w AS cume_ratio FROM ( SELECT sp.*, COUNT(*) AS ct FROM {sp} AS sp INNER JOIN {s} AS s ON s.prefix_id=sp.id GROUP BY sp.id HAVING COUNT(*) > 0 ) AS t WINDOW w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS t WHERE t.id=starsystem_prefix.id """.format(sp=StarsystemPrefix.__tablename__, s=Starsystem.__tablename__) ) stats_end = time() # Update refresh time status = get_status(db) status.starsystem_refreshed = sql.func.clock_timestamp() db.add(status) db.commit() bloom_start = time() refresh_bloom(bot) bloom_end = time() end = time() stats = { 'stats': stats_end - stats_start, 'load': load_end - load_start, 'fetch': fetch_end - fetch_start, 'bloom': bloom_end - bloom_start } stats['misc'] = (end - start) - sum(stats.values()) stats['all'] = end - start bot.memory['ratbot']['stats']['starsystem_refresh'] = stats return True
def _refresh_database(bot, force=False, callback=None, background=False, db=None): """ Actual implementation of refresh_database. Refreshes the database of starsystems. Also rebuilds the bloom filter. :param bot: Bot instance :param force: True to force refresh :param callback: Optional function that is called as soon as the system determines a refresh is needed. :param background: If True and a refresh is needed, it is submitted as a background task rather than running immediately. :param db: Database handle Note that this function executes some raw SQL queries (among other voodoo). This is for performance reasons concerning the insanely large dataset being handled, and should NOT serve as an example for implementation elsewhere. """ eddb_url = bot.config.ratbot.edsm_url or "https://eddb.io/archive/v5/systems.csv" chunked = bot.config.ratbot.chunked_systems # Should really implement this, but until then if chunked: raise NotImplementedError("Chunked system loading is not implemented yet.") status = get_status(db) eddb_maxage = float(bot.config.ratbot.edsm_maxage or (7*86400)) # Once per week = 604800 seconds if not ( force or not status.starsystem_refreshed or (datetime.datetime.now(tz=datetime.timezone.utc) - status.starsystem_refreshed).total_seconds() > eddb_maxage ): # No refresh needed. # print('not force and no refresh needed') return False if callback: callback() if background: print('Scheduling background refresh of starsystem data') return bot.memory['ratbot']['executor'].submit( _refresh_database, bot, force=True, callback=None, background=False ) conn = db.connection() # Now in actual implementation beyond background scheduling # Counters for stats # All times in seconds stats = { 'load': 0, # Time spent retrieving the CSV file(s) and dumping it into a temptable in the db. 'prune': 0, # Time spent removing non-update updates. 'systems': 0, # Time spent merging starsystems into the db. 'prefixes': 0, # Time spent merging starsystem prefixes into the db. 'stats': 0, # Time spent (re)computing system statistics 'bloom': 0, # Time spent (re)building the system prefix bloom filter. 'optimize': 0, # Time spent optimizing/analyzing tables. 'misc': 0, # Miscellaneous tasks (total time - all other stats) 'total': 0, # Total time spent. } def log(fmt, *args, **kwargs): print("[{}] ".format(datetime.datetime.now()) + fmt.format(*args, **kwargs)) overall_timer = TimedResult() log("Starsystem refresh started") if chunked: # FIXME: Needs to be reimplemented. log("Retrieving starsystem index at {}", eddb_url) with timed() as t: response = requests.get(eddb_url) response.raise_for_status() urls = list(urljoin(eddb_url, chunk["SectorName"]) for chunk in response.json()) stats['index'] += t.seconds log("{} file(s) queued for starsystem refresh. (Took {}}", len(urls), format_timestamp(t.delta)) else: urls = [eddb_url] temptable = sa.Table( '_temp_new_starsystem', sa.MetaData(), sa.Column('id', sa.Integer, primary_key=True, autoincrement=True), sa.Column('eddb_id', sa.Integer), sa.Column('name_lower', sa.Text(collation="C")), sa.Column('name', sa.Text(collation="C")), sa.Column('first_word', sa.Text(collation="C")), sa.Column('word_ct', sa.Integer), sa.Column('xz', SQLPoint), sa.Column('y', sa.Numeric), # sa.Index('_temp_id_ix', 'eddb_id'), prefixes=['TEMPORARY'], postgresql_on_commit='DROP' ) temptable.create(conn) tablenames = { 'sp': StarsystemPrefix.__tablename__, 's': Starsystem.__tablename__, 'ts': temptable.name, 'tsp': '_temp_new_prefixes' } buffer = io.StringIO() # Temporary IO buffer for COPY FROM columns = ['eddb_id', 'name_lower', 'name', 'first_word', 'word_ct', 'xz', 'y'] # Columns to copy to temptable getter = operator.itemgetter(*columns) total_flushed = 0 # Total number of flushed items so far pending_flush = 0 # Number of items waiting to flush def exec(sql, *args, **kwargs): try: conn.execute(sql.format(*args, **kwargs, **tablenames)) except Exception as ex: log("Query failed.") import traceback traceback.print_exc() raise def flush(): nonlocal buffer, total_flushed, pending_flush if not pending_flush: return log("Flushing system(s) {}-{}", total_flushed + 1, total_flushed + pending_flush) buffer.seek(0) cursor = conn.connection.cursor() cursor.copy_from(buffer, temptable.name, sep='\t', null='', columns=columns) buffer = io.StringIO() # systems = [] total_flushed += pending_flush pending_flush = 0 with timed() as t: for url in urls: log("Retrieving starsystem data at {}", url) try: response = requests.get(url, stream=True) reader = csv.DictReader(io.TextIOWrapper(response.raw)) for row in reader: # Parse and reformat system info from CSV name, word_ct = re.subn(r'\s+', ' ', row['name'].strip()) name_lower = name.lower() first_word, *unused = name_lower.split(" ", 1) word_ct += 1 if all((row['x'], row['y'], row['z'])): xz = "({x},{z})".format(**row) y = row['y'] else: xz = y = '' system_raw = { 'eddb_id': str(row['id']), 'name_lower': name_lower, 'name': name, 'first_word': first_word, 'xz': xz, 'y': y, 'word_ct': str(word_ct) } pending_flush += 1 buffer.write("\t".join(getter(system_raw))) buffer.write("\n") if pending_flush >= FLUSH_THRESHOLD: flush() except ValueError: pass except Exception as ex: log("Failed to retrieve data") import traceback traceback.print_exc() flush() log("Creating index") exec("CREATE INDEX ON {ts}(eddb_id)") stats['load'] += t.seconds with timed() as t: log("Removing possible duplicates") # Not the most elegant, but it'll do exec(""" WITH latest AS ( SELECT MAX(id) AS id, eddb_id FROM {ts} GROUP BY eddb_id ) DELETE FROM {ts} AS t USING latest WHERE latest.eddb_id=t.eddb_id AND latest.id<>t.id """) log("Removing non-updates to existing systems") # If a starsystem has been updated, at least one of 'name', 'xz' or 'y' are guarunteed to have changed. # (A change that effects word_ct would effect name as well, for instance.) # Delete any temporary systems that exist in the real table with matching attributes. exec(""" DELETE FROM {ts} AS t USING {s} AS s WHERE s.eddb_id=t.eddb_id AND ROW(s.name, s.y) IS NOT DISTINCT FROM ROW(t.name, t.y) AND ((s.xz IS NULL)=(t.xz IS NULL)) AND (s.xz~=t.xz OR s.xz IS NULL) """) stats['prune'] += t.seconds with timed() as t: log("Building list of distinct prefixes") # Create list of unique prefixes in this batch exec(""" CREATE TEMPORARY TABLE {tsp} ON COMMIT DROP AS SELECT DISTINCT first_word, word_ct FROM {ts} """) # Outdate stats on listed prefixes # This is now implemented differently. # exec(""" # UPDATE {sp} AS sp # SET ratio=NULL, cume_ratio=NULL # FROM {tsp} AS t # WHERE sp.first_word=t.first_word AND sp.word_ct=t.word_ct # """) # Insert new prefixes exec(""" INSERT INTO {sp} (first_word, word_ct) SELECT t.first_word, t.word_ct FROM {tsp} AS t LEFT JOIN starsystem_prefix AS sp ON sp.first_word=t.first_word AND sp.word_ct=t.word_ct WHERE sp.first_word IS NULL """) stats['prefixes'] += t.seconds with timed() as t: # Update existing systems exec(""" UPDATE {s} AS s SET name_lower=t.name_lower, name=t.name, first_word=t.first_word, word_ct=t.word_ct, xz=t.xz, y=t.y FROM {ts} AS t WHERE s.eddb_id=t.eddb_id """) # Insert new systems exec(""" INSERT INTO {s} (eddb_id, name_lower, name, first_word, word_ct, xz, y) SELECT t.eddb_id, t.name_lower, t.name, t.first_word, t.word_ct, t.xz, t.y FROM {ts} AS t LEFT JOIN {s} AS s ON s.eddb_id=t.eddb_id WHERE s.eddb_id IS NULL """) stats['systems'] += t.seconds with timed() as t: log('Computing prefix statistics') exec(""" UPDATE {sp} SET ratio=t.ratio, cume_ratio=t.cume_ratio FROM ( SELECT t.first_word, t.word_ct, ct/(SUM(ct) OVER w) AS ratio, (SUM(ct) OVER p)/(SUM(ct) OVER w) AS cume_ratio FROM ( SELECT sp.*, COUNT(s.eddb_id) AS ct FROM {sp} AS sp LEFT JOIN {s} AS s USING (first_word, word_ct) WHERE sp.first_word IN(SELECT first_word FROM {tsp}) GROUP BY sp.first_word, sp.word_ct HAVING COUNT(*) > 0 ) AS t WINDOW w AS (PARTITION BY t.first_word ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), p AS (PARTITION BY t.first_word ORDER BY t.word_ct ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS t WHERE {sp}.first_word=t.first_word AND {sp}.word_ct=t.word_ct """) stats['stats'] += t.seconds with timed() as t: log("Analyzing tables") exec("ANALYZE {sp}") exec("ANALYZE {s}") stats['optimize'] += t.seconds log("Starsystem database update complete") # Update refresh time try: status = get_status(db) status.starsystem_refreshed = sql.func.clock_timestamp() db.add(status) db.commit() except Exception as ex: import traceback traceback.print_exc() raise log("Starsystem database update committed") with timed() as t: log("Rebuilding bloom filter") refresh_bloom(bot) stats['bloom'] += t.seconds overall_timer.stop() stats['misc'] = overall_timer.seconds - sum(stats.values()) stats['total'] = overall_timer.seconds bot.memory['ratbot']['stats']['starsystem_refresh'] = stats log("Starsystem refresh finished") return True