def _clean(self, n): """ obtain n% oldest entries by looking into the usage databases. Then these entries are deleted first from the traj_info db and afterwards from the associated LRU dbs. :param n: delete n% entries in traj_info db [and associated LRU (usage) dbs]. """ # delete the n % oldest entries in the database import sqlite3 num_delete = int(self.num_entries / 100.0 * n) logger.debug("removing %i entries from db" % num_delete) lru_dbs = self._database.execute( "select hash, lru_db from traj_info").fetchall() lru_dbs.sort(key=itemgetter(1)) hashs_by_db = {} age_by_hash = [] for k, v in itertools.groupby(lru_dbs, key=itemgetter(1)): hashs_by_db[k] = list(x[0] for x in v) # debug: distribution len_by_db = { os.path.basename(db): len(hashs_by_db[db]) for db in hashs_by_db.keys() } logger.debug("distribution of lru: %s", str(len_by_db)) ### end dbg # collect timestamps from databases for db in hashs_by_db.keys(): with sqlite3.connect(db, timeout=self.lru_timeout) as conn: rows = conn.execute( "select hash, last_read from usage").fetchall() for r in rows: age_by_hash.append((r[0], float(r[1]), db)) # sort by age age_by_hash.sort(key=itemgetter(1)) if len(age_by_hash) >= 2: assert [age_by_hash[-1] > age_by_hash[-2]] ids = map(itemgetter(0), age_by_hash[:num_delete]) ids = tuple(map(str, ids)) sql_compatible_ids = SqliteDB._format_tuple_for_sql(ids) with self._database as c: c.execute("DELETE FROM traj_info WHERE hash in (%s)" % sql_compatible_ids) # iterate over all LRU databases and delete those ids, we've just deleted from the main db. # Do this within the same execution block of the main database, because we do not want the entry to be deleted, # in case of a subsequent failure. age_by_hash.sort(key=itemgetter(2)) for db, values in itertools.groupby(age_by_hash, key=itemgetter(2)): values = tuple(v[0] for v in values) with sqlite3.connect(db, timeout=self.lru_timeout) as conn: stmnt = "DELETE FROM usage WHERE hash IN (%s)" \ % SqliteDB._format_tuple_for_sql(values) curr = conn.execute(stmnt) assert curr.rowcount == len(values), curr.rowcount
def set(self, traj_info): import sqlite3 values = ( traj_info.hash_value, traj_info.length, traj_info.ndim, np.array(traj_info.offsets), traj_info.abs_path, TrajectoryInfoCache.DB_VERSION, # lru db self._database_from_key(traj_info.hash_value)) statement = ( "INSERT INTO traj_info (hash, length, ndim, offsets, abs_path, version, lru_db)" "VALUES (?, ?, ?, ?, ?, ?, ?)", values) try: with self._database as c: c.execute(*statement) except sqlite3.IntegrityError as ie: logger.debug("insert failed: %s", ie, exc_info=True) return self._update_time_stamp(hash_value=traj_info.hash_value) if self.filename is not None: current_size = os.stat(self.filename).st_size if (self.num_entries >= config.traj_info_max_entries or # current_size is in bytes, while traj_info_max_size is in MB 1. * current_size / 1024**2 >= config.traj_info_max_size): logger.info( "Cleaning database because it has too much entries or is too large.\n" "Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB" % (self.num_entries, (current_size * 1.0 / 1024**2), config.traj_info_max_entries, config.traj_info_max_size)) self._clean(n=self.clean_n_entries)
def _update(): import sqlite3 try: with sqlite3.connect(db_name, timeout=self.lru_timeout) as conn: """ last_read is a result of time.time()""" conn.execute('CREATE TABLE IF NOT EXISTS usage ' '(hash VARCHAR(32), last_read FLOAT)') conn.commit() cur = conn.execute('select * from usage where hash=?', (hash_value, )) row = cur.fetchone() if not row: conn.execute( "insert into usage(hash, last_read) values(?, ?)", (hash_value, time.time())) else: conn.execute( "update usage set last_read=? where hash=?", (time.time(), hash_value)) conn.commit() except sqlite3.OperationalError: # if there are many jobs to write to same database at same time, the timeout could be hit logger.debug('could not update LRU info for db %s', db_name)