예제 #1
0
    def _clean(self, n):
        """
        obtain n% oldest entries by looking into the usage databases. Then these entries
        are deleted first from the traj_info db and afterwards from the associated LRU dbs.

        :param n: delete n% entries in traj_info db [and associated LRU (usage) dbs].
        """
        # delete the n % oldest entries in the database
        import sqlite3
        num_delete = int(self.num_entries / 100.0 * n)
        logger.debug("removing %i entries from db" % num_delete)
        lru_dbs = self._database.execute(
            "select hash, lru_db from traj_info").fetchall()
        lru_dbs.sort(key=itemgetter(1))
        hashs_by_db = {}
        age_by_hash = []
        for k, v in itertools.groupby(lru_dbs, key=itemgetter(1)):
            hashs_by_db[k] = list(x[0] for x in v)

        # debug: distribution
        len_by_db = {
            os.path.basename(db): len(hashs_by_db[db])
            for db in hashs_by_db.keys()
        }
        logger.debug("distribution of lru: %s", str(len_by_db))
        ### end dbg

        # collect timestamps from databases
        for db in hashs_by_db.keys():
            with sqlite3.connect(db, timeout=self.lru_timeout) as conn:
                rows = conn.execute(
                    "select hash, last_read from usage").fetchall()
                for r in rows:
                    age_by_hash.append((r[0], float(r[1]), db))

        # sort by age
        age_by_hash.sort(key=itemgetter(1))
        if len(age_by_hash) >= 2:
            assert [age_by_hash[-1] > age_by_hash[-2]]
        ids = map(itemgetter(0), age_by_hash[:num_delete])
        ids = tuple(map(str, ids))

        sql_compatible_ids = SqliteDB._format_tuple_for_sql(ids)

        with self._database as c:
            c.execute("DELETE FROM traj_info WHERE hash in (%s)" %
                      sql_compatible_ids)

            # iterate over all LRU databases and delete those ids, we've just deleted from the main db.
            # Do this within the same execution block of the main database, because we do not want the entry to be deleted,
            # in case of a subsequent failure.
            age_by_hash.sort(key=itemgetter(2))
            for db, values in itertools.groupby(age_by_hash,
                                                key=itemgetter(2)):
                values = tuple(v[0] for v in values)
                with sqlite3.connect(db, timeout=self.lru_timeout) as conn:
                    stmnt = "DELETE FROM usage WHERE hash IN (%s)" \
                            % SqliteDB._format_tuple_for_sql(values)
                    curr = conn.execute(stmnt)
                    assert curr.rowcount == len(values), curr.rowcount
예제 #2
0
    def set(self, traj_info):
        import sqlite3
        values = (
            traj_info.hash_value,
            traj_info.length,
            traj_info.ndim,
            np.array(traj_info.offsets),
            traj_info.abs_path,
            TrajectoryInfoCache.DB_VERSION,
            # lru db
            self._database_from_key(traj_info.hash_value))
        statement = (
            "INSERT INTO traj_info (hash, length, ndim, offsets, abs_path, version, lru_db)"
            "VALUES (?, ?, ?, ?, ?, ?, ?)", values)
        try:
            with self._database as c:
                c.execute(*statement)
        except sqlite3.IntegrityError as ie:
            logger.debug("insert failed: %s", ie, exc_info=True)
            return

        self._update_time_stamp(hash_value=traj_info.hash_value)

        if self.filename is not None:
            current_size = os.stat(self.filename).st_size
            if (self.num_entries >= config.traj_info_max_entries or
                    # current_size is in bytes, while traj_info_max_size is in MB
                    1. * current_size / 1024**2 >= config.traj_info_max_size):
                logger.info(
                    "Cleaning database because it has too much entries or is too large.\n"
                    "Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB"
                    %
                    (self.num_entries, (current_size * 1.0 / 1024**2),
                     config.traj_info_max_entries, config.traj_info_max_size))
                self._clean(n=self.clean_n_entries)
예제 #3
0
 def _update():
     import sqlite3
     try:
         with sqlite3.connect(db_name,
                              timeout=self.lru_timeout) as conn:
             """ last_read is a result of time.time()"""
             conn.execute('CREATE TABLE IF NOT EXISTS usage '
                          '(hash VARCHAR(32), last_read FLOAT)')
             conn.commit()
             cur = conn.execute('select * from usage where hash=?',
                                (hash_value, ))
             row = cur.fetchone()
             if not row:
                 conn.execute(
                     "insert into usage(hash, last_read) values(?, ?)",
                     (hash_value, time.time()))
             else:
                 conn.execute(
                     "update usage set last_read=? where hash=?",
                     (time.time(), hash_value))
             conn.commit()
     except sqlite3.OperationalError:
         # if there are many jobs to write to same database at same time, the timeout could be hit
         logger.debug('could not update LRU info for db %s', db_name)