def __init__(self, root_dir: Path, save_threshold: int = 10): self.root_dir = root_dir self.save_threshold = save_threshold self.data_dir = self._get_data_dir(self.root_dir) self.storage_path = Path(os.path.join(self.data_dir, "images.db")) init_db = not self.storage_path.exists() self.storage = FileStorage(os.path.join(self.data_dir, "images.db")) self.db = DB(self.storage) self.connection = self.db.open() self.root = self.connection.root if init_db: self.root.by_path = OOBTree.BTree() self.root.by_hash = OOBTree.BTree() self.mod_count = 0
def register(self, *instances): """Register new instances to appropriate namespaces""" for instance in instances: namespace = instance.namespace if namespace not in self._root: self._root[namespace] = OOBTree.BTree() if instance.key not in self._root[namespace]: self._root[namespace][instance.key] = instance
def init_attractor_db_structure(db_conn): """ Init database structure for storing aggregated attractors. :param db_conn: database connection :return None """ db_conn.root.aggregated_attractors = OOBTree.BTree() db_conn.root.aggregated_attractor_keys_by_batch_index = OOBTree.BTree() db_conn.root.sorted_aggregated_attractors = OOBTree.BTree() # To store summary statistics of attractors. db_conn.root.n_aggregated_attractors = Length.Length() db_conn.root.total_frequency = Length.Length() db_conn.root.n_aggregated_attractor_batches = Length.Length() # To store least common multiple of sizes of aggregated attractor # batches. db_conn.root.aggregated_attractor_batch_sizes_lcm = Length.Length(1)
def __size_blob_dir(self, is_cache_dir_name=re.compile(r'\d+$').match): # Calculate the sizes of the blobs stored in the blob_dir. # Return the total size, and a BTree {atime: [full path to blob file]} # TODO: nti.zodb.containers has support for mapping # time.time() values into integers for use with the (smaller, # faster) IOBTree. Use that if we can prove that we can pop # the min atime successfully (that is, while the # nti.zodb.containers transformation is lossless and # reversible, we need to prove that it also maintains order; # I'm not sure it does). # # Other optimizations: Don't use a list until we get more than one # file with a matching atime. And/or use tuples and not lists: # tuples aren't tracked by the GC like lists are (after they survive one # collection, anyway). blob_dir = self.blob_dir blob_suffix = ZODB.blob.BLOB_SUFFIX files_by_atime = OOBTree.BTree() size = 0 # Use os.walk() instead of os.listdir(); on 3.5+ this is much faster # thanks to the use of os.scandir(). When we're on Python 3.5+ *only* # we could use os.scandir ourself and maybe save some stat calls? for dirpath, dirnames, filenames in os.walk(blob_dir): # Walk top-down, only recursing into directories matching the # OID components (of which there should be one level) dirnames[:] = [d for d in dirnames if is_cache_dir_name(d)] # Examine blob files. blobfile_paths = [ os.path.join(dirpath, f) for f in filenames if f.endswith(blob_suffix) ] for file_path in blobfile_paths: stat = os.stat(file_path) size += stat.st_size t = stat.st_atime if t not in files_by_atime: files_by_atime[t] = [] # The ZEO version returns a weird version of the path, # # os.path.join(dirname, file_name) # # which it must later re-combine to get an actual path: # # os.path.join(blob_dir, file_name) # # It's not clear why it doesn't return the full path # that it already has. Temporary memory savings, # perhaps? If so, is that even a concern anymore? files_by_atime[t].append(file_path) logger.debug("Blob cache size for %s: %s", self.blob_dir, byte_display(size)) return size, files_by_atime
def init_simulation_db_structure(db_conn): """ Init database structure for storing simulations. :param db_conn: DB connection :return: None """ db_conn.root.simulations = OOBTree.BTree() # To store summary statistics of simulations. db_conn.root.n_simulations = Length.Length() db_conn.root.n_simulation_batches = Length.Length() # To store least common multiple of sizes of simulation batches. db_conn.root.simulation_batch_sizes_lcm = Length.Length(1)
def __init__(self, title): self.title = title self.categories = OOBTree.BTree()