Example #1
0
 def __init__(self, root_dir: Path, save_threshold: int = 10):
     self.root_dir = root_dir
     self.save_threshold = save_threshold
     self.data_dir = self._get_data_dir(self.root_dir)
     self.storage_path = Path(os.path.join(self.data_dir, "images.db"))
     init_db = not self.storage_path.exists()
     self.storage = FileStorage(os.path.join(self.data_dir, "images.db"))
     self.db = DB(self.storage)
     self.connection = self.db.open()
     self.root = self.connection.root
     if init_db:
         self.root.by_path = OOBTree.BTree()
         self.root.by_hash = OOBTree.BTree()
     self.mod_count = 0
Example #2
0
 def register(self, *instances):
     """Register new instances to appropriate namespaces"""
     for instance in instances:
         namespace = instance.namespace
         if namespace not in self._root:
             self._root[namespace] = OOBTree.BTree()
         if instance.key not in self._root[namespace]:
             self._root[namespace][instance.key] = instance
Example #3
0
def init_attractor_db_structure(db_conn):
    """
    Init database structure for storing aggregated attractors.

    :param db_conn: database connection
    :return None
    """
    db_conn.root.aggregated_attractors = OOBTree.BTree()
    db_conn.root.aggregated_attractor_keys_by_batch_index = OOBTree.BTree()
    db_conn.root.sorted_aggregated_attractors = OOBTree.BTree()
    # To store summary statistics of attractors.
    db_conn.root.n_aggregated_attractors = Length.Length()
    db_conn.root.total_frequency = Length.Length()
    db_conn.root.n_aggregated_attractor_batches = Length.Length()
    # To store least common multiple of sizes of aggregated attractor
    # batches.
    db_conn.root.aggregated_attractor_batch_sizes_lcm = Length.Length(1)
Example #4
0
    def __size_blob_dir(self, is_cache_dir_name=re.compile(r'\d+$').match):
        # Calculate the sizes of the blobs stored in the blob_dir.
        # Return the total size, and a BTree {atime: [full path to blob file]}

        # TODO: nti.zodb.containers has support for mapping
        # time.time() values into integers for use with the (smaller,
        # faster) IOBTree. Use that if we can prove that we can pop
        # the min atime successfully (that is, while the
        # nti.zodb.containers transformation is lossless and
        # reversible, we need to prove that it also maintains order;
        # I'm not sure it does).
        #
        # Other optimizations: Don't use a list until we get more than one
        # file with a matching atime. And/or use tuples and not lists:
        # tuples aren't tracked by the GC like lists are (after they survive one
        # collection, anyway).

        blob_dir = self.blob_dir
        blob_suffix = ZODB.blob.BLOB_SUFFIX
        files_by_atime = OOBTree.BTree()
        size = 0

        # Use os.walk() instead of os.listdir(); on 3.5+ this is much faster
        # thanks to the use of os.scandir(). When we're on Python 3.5+ *only*
        # we could use os.scandir ourself and maybe save some stat calls?
        for dirpath, dirnames, filenames in os.walk(blob_dir):
            # Walk top-down, only recursing into directories matching the
            # OID components (of which there should be one level)
            dirnames[:] = [d for d in dirnames if is_cache_dir_name(d)]
            # Examine blob files.
            blobfile_paths = [
                os.path.join(dirpath, f) for f in filenames
                if f.endswith(blob_suffix)
            ]

            for file_path in blobfile_paths:
                stat = os.stat(file_path)
                size += stat.st_size
                t = stat.st_atime
                if t not in files_by_atime:
                    files_by_atime[t] = []

                # The ZEO version returns a weird version of the path,
                #
                #     os.path.join(dirname, file_name)
                #
                # which it must later re-combine to get an actual path:
                #
                #     os.path.join(blob_dir, file_name)
                #
                # It's not clear why it doesn't return the full path
                # that it already has. Temporary memory savings,
                # perhaps? If so, is that even a concern anymore?
                files_by_atime[t].append(file_path)

        logger.debug("Blob cache size for %s: %s", self.blob_dir,
                     byte_display(size))
        return size, files_by_atime
Example #5
0
def init_simulation_db_structure(db_conn):
    """
    Init database structure for storing simulations.

    :param db_conn: DB connection
    :return: None
    """
    db_conn.root.simulations = OOBTree.BTree()
    # To store summary statistics of simulations.
    db_conn.root.n_simulations = Length.Length()
    db_conn.root.n_simulation_batches = Length.Length()
    # To store least common multiple of sizes of simulation batches.
    db_conn.root.simulation_batch_sizes_lcm = Length.Length(1)
Example #6
0
 def __init__(self, title):
     self.title = title
     self.categories = OOBTree.BTree()