Ejemplo n.º 1
0
def retrieve_queue_data(env: lmdb.Environment) -> QueueData:
    """
    Retrieve set value in the queue database.

    Value stored in the queue database are high water mark values and pruning
    strategy.

    :param env: environment that stores queue data
    :return: data stored in LMDB related to the queue
    """
    with env.begin(write=False) as txn:
        queue_db = env.open_db(key=QUEUE_DB, txn=txn, create=False)
        message_timeout = bytes_to_int(
            txn.get(key=MESSAGE_TIMEOUT_KEY, db=queue_db))
        max_messages = bytes_to_int(txn.get(key=MAX_MESSAGES_KEY, db=queue_db))
        hwm_db_size = bytes_to_int(txn.get(key=HWM_DB_SIZE_KEY, db=queue_db))
        strategy = bytes_to_str(txn.get(key=STRATEGY_KEY, db=queue_db))

        subscriber_db = env.open_db(key=SUBSCRIBER_DB, txn=txn, create=False)
        sub_cursor = txn.cursor(db=subscriber_db)
        subscriber_ids = set(
            bytes_to_str(subscriber_id)
            for subscriber_id in sub_cursor.iternext(values=False))

    queue_data = QueueData(message_timeout=message_timeout,
                           max_messages=max_messages,
                           hwm_db_size=hwm_db_size,
                           strategy=strategy,
                           subscriber_ids=subscriber_ids)
    return queue_data
Ejemplo n.º 2
0
    def __init__(self, model: s_datamodel.Model,
                 dbenv: lmdb.Environment) -> None:
        '''
        Creates metadata for all the indices.

        Args:
            dbenv (lmdb.Environment): the lmdb instance in which to store the metadata.

        Returns:
            None
        '''
        self._dbenv = dbenv
        self.model = model

        # The table in the database file (N.B. in LMDB speak, this is called a database)
        self._metatbl = dbenv.open_db(b'meta')
        is_new_db = False
        with dbenv.begin(db=self._metatbl, buffers=True) as txn:
            indices_enc = txn.get(b'indices')
            progress_enc = txn.get(b'progress')
        if indices_enc is None or progress_enc is None:
            if indices_enc is None and progress_enc is None:
                is_new_db = True
                indices_enc = s_msgpack.en({'present': {}, 'deleting': []})
                progress_enc = s_msgpack.en({})
            else:
                raise s_exc.CorruptDatabase(
                    'missing meta information in index meta'
                )  # pragma: no cover

        indices = s_msgpack.un(indices_enc)

        # The details about what the indices are actually indexing: the datapath and type.

        self.indices = {
            k: _MetaEntry(model, **s_msgpack.un(v))
            for k, v in indices.get('present', {}).items()
        }
        self.deleting = list(indices.get('deleting', ()))
        # Keeps track (non-persistently) of which indices have been paused
        self.asleep = defaultdict(bool)  # type: ignore

        # How far each index has progressed as well as statistics
        self.progresses = s_msgpack.un(progress_enc)
        if not all(p in self.indices for p in self.deleting):
            raise s_exc.CorruptDatabase(
                'index meta table: deleting entry with unrecognized property name'
            )  # pragma: no cover
        if not all(p in self.indices for p in self.progresses):
            raise s_exc.CorruptDatabase(
                'index meta table: progress entry with unrecognized property name'
            )  # pragma: no cover
        if is_new_db:
            self.persist()
Ejemplo n.º 3
0
class Database(object):
    """
    Representation of a Database, this is the main API class
    
    :param name: The name of the database to open
    :type name: str
    :param conf: Any additional or custom options for this environment
    :type conf: dict  
    """
    _debug = False
    _conf = {
        'map_size': 1024 * 1024 * 1024 * 2,
        'subdir': True,
        'metasync': False,
        'sync': True,
        'lock': True,
        'max_dbs': 64,
        'writemap': True,
        'map_async': True
    }

    def __init__(self, name, conf=None, binlog=True, size=None):
        conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf
        if size: conf['map_size'] = size
        self._tables = {}
        self._env = Environment(name, **conf)
        self._db = self._env.open_db()
        self._transaction = None
        try:
            self._binlog = self.env.open_db('__binlog__'.encode(),
                                            create=binlog)
        except NotFoundError:
            self._binlog = None
        try:
            self._metadata = self.env.open_db('__metadata__'.encode(),
                                              create=False)
        except NotFoundError:
            self.migrate_metadata()

    def __del__(self):
        self.close()

    def migrate_metadata(self):
        self._metadata = self.env.open_db('__metadata__'.encode(), create=True)
        with self.env.begin(write=True) as txn:
            with Cursor(self._db, txn) as cursor:
                move_next = cursor.first
                while move_next():
                    move_next = cursor.next
                    name = cursor.key().decode()
                    if name[0] == '@':
                        record = txn.get(cursor.key(), db=self._db)
                        if not txn.put(
                                name[1:].encode(), record, db=self._metadata):
                            txn.abort()
                            print("Failed to create new record")
                            return
                        if not txn.delete(cursor.key(), db=self._db):
                            print("Failed to delete old record")
                            return

    @property
    def env(self):
        """
        Return a reference to the current database environment
        
        :return: A Database Environment
        :rtype: Environment
        """
        return self._env

    @property
    def transaction(self):
        """
        Return a reference to the current transaction
        
        :return: The current transaction (or None) 
        :rtype: DBTransaction
        """
        return self._transaction

    def binlog(self, enable=True):
        """
        Enable or disable binary logging, disable with delete the transaction history too ...
        
        :param enable: Whether to enable or disable logging 
        """
        if enable:
            if not self._binlog:
                self._binlog = self.env.open_db('__binlog__'.encode())
        else:
            if self._binlog:
                if self.transaction:
                    self.transaction.txn.drop(self._binlog, True)
                else:
                    with self.env.begin(write=True) as txn:
                        txn.drop(self._binlog, True)

            self._binlog = None

    def begin(self):
        """
        Begin a new transaction returning a transaction reference (use with "with")
        :return: Reference to the new transaction
        :rtype: DBTransaction
        """
        self._transaction = DBTransaction(self)
        return self._transaction

    def end(self):
        """
        End an existing transaction committing all replication changes
        """
        self._transaction = None

    def close(self):
        """
        Close the current database
        """
        if self._env:
            self._env.close()
            self._env = None

    def sync(self, force=False):
        self.env.sync(force)

    def exists(self, name):
        """
        Test whether a table with a given name already exists

        :param name: Table name
        :type name: str
        :return: True if table exists
        :rtype: bool
        """
        return name in self.tables

    @property
    def tables(self):
        return self._return_tables(False)

    @property
    def tables_all(self):
        return self._return_tables(True)

    def _return_tables(self, all):
        """
        PROPERTY - Generate a list of names of the tables associated with this database

        :getter: Returns a list of table names
        :type: list
        """
        if self.transaction:
            txn = self.transaction.txn
            abort = False
        else:
            txn = self.env.begin()
            abort = True

        try:
            result = []
            with Cursor(self._db, txn) as cursor:
                if cursor.first():
                    while True:
                        name = cursor.key().decode()
                        if all or name[0] not in ['_', '~']:
                            result.append(name)
                        if not cursor.next():
                            break
            return result
        finally:
            if abort:
                txn.abort()

    def drop(self, name):
        """
        Drop a database table
        
        :param name: Name of table to drop
        :type name: str
        """
        if name not in self.tables_all:
            raise xTableMissing

        if name not in self._tables:
            table = self.table(name)

        if name in self._tables:
            self._tables[name]._drop()
        del self._tables[name]

    def restructure(self, name):
        """
        Restructure a table, copy to a temporary table, then copy back. This will recreate the table
        and all it's ID's but will retain the original indexes. (which it will regenerate)
        
        :param name: Name of the table to restructure
        :type name: str
        """
        txn = self.transaction.txn
        if name not in self.tables: raise xTableMissing
        src = self._tables[name]
        dst_name = '~' + name
        if dst_name in self.tables: raise xTableExists
        dst = self.table(dst_name)
        for doc in src.find():
            dst.append(doc, txn=txn)

        src.empty(txn=txn)
        for doc in dst.find(txn=txn):
            src.append(doc, txn=txn)
        dst._drop(txn=txn)
        del self._tables[dst_name]

    def table(self, name):
        """
        Return a reference to a table with a given name, creating first if it doesn't exist
        
        :param name: Name of table
        :type name: str
        :return: Reference to table
        :rtype: Table
        """
        if name not in self._tables:
            self._tables[name] = Table(self, name)
        return self._tables[name]
Ejemplo n.º 4
0
class LmdbDataset:
    def __init__(self, databasePath, readonly=True):
        self.databasePath = databasePath
        self.readonly = readonly
        self.__init()

    def __init(self):
        create = not self.readonly

        self.env = Environment(self.databasePath,
                               map_size=3e9,
                               subdir=False,
                               max_dbs=64,
                               mode=0,
                               create=create,
                               readonly=self.readonly)
        self.descriptorToDb = {}
        self.nextIds = {}

        self.__addDb(SceneSetup, create=create)
        self.__addDb(ScatterSample, create=create)
        self.__addDb(DisneyDescriptor, create=create)
        self.__addDb(BakedInterpolationSet, create=create)
        self.__addDb(Result, create=create)

    def __addDb(self, protocol, create):
        name = protocol.DESCRIPTOR.name
        db = self.env.open_db(name.encode('ascii'),
                              integerkey=True,
                              create=create)
        self.descriptorToDb[name] = db
        self.nextIds[name] = 0
        return db

    def append(self, value):
        name, db = self.__getNameAndDb(value)

        with self.env.begin(write=True) as transaction:
            transaction.put((self.nextIds[name]).to_bytes(4, 'little'),
                            value.SerializeToString(),
                            db=db)
            self.nextIds[name] += 1

    def getCountOf(self, protocolType):
        name, db = self.__getNameAndDb(protocolType)

        with self.env.begin() as transaction:
            return transaction.stat(db)['entries']

    def get(self, protocolType, id, buffers=False):
        _, db = self.__getNameAndDb(protocolType)

        with self.env.begin(db=db, buffers=buffers) as transaction:
            serialized = transaction.get(id.to_bytes(4, 'little'), db=db)

            protocol = protocolType()

            protocol.ParseFromString(serialized)
            return protocol

    def getCountBeforeLastFlatCloud(self):
        with self.env.begin() as transaction:
            _, db = self.__getNameAndDb(SceneSetup)
            cursor = transaction.cursor(db)
            cursor.first()
            id = 0
            for key, value in cursor:
                scene = SceneSetup()
                scene.ParseFromString(value)
                if "RoundClouds" in scene.cloud_path:
                    return id
                id += BATCH_SIZE

    def __getNameAndDb(self, protocolType):
        name = protocolType.DESCRIPTOR.name
        db = self.descriptorToDb[name]
        if db == None:
            db = self.__addDb(protocolType, False)
        return (name, db)

    def __getstate__(self):
        state = {"databasePath": self.databasePath, "readonly": self.readonly}

        return state

    def __setstate__(self, state):
        # Restore instance attributes
        self.__dict__.update(state)
        self.__init()