Ejemplo n.º 1
0
def retrieve_queue_data(env: lmdb.Environment) -> QueueData:
    """
    Retrieve set value in the queue database.

    Value stored in the queue database are high water mark values and pruning
    strategy.

    :param env: environment that stores queue data
    :return: data stored in LMDB related to the queue
    """
    with env.begin(write=False) as txn:
        queue_db = env.open_db(key=QUEUE_DB, txn=txn, create=False)
        message_timeout = bytes_to_int(
            txn.get(key=MESSAGE_TIMEOUT_KEY, db=queue_db))
        max_messages = bytes_to_int(txn.get(key=MAX_MESSAGES_KEY, db=queue_db))
        hwm_db_size = bytes_to_int(txn.get(key=HWM_DB_SIZE_KEY, db=queue_db))
        strategy = bytes_to_str(txn.get(key=STRATEGY_KEY, db=queue_db))

        subscriber_db = env.open_db(key=SUBSCRIBER_DB, txn=txn, create=False)
        sub_cursor = txn.cursor(db=subscriber_db)
        subscriber_ids = set(
            bytes_to_str(subscriber_id)
            for subscriber_id in sub_cursor.iternext(values=False))

    queue_data = QueueData(message_timeout=message_timeout,
                           max_messages=max_messages,
                           hwm_db_size=hwm_db_size,
                           strategy=strategy,
                           subscriber_ids=subscriber_ids)
    return queue_data
Ejemplo n.º 2
0
    def push_find_missing_labels(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes())
                c_hashes = list(c_hashset)
                tmpDB.close()
        else:
            c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes())
            c_hashes = list(c_hashset)

        c_hashs_raw = [
            chunks.serialize_ident(digest, '') for digest in c_hashes
        ]
        raw_pack = chunks.serialize_record_pack(c_hashs_raw)
        pb2_func = hangar_service_pb2.FindMissingLabelsRequest
        cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func)
        responses = self.stub.PushFindMissingLabels(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset:offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_raw = chunks.deserialize_record_pack(uncompBytes)
        s_mis_hsh = [
            chunks.deserialize_ident(raw).digest for raw in s_missing_raw
        ]
        return s_mis_hsh
Ejemplo n.º 3
0
    def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
                c_hashes = list(set(c_hashs_schemas.keys()))
                tmpDB.close()
        else:
            c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
            c_hashes = list(set(c_hashs_schemas.keys()))

        pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest
        cIter = chunks.missingHashRequestIterator(commit, c_hashes, pb2_func)
        responses = self.stub.PushFindMissingHashRecords(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset: offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False)
        s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_missing_hashs]
        return s_mis_hsh_sch
Ejemplo n.º 4
0
    def __init__(self, model: s_datamodel.Model,
                 dbenv: lmdb.Environment) -> None:
        '''
        Creates metadata for all the indices.

        Args:
            dbenv (lmdb.Environment): the lmdb instance in which to store the metadata.

        Returns:
            None
        '''
        self._dbenv = dbenv
        self.model = model

        # The table in the database file (N.B. in LMDB speak, this is called a database)
        self._metatbl = dbenv.open_db(b'meta')
        is_new_db = False
        with dbenv.begin(db=self._metatbl, buffers=True) as txn:
            indices_enc = txn.get(b'indices')
            progress_enc = txn.get(b'progress')
        if indices_enc is None or progress_enc is None:
            if indices_enc is None and progress_enc is None:
                is_new_db = True
                indices_enc = s_msgpack.en({'present': {}, 'deleting': []})
                progress_enc = s_msgpack.en({})
            else:
                raise s_exc.CorruptDatabase(
                    'missing meta information in index meta'
                )  # pragma: no cover

        indices = s_msgpack.un(indices_enc)

        # The details about what the indices are actually indexing: the datapath and type.

        self.indices = {
            k: _MetaEntry(model, **s_msgpack.un(v))
            for k, v in indices.get('present', {}).items()
        }
        self.deleting = list(indices.get('deleting', ()))
        # Keeps track (non-persistently) of which indices have been paused
        self.asleep = defaultdict(bool)  # type: ignore

        # How far each index has progressed as well as statistics
        self.progresses = s_msgpack.un(progress_enc)
        if not all(p in self.indices for p in self.deleting):
            raise s_exc.CorruptDatabase(
                'index meta table: deleting entry with unrecognized property name'
            )  # pragma: no cover
        if not all(p in self.indices for p in self.progresses):
            raise s_exc.CorruptDatabase(
                'index meta table: progress entry with unrecognized property name'
            )  # pragma: no cover
        if is_new_db:
            self.persist()
Ejemplo n.º 5
0
    def begin_reader_txn(self,
                         lmdbenv: lmdb.Environment,
                         buffer: bool = False) -> lmdb.Transaction:
        """Start a reader only txn for the given environment

        If there a read-only transaction for the same environment already exists
        then the same reader txn handle will be returned, and will not close
        until all operations on that handle have said they are finished.

        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment to start the transaction in.
        buffer : bool, optional
            weather a buffer transaction should be used (the default is False,
            which means no buffers are returned)

        Returns
        -------
        lmdb.Transaction
            handle to the lmdb transaction.
        """
        if self.ReaderAncestors[lmdbenv] == 0:
            self.ReaderTxn[lmdbenv] = lmdbenv.begin(write=False,
                                                    buffers=buffer)
        self.ReaderAncestors[lmdbenv] += 1
        return self.ReaderTxn[lmdbenv]
Ejemplo n.º 6
0
    def begin_writer_txn(self,
                         lmdbenv: lmdb.Environment,
                         buffer: bool = False) -> lmdb.Transaction:
        """Start a write enabled transaction on the given environment

        If multiple write transactions are requested for the same handle, only
        one instance of the transaction handle will be returened, and will not
        close until all operations on that handle have requested to close

        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment to open the transaction on
        buffer : bool, optional
            if buffer objects should be used (the default is False, which does
            not use buffers)

        Returns
        -------
        lmdb.Transaction
            transaction handle to perform operations on
        """
        if self.WriterAncestors[lmdbenv] == 0:
            self.WriterTxn[lmdbenv] = lmdbenv.begin(write=True, buffers=buffer)
        self.WriterAncestors[lmdbenv] += 1
        return self.WriterTxn[lmdbenv]
Ejemplo n.º 7
0
 def __init__(self, name, conf=None, binlog=True, size=None):
     conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf
     if size: conf['map_size'] = size
     self._tables = {}
     self._env = Environment(name, **conf)
     self._db = self._env.open_db()
     self._transaction = None
     try:
         self._binlog = self.env.open_db('__binlog__'.encode(),
                                         create=binlog)
     except NotFoundError:
         self._binlog = None
     try:
         self._metadata = self.env.open_db('__metadata__'.encode(),
                                           create=False)
     except NotFoundError:
         self.migrate_metadata()
Ejemplo n.º 8
0
    def __init(self):
        create = not self.readonly

        self.env = Environment(self.databasePath,
                               map_size=3e9,
                               subdir=False,
                               max_dbs=64,
                               mode=0,
                               create=create,
                               readonly=self.readonly)
        self.descriptorToDb = {}
        self.nextIds = {}

        self.__addDb(SceneSetup, create=create)
        self.__addDb(ScatterSample, create=create)
        self.__addDb(DisneyDescriptor, create=create)
        self.__addDb(BakedInterpolationSet, create=create)
        self.__addDb(Result, create=create)
Ejemplo n.º 9
0
    def push_find_missing_schemas(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes())
                c_schemas = list(c_schemaset)
                tmpDB.close()
        else:
            c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes())
            c_schemas = list(c_schemaset)

        request = hangar_service_pb2.FindMissingSchemasRequest()
        request.commit = commit
        request.schema_digests.extend(c_schemas)

        response = self.stub.PushFindMissingSchemas(request)
        return response
Ejemplo n.º 10
0
 def push(self,
          env: lmdb.Environment,
          write: bool = False,
          iterator: bool = False):
     try:
         txn = env.begin(
             write = write, buffers = True,
             parent = self.stacks[env][-1].transaction \
             if self.stacks[env] and self.stacks[env][-1].write else None
         )
         self.stacks[env].append(ExplicitContext(txn, write, iterator))
     except lmdb.Error as exc:
         raise TransactionError() from exc
Ejemplo n.º 11
0
def initialize_environment(env: lmdb.Environment, namespace: str) -> str:
    try:
        txn = None
        txn = env.begin(write=True, buffers=False)
        env_uuid = txn.get(key=constants.ENVIRONMENT_UUID_KEY.encode('utf-8'))
        if env_uuid is None:
            env_uuid = str(uuid.uuid4())
            assert txn.put(key=constants.ENVIRONMENT_UUID_KEY.encode('utf-8'),
                           value=env_uuid.encode('utf-8'))
        else:
            env_uuid = env_uuid.decode('utf-8')
        if namespace == constants.ROOT_NAMESPACE:
            cursor = txn.cursor()
            cursor.first()
            while True:
                if cursor.key().decode('utf-8') not in [
                        constants.ENVIRONMENT_UUID_KEY,
                        constants.ATTRIBUTE_DATABASE,
                        constants.VERSION_DATABASE,
                        constants.DESCRIPTOR_DATABASE, constants.NAME_DATABASE
                ]:
                    mapsize[cursor.key().decode('utf-8')] = \
                    struct.unpack('@N', cursor.value())[0]
                if not cursor.next():
                    break
            txn.commit()
        return env_uuid
    except BaseException as exc:
        if txn:
            try:
                txn.abort()
            except lmdb.Error:
                pass
        if isinstance(exc, lmdb.Error):
            raise TransactionError() from exc
        raise exc
Ejemplo n.º 12
0
class Database(object):
    """
    Representation of a Database, this is the main API class
    
    :param name: The name of the database to open
    :type name: str
    :param conf: Any additional or custom options for this environment
    :type conf: dict  
    """
    _debug = False
    _conf = {
        'map_size': 1024 * 1024 * 1024 * 2,
        'subdir': True,
        'metasync': False,
        'sync': True,
        'lock': True,
        'max_dbs': 64,
        'writemap': True,
        'map_async': True
    }

    def __init__(self, name, conf=None, binlog=True, size=None):
        conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf
        if size: conf['map_size'] = size
        self._tables = {}
        self._env = Environment(name, **conf)
        self._db = self._env.open_db()
        self._transaction = None
        try:
            self._binlog = self.env.open_db('__binlog__'.encode(),
                                            create=binlog)
        except NotFoundError:
            self._binlog = None
        try:
            self._metadata = self.env.open_db('__metadata__'.encode(),
                                              create=False)
        except NotFoundError:
            self.migrate_metadata()

    def __del__(self):
        self.close()

    def migrate_metadata(self):
        self._metadata = self.env.open_db('__metadata__'.encode(), create=True)
        with self.env.begin(write=True) as txn:
            with Cursor(self._db, txn) as cursor:
                move_next = cursor.first
                while move_next():
                    move_next = cursor.next
                    name = cursor.key().decode()
                    if name[0] == '@':
                        record = txn.get(cursor.key(), db=self._db)
                        if not txn.put(
                                name[1:].encode(), record, db=self._metadata):
                            txn.abort()
                            print("Failed to create new record")
                            return
                        if not txn.delete(cursor.key(), db=self._db):
                            print("Failed to delete old record")
                            return

    @property
    def env(self):
        """
        Return a reference to the current database environment
        
        :return: A Database Environment
        :rtype: Environment
        """
        return self._env

    @property
    def transaction(self):
        """
        Return a reference to the current transaction
        
        :return: The current transaction (or None) 
        :rtype: DBTransaction
        """
        return self._transaction

    def binlog(self, enable=True):
        """
        Enable or disable binary logging, disable with delete the transaction history too ...
        
        :param enable: Whether to enable or disable logging 
        """
        if enable:
            if not self._binlog:
                self._binlog = self.env.open_db('__binlog__'.encode())
        else:
            if self._binlog:
                if self.transaction:
                    self.transaction.txn.drop(self._binlog, True)
                else:
                    with self.env.begin(write=True) as txn:
                        txn.drop(self._binlog, True)

            self._binlog = None

    def begin(self):
        """
        Begin a new transaction returning a transaction reference (use with "with")
        :return: Reference to the new transaction
        :rtype: DBTransaction
        """
        self._transaction = DBTransaction(self)
        return self._transaction

    def end(self):
        """
        End an existing transaction committing all replication changes
        """
        self._transaction = None

    def close(self):
        """
        Close the current database
        """
        if self._env:
            self._env.close()
            self._env = None

    def sync(self, force=False):
        self.env.sync(force)

    def exists(self, name):
        """
        Test whether a table with a given name already exists

        :param name: Table name
        :type name: str
        :return: True if table exists
        :rtype: bool
        """
        return name in self.tables

    @property
    def tables(self):
        return self._return_tables(False)

    @property
    def tables_all(self):
        return self._return_tables(True)

    def _return_tables(self, all):
        """
        PROPERTY - Generate a list of names of the tables associated with this database

        :getter: Returns a list of table names
        :type: list
        """
        if self.transaction:
            txn = self.transaction.txn
            abort = False
        else:
            txn = self.env.begin()
            abort = True

        try:
            result = []
            with Cursor(self._db, txn) as cursor:
                if cursor.first():
                    while True:
                        name = cursor.key().decode()
                        if all or name[0] not in ['_', '~']:
                            result.append(name)
                        if not cursor.next():
                            break
            return result
        finally:
            if abort:
                txn.abort()

    def drop(self, name):
        """
        Drop a database table
        
        :param name: Name of table to drop
        :type name: str
        """
        if name not in self.tables_all:
            raise xTableMissing

        if name not in self._tables:
            table = self.table(name)

        if name in self._tables:
            self._tables[name]._drop()
        del self._tables[name]

    def restructure(self, name):
        """
        Restructure a table, copy to a temporary table, then copy back. This will recreate the table
        and all it's ID's but will retain the original indexes. (which it will regenerate)
        
        :param name: Name of the table to restructure
        :type name: str
        """
        txn = self.transaction.txn
        if name not in self.tables: raise xTableMissing
        src = self._tables[name]
        dst_name = '~' + name
        if dst_name in self.tables: raise xTableExists
        dst = self.table(dst_name)
        for doc in src.find():
            dst.append(doc, txn=txn)

        src.empty(txn=txn)
        for doc in dst.find(txn=txn):
            src.append(doc, txn=txn)
        dst._drop(txn=txn)
        del self._tables[dst_name]

    def table(self, name):
        """
        Return a reference to a table with a given name, creating first if it doesn't exist
        
        :param name: Name of table
        :type name: str
        :return: Reference to table
        :rtype: Table
        """
        if name not in self._tables:
            self._tables[name] = Table(self, name)
        return self._tables[name]
Ejemplo n.º 13
0
class LmdbDataset:
    def __init__(self, databasePath, readonly=True):
        self.databasePath = databasePath
        self.readonly = readonly
        self.__init()

    def __init(self):
        create = not self.readonly

        self.env = Environment(self.databasePath,
                               map_size=3e9,
                               subdir=False,
                               max_dbs=64,
                               mode=0,
                               create=create,
                               readonly=self.readonly)
        self.descriptorToDb = {}
        self.nextIds = {}

        self.__addDb(SceneSetup, create=create)
        self.__addDb(ScatterSample, create=create)
        self.__addDb(DisneyDescriptor, create=create)
        self.__addDb(BakedInterpolationSet, create=create)
        self.__addDb(Result, create=create)

    def __addDb(self, protocol, create):
        name = protocol.DESCRIPTOR.name
        db = self.env.open_db(name.encode('ascii'),
                              integerkey=True,
                              create=create)
        self.descriptorToDb[name] = db
        self.nextIds[name] = 0
        return db

    def append(self, value):
        name, db = self.__getNameAndDb(value)

        with self.env.begin(write=True) as transaction:
            transaction.put((self.nextIds[name]).to_bytes(4, 'little'),
                            value.SerializeToString(),
                            db=db)
            self.nextIds[name] += 1

    def getCountOf(self, protocolType):
        name, db = self.__getNameAndDb(protocolType)

        with self.env.begin() as transaction:
            return transaction.stat(db)['entries']

    def get(self, protocolType, id, buffers=False):
        _, db = self.__getNameAndDb(protocolType)

        with self.env.begin(db=db, buffers=buffers) as transaction:
            serialized = transaction.get(id.to_bytes(4, 'little'), db=db)

            protocol = protocolType()

            protocol.ParseFromString(serialized)
            return protocol

    def getCountBeforeLastFlatCloud(self):
        with self.env.begin() as transaction:
            _, db = self.__getNameAndDb(SceneSetup)
            cursor = transaction.cursor(db)
            cursor.first()
            id = 0
            for key, value in cursor:
                scene = SceneSetup()
                scene.ParseFromString(value)
                if "RoundClouds" in scene.cloud_path:
                    return id
                id += BATCH_SIZE

    def __getNameAndDb(self, protocolType):
        name = protocolType.DESCRIPTOR.name
        db = self.descriptorToDb[name]
        if db == None:
            db = self.__addDb(protocolType, False)
        return (name, db)

    def __getstate__(self):
        state = {"databasePath": self.databasePath, "readonly": self.readonly}

        return state

    def __setstate__(self, state):
        # Restore instance attributes
        self.__dict__.update(state)
        self.__init()