def retrieve_queue_data(env: lmdb.Environment) -> QueueData: """ Retrieve set value in the queue database. Value stored in the queue database are high water mark values and pruning strategy. :param env: environment that stores queue data :return: data stored in LMDB related to the queue """ with env.begin(write=False) as txn: queue_db = env.open_db(key=QUEUE_DB, txn=txn, create=False) message_timeout = bytes_to_int( txn.get(key=MESSAGE_TIMEOUT_KEY, db=queue_db)) max_messages = bytes_to_int(txn.get(key=MAX_MESSAGES_KEY, db=queue_db)) hwm_db_size = bytes_to_int(txn.get(key=HWM_DB_SIZE_KEY, db=queue_db)) strategy = bytes_to_str(txn.get(key=STRATEGY_KEY, db=queue_db)) subscriber_db = env.open_db(key=SUBSCRIBER_DB, txn=txn, create=False) sub_cursor = txn.cursor(db=subscriber_db) subscriber_ids = set( bytes_to_str(subscriber_id) for subscriber_id in sub_cursor.iternext(values=False)) queue_data = QueueData(message_timeout=message_timeout, max_messages=max_messages, hwm_db_size=hwm_db_size, strategy=strategy, subscriber_ids=subscriber_ids) return queue_data
def push_find_missing_labels(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) tmpDB.close() else: c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) c_hashs_raw = [ chunks.serialize_ident(digest, '') for digest in c_hashes ] raw_pack = chunks.serialize_record_pack(c_hashs_raw) pb2_func = hangar_service_pb2.FindMissingLabelsRequest cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func) responses = self.stub.PushFindMissingLabels(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset:offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_raw = chunks.deserialize_record_pack(uncompBytes) s_mis_hsh = [ chunks.deserialize_ident(raw).digest for raw in s_missing_raw ] return s_mis_hsh
def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) tmpDB.close() else: c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest cIter = chunks.missingHashRequestIterator(commit, c_hashes, pb2_func) responses = self.stub.PushFindMissingHashRecords(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset: offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False) s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_missing_hashs] return s_mis_hsh_sch
def __init__(self, model: s_datamodel.Model, dbenv: lmdb.Environment) -> None: ''' Creates metadata for all the indices. Args: dbenv (lmdb.Environment): the lmdb instance in which to store the metadata. Returns: None ''' self._dbenv = dbenv self.model = model # The table in the database file (N.B. in LMDB speak, this is called a database) self._metatbl = dbenv.open_db(b'meta') is_new_db = False with dbenv.begin(db=self._metatbl, buffers=True) as txn: indices_enc = txn.get(b'indices') progress_enc = txn.get(b'progress') if indices_enc is None or progress_enc is None: if indices_enc is None and progress_enc is None: is_new_db = True indices_enc = s_msgpack.en({'present': {}, 'deleting': []}) progress_enc = s_msgpack.en({}) else: raise s_exc.CorruptDatabase( 'missing meta information in index meta' ) # pragma: no cover indices = s_msgpack.un(indices_enc) # The details about what the indices are actually indexing: the datapath and type. self.indices = { k: _MetaEntry(model, **s_msgpack.un(v)) for k, v in indices.get('present', {}).items() } self.deleting = list(indices.get('deleting', ())) # Keeps track (non-persistently) of which indices have been paused self.asleep = defaultdict(bool) # type: ignore # How far each index has progressed as well as statistics self.progresses = s_msgpack.un(progress_enc) if not all(p in self.indices for p in self.deleting): raise s_exc.CorruptDatabase( 'index meta table: deleting entry with unrecognized property name' ) # pragma: no cover if not all(p in self.indices for p in self.progresses): raise s_exc.CorruptDatabase( 'index meta table: progress entry with unrecognized property name' ) # pragma: no cover if is_new_db: self.persist()
def begin_reader_txn(self, lmdbenv: lmdb.Environment, buffer: bool = False) -> lmdb.Transaction: """Start a reader only txn for the given environment If there a read-only transaction for the same environment already exists then the same reader txn handle will be returned, and will not close until all operations on that handle have said they are finished. Parameters ---------- lmdbenv : lmdb.Environment the environment to start the transaction in. buffer : bool, optional weather a buffer transaction should be used (the default is False, which means no buffers are returned) Returns ------- lmdb.Transaction handle to the lmdb transaction. """ if self.ReaderAncestors[lmdbenv] == 0: self.ReaderTxn[lmdbenv] = lmdbenv.begin(write=False, buffers=buffer) self.ReaderAncestors[lmdbenv] += 1 return self.ReaderTxn[lmdbenv]
def begin_writer_txn(self, lmdbenv: lmdb.Environment, buffer: bool = False) -> lmdb.Transaction: """Start a write enabled transaction on the given environment If multiple write transactions are requested for the same handle, only one instance of the transaction handle will be returened, and will not close until all operations on that handle have requested to close Parameters ---------- lmdbenv : lmdb.Environment the environment to open the transaction on buffer : bool, optional if buffer objects should be used (the default is False, which does not use buffers) Returns ------- lmdb.Transaction transaction handle to perform operations on """ if self.WriterAncestors[lmdbenv] == 0: self.WriterTxn[lmdbenv] = lmdbenv.begin(write=True, buffers=buffer) self.WriterAncestors[lmdbenv] += 1 return self.WriterTxn[lmdbenv]
def __init__(self, name, conf=None, binlog=True, size=None): conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf if size: conf['map_size'] = size self._tables = {} self._env = Environment(name, **conf) self._db = self._env.open_db() self._transaction = None try: self._binlog = self.env.open_db('__binlog__'.encode(), create=binlog) except NotFoundError: self._binlog = None try: self._metadata = self.env.open_db('__metadata__'.encode(), create=False) except NotFoundError: self.migrate_metadata()
def __init(self): create = not self.readonly self.env = Environment(self.databasePath, map_size=3e9, subdir=False, max_dbs=64, mode=0, create=create, readonly=self.readonly) self.descriptorToDb = {} self.nextIds = {} self.__addDb(SceneSetup, create=create) self.__addDb(ScatterSample, create=create) self.__addDb(DisneyDescriptor, create=create) self.__addDb(BakedInterpolationSet, create=create) self.__addDb(Result, create=create)
def push_find_missing_schemas(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes()) c_schemas = list(c_schemaset) tmpDB.close() else: c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes()) c_schemas = list(c_schemaset) request = hangar_service_pb2.FindMissingSchemasRequest() request.commit = commit request.schema_digests.extend(c_schemas) response = self.stub.PushFindMissingSchemas(request) return response
def push(self, env: lmdb.Environment, write: bool = False, iterator: bool = False): try: txn = env.begin( write = write, buffers = True, parent = self.stacks[env][-1].transaction \ if self.stacks[env] and self.stacks[env][-1].write else None ) self.stacks[env].append(ExplicitContext(txn, write, iterator)) except lmdb.Error as exc: raise TransactionError() from exc
def initialize_environment(env: lmdb.Environment, namespace: str) -> str: try: txn = None txn = env.begin(write=True, buffers=False) env_uuid = txn.get(key=constants.ENVIRONMENT_UUID_KEY.encode('utf-8')) if env_uuid is None: env_uuid = str(uuid.uuid4()) assert txn.put(key=constants.ENVIRONMENT_UUID_KEY.encode('utf-8'), value=env_uuid.encode('utf-8')) else: env_uuid = env_uuid.decode('utf-8') if namespace == constants.ROOT_NAMESPACE: cursor = txn.cursor() cursor.first() while True: if cursor.key().decode('utf-8') not in [ constants.ENVIRONMENT_UUID_KEY, constants.ATTRIBUTE_DATABASE, constants.VERSION_DATABASE, constants.DESCRIPTOR_DATABASE, constants.NAME_DATABASE ]: mapsize[cursor.key().decode('utf-8')] = \ struct.unpack('@N', cursor.value())[0] if not cursor.next(): break txn.commit() return env_uuid except BaseException as exc: if txn: try: txn.abort() except lmdb.Error: pass if isinstance(exc, lmdb.Error): raise TransactionError() from exc raise exc
class Database(object): """ Representation of a Database, this is the main API class :param name: The name of the database to open :type name: str :param conf: Any additional or custom options for this environment :type conf: dict """ _debug = False _conf = { 'map_size': 1024 * 1024 * 1024 * 2, 'subdir': True, 'metasync': False, 'sync': True, 'lock': True, 'max_dbs': 64, 'writemap': True, 'map_async': True } def __init__(self, name, conf=None, binlog=True, size=None): conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf if size: conf['map_size'] = size self._tables = {} self._env = Environment(name, **conf) self._db = self._env.open_db() self._transaction = None try: self._binlog = self.env.open_db('__binlog__'.encode(), create=binlog) except NotFoundError: self._binlog = None try: self._metadata = self.env.open_db('__metadata__'.encode(), create=False) except NotFoundError: self.migrate_metadata() def __del__(self): self.close() def migrate_metadata(self): self._metadata = self.env.open_db('__metadata__'.encode(), create=True) with self.env.begin(write=True) as txn: with Cursor(self._db, txn) as cursor: move_next = cursor.first while move_next(): move_next = cursor.next name = cursor.key().decode() if name[0] == '@': record = txn.get(cursor.key(), db=self._db) if not txn.put( name[1:].encode(), record, db=self._metadata): txn.abort() print("Failed to create new record") return if not txn.delete(cursor.key(), db=self._db): print("Failed to delete old record") return @property def env(self): """ Return a reference to the current database environment :return: A Database Environment :rtype: Environment """ return self._env @property def transaction(self): """ Return a reference to the current transaction :return: The current transaction (or None) :rtype: DBTransaction """ return self._transaction def binlog(self, enable=True): """ Enable or disable binary logging, disable with delete the transaction history too ... :param enable: Whether to enable or disable logging """ if enable: if not self._binlog: self._binlog = self.env.open_db('__binlog__'.encode()) else: if self._binlog: if self.transaction: self.transaction.txn.drop(self._binlog, True) else: with self.env.begin(write=True) as txn: txn.drop(self._binlog, True) self._binlog = None def begin(self): """ Begin a new transaction returning a transaction reference (use with "with") :return: Reference to the new transaction :rtype: DBTransaction """ self._transaction = DBTransaction(self) return self._transaction def end(self): """ End an existing transaction committing all replication changes """ self._transaction = None def close(self): """ Close the current database """ if self._env: self._env.close() self._env = None def sync(self, force=False): self.env.sync(force) def exists(self, name): """ Test whether a table with a given name already exists :param name: Table name :type name: str :return: True if table exists :rtype: bool """ return name in self.tables @property def tables(self): return self._return_tables(False) @property def tables_all(self): return self._return_tables(True) def _return_tables(self, all): """ PROPERTY - Generate a list of names of the tables associated with this database :getter: Returns a list of table names :type: list """ if self.transaction: txn = self.transaction.txn abort = False else: txn = self.env.begin() abort = True try: result = [] with Cursor(self._db, txn) as cursor: if cursor.first(): while True: name = cursor.key().decode() if all or name[0] not in ['_', '~']: result.append(name) if not cursor.next(): break return result finally: if abort: txn.abort() def drop(self, name): """ Drop a database table :param name: Name of table to drop :type name: str """ if name not in self.tables_all: raise xTableMissing if name not in self._tables: table = self.table(name) if name in self._tables: self._tables[name]._drop() del self._tables[name] def restructure(self, name): """ Restructure a table, copy to a temporary table, then copy back. This will recreate the table and all it's ID's but will retain the original indexes. (which it will regenerate) :param name: Name of the table to restructure :type name: str """ txn = self.transaction.txn if name not in self.tables: raise xTableMissing src = self._tables[name] dst_name = '~' + name if dst_name in self.tables: raise xTableExists dst = self.table(dst_name) for doc in src.find(): dst.append(doc, txn=txn) src.empty(txn=txn) for doc in dst.find(txn=txn): src.append(doc, txn=txn) dst._drop(txn=txn) del self._tables[dst_name] def table(self, name): """ Return a reference to a table with a given name, creating first if it doesn't exist :param name: Name of table :type name: str :return: Reference to table :rtype: Table """ if name not in self._tables: self._tables[name] = Table(self, name) return self._tables[name]
class LmdbDataset: def __init__(self, databasePath, readonly=True): self.databasePath = databasePath self.readonly = readonly self.__init() def __init(self): create = not self.readonly self.env = Environment(self.databasePath, map_size=3e9, subdir=False, max_dbs=64, mode=0, create=create, readonly=self.readonly) self.descriptorToDb = {} self.nextIds = {} self.__addDb(SceneSetup, create=create) self.__addDb(ScatterSample, create=create) self.__addDb(DisneyDescriptor, create=create) self.__addDb(BakedInterpolationSet, create=create) self.__addDb(Result, create=create) def __addDb(self, protocol, create): name = protocol.DESCRIPTOR.name db = self.env.open_db(name.encode('ascii'), integerkey=True, create=create) self.descriptorToDb[name] = db self.nextIds[name] = 0 return db def append(self, value): name, db = self.__getNameAndDb(value) with self.env.begin(write=True) as transaction: transaction.put((self.nextIds[name]).to_bytes(4, 'little'), value.SerializeToString(), db=db) self.nextIds[name] += 1 def getCountOf(self, protocolType): name, db = self.__getNameAndDb(protocolType) with self.env.begin() as transaction: return transaction.stat(db)['entries'] def get(self, protocolType, id, buffers=False): _, db = self.__getNameAndDb(protocolType) with self.env.begin(db=db, buffers=buffers) as transaction: serialized = transaction.get(id.to_bytes(4, 'little'), db=db) protocol = protocolType() protocol.ParseFromString(serialized) return protocol def getCountBeforeLastFlatCloud(self): with self.env.begin() as transaction: _, db = self.__getNameAndDb(SceneSetup) cursor = transaction.cursor(db) cursor.first() id = 0 for key, value in cursor: scene = SceneSetup() scene.ParseFromString(value) if "RoundClouds" in scene.cloud_path: return id id += BATCH_SIZE def __getNameAndDb(self, protocolType): name = protocolType.DESCRIPTOR.name db = self.descriptorToDb[name] if db == None: db = self.__addDb(protocolType, False) return (name, db) def __getstate__(self): state = {"databasePath": self.databasePath, "readonly": self.readonly} return state def __setstate__(self, state): # Restore instance attributes self.__dict__.update(state) self.__init()