def push_find_missing_labels(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) tmpDB.close() else: c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes()) c_hashes = list(c_hashset) c_hashs_raw = [ chunks.serialize_ident(digest, '') for digest in c_hashes ] raw_pack = chunks.serialize_record_pack(c_hashs_raw) pb2_func = hangar_service_pb2.FindMissingLabelsRequest cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func) responses = self.stub.PushFindMissingLabels(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset:offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_raw = chunks.deserialize_record_pack(uncompBytes) s_mis_hsh = [ chunks.deserialize_ident(raw).digest for raw in s_missing_raw ] return s_mis_hsh
def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) tmpDB.close() else: c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash() c_hashes = list(set(c_hashs_schemas.keys())) pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest cIter = chunks.missingHashRequestIterator(commit, c_hashes, pb2_func) responses = self.stub.PushFindMissingHashRecords(cIter) for idx, response in enumerate(responses): if idx == 0: commit = response.commit hBytes, offset = bytearray(response.total_byte_size), 0 size = len(response.hashs) hBytes[offset: offset + size] = response.hashs offset += size uncompBytes = blosc.decompress(hBytes) s_missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False) s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_missing_hashs] return s_mis_hsh_sch
def push_find_missing_schemas(self, commit, tmpDB: lmdb.Environment = None): if tmpDB is None: with tempfile.TemporaryDirectory() as tempD: tmpDF = os.path.join(tempD, 'test.lmdb') tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS) commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit) c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes()) c_schemas = list(c_schemaset) tmpDB.close() else: c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes()) c_schemas = list(c_schemaset) request = hangar_service_pb2.FindMissingSchemasRequest() request.commit = commit request.schema_digests.extend(c_schemas) response = self.stub.PushFindMissingSchemas(request) return response
class Database(object): """ Representation of a Database, this is the main API class :param name: The name of the database to open :type name: str :param conf: Any additional or custom options for this environment :type conf: dict """ _debug = False _conf = { 'map_size': 1024 * 1024 * 1024 * 2, 'subdir': True, 'metasync': False, 'sync': True, 'lock': True, 'max_dbs': 64, 'writemap': True, 'map_async': True } def __init__(self, name, conf=None, binlog=True, size=None): conf = dict(self._conf, **conf.get('env', {})) if conf else self._conf if size: conf['map_size'] = size self._tables = {} self._env = Environment(name, **conf) self._db = self._env.open_db() self._transaction = None try: self._binlog = self.env.open_db('__binlog__'.encode(), create=binlog) except NotFoundError: self._binlog = None try: self._metadata = self.env.open_db('__metadata__'.encode(), create=False) except NotFoundError: self.migrate_metadata() def __del__(self): self.close() def migrate_metadata(self): self._metadata = self.env.open_db('__metadata__'.encode(), create=True) with self.env.begin(write=True) as txn: with Cursor(self._db, txn) as cursor: move_next = cursor.first while move_next(): move_next = cursor.next name = cursor.key().decode() if name[0] == '@': record = txn.get(cursor.key(), db=self._db) if not txn.put( name[1:].encode(), record, db=self._metadata): txn.abort() print("Failed to create new record") return if not txn.delete(cursor.key(), db=self._db): print("Failed to delete old record") return @property def env(self): """ Return a reference to the current database environment :return: A Database Environment :rtype: Environment """ return self._env @property def transaction(self): """ Return a reference to the current transaction :return: The current transaction (or None) :rtype: DBTransaction """ return self._transaction def binlog(self, enable=True): """ Enable or disable binary logging, disable with delete the transaction history too ... :param enable: Whether to enable or disable logging """ if enable: if not self._binlog: self._binlog = self.env.open_db('__binlog__'.encode()) else: if self._binlog: if self.transaction: self.transaction.txn.drop(self._binlog, True) else: with self.env.begin(write=True) as txn: txn.drop(self._binlog, True) self._binlog = None def begin(self): """ Begin a new transaction returning a transaction reference (use with "with") :return: Reference to the new transaction :rtype: DBTransaction """ self._transaction = DBTransaction(self) return self._transaction def end(self): """ End an existing transaction committing all replication changes """ self._transaction = None def close(self): """ Close the current database """ if self._env: self._env.close() self._env = None def sync(self, force=False): self.env.sync(force) def exists(self, name): """ Test whether a table with a given name already exists :param name: Table name :type name: str :return: True if table exists :rtype: bool """ return name in self.tables @property def tables(self): return self._return_tables(False) @property def tables_all(self): return self._return_tables(True) def _return_tables(self, all): """ PROPERTY - Generate a list of names of the tables associated with this database :getter: Returns a list of table names :type: list """ if self.transaction: txn = self.transaction.txn abort = False else: txn = self.env.begin() abort = True try: result = [] with Cursor(self._db, txn) as cursor: if cursor.first(): while True: name = cursor.key().decode() if all or name[0] not in ['_', '~']: result.append(name) if not cursor.next(): break return result finally: if abort: txn.abort() def drop(self, name): """ Drop a database table :param name: Name of table to drop :type name: str """ if name not in self.tables_all: raise xTableMissing if name not in self._tables: table = self.table(name) if name in self._tables: self._tables[name]._drop() del self._tables[name] def restructure(self, name): """ Restructure a table, copy to a temporary table, then copy back. This will recreate the table and all it's ID's but will retain the original indexes. (which it will regenerate) :param name: Name of the table to restructure :type name: str """ txn = self.transaction.txn if name not in self.tables: raise xTableMissing src = self._tables[name] dst_name = '~' + name if dst_name in self.tables: raise xTableExists dst = self.table(dst_name) for doc in src.find(): dst.append(doc, txn=txn) src.empty(txn=txn) for doc in dst.find(txn=txn): src.append(doc, txn=txn) dst._drop(txn=txn) del self._tables[dst_name] def table(self, name): """ Return a reference to a table with a given name, creating first if it doesn't exist :param name: Name of table :type name: str :return: Reference to table :rtype: Table """ if name not in self._tables: self._tables[name] = Table(self, name) return self._tables[name]