def OnAddHeader(self, header): hHash = header.Hash.ToBytes() if hHash not in self._header_index: self._header_index.append(hHash) while header.Index - 2000 >= self._stored_header_count: ms = StreamManager.GetStream() w = BinaryWriter(ms) headers_to_write = self._header_index[ self._stored_header_count:self._stored_header_count + 2000] w.Write2000256List(headers_to_write) out = ms.ToArray() StreamManager.ReleaseStream(ms) # with self._db.write_batch() as wb: wb = rocksdb.WriteBatch() wb.put( DBPrefix.IX_HeaderHashList + self._stored_header_count.to_bytes(4, 'little'), out) self._db.write(wb) self._stored_header_count += 2000 logger.debug("Trimming stored header index %s" % self._stored_header_count) # with self._db.write_batch() as wb: wb = rocksdb.WriteBatch() wb.put(DBPrefix.DATA_Block + hHash, bytes(8) + header.ToArray()) wb.put(DBPrefix.SYS_CurrentHeader, hHash + header.Index.to_bytes(4, 'little')) self._db.write(wb)
def append_block(self, block): hdr = block.header() batch = rocksdb.WriteBatch() # check prev_hash tip = self.tip() if tip: assert hdr.prev_header() == tip.hash(), 'invalid block.' h = hdr.hash() batch.put(b'b/' + h, hdr.raw()) batch.put(b'e/fl/' + hdr.prev_header(), h) undos = None if not block.is_genesis(): undos = self._get_block_undos(block) self.utxo_apply_block(block, batch) for tx in block.transactions(): for out in tx.outputs(): batch.put(b'a/' + out.addr, b'') self.set_tip(hdr, batch) self.db.write(batch) # write body epoch, _ = hdr.slot() db = self.open_epoch_db(epoch, readonly=False) batch = rocksdb.WriteBatch() if hdr.is_genesis(): assert not db.get(b'g') batch.put(b'g', h) else: batch.put(b'u/' + h, cbor.dumps(undos)) batch.put(b'b/' + h, block.raw()) db.write(batch)
def Rollback(self, keys, startTS): ''' @type keys: list(str) @type startTS: uint64 ''' self.mu.Lock() batch = rocksdb.WriteBatch() err = None for k in keys: err = rollbackKey(self.db, batch, k, startTS) logger.info("rollbackKey, key=%s, startTS=%s, err=%s" % (k, startTS, err)) if err != nil: break if err is None: try: self.db.write(batch) except Exception as e: print e err = ErrWriteBatch() self.mu.UnLock() return err
def Commit(self, keys, startTS, commitTS): ''' @type keys: list(str) @type startTS: uint64 @type commitTS: uint64 @rtype: ErrRetryable ''' self.mu.Lock() batch = rocksdb.WriteBatch() err = None for k in keys: err = commitKey(self.db, batch, k, startTS, commitTS) logger.debug("commitKey, key=%s,startTS=%d,commitTS=%d, err=%s" % (k, startTS, commitTS, err)) if err != nil: break if err is None: try: self.db.write(batch) except Exception as e: print e err = ErrWriteBatch() self.mu.UnLock() return err
def update(self, _data=None, **kwargs): batch = rocksdb.WriteBatch() if _data: kwargs.update(_data) for key, value in kwargs.iteritems(): batch.put(key, value) self.db.write(batch)
def rocksdb_write_thread(queue): """ Worker thread to write ngrams to rocksdb, spawned by rocksdb_writer. """ while True: try: # fetch items until 'None' is added to queue item = queue.get() if item is None: break totals, merge_value_prefix, merges = item # skip storing jurisdiction-year combinations that already have ngrams if ngram_kv_store.get(totals[0]): continue # write in a batch so writes succeed or fail as a group batch = rocksdb.WriteBatch() # write each ngram, in the form (b'<n><gram>', pack(<jurisdiction_id>, <year>, <instance_count>, <document_count>)) # see ngram_kv_store.NgramMergeOperator for how this value is merged into the existing b'<n><gram>' key for k, v in tqdm(merges, desc="Current write job", mininterval=.5): ngram_kv_store.merge(k, merge_value_prefix + v, packed=True, batch=batch) # write totals value ngram_kv_store.put(totals[0], totals[1], packed=True, batch=batch) # write batch ngram_kv_store.db.write(batch) finally: # let internal_queue.join() know not to wait for this job to complete queue.task_done()
def __init__(self, dbfile=None): dbfile = dbfile or f"/tmp/{os.urandom(8).hex()}" self.db = rocksdb.DB(dbfile, self.config_rocksdb()) self.meta = set() self.cache = defaultdict(lambda: None) self.batch = rocksdb.WriteBatch() self.batch_mode = False
def __init__(self, adapter, chunk_size=100000): self.chunk_size = chunk_size self.batch = rocksdb.WriteBatch() self.adapter = adapter self.key = None self.value = None self.serde = None
def queue_delete(self, ): database_keys_iterator = self.database_obj.iterkeys() while True: database_write_batch = rocksdb.WriteBatch() num_of_keys = 0 num_of_keys_per_chunk = 5000 for key in database_keys_iterator: database_write_batch.delete(key) num_of_keys += 1 if num_of_keys == num_of_keys_per_chunk: break self.database_obj.write( batch=database_write_batch, sync=True, ) if num_of_keys != num_of_keys_per_chunk: break self.database_obj.compact_range( begin=None, end=None, ) self.first_key = b'' self.last_key = b'' self.db_was_changed_recently = True return True
def bulk_delete(self, to_delete): batch = rocksdb.WriteBatch() for item in to_delete: batch.delete(item.encode()) self.database.write(batch) return len(to_delete)
def setBatch(self, batch: Iterable[Tuple]): b = rocksdb.WriteBatch() for key, value in batch: key = self.to_byte_repr(key) value = self.to_byte_repr(value) b.put(key, value) self._db.write(b, sync=False)
def create_write_batch(items): batch = rocksdb.WriteBatch() for item in items: if item.type.name == "put": batch.put(bytes(item.key, 'utf-8'), bytes(item.value, 'utf-8')) elif item.type.name == "delete": batch.delete(bytes(item.key, 'utf-8')) return batch
def __init__(self, adapter: RocksdbAdapter): self.batch_size = RollPairConfKeys.EGGROLL_ROLLPAIR_ROCKSDB_WRITEBATCH_SIZE.get() self.batch = rocksdb.WriteBatch() self.adapter = adapter self.write_count = 0 self.manual_merger = dict() self.has_write_op = False L.trace(f"created writeBatch={self.adapter.path} batch_size={self.batch_size}")
def _del_from_height(self, height: int) -> None: """ Delete all entries starting from the given height up.""" import rocksdb batch = rocksdb.WriteBatch() it = self._db.iterkeys(self._cf) it.seek(self._to_key(height)) for _, key in it: batch.delete((self._cf, key)) self._db.write(batch)
def add(self, journal: Journal, batch: rocksdb.WriteBatch = None) -> JournalID: batch, new_batch = (rocksdb.WriteBatch(), True) if batch is None else (batch, False) id = self._jdb._gen_journal_id(batch) batch.put((self._jdb._journals_cf, _pack(id)), _pack(journal, _encode)) for name in journal.names: batch.put((self._jdb._journal_names_index_cf, _pack(name)), _pack(id)) if new_batch: self._jdb._db.write(batch) return id
def BatchResolveLock(self, startKey, endKey, txnInfos): ''' @type txnInfos: dict[int, int], dict[startTS]=commitTS ''' self.mu.Lock() iterator = Iterator(self.db, mvccEncode(startKey, lockVer), mvccEncode(endKey, lockVer)) err = None if iterator.Valid() else ErrIterator() if err != None: self.mu.UnLock() return None currKey, _, _ = mvccDecode(iterator.Key()) batch = rocksdb.WriteBatch() while iterator.Valid(): dec = lockDecoder(expectKey=currKey) ok, err = dec.Decode(iterator) if err != nil: self.mu.UnLock() return err if ok: if txnInfos.has_key(dec.lock.startTS): commitTS = txnInfos[dec.lock.startTS] if commitTS > 0: err = commitLock(batch, dec.lock, currKey, dec.lock.startTS, commitTS) logger.debug( "commitLock. key=%s,startTS=%d,commitTS=%d,err=%s", currKey, dec.lock.startTS, commitTS, err) else: err = rollbackLock(batch, dec.lock, currKey, dec.lock.startTS) logger.debug("rollbackLock. key=%s,startTS=%d,err=%s", currKey, dec.lock.startTS, err) if err != nil: self.mu.UnLock() return err skip = skipDecoder(currKey=currKey) _, err = skip.Decode(iterator) if err != nil: self.mu.UnLock() return err currKey = skip.currKey try: self.db.write(batch) except Exception as e: print e err = ErrWriteBatch() finally: self.mu.UnLock() return err
def handle_index(index): db_start_key = (index + '/').encode('utf-8') if request.method == 'GET': it = db.iterkeys() it.seek(db_start_key) count = 0 if request.args.get('list', False): keys = [] for db_key in it: if not db_key.startswith(db_start_key): break count += 1 _index, key = db_key.decode('utf-8').split('/') keys.append(key) return jsonify({ 'count': count, 'keys': keys }) else: for db_key in it: if not db_key.startswith(db_start_key): break count += 1 return jsonify({ 'count': count }) elif request.method == 'POST': try: results = [] vec = np.array([int(x) for x in list(request.json['vec'])]) rank = len(vec) max_dist = int(request.json.get('max', 10)) it = db.iteritems() it.seek(db_start_key) for db_key, db_val in it: if not db_key.startswith(db_start_key): break other_vec = np.array(msgpack.unpackb(db_val)) if rank != len(other_vec): continue dist = np.count_nonzero(vec != other_vec) if dist <= max_dist: _index, key = db_key.decode('utf-8').split('/') results.append({ 'key': key, 'dist': dist }) return jsonify({ 'data': results }) except KeyError as err: return jsonify({ 'errors': [{ 'message': 'missing arg: ' + str(err) }] }), 400 except (TypeError, ValueError) as err: return jsonify({ 'errors': [{ 'message': 'invalid value: ' + str(err) }] }), 400 else: it = db.iterkeys() it.seek(db_start_key) count = 0 batch = rocksdb.WriteBatch() for db_key in it: if not db_key.startswith(db_start_key): break batch.delete(db_key) count += 1 db.write(batch) return jsonify({ 'data': count })
def remove_vector(dataset, key): db_key = ('k/' + key).encode('utf-8') batch = rocksdb.WriteBatch() old_vec = get_vector_by_key(dataset, key) if old_vec: old_db_hash = ('h/' + hash_vec(old_vec) + '/' + key).encode('utf-8') batch.delete(old_db_hash) dataset.delete(db_key) dataset.write(batch) return True
def in_transaction(self, *args, **kwargs): """ This is not really a RocksDB transaction, which python-rocksdb doesn't seem to support, but a WriteBatch, which is effectively the same for write-only transactions that fit in RAM. """ self.batch = rocksdb.WriteBatch(*args, **kwargs) try: yield self.batch self.db.write(self.batch) finally: self.batch = None
def handle_recache_utxo(args): store = Storage(args.root) print('Removing all cached utxo') remove_prefix(store.db, b'ut/t/') print('Iterating blocks') count = 0 for block in store.blocks(): batch = rocksdb.WriteBatch() store.utxo_apply_block(block, batch) store.db.write(batch) count += 1 print('%d' % count, end='\r')
def test_write_batch(self): batch = rocksdb.WriteBatch() batch.put(b"key", b"v1") batch.delete(b"key") batch.put(b"key", b"v2") batch.put(b"key", b"v3") batch.put(b"a", b"b") self.db.write(batch) ref = {b'a': b'b', b'key': b'v3'} ret = self.db.multi_get([b'key', b'a']) self.assertEqual(ref, ret)
def clean_old_versions(dataset, latest_version): print('cleaning versions before', latest_version) start_k = b'i/' end_k = ('i/' + latest_version).encode('utf-8') batch = rocksdb.WriteBatch() it = dataset.iterkeys() it.seek(b'i/') for db_i in it: if not db_i.startswith(start_k) or db_i.startswith(end_k): break batch.delete(db_i) dataset.write(batch)
def update(self, id: JournalID, journal_new: Journal, journal_old: Optional[Journal] = None, batch: rocksdb.WriteBatch = None): if journal_old is None: journal_old = self.get(id) batch, new_batch = (rocksdb.WriteBatch(), True) if batch is None else (batch, False) batch.put((self._jdb._journals_cf, _pack(id)), _pack(journal_new, _encode)) for name in journal_old.names - journal_new.names: batch.delete((self._jdb._journal_names_index_cf, _pack(name))) for name in journal_new.names - journal_old.names: batch.put((self._jdb._journal_names_index_cf, _pack(name)), _pack(id)) if new_batch: self._jdb._db.write(batch)
def add_vector(dataset, key, vec): batch = rocksdb.WriteBatch() db_key = ('k/' + key).encode('utf-8') old_vec = get_vector_by_key(dataset, key) if old_vec: old_db_hash = ('h/' + hash_vec(old_vec) + '/' + key).encode('utf-8') batch.delete(old_db_hash) db_vec = msgpack.packb(vec, use_bin_type=True) db_hash = ('h/' + hash_vec(vec) + '/' + key).encode('utf-8') batch.put(db_key, db_vec) batch.put(db_hash, db_key) dataset.write(batch)
def test_write_batch(self): batch = rocksdb.WriteBatch() batch.put(b"key", b"v1", column_family=self.cf_a) batch.delete(b"key", column_family=self.cf_a) batch.put(b"key", b"v2", column_family=self.cf_a) batch.put(b"key", b"v3", column_family=self.cf_a) batch.put(b"a", b"1", column_family=self.cf_a) batch.put(b"b", b"2", column_family=self.cf_b) self.db.write(batch) self.assertEqual(b"v3", self.db.get(b"key", column_family=self.cf_a)) self.assertEqual(b"1", self.db.get(b"a", column_family=self.cf_a)) self.assertEqual(b"2", self.db.get(b"b", column_family=self.cf_b))
def queue_pop( self, number_of_items, ): items = [] keys = [] if self.db_was_changed_recently: self.db_was_changed_recently = False self.database_iterator = self.database_obj.iteritems() self.database_iterator.seek(self.first_key) items_fetched = 0 for key, value in self.database_iterator: items.append(value) keys.append(key) items_fetched += 1 if items_fetched == number_of_items: break if keys: database_write_batch = rocksdb.WriteBatch() for key in keys: database_write_batch.delete(key) self.database_obj.write( batch=database_write_batch, disable_wal=True, ) try: key, value = next(self.database_iterator) self.first_key = key self.database_iterator.seek(key) except StopIteration: self.first_key = b'' self.last_key = b'' self.database_obj.compact_range( begin=None, end=None, ) else: self.first_key = b'' self.last_key = b'' self.database_obj.compact_range( begin=None, end=None, ) return items
def Prewrite(self, mutations, primary, startTS, ttl): ''' prewrite mutations @type mutations: []*kvrpcpb.Mutation @type primary: []byte @type startTS: uint64 @type ttl: uint64 @rtype: list(BaseError) one of those errs: ErrKeyAlreadyExist : when op is Insert and key already exist. ErrLocked: wait to resolve lock ErrRetryable: restart txn None: success @attention: only anyError is False, mutations can apply to db. ''' self.mu.Lock() anyError = False batch = rocksdb.WriteBatch() errs = list() for m in mutations: # If the operation is Insert, check if key is exists at first. err = None if m.op == kvrpcpb.Insert: v, err = self.getValue(m.key, startTS, kvrpcpb.SI) if err != nil: errs.append(err) anyError = True continue if v != nil: err = ErrKeyAlreadyExist(Key=m.key, ) errs.append(err) anyError = True continue err = prewriteMutation(self.db, batch, m, startTS, primary, ttl) logger.debug( "prewriteMutation, key=%s, value=%s, startTS=%s, err=%s" % (m.key, m.value, startTS, err)) errs.append(err) if err != nil: anyError = True if not anyError: try: self.db.write(batch) except Exception as e: print e self.mu.UnLock() return errs
def queue_push( self, items, priority, ): if priority == 'NORMAL': if self.last_key != b'': next_item_number = int(self.last_key.decode('utf-8')) + 1 else: next_item_number = int((10**16) / 2) factor = 1 elif priority == 'HIGH': if self.first_key != b'': next_item_number = int(self.first_key.decode('utf-8')) - 1 else: next_item_number = int((10**16) / 2) - 1 factor = -1 else: raise Exception('unknown priority level: {priority}'.format( priority=priority, )) database_write_batch = rocksdb.WriteBatch() for item in items: next_item_key = str(next_item_number).rjust(20, '0').encode('utf-8') database_write_batch.put( next_item_key, item, ) next_item_number += factor self.database_obj.write( batch=database_write_batch, disable_wal=True, ) if factor == 1: self.last_key = next_item_key if self.first_key == b'': self.first_key = str(int( (10**16) / 2)).rjust(20, '0').encode('utf-8') else: self.first_key = next_item_key if self.last_key == b'': self.last_key = str(int((10**16) / 2) - 1).rjust( 20, '0').encode('utf-8') self.db_was_changed_recently = True return True
def append_block(self, block): hdr = block.header() batch = rocksdb.WriteBatch() # check prev_hash tip = self.tip() if tip: assert hdr.prev_header() == tip, 'invalid block.' hash = hdr.hash() batch.put(b'b/' + hash, hdr.raw()) batch.put(b'e/fl/' + hdr.prev_header(), hash) self.utxo_apply_block(block, batch) self.set_tip(hash, batch) self.db.write(batch)
def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs): import rocksdb write_batch = rocksdb.WriteBatch() for k, d in zip(keys, docs): key_bytes = pickle.dumps(k) if self.drop_raw_data: d.ClearField('raw_data') if self.drop_chunk_blob: for c in d.chunks: c.ClearField('blob') value_bytes = d.SerializeToString() write_batch.put(key_bytes, value_bytes) self._db.write(write_batch, sync=True)