def _delRowsByProp(self, prop, valu=None, mintime=None, maxtime=None): indx = self.index_pt if valu is None else self.index_pvt first_key, last_key, v_is_hashed, do_fast_compare = _calcFirstLastKeys(prop, valu, mintime, maxtime) with self._getTxn(write=True) as txn, txn.cursor(indx) as cursor: if not cursor.set_range(first_key): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') while True: key, pk_enc = cursor.item() if do_fast_compare: if key[:len(first_key)] != first_key: break else: if key.tobytes() >= last_key: break if self._delRowAndIndices(txn, pk_enc, delete_pt=(valu is not None), delete_pvt=(valu is None), only_if_val=(valu if v_is_hashed else None)): # Delete did go through: delete entry at cursor if not cursor.delete(): raise s_common.BadCoreStore(store='lmdb', mesg='Delete failure') else: # Delete didn't go through: advance to next if not cursor.next(): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel')
def _delRowsById(self, iden): i_enc = _encIden(iden) with self._getTxn(write=True) as txn, txn.cursor( self.index_ip) as cursor: # Get the first record >= i_enc if not cursor.set_range(i_enc): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') while True: # We don't use iterator here because the delete already advances to the next # record key, value = cursor.item() if key[:len(i_enc)] != i_enc: return p_enc = key[len(i_enc):].tobytes() # Need to copy out with tobytes because we're deleting pk_enc = value.tobytes() if not cursor.delete(): raise s_common.BadCoreStore(store='lmdb', mesg='Delete failure') self._delRowAndIndices(txn, pk_enc, i_enc=i_enc, p_enc=p_enc, delete_ip=False)
def getRowsByProp(self, prop, valu=None, limit=None, mintime=None, maxtime=None, do_count_only=False): indx = self.index_pt if valu is None else self.index_pvt first_key, last_key, v_is_hashed, do_fast_compare = _calcFirstLastKeys(prop, valu, mintime, maxtime) count = 0 rows = [] with self._getTxn() as txn, txn.cursor(indx) as cursor: if not cursor.set_range(first_key): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') while True: if limit is not None and count >= limit: break key, pk_enc = cursor.item() if do_fast_compare: if key[:len(first_key)] != first_key: break else: if key.tobytes() >= last_key: break if v_is_hashed or not do_count_only: row = self._getRowByPkValEnc(txn, pk_enc) if v_is_hashed: if row[2] != valu: continue if not do_count_only: rows.append(row) count += 1 if not cursor.next(): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') return count if do_count_only else rows
def _delRowsByIdProp(self, iden, prop, valu=None): i_enc = _encIden(iden) p_enc = _encProp(prop) first_key = i_enc + p_enc with self._getTxn(write=True) as txn, txn.cursor(self.index_ip) as cursor: # Retrieve and delete I-P index if not cursor.set_range(first_key): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') while True: # We don't use iterator here because the delete already advances to the next # record key, value = cursor.item() if key[:len(first_key)] != first_key: return # Need to copy out with tobytes because we're deleting pk_enc = value.tobytes() # Delete the row and the other indices if not self._delRowAndIndices(txn, pk_enc, i_enc=i_enc, p_enc=p_enc, delete_ip=False, only_if_val=valu): if not cursor.next(): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') else: if not cursor.delete(): raise s_common.BadCoreStore(store='lmdb', mesg='Delete failure')
def _delRowAndIndices(self, txn, pk_enc, i_enc=None, p_enc=None, v_key_enc=None, t_enc=None, delete_ip=True, delete_pvt=True, delete_pt=True, only_if_val=None): ''' Deletes the row corresponding to pk_enc and the indices pointing to it ''' with txn.cursor(self.rows) as cursor: if not cursor.set_key(pk_enc): raise s_common.BadCoreStore(store='lmdb', mesg='Missing PK') i, p, v, t = s_msgpack.un(cursor.value()) if only_if_val is not None and only_if_val != v: return False cursor.delete() if delete_ip and i_enc is None: i_enc = _encIden(i) if p_enc is None: p_enc = _encProp(p) if delete_pvt and v_key_enc is None: v_key_enc = _encValKey(v) if (delete_pvt or delete_pt) and t_enc is None: t_enc = s_msgpack.en(t) if delete_ip: # Delete I-P index entry if not txn.delete(i_enc + p_enc, value=pk_enc, db=self.index_ip): raise s_common.BadCoreStore(store='lmdb', mesg='Missing I-P index') if delete_pvt: # Delete P-V-T index entry if not txn.delete(p_enc + v_key_enc + t_enc, value=pk_enc, db=self.index_pvt): raise s_common.BadCoreStore(store='lmdb', mesg='Missing P-V-T index') if delete_pt: # Delete P-T index entry if not txn.delete(p_enc + t_enc, value=pk_enc, db=self.index_pt): raise s_common.BadCoreStore(store='lmdb', mesg='Missing P-T index') return True
def _addRows(self, rows): ''' Adds a bunch of rows to the database Take care: this was written this way for performance, in particular when len(rows) is large. ''' encs = [] with self._getTxn(write=True) as txn: next_pk = self.next_pk # First, we encode all the i, p, v, t for all rows for i, p, v, t in rows: if next_pk > MAX_PK: raise s_common.HitCoreLimit( name='MAX_PK', size=MAX_PK, mesg='Out of primary key values') if len(p) > MAX_PROP_LEN: raise s_common.HitCoreLimit( name='MAX_PROP_LEN', size=MAX_PROP_LEN, mesg='Property length too large') i_enc = _encIden(i) p_enc = _encProp(p) v_key_enc = _encValKey(v) t_enc = s_msgpack.en(t) pk_enc = _encPk(next_pk) row_enc = s_msgpack.en((i, p, v, t)) # idx 0 1 2 3 4 5 encs.append((i_enc, p_enc, row_enc, t_enc, v_key_enc, pk_enc)) next_pk += 1 # An iterator of what goes into the main table: key=pk_enc, val=encoded(i, p, v, t) kvs = ((x[5], x[2]) for x in encs) # Shove it all in at once consumed, added = txn.cursor(self.rows).putmulti(kvs, overwrite=False, append=True) if consumed != added or consumed != len(encs): # Will only fail if record already exists, which should never happen raise s_common.BadCoreStore(store='lmdb', mesg='unexpected pk in DB') # Update the indices for all rows kvs = ((x[0] + x[1], x[5]) for x in encs) txn.cursor(self.index_ip).putmulti(kvs, dupdata=True) kvs = ((x[1] + x[4] + x[3], x[5]) for x in encs) txn.cursor(self.index_pvt).putmulti(kvs, dupdata=True) kvs = ((x[1] + x[3], x[5]) for x in encs) txn.cursor(self.index_pt).putmulti(kvs, dupdata=True) # self.next_pk should be protected from multiple writers. Luckily lmdb write lock does # that for us. self.next_pk = next_pk
def _hasBlobValu(self, key): rows = self._getBlobValuRows(key) if len(rows) > 1: # pragma: no cover raise s_common.BadCoreStore(store=self.getCoreType(), mesg='Too many blob rows received.') if not rows: return False return True
def getRowsByIdProp(self, iden, prop, valu=None): # For now not making a ipv index because multiple v for a given i,p are probably rare iden_enc = _encIden(iden) prop_enc = _encProp(prop) first_key = iden_enc + prop_enc ret = [] with self._getTxn() as txn, txn.cursor(self.index_ip) as cursor: if not cursor.set_range(first_key): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') for key, value in cursor: if key.tobytes() != first_key: return ret row = self._getRowByPkValEnc(txn, value) if valu is not None and row[2] != valu: continue ret.append(row) raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel')
def _subrangeRows(self, p_enc, first_val, last_val, limit, right_closed, do_count_only): ''' Performs part of a range query, either completely negative or non-negative ''' first_key = p_enc + _encValKey(first_val) am_going_backwards = (first_val < 0) last_key = p_enc + _encValKey(last_val) ret = [] count = 0 # Figure out the terminating condition of the loop if am_going_backwards: term_cmp = bytes.__lt__ if right_closed else bytes.__le__ else: term_cmp = bytes.__gt__ if right_closed else bytes.__ge__ with self._getTxn() as txn, txn.cursor(self.index_pvt) as cursor: if not cursor.set_range(first_key): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') if am_going_backwards: # set_range sets the cursor at the first key >= first_key, if we're going backwards # we actually want the first key <= first_key if cursor.key()[:len(first_key)].tobytes() > first_key: if not cursor.prev(): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') it = cursor.iterprev(keys=True, values=True) else: it = cursor.iternext(keys=True, values=True) for key, value in it: if term_cmp(key[:len(last_key)].tobytes(), last_key): break if limit is not None and count >= limit: break if not do_count_only: ret.append(self._getRowByPkValEnc(txn, value)) count += 1 return count if do_count_only else ret
def getRowsById(self, iden): iden_enc = _encIden(iden) rows = [] with self._getTxn() as txn, txn.cursor(self.index_ip) as cursor: if not cursor.set_range(iden_enc): raise s_common.BadCoreStore(store='lmdb', mesg='Missing sentinel') for key, pk_enc in cursor: if key[:len(iden_enc)] != iden_enc: break rows.append(self._getRowByPkValEnc(txn, pk_enc)) return rows
def _getRowByPkValEnc(self, txn, pk_enc): row = txn.get(pk_enc, db=self.rows) if row is None: raise s_common.BadCoreStore(store='lmdb', mesg='Index val has no corresponding row') return s_msgpack.un(row)
def _initDbConn(self): dbinfo = self._initDbInfo() dbname = dbinfo.get('name') # Initial DB Size. Must be < 2 GiB for 32-bit. Can be big for 64-bit systems. Will create # a file of that size. On Windows, will actually immediately take up that much # disk space. DEFAULT_MAP_SIZE = 512 * 1024 * 1024 # _write_lock exists solely to hold off other threads' write transactions long enough to # potentially increase the map size. self._write_lock = Lock() map_size = self._link[1].get('lmdb:mapsize', DEFAULT_MAP_SIZE) self._map_size, _ = s_datamodel.getTypeNorm('int', map_size) self._max_map_size = 2**46 if sys.maxsize > 2**32 else 2**30 map_slack = self._link[1].get('lmdb:mapslack', 2 ** 30) self._map_slack, _ = s_datamodel.getTypeNorm('int', map_slack) # Maximum number of 'databases', really tables. We use 5 different tables (1 main plus # 3 indices and a blob store), + 10 tables for possible migration use cases. MAX_DBS = 5 + 10 # flush system buffers to disk only once per transaction. Set to False can lead to last # transaction loss, but not corruption metasync_val = self._link[1].get('lmdb:metasync', False) metasync, _ = s_datamodel.getTypeNorm('bool', metasync_val) metasync = (metasync == 1) # If sync is False, could lead to database corruption on power loss sync_val = self._link[1].get('lmdb:sync', True) sync, _ = s_datamodel.getTypeNorm('bool', sync_val) sync = (sync == 1) # Write data directly to mapped memory WRITEMAP = True # Doesn't create a subdirectory for storage files SUBDIR = False # We can disable locking, but bad things might happen if we have multiple threads DEFAULT_LOCK = True lock_val = self._link[1].get('lmdb:lock', DEFAULT_LOCK) lock, _ = s_datamodel.getTypeNorm('bool', lock_val) lock = (lock == 1) # Maximum simultaneous readers. MAX_READERS = 4 max_readers = self._link[1].get('lmdb:maxreaders', MAX_READERS) max_readers, _ = s_datamodel.getTypeNorm('int', max_readers) if max_readers == 1: lock = False self.dbenv = lmdb.Environment(dbname, map_size=self._map_size, subdir=SUBDIR, metasync=metasync, writemap=WRITEMAP, max_readers=max_readers, max_dbs=MAX_DBS, sync=sync, lock=lock) # Check we're not running a weird version of LMDB if self.dbenv.stat()['psize'] != 4096: raise s_common.BadCoreStore(store='lmdb', mesg='Unknown version of lmdb configured') # Ensure we have enough room in the map for expansion self._ensure_map_slack() def onfini(): self.dbenv.close() self.onfini(onfini)