def main(): kvTotalSize, dbName = sys.argv[1], sys.argv[2] + ".db" # kvcntleft = int(sys.argv[3]) m = re.match(r'(\d+)G', kvTotalSize) if not m: print("invalid format for kv size") return kvTotalSize = int(m.group(1)) << 30 if os.path.exists(dbName): print("db already exists") return opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(8 << 20)) db = rocksdb.DB(dbName, opts) opts.create_if_missing = True curkvSize = 0 it = db.iterkeys() it.seek_to_last() last_key = it.get() seqi = int.from_bytes(last_key, "big") print(seqi) while curkvSize < kvTotalSize: # and kvcntleft: seqi += 1 key = seqi.to_bytes(40, "big") value = generateValue() db.put(key, value) curkvSize += len(key) + len(value) #kvcntleft -= 1 print("total number of keys: {}, total size in bytes: {}".format( seqi, curkvSize))
def __init__( self, database_path, database_name, ): database_path = '{database_path}/keys/{database_name}'.format( database_path=database_path, database_name=database_name, ) os.makedirs( name=database_path, exist_ok=True, ) rocksdb_options = rocksdb.Options() rocksdb_options.create_if_missing = True rocksdb_options.max_open_files = 300000 rocksdb_options.write_buffer_size = 67108864 rocksdb_options.max_write_buffer_number = 3 rocksdb_options.target_file_size_base = 67108864 rocksdb_options.compression = rocksdb.CompressionType.no_compression rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), ) self.database_obj = rocksdb.DB( db_name=database_path, opts=rocksdb_options, ) self.database_obj.compact_range( begin=None, end=None, )
def open(self): # initial "production ready" settings via https://python-rocksdb.readthedocs.io/en/latest/tutorial/index.html opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 64 * 2**20 # 64MB opts.max_write_buffer_number = 3 opts.target_file_size_base = 64 * 2**20 # 64MB opts.merge_operator = self.NgramMergeOperator() opts.compression = rocksdb.CompressionType.lz4_compression # fast ingest stuff # via https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ -- "Q: What's the fastest way to load data into RocksDB?" # these settings require manual compaction after ingest opts.max_background_flushes = 8 opts.level0_file_num_compaction_trigger = -1 opts.level0_slowdown_writes_trigger = -1 opts.level0_stop_writes_trigger = 2**16 # default is 24 -- we want to avoid hitting this until it's done opts.write_buffer_size = 32 * 2**20 # default is 4 * 2 ** 20 opts.max_write_buffer_number = 8 # default is 2 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * 2**30), # 2GB block_cache_compressed=rocksdb.LRUCache(500 * 2**20)) # 500MB self._db = rocksdb.DB(self.db_path(), opts, read_only=self.read_only)
def __init__(self, db_name, q=None): import rocksdb self._db_name = db_name self._q = q if self._q is None: opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.max_open_files = -1 # Dangerous opts.write_buffer_size = 2 * 512 * 1024**2 opts.max_write_buffer_number = 3 opts.target_file_size_base = 512 * 1024**2 # MB #opts.compression = rocksdb.CompressionType.zlib_compression opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), #cache_index_and_filter_blocks=True, #optimize_filters_for_hits=True, block_cache=rocksdb.LRUCache(5 * (1024**3)), block_size=64 * 1024, block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self._db = rocksdb.DB(self._db_name, opts) A, mA, C, mC, G, T = 0, 1, 2, 3, 4, 5
def rocks_opts(**kwargs): # pylint: disable=no-member opts = rocksdb.Options(**kwargs) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def default_opts(): opts = rocksdb.Options() opts.create_if_missing = True opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def setup(self): if self.db is None: self.opts.create_if_missing = True self.opts.max_open_files = 300000 self.opts.write_buffer_size = 67108864 self.opts.max_write_buffer_number = 3 self.opts.target_file_size_base = 67108864 self.opts.merge_operator = UpdateData() self.opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self.db = rocksdb.DB(f"{self.dbname}.db", self.opts)
def __init__( self, database_path, database_name, ): database_path = '{database_path}/queues/{database_name}'.format( database_path=database_path, database_name=database_name, ) os.makedirs( name=database_path, exist_ok=True, ) rocksdb_options = rocksdb.Options() rocksdb_options.create_if_missing = True rocksdb_options.max_open_files = 300000 rocksdb_options.write_buffer_size = 67108864 rocksdb_options.max_write_buffer_number = 3 rocksdb_options.target_file_size_base = 67108864 rocksdb_options.compression = rocksdb.CompressionType.no_compression rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), ) self.database_obj = rocksdb.DB( db_name=database_path, opts=rocksdb_options, ) self.database_obj.compact_range( begin=None, end=None, ) self.database_iterator = self.database_obj.iteritems() self.database_iterator.seek_to_last() try: key, value = next(self.database_iterator) self.last_key = key except StopIteration: self.last_key = b'' self.database_iterator.seek_to_first() try: key, value = next(self.database_iterator) self.first_key = key self.database_iterator.seek(self.first_key) except StopIteration: self.first_key = b'' self.db_was_changed_recently = False
def default_db_options(uid_length=parms.uid_length): opts = rocksdb.Options() opts.create_if_missing = True opts.prefix_extractor = StaticPrefix(uid_length) opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) return opts
def as_options(self) -> Options: return rocksdb.Options( create_if_missing=True, max_open_files=self.max_open_files, write_buffer_size=self.write_buffer_size, max_write_buffer_number=self.max_write_buffer_number, target_file_size_base=self.target_file_size_base, table_factory=rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( self.bloom_filter_size), block_cache=rocksdb.LRUCache(self.block_cache_size), block_cache_compressed=rocksdb.LRUCache( self.block_cache_compressed_size), ), **self.extra_options)
def get_rocks_db(): size = 128*1024*1024 opts = rocksdb.Options() opts.create_if_missing = True opts.write_buffer_size = size opts.max_write_buffer_number = 2 opts.target_file_size_base = size opts.compression = rocksdb.CompressionType.no_compression opts.disable_auto_compactions = True opts.table_factory = rocksdb.BlockBasedTableFactory( block_size=512, filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB('rocks.db', opts) return db
def initRocks(self, ind): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 1073741824 opts.max_write_buffer_number = 20 opts.target_file_size_base = 67108864 opts.max_background_compactions = 8 opts.max_background_flushes = 4 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB(self.dirs + '/' + ind + '.db', opts) return db
def init_store(config): """ Only called if using rocksdb for now. """ global hashes_db opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) hashes_db = rocksdb.DB(os.path.join(config.storage.folder.value, HASHES_STORAGE), opts)
def post_init(self): import rocksdb opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) for key, value in self.kwargs.items(): setattr(opts, key, value) self._db = rocksdb.DB(self.data_path, opts, read_only=self.read_only)
def get_rocksdb_options(): rocks_options = rocksdb.Options() rocks_options.create_if_missing = True rocks_options.merge_operator = StringAddOperator() rocks_options.compression = CompressionType.zstd_compression rocks_options.max_open_files = 300000 rocks_options.write_buffer_size = 67 * 1024**2 rocks_options.max_write_buffer_number = 3 rocks_options.target_file_size_base = 256 * 1024**2 rocks_options.max_log_file_size = 4 * 1024**2 rocks_options.keep_log_file_num = 100 # we want to set this option, but it's not included in the python client # rocks_options.optimize_filters_for_hits = True rocks_options.table_factory = rocksdb.BlockBasedTableFactory( block_cache=rocksdb.LRUCache(1 * 1024**3), block_size=16 * 1024, filter_policy=rocksdb.BloomFilterPolicy(10), ) return rocks_options
def init(): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = int(config['rocksdb']['max_open_files']) opts.write_buffer_size = int(config['rocksdb']['write_buffer_size']) opts.max_write_buffer_number = int( config['rocksdb']['max_write_buffer_number']) opts.target_file_size_base = int( config['rocksdb']['target_file_size_base']) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( int(config['rocksdb']['filter_policy'])), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) # TODO find a way to add block_cache* in config, only this format is allowed for rocksdb # https://python-rocksdb.readthedocs.io/en/latest/tutorial/#prefixextractor opts.prefix_extractor = StaticPrefix() db = rocksdb.DB(config['rocksdb']['db_file'], opts) return db
def __init__(self, read_only=False, filepath=None, max_open_files=300000, write_buffer_size=67108864, max_write_buffer_number=3, target_file_size_base=67108864, **kwargs): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = max_open_files opts.write_buffer_size = write_buffer_size opts.max_write_buffer_number = max_write_buffer_number opts.target_file_size_base = target_file_size_base opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) opts.merge_operator = AssocCounter() self._db = rocksdb.DB(filepath, opts) super().__init__(read_only)
def initdb(fn): """Initialize a database with a given name Arguments: fn - name of the database """ # create database opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB(fn + '.db', opts) print( "\x1b[6;30;42m[+]\x1b[0m setting up database {}.db in current directory..." .format(fn)) return db
required=False) global args args = parser.parse_args() if not args.memory: import rocksdb # create database for nodes opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB('nodes.db', opts) if args.verbose: print( "\x1b[6;30;42m[+]\x1b[0m setting up database nodes.db in current directory..." ) else: db = dict() if args.size is None: # auto set base graph size in # of edges total = 0 with open(args.input) as f: