def open(self): # initial "production ready" settings via https://python-rocksdb.readthedocs.io/en/latest/tutorial/index.html opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 64 * 2**20 # 64MB opts.max_write_buffer_number = 3 opts.target_file_size_base = 64 * 2**20 # 64MB opts.merge_operator = self.NgramMergeOperator() opts.compression = rocksdb.CompressionType.lz4_compression # fast ingest stuff # via https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ -- "Q: What's the fastest way to load data into RocksDB?" # these settings require manual compaction after ingest opts.max_background_flushes = 8 opts.level0_file_num_compaction_trigger = -1 opts.level0_slowdown_writes_trigger = -1 opts.level0_stop_writes_trigger = 2**16 # default is 24 -- we want to avoid hitting this until it's done opts.write_buffer_size = 32 * 2**20 # default is 4 * 2 ** 20 opts.max_write_buffer_number = 8 # default is 2 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * 2**30), # 2GB block_cache_compressed=rocksdb.LRUCache(500 * 2**20)) # 500MB self._db = rocksdb.DB(self.db_path(), opts, read_only=self.read_only)
def __init__(self, db_name, q=None): import rocksdb self._db_name = db_name self._q = q if self._q is None: opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.max_open_files = -1 # Dangerous opts.write_buffer_size = 2 * 512 * 1024**2 opts.max_write_buffer_number = 3 opts.target_file_size_base = 512 * 1024**2 # MB #opts.compression = rocksdb.CompressionType.zlib_compression opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), #cache_index_and_filter_blocks=True, #optimize_filters_for_hits=True, block_cache=rocksdb.LRUCache(5 * (1024**3)), block_size=64 * 1024, block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self._db = rocksdb.DB(self._db_name, opts) A, mA, C, mC, G, T = 0, 1, 2, 3, 4, 5
def rocks_opts(**kwargs): # pylint: disable=no-member opts = rocksdb.Options(**kwargs) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def default_opts(): opts = rocksdb.Options() opts.create_if_missing = True opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)), ) return opts
def setup(self): if self.db is None: self.opts.create_if_missing = True self.opts.max_open_files = 300000 self.opts.write_buffer_size = 67108864 self.opts.max_write_buffer_number = 3 self.opts.target_file_size_base = 67108864 self.opts.merge_operator = UpdateData() self.opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) self.db = rocksdb.DB(f"{self.dbname}.db", self.opts)
def __apply_db_config_opts(self, opts): if self._db_config is None: return _db_config = self._db_config if _db_config['max_open_files'] is not None: opts.max_open_files = _db_config['max_open_files'] if _db_config['max_log_file_size'] is not None: opts.max_log_file_size = _db_config['max_log_file_size'] if _db_config['keep_log_file_num'] is not None: opts.keep_log_file_num = _db_config['keep_log_file_num'] if _db_config['db_log_dir'] is not None: opts.db_log_dir = _db_config['db_log_dir'] # Compaction related options if _db_config['target_file_size_base'] is not None: opts.target_file_size_base = _db_config['target_file_size_base'] # Memtable related options if _db_config['write_buffer_size'] is not None: opts.write_buffer_size = _db_config['write_buffer_size'] if _db_config['max_write_buffer_number'] is not None: opts.max_write_buffer_number = _db_config[ 'max_write_buffer_number'] if _db_config['block_size'] is not None \ or _db_config['block_cache_size'] is not None \ or _db_config['block_cache_compressed_size'] is not None \ or _db_config['no_block_cache'] is not None: block_size = _db_config['block_size'] block_cache_size = _db_config['block_cache_size'] block_cache_compressed_size = _db_config[ 'block_cache_compressed_size'] no_block_cache = _db_config['no_block_cache'] block_cache = None block_cache_compressed = None if block_cache_size is not None: block_cache = rocksdb.LRUCache(block_cache_size) if block_cache_compressed_size is not None: block_cache_compressed = rocksdb.LRUCache( block_cache_compressed_size) opts.table_factory = rocksdb.BlockBasedTableFactory( block_size=block_size, block_cache=block_cache, block_cache_compressed=block_cache_compressed, no_block_cache=no_block_cache)
def default_db_options(uid_length=parms.uid_length): opts = rocksdb.Options() opts.create_if_missing = True opts.prefix_extractor = StaticPrefix(uid_length) opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) return opts
def as_options(self) -> Options: return rocksdb.Options( create_if_missing=True, max_open_files=self.max_open_files, write_buffer_size=self.write_buffer_size, max_write_buffer_number=self.max_write_buffer_number, target_file_size_base=self.target_file_size_base, table_factory=rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( self.bloom_filter_size), block_cache=rocksdb.LRUCache(self.block_cache_size), block_cache_compressed=rocksdb.LRUCache( self.block_cache_compressed_size), ), **self.extra_options)
def get_rocks_db(): size = 128*1024*1024 opts = rocksdb.Options() opts.create_if_missing = True opts.write_buffer_size = size opts.max_write_buffer_number = 2 opts.target_file_size_base = size opts.compression = rocksdb.CompressionType.no_compression opts.disable_auto_compactions = True opts.table_factory = rocksdb.BlockBasedTableFactory( block_size=512, filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB('rocks.db', opts) return db
def main(): kvTotalSize, dbName = sys.argv[1], sys.argv[2] + ".db" # kvcntleft = int(sys.argv[3]) m = re.match(r'(\d+)G', kvTotalSize) if not m: print("invalid format for kv size") return kvTotalSize = int(m.group(1)) << 30 if os.path.exists(dbName): print("db already exists") return opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(8 << 20)) db = rocksdb.DB(dbName, opts) opts.create_if_missing = True curkvSize = 0 it = db.iterkeys() it.seek_to_last() last_key = it.get() seqi = int.from_bytes(last_key, "big") print(seqi) while curkvSize < kvTotalSize: # and kvcntleft: seqi += 1 key = seqi.to_bytes(40, "big") value = generateValue() db.put(key, value) curkvSize += len(key) + len(value) #kvcntleft -= 1 print("total number of keys: {}, total size in bytes: {}".format( seqi, curkvSize))
def initRocks(self, ind): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 1073741824 opts.max_write_buffer_number = 20 opts.target_file_size_base = 67108864 opts.max_background_compactions = 8 opts.max_background_flushes = 4 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) db = rocksdb.DB(self.dirs + '/' + ind + '.db', opts) return db
def init_store(config): """ Only called if using rocksdb for now. """ global hashes_db opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 10000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2))) hashes_db = rocksdb.DB(os.path.join(config.storage.folder.value, HASHES_STORAGE), opts)
def post_init(self): import rocksdb opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) for key, value in self.kwargs.items(): setattr(opts, key, value) self._db = rocksdb.DB(self.data_path, opts, read_only=self.read_only)
def init(): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = int(config['rocksdb']['max_open_files']) opts.write_buffer_size = int(config['rocksdb']['write_buffer_size']) opts.max_write_buffer_number = int( config['rocksdb']['max_write_buffer_number']) opts.target_file_size_base = int( config['rocksdb']['target_file_size_base']) opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy( int(config['rocksdb']['filter_policy'])), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) # TODO find a way to add block_cache* in config, only this format is allowed for rocksdb # https://python-rocksdb.readthedocs.io/en/latest/tutorial/#prefixextractor opts.prefix_extractor = StaticPrefix() db = rocksdb.DB(config['rocksdb']['db_file'], opts) return db
def __init__(self, read_only=False, filepath=None, max_open_files=300000, write_buffer_size=67108864, max_write_buffer_number=3, target_file_size_base=67108864, **kwargs): opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = max_open_files opts.write_buffer_size = write_buffer_size opts.max_write_buffer_number = max_write_buffer_number opts.target_file_size_base = target_file_size_base opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) opts.merge_operator = AssocCounter() self._db = rocksdb.DB(filepath, opts) super().__init__(read_only)
def __init__(self, path: str = './', with_index: bool = True, cache_capacity: Optional[int] = None, use_memory_indexes: bool = False): import rocksdb self.log = logger.new() self._path = path self._use_memory_indexes = use_memory_indexes tx_dir = os.path.join(path, _DB_NAME) lru_cache = cache_capacity and rocksdb.LRUCache(cache_capacity) table_factory = rocksdb.BlockBasedTableFactory(block_cache=lru_cache) options = rocksdb.Options( table_factory=table_factory, write_buffer_size=83886080, # 80MB (default is 4MB) compression=rocksdb.CompressionType.no_compression, allow_mmap_writes=True, # default is False allow_mmap_reads=True, # default is already True ) cf_names: List[bytes] try: # get the list of existing column families cf_names = rocksdb.list_column_families(tx_dir, options) except rocksdb.errors.RocksIOError: # this means the db doesn't exist, a repair will create one rocksdb.repair_db(tx_dir, options) cf_names = [] # we need to open all column families column_families = { cf: rocksdb.ColumnFamilyOptions() for cf in cf_names } # finally, open the database self._db = rocksdb.DB(tx_dir, options, column_families=column_families) self.log.debug('open db', cf_list=[ cf.name.decode('ascii') for cf in self._db.column_families ]) self._cf_tx = self._get_or_create_column_family(_CF_NAME_TX) self._cf_meta = self._get_or_create_column_family(_CF_NAME_META) self._cf_attr = self._get_or_create_column_family(_CF_NAME_ATTR) super().__init__(with_index=with_index)
def initdb(fn): """Initialize a database with a given name Arguments: fn - name of the database """ # create database opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB(fn + '.db', opts) print( "\x1b[6;30;42m[+]\x1b[0m setting up database {}.db in current directory..." .format(fn)) return db
def get_rocksdb_options(): rocks_options = rocksdb.Options() rocks_options.create_if_missing = True rocks_options.merge_operator = StringAddOperator() rocks_options.compression = CompressionType.zstd_compression rocks_options.max_open_files = 300000 rocks_options.write_buffer_size = 67 * 1024**2 rocks_options.max_write_buffer_number = 3 rocks_options.target_file_size_base = 256 * 1024**2 rocks_options.max_log_file_size = 4 * 1024**2 rocks_options.keep_log_file_num = 100 # we want to set this option, but it's not included in the python client # rocks_options.optimize_filters_for_hits = True rocks_options.table_factory = rocksdb.BlockBasedTableFactory( block_cache=rocksdb.LRUCache(1 * 1024**3), block_size=16 * 1024, filter_policy=rocksdb.BloomFilterPolicy(10), ) return rocks_options
def test_row_cache(self): opts = rocksdb.Options() self.assertIsNone(opts.row_cache) opts.row_cache = cache = rocksdb.LRUCache(2*1024*1024) self.assertEqual(cache, opts.row_cache)
def test_block_options(self): rocksdb.BlockBasedTableFactory( block_size=4096, filter_policy=TestFilterPolicy(), block_cache=rocksdb.LRUCache(100))
global args args = parser.parse_args() if not args.memory: import rocksdb # create database for nodes opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(10), block_cache=rocksdb.LRUCache(2 * (1024**3)), block_cache_compressed=rocksdb.LRUCache(500 * (1024**2))) db = rocksdb.DB('nodes.db', opts) if args.verbose: print( "\x1b[6;30;42m[+]\x1b[0m setting up database nodes.db in current directory..." ) else: db = dict() if args.size is None: # auto set base graph size in # of edges total = 0 with open(args.input) as f: for line in f:
sizes = (64 * 2**i * (j + 2) for i in range(4) for j in range(2)) sizes = [s for s in sizes if s <= 1024] parser.add_argument('--out', type=str, default="out.lmdb") parser.add_argument('--size', type=str, default=','.join(map(str, sizes))) parser.add_argument('--n_worker', type=int, default=12) parser.add_argument('path', type=str) args = parser.parse_args() sizes = [int(s.strip()) for s in args.size.split(',')] print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes)) imgset = datasets.ImageFolder(args.path) opts = rocksdb.Options() opts.create_if_missing = True opts.write_buffer_size = 2**34 opts.max_write_buffer_number = 2**14 opts.max_open_files = -1 opts.compression = rocksdb.CompressionType.no_compression opts.target_file_size_base = 2**26 opts.target_file_size_multiplier = 4 opts.max_bytes_for_level_base = 2**28 opts.manifest_preallocation_size = 2**36 opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(32), block_cache=rocksdb.LRUCache(2**34, 32), ) db = rocksdb.DB("out.rdb", opts) prepare(db, imgset, args.n_worker, sizes=sizes)
print("Found " + str(cpus) + " CPU cores on your system. " + str(max_jobs) + " cores will be used.") opts = rocksdb.Options() # Create new instance if not already present opts.create_if_missing = True # We have A LOT of BTC-Transactions, so file open limit should be increased opts.max_open_files = 1000000 # Increase buffer size since I/O is the bottleneck, not RAM opts.write_buffer_size = db_memory * 0.3 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 # Bloom filters for faster lookup opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(12), block_cache=rocksdb.LRUCache(60 * (1024 ** 3)), block_cache_compressed=rocksdb.LRUCache(20 * (1024 ** 3))) block_cache=rocksdb.LRUCache(db_memory * 0.3), block_cache_compressed=rocksdb.LRUCache(db_memory * 0.4)) # Load RocksDB Database db = rocksdb.DB(DB_PATH, opts) # Load Blockchain blockchain = Blockchain(BLOCK_PATH) # Initialize iterator with respect to user specifications if END_BLOCK < 1: blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=START_BLOCK) TOTAL_BLOCKS = len(blockchain.blockIndexes) print("Processing the entire blockchain.") print("INFO: Depending on your system, this process may take up to a week. You can interrupt the process " +
# print("Found " + str(cpus) + " CPU cores on your system. " + str(max_jobs) + " cores will be used.") opts = rocksdb.Options() # Create new instance if not already present opts.create_if_missing = True # We have A LOT of BTC-Transactions, so file open limit should be increased opts.max_open_files = 1000000 # Increase buffer size since I/O is the bottleneck, not RAM opts.write_buffer_size = db_memory * 0.3 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 # Bloom filters for faster lookup opts.table_factory = rocksdb.BlockBasedTableFactory( filter_policy=rocksdb.BloomFilterPolicy(12), block_cache=rocksdb.LRUCache(db_memory * 0.3), block_cache_compressed=rocksdb.LRUCache(db_memory * 0.4)) # Load RocksDB Database db = rocksdb.DB(DB_PATH, opts) # Load Blockchain blockchain = Blockchain(BLOCK_PATH) # Initialize iterator with respect to user specifications if END_BLOCK < 1: blockchain = blockchain.get_ordered_blocks(INDEX_PATH, start=START_BLOCK) TOTAL_BLOCKS = len(blockchain.blockIndexes) print("Processing the entire blockchain.") print("INFO: Depending on your system, this process may take up to a week. You can interrupt the process " + "at any time by pressing CTRL+C.") iterator = blockchain