Esempio n. 1
0
def main():
    kvTotalSize, dbName = sys.argv[1], sys.argv[2] + ".db"
    # kvcntleft = int(sys.argv[3])
    m = re.match(r'(\d+)G', kvTotalSize)
    if not m:
        print("invalid format for kv size")
        return
    kvTotalSize = int(m.group(1)) << 30
    if os.path.exists(dbName):
        print("db already exists")
        return
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.max_open_files = 10000
    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(8 << 20))
    db = rocksdb.DB(dbName, opts)
    opts.create_if_missing = True
    curkvSize = 0
    it = db.iterkeys()
    it.seek_to_last()
    last_key = it.get()
    seqi = int.from_bytes(last_key, "big")
    print(seqi)
    while curkvSize < kvTotalSize:  # and kvcntleft:
        seqi += 1
        key = seqi.to_bytes(40, "big")
        value = generateValue()
        db.put(key, value)

        curkvSize += len(key) + len(value)
        #kvcntleft -= 1
    print("total number of keys: {}, total size in bytes: {}".format(
        seqi, curkvSize))
Esempio n. 2
0
    def __init__(
        self,
        database_path,
        database_name,
    ):
        database_path = '{database_path}/keys/{database_name}'.format(
            database_path=database_path,
            database_name=database_name,
        )
        os.makedirs(
            name=database_path,
            exist_ok=True,
        )

        rocksdb_options = rocksdb.Options()
        rocksdb_options.create_if_missing = True
        rocksdb_options.max_open_files = 300000
        rocksdb_options.write_buffer_size = 67108864
        rocksdb_options.max_write_buffer_number = 3
        rocksdb_options.target_file_size_base = 67108864
        rocksdb_options.compression = rocksdb.CompressionType.no_compression
        rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), )

        self.database_obj = rocksdb.DB(
            db_name=database_path,
            opts=rocksdb_options,
        )

        self.database_obj.compact_range(
            begin=None,
            end=None,
        )
Esempio n. 3
0
    def open(self):
        # initial "production ready" settings via https://python-rocksdb.readthedocs.io/en/latest/tutorial/index.html
        opts = rocksdb.Options()
        opts.create_if_missing = True
        opts.max_open_files = 300000
        opts.write_buffer_size = 64 * 2**20  # 64MB
        opts.max_write_buffer_number = 3
        opts.target_file_size_base = 64 * 2**20  # 64MB
        opts.merge_operator = self.NgramMergeOperator()
        opts.compression = rocksdb.CompressionType.lz4_compression

        # fast ingest stuff
        # via https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ -- "Q: What's the fastest way to load data into RocksDB?"
        # these settings require manual compaction after ingest
        opts.max_background_flushes = 8
        opts.level0_file_num_compaction_trigger = -1
        opts.level0_slowdown_writes_trigger = -1
        opts.level0_stop_writes_trigger = 2**16  # default is 24 -- we want to avoid hitting this until it's done
        opts.write_buffer_size = 32 * 2**20  # default is 4 * 2 ** 20
        opts.max_write_buffer_number = 8  # default is 2

        opts.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(10),
            block_cache=rocksdb.LRUCache(2 * 2**30),  # 2GB
            block_cache_compressed=rocksdb.LRUCache(500 * 2**20))  # 500MB

        self._db = rocksdb.DB(self.db_path(), opts, read_only=self.read_only)
Esempio n. 4
0
    def __init__(self, db_name, q=None):
        import rocksdb
        self._db_name = db_name
        self._q = q

        if self._q is None:
            opts = rocksdb.Options()
            opts.create_if_missing = True
            opts.max_open_files = 300000
            opts.max_open_files = -1  # Dangerous
            opts.write_buffer_size = 2 * 512 * 1024**2
            opts.max_write_buffer_number = 3
            opts.target_file_size_base = 512 * 1024**2  # MB

            #opts.compression = rocksdb.CompressionType.zlib_compression

            opts.table_factory = rocksdb.BlockBasedTableFactory(
                filter_policy=rocksdb.BloomFilterPolicy(10),
                #cache_index_and_filter_blocks=True,
                #optimize_filters_for_hits=True,
                block_cache=rocksdb.LRUCache(5 * (1024**3)),
                block_size=64 * 1024,
                block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))

            self._db = rocksdb.DB(self._db_name, opts)
        A, mA, C, mC, G, T = 0, 1, 2, 3, 4, 5
Esempio n. 5
0
def rocks_opts(**kwargs):
    # pylint: disable=no-member
    opts = rocksdb.Options(**kwargs)
    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024**3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)),
    )
    return opts
Esempio n. 6
0
def default_opts():
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024**3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)),
    )
    return opts
Esempio n. 7
0
 def setup(self):
     if self.db is None:
         self.opts.create_if_missing = True
         self.opts.max_open_files = 300000
         self.opts.write_buffer_size = 67108864
         self.opts.max_write_buffer_number = 3
         self.opts.target_file_size_base = 67108864
         self.opts.merge_operator = UpdateData()
         self.opts.table_factory = rocksdb.BlockBasedTableFactory(
             filter_policy=rocksdb.BloomFilterPolicy(10),
             block_cache=rocksdb.LRUCache(2 * (1024**3)),
             block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))
     self.db = rocksdb.DB(f"{self.dbname}.db", self.opts)
Esempio n. 8
0
    def __init__(
        self,
        database_path,
        database_name,
    ):
        database_path = '{database_path}/queues/{database_name}'.format(
            database_path=database_path,
            database_name=database_name,
        )
        os.makedirs(
            name=database_path,
            exist_ok=True,
        )

        rocksdb_options = rocksdb.Options()
        rocksdb_options.create_if_missing = True
        rocksdb_options.max_open_files = 300000
        rocksdb_options.write_buffer_size = 67108864
        rocksdb_options.max_write_buffer_number = 3
        rocksdb_options.target_file_size_base = 67108864
        rocksdb_options.compression = rocksdb.CompressionType.no_compression
        rocksdb_options.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(bits_per_key=10, ), )

        self.database_obj = rocksdb.DB(
            db_name=database_path,
            opts=rocksdb_options,
        )

        self.database_obj.compact_range(
            begin=None,
            end=None,
        )
        self.database_iterator = self.database_obj.iteritems()

        self.database_iterator.seek_to_last()
        try:
            key, value = next(self.database_iterator)
            self.last_key = key
        except StopIteration:
            self.last_key = b''

        self.database_iterator.seek_to_first()
        try:
            key, value = next(self.database_iterator)
            self.first_key = key
            self.database_iterator.seek(self.first_key)
        except StopIteration:
            self.first_key = b''

        self.db_was_changed_recently = False
Esempio n. 9
0
def default_db_options(uid_length=parms.uid_length):
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.prefix_extractor = StaticPrefix(uid_length)
    opts.max_open_files = 300000
    opts.write_buffer_size = 67108864
    opts.max_write_buffer_number = 3
    opts.target_file_size_base = 67108864

    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024 ** 3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2)))
    return opts
Esempio n. 10
0
 def as_options(self) -> Options:
     return rocksdb.Options(
         create_if_missing=True,
         max_open_files=self.max_open_files,
         write_buffer_size=self.write_buffer_size,
         max_write_buffer_number=self.max_write_buffer_number,
         target_file_size_base=self.target_file_size_base,
         table_factory=rocksdb.BlockBasedTableFactory(
             filter_policy=rocksdb.BloomFilterPolicy(
                 self.bloom_filter_size),
             block_cache=rocksdb.LRUCache(self.block_cache_size),
             block_cache_compressed=rocksdb.LRUCache(
                 self.block_cache_compressed_size),
         ),
         **self.extra_options)
Esempio n. 11
0
def get_rocks_db():
    size = 128*1024*1024
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.write_buffer_size = size
    opts.max_write_buffer_number = 2
    opts.target_file_size_base = size
    opts.compression = rocksdb.CompressionType.no_compression
    opts.disable_auto_compactions = True
    opts.table_factory = rocksdb.BlockBasedTableFactory(
        block_size=512,
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024 ** 3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2)))
    db = rocksdb.DB('rocks.db', opts)
    return db
Esempio n. 12
0
    def initRocks(self, ind):
        opts = rocksdb.Options()
        opts.create_if_missing = True
        opts.max_open_files = 300000
        opts.write_buffer_size = 1073741824
        opts.max_write_buffer_number = 20
        opts.target_file_size_base = 67108864
        opts.max_background_compactions = 8
        opts.max_background_flushes = 4

        opts.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(10),
            block_cache=rocksdb.LRUCache(2 * (1024 ** 3)),
            block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2)))

        db = rocksdb.DB(self.dirs + '/' + ind + '.db', opts)
        return db
Esempio n. 13
0
def init_store(config):
    """ Only called if using rocksdb for now.
    """
    global hashes_db
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.max_open_files = 10000
    opts.write_buffer_size = 67108864
    opts.max_write_buffer_number = 3
    opts.target_file_size_base = 67108864

    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024 ** 3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024 ** 2)))

    hashes_db = rocksdb.DB(os.path.join(config.storage.folder.value, HASHES_STORAGE), opts)
Esempio n. 14
0
    def post_init(self):
        import rocksdb

        opts = rocksdb.Options()
        opts.create_if_missing = True
        opts.max_open_files = 300000
        opts.write_buffer_size = 67108864
        opts.max_write_buffer_number = 3
        opts.target_file_size_base = 67108864

        opts.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(10),
            block_cache=rocksdb.LRUCache(2 * (1024**3)),
            block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))

        for key, value in self.kwargs.items():
            setattr(opts, key, value)

        self._db = rocksdb.DB(self.data_path, opts, read_only=self.read_only)
Esempio n. 15
0
def get_rocksdb_options():
    rocks_options = rocksdb.Options()
    rocks_options.create_if_missing = True
    rocks_options.merge_operator = StringAddOperator()
    rocks_options.compression = CompressionType.zstd_compression
    rocks_options.max_open_files = 300000
    rocks_options.write_buffer_size = 67 * 1024**2
    rocks_options.max_write_buffer_number = 3
    rocks_options.target_file_size_base = 256 * 1024**2
    rocks_options.max_log_file_size = 4 * 1024**2
    rocks_options.keep_log_file_num = 100

    # we want to set this option, but it's not included in the python client
    # rocks_options.optimize_filters_for_hits = True

    rocks_options.table_factory = rocksdb.BlockBasedTableFactory(
        block_cache=rocksdb.LRUCache(1 * 1024**3),
        block_size=16 * 1024,
        filter_policy=rocksdb.BloomFilterPolicy(10),
    )
    return rocks_options
Esempio n. 16
0
def init():
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.max_open_files = int(config['rocksdb']['max_open_files'])
    opts.write_buffer_size = int(config['rocksdb']['write_buffer_size'])
    opts.max_write_buffer_number = int(
        config['rocksdb']['max_write_buffer_number'])
    opts.target_file_size_base = int(
        config['rocksdb']['target_file_size_base'])

    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(
            int(config['rocksdb']['filter_policy'])),
        block_cache=rocksdb.LRUCache(2 * (1024**3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))
    # TODO find a way to add block_cache* in config, only this format is allowed for rocksdb

    # https://python-rocksdb.readthedocs.io/en/latest/tutorial/#prefixextractor
    opts.prefix_extractor = StaticPrefix()

    db = rocksdb.DB(config['rocksdb']['db_file'], opts)
    return db
Esempio n. 17
0
    def __init__(self,
                 read_only=False,
                 filepath=None,
                 max_open_files=300000,
                 write_buffer_size=67108864,
                 max_write_buffer_number=3,
                 target_file_size_base=67108864,
                 **kwargs):
        opts = rocksdb.Options()
        opts.create_if_missing = True
        opts.max_open_files = max_open_files
        opts.write_buffer_size = write_buffer_size
        opts.max_write_buffer_number = max_write_buffer_number
        opts.target_file_size_base = target_file_size_base

        opts.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(10),
            block_cache=rocksdb.LRUCache(2 * (1024**3)),
            block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))
        opts.merge_operator = AssocCounter()

        self._db = rocksdb.DB(filepath, opts)
        super().__init__(read_only)
Esempio n. 18
0
def initdb(fn):
    """Initialize a database with a given name

	Arguments:
	fn - name of the database
	"""
    # create database
    opts = rocksdb.Options()
    opts.create_if_missing = True
    opts.max_open_files = 300000
    opts.write_buffer_size = 67108864
    opts.max_write_buffer_number = 3
    opts.target_file_size_base = 67108864

    opts.table_factory = rocksdb.BlockBasedTableFactory(
        filter_policy=rocksdb.BloomFilterPolicy(10),
        block_cache=rocksdb.LRUCache(2 * (1024**3)),
        block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))

    db = rocksdb.DB(fn + '.db', opts)
    print(
        "\x1b[6;30;42m[+]\x1b[0m setting up database {}.db in current directory..."
        .format(fn))
    return db
Esempio n. 19
0
                        required=False)
    global args
    args = parser.parse_args()

    if not args.memory:
        import rocksdb
        # create database for nodes
        opts = rocksdb.Options()
        opts.create_if_missing = True
        opts.max_open_files = 300000
        opts.write_buffer_size = 67108864
        opts.max_write_buffer_number = 3
        opts.target_file_size_base = 67108864

        opts.table_factory = rocksdb.BlockBasedTableFactory(
            filter_policy=rocksdb.BloomFilterPolicy(10),
            block_cache=rocksdb.LRUCache(2 * (1024**3)),
            block_cache_compressed=rocksdb.LRUCache(500 * (1024**2)))

        db = rocksdb.DB('nodes.db', opts)
        if args.verbose:
            print(
                "\x1b[6;30;42m[+]\x1b[0m setting up database nodes.db in current directory..."
            )
    else:
        db = dict()

    if args.size is None:
        # auto set base graph size in # of edges
        total = 0
        with open(args.input) as f: