class BulkArgsGenerator: def __init__(self, bulk_size): self.count = 0 self.bulk_size = bulk_size def __call__(self): start = self.count * self.bulk_size end = start + self.bulk_size self.count += 1 return [[x, "crate", 1] for x in range(start, end)] spec = Spec( setup=Instructions( statement_files=["sql/articles_bulk_insert.sql"], statements=[ "copy articles_bulk_insert from 'https://cdn.crate.io/downloads/datasets/benchmarks/articles_0.json.gz' with (compression = 'gzip')", "refresh table articles_bulk_insert" ]), teardown=Instructions( statements=["drop table if exists articles_bulk_insert"]), queries=[{ 'statement': """insert into articles_bulk_insert (id, name, price) values ($1, $2, $3) on conflict (id) do update set name = $2, price = $3""", 'bulk_args': BulkArgsGenerator(1000), 'iterations': 1000, }, { 'statement': """insert into articles_bulk_insert (id, name, price) values (?, ?, ?) on conflict (id) do update set price = excluded.price + price""", 'bulk_args': BulkArgsGenerator(1000),
CREATE_TABLE = ''' CREATE TABLE tp ( id STRING PRIMARY KEY, p INTEGER PRIMARY KEY ) CLUSTERED INTO 10 SHARDS PARTITIONED BY (p) ''' def _queries(): num = 400 c = cycle(list(range(num))) yield { 'statement': 'insert into tp (id, p) values (?, ?)', 'args': lambda: [str(uuid4()), next(c)], 'concurrency': 25, 'iterations': int(1e6) } spec = Spec( setup=Instructions(statements=[CREATE_TABLE]), teardown=Instructions(statements=["drop table if exists tp"]), queries=_queries(), ) if __name__ == "__main__": for q in _queries(): for _ in range(5): print(q['args']())
ids = [] values = [] for idx, line in enumerate(f): record = json.loads(line) ids.append(record['id']) values.append(record['value']) if idx % BULK_SIZE == 0: yield (ids, values) ids.clear() values.clear() if ids: yield (ids, values) spec = Spec( setup=Instructions(statement_files=["sql/id_int_value_str.sql"]), teardown=Instructions(statements=["drop table if exists id_int_value_str"]), queries=[ { "statement": "insert into id_int_value_str (id, value) (select col1, col2 from unnest(?, ?))", "args": args(), "iterations": 1000 }, { "statement": "insert into id_int_value_str (id, value) (select col1, col2 from unnest(?, ?))", "args": args(), "iterations": 1000, "concurrency": 15 } ] )
from itertools import count from cr8.bench_spec import Spec, Instructions def queries(): for i in range(5): yield {'statement': 'insert into t (x) values (?)', 'args': [i]} c = count(100) yield { 'statement': 'insert into t (x) values (?)', 'bulk_args': lambda: [[next(c)] for i in range(10)] } # Spec and Instructions are injected by the spec runner spec = Spec( setup=Instructions(statements=["create table t (x int)"]), teardown=Instructions(statements=["drop table t"]), queries=queries(), )
}, { 'statement': ('SELECT name, count(*) FROM lrt.t1 ' 'GROUP BY name ORDER BY 2 DESC LIMIT 500'), 'concurrency': 25, 'duration': 1 * 60 * 60 }) spec = Spec(setup=Instructions(statements=[ """CREATE TABLE lrt.t1 ( id STRING PRIMARY KEY, name STRING, hour AS date_format('%Y-%m-%d_%H', ts) PRIMARY KEY, ts TIMESTAMP, dev AS CAST(random() * 127 AS BYTE) ) CLUSTERED INTO 5 SHARDS PARTITIONED BY (hour)""", """CREATE TABLE lrt.t2 ( id STRING PRIMARY KEY, name STRING, ts TIMESTAMP, dev AS CAST(random() * 127 AS BYTE) ) CLUSTERED INTO 5 SHARDS""", ]), teardown=Instructions(statements=[ "DROP TABLE lrt.t1", "DROP TABLE lrt.t2", ]), queries=get_queries())
CREATE_TABLE = ''' CREATE TABLE tp ( id STRING PRIMARY KEY, p INTEGER PRIMARY KEY ) CLUSTERED INTO 10 SHARDS PARTITIONED BY (p) ''' def _queries(): num = 400 c = cycle(list(range(num))) yield { 'statement': 'insert into tp (id, p) values (?, ?)', 'args': lambda: [str(uuid4()), next(c)], 'concurrency': 25, 'iterations': int(1e6) } spec = Spec( setup=Instructions(statements=[CREATE_TABLE]), teardown=Instructions(statements=["DROP TABLE tp"]), queries=_queries(), ) if __name__ == "__main__": for q in _queries(): for _ in range(5): print(q['args']())
def main(): args = parse_args() schema, table = parse_table(args.table) with connect(args.hosts) as conn: cursor = conn.cursor() columns = get_columns(cursor, schema, table) data_faker = DataFaker() for query in generate_queries(data_faker, columns, schema, table, args.duration): print(query) spec = Spec(setup=Instructions(statements=[ CREATE_TABLE, "COPY benchmarks.query_tests FROM ['https://cdn.crate.io/downloads/datasets/query-tests/query_tests_0_.json', 'https://cdn.crate.io/downloads/datasets/query-tests/query_tests_1_.json']", "refresh table benchmarks.query_tests", "optimize table benchmarks.query_tests" ]), teardown=Instructions(statements=[ "DROP TABLE benchmarks.query_tests", ]), queries=queries_for_spec( columns={ 'id': 'string', 'sboolean': 'boolean', 'sbyte': 'byte', 'sshort': 'short', 'sinteger': 'integer', 'slong': 'long', 'sfloat': 'float', 'sdouble': 'double',