def exec_instructions(self, instructions): filenames = instructions.statement_files filenames = (os.path.join(self.spec_dir, i) for i in filenames) lines = (line for fn in filenames for line in get_lines(fn)) statements = itertools.chain(as_statements(lines), instructions.statements) for stmt in statements: aio.run(self.client.execute, stmt) for data_file in instructions.data_files: inserts = as_bulk_queries(self._to_inserts(data_file), data_file.get('bulk_size', 5000)) concurrency = data_file.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) if self.client.is_cratedb: aio.run(self.client.execute, f"refresh table {data_file['target']}") for data_cmd in instructions.data_cmds: process = subprocess.Popen(data_cmd['cmd'], stdout=subprocess.PIPE, universal_newlines=True) target = data_cmd['target'] dicts = dicts_from_lines(process.stdout) inserts = as_bulk_queries((to_insert(target, d) for d in dicts), data_cmd.get('bulk_size', 5000)) concurrency = data_cmd.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) if self.client.is_cratedb: aio.run(self.client.execute, f"refresh table {target}")
def test_as_bulk_queries(self): queries = [ ('x', (1, 2)), ('x', (3, 4)), ('x', (5, 6)), ('y', (1, 2))] bulk_queries = sorted(list(misc.as_bulk_queries(queries, 2))) self.assertEqual(bulk_queries, [ ('x', [(1, 2), (3, 4)]), ('x', [(5, 6)]), ('y', [(1, 2)]) ])
def exec_instructions(self, instructions): filenames = instructions.statement_files filenames = (os.path.join(self.spec_dir, i) for i in filenames) lines = (line for fn in filenames for line in get_lines(fn)) statements = itertools.chain(as_statements(lines), instructions.statements) for stmt in statements: aio.run(self.client.execute, stmt) for data_file in instructions.data_files: inserts = as_bulk_queries(self._to_inserts(data_file), data_file.get('bulk_size', 5000)) concurrency = data_file.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) aio.run(self.client.execute, 'refresh table {target}'.format(target=data_file['target']))
def run_load_data(self, data_spec, meta=None): inserts = self._to_inserts(data_spec) statement = next(iter(inserts))[0] bulk_size = data_spec.get('bulk_size', 5000) inserts = as_bulk_queries(self._to_inserts(data_spec), bulk_size) concurrency = data_spec.get('concurrency', 25) num_records = data_spec.get('num_records') if num_records: num_records = max(1, int(num_records / bulk_size)) timed_stats = run_and_measure( self.client.execute_many, inserts, concurrency, num_records) self.process_result(self.create_result( statement=statement, meta=meta, timed_stats=timed_stats, concurrency=concurrency, bulk_size=bulk_size, ))
def test_as_bulk_queries(self): queries = [('x', (1, 2)), ('x', (3, 4)), ('x', (5, 6)), ('y', (1, 2))] bulk_queries = sorted(list(misc.as_bulk_queries(queries, 2))) self.assertEqual(bulk_queries, [('x', [(1, 2), (3, 4)]), ('x', [(5, 6)]), ('y', [(1, 2)])])