def test_tf_thread_pool_run(synthetic_dataset): reader_throughput(synthetic_dataset.url, ['id', 'id2'], warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.THREAD, loaders_count=1, read_method=ReadMethod.TF)
def test_all_fields(synthetic_dataset): reader_throughput(synthetic_dataset.url, None, warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.THREAD, loaders_count=1, read_method=ReadMethod.PYTHON)
def test_pure_python_dummy_pool_run(synthetic_dataset): # Use a regex to match field name ('i.' instead of 'id') reader_throughput(synthetic_dataset.url, ['i.'], warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.NONE, loaders_count=1, read_method=ReadMethod.PYTHON)
def test_pure_python_process_pool_run(synthetic_dataset): reader_throughput(synthetic_dataset.url, ['id'], warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.PROCESS, loaders_count=1, read_method=ReadMethod.PYTHON, spawn_new_process=False)
def _main(args): logging.basicConfig() args = _parse_args(args) if args.v: logging.getLogger().setLevel(logging.INFO) if args.vv: logging.getLogger().setLevel(logging.DEBUG) results = reader_throughput(args.dataset_path, args.field_regex, warmup_cycles_count=args.warmup_cycles, measure_cycles_count=args.measure_cycles, pool_type=args.pool_type, loaders_count=args.workers_count, profile_threads=args.profile_threads, read_method=args.read_method, shuffling_queue_size=args.shuffling_queue_size, min_after_dequeue=args.min_after_dequeue) logger.info('Done') print('Average sample read rate: {:1.2f} samples/sec; RAM {:1.2f} MB (rss); ' 'CPU {:1.2f}%'.format(results.samples_per_second, results.memory_info.rss / 2 ** 20, results.cpu))