def test_experimental_reader(synthetic_dataset): reader_v2_throughput(synthetic_dataset.url, None, warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.THREAD, loaders_count=1, read_method=ReadMethod.PYTHON)
def test_tf_thread_pool_run_experimental(synthetic_dataset): reader_v2_throughput(synthetic_dataset.url, field_regex=[r'\bid\b', r'\bmatrix\b'], warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.THREAD, loaders_count=1, read_method=ReadMethod.TF)
def _main(args): logging.basicConfig() args = _parse_args(args) if args.v: logging.getLogger().setLevel(logging.INFO) if args.vv: logging.getLogger().setLevel(logging.DEBUG) if args.experimental_reader: results = reader_v2_throughput( args.dataset_path, args.field_regex, warmup_cycles_count=args.warmup_cycles, measure_cycles_count=args.measure_cycles, pool_type=args.pool_type, loaders_count=args.workers_count, decoders_count=args.experimental_decoders_count, read_method=args.read_method, shuffling_queue_size=args.shuffling_queue_size, min_after_dequeue=args.min_after_dequeue) else: results = reader_throughput( args.dataset_path, args.field_regex, warmup_cycles_count=args.warmup_cycles, measure_cycles_count=args.measure_cycles, pool_type=args.pool_type, loaders_count=args.workers_count, profile_threads=args.profile_threads, read_method=args.read_method, shuffling_queue_size=args.shuffling_queue_size, min_after_dequeue=args.min_after_dequeue) logger.info('Done') print( 'Average sample read rate: {:1.2f} samples/sec; RAM {:1.2f} MB (rss); ' 'CPU {:1.2f}%'.format(results.samples_per_second, results.memory_info.rss / 2**20, results.cpu))
def test_tf_thread_pool_run_experimental_with_pyarrow_serialize(synthetic_dataset): reader_v2_throughput(synthetic_dataset.url, field_regex=[r'\bid\b', r'\bmatrix\b'], warmup_cycles_count=5, measure_cycles_count=5, pool_type=WorkerPoolType.PROCESS, loaders_count=1, read_method=ReadMethod.TF, pyarrow_serialize=True)