def prof(dtype, op, nl, hidden_size_max): fuzzer = benchmark.Fuzzer( parameters=[ benchmark.FuzzedParameter('s', minval=1000, maxval=6000, distribution='uniform'), # seq_length benchmark.FuzzedParameter('b', minval=1, maxval=64, distribution='uniform'), # batch_size benchmark.FuzzedParameter('i', minval=16, maxval=512, distribution='uniform'), # input_size benchmark.FuzzedParameter('h', minval=16, maxval=hidden_size_max, distribution='uniform'), # hidden_size benchmark.FuzzedParameter('n', minval=1, maxval=4, distribution='uniform'), # num_layer ], tensors=[ benchmark.FuzzedTensor('x', size='sbi', min_elements=12, max_elements=10000000, cuda=True, dtype=d_dtype[dtype], max_allocation_bytes=1_000_000_000) ], seed=42, constraints=[ lambda params: params['i'] % 8 == 0, lambda params: params['h'] % 8 == 0 ]) res = [] for tensors, tensor_params, params in fuzzer.take(20): s = params['s'] b = params['b'] i = params['i'] h = params['h'] n = params['n'] sub_label = f'x=({s}, {b}, {i}),'.ljust(20) + f'op=({i}, {h}, {n})' # sub_label = str(tensors['x'].size()) if nl is None: setup=f'rnn=torch.nn.{op}({i}, {h}, {n})' else: setup=f'rnn=torch.nn.{op}({i}, {h}, {n}, nonlinearity="{nl}")' setup += f'.to(device="cuda", dtype={d_dtype[dtype]})' res.append( benchmark.Timer(stmt=f'rnn(x)', setup=setup, globals=tensors, label=f"{op=}, nonlinearity='{nl}', {dtype=}", sub_label=sub_label, description=f'{torch.__version__}') .blocked_autorange(min_run_time=0.1)) torch_ver = str(torch.__version__) torch_git_ver = torch_ver[torch_ver.index('+') + 1:] with open(f'{torch_git_ver}-{op}-{nl}-{dtype}.pkl', 'wb') as f: pickle.dump(res, f) compare = benchmark.Compare(res) # compare.colorize() compare.print()
def prof(dtype, op): fuzzer = benchmark.Fuzzer(parameters=[ benchmark.FuzzedParameter('n', minval=4, maxval=16, distribution='uniform'), benchmark.FuzzedParameter('c', minval=4, maxval=256, distribution='uniform'), benchmark.FuzzedParameter('h', minval=8, maxval=256, distribution='uniform'), benchmark.FuzzedParameter('w', minval=8, maxval=256, distribution='uniform'), ], tensors=[ benchmark.FuzzedTensor( 'x', size='nchw', min_elements=12, max_elements=10000000, cuda=True, dtype=d_dtype[dtype], max_allocation_bytes=1_000_000_000) ], seed=42) res = [] for kernel_size in [2, 3, 5]: for tensors, tensor_params, params in fuzzer.take(20): sub_label = str(tensors['x'].size()) res.append( benchmark.Timer( stmt=f'torch.nn.functional.{op}(x, {kernel_size})', setup='', globals=tensors, label=f'{op}, {dtype=}, {kernel_size=}', sub_label=sub_label, description=f'{torch.__version__}').blocked_autorange( min_run_time=0.1)) torch_ver = str(torch.__version__) torch_git_ver = torch_ver[torch_ver.index('+') + 1:] with open(f'{torch_git_ver}-{op}-{dtype}.pkl', 'wb') as f: pickle.dump(res, f) compare = benchmark.Compare(res) # compare.colorize() compare.print()
def test_fuzzer(self): fuzzer = benchmark_utils.Fuzzer( parameters=[ benchmark_utils.FuzzedParameter( "n", minval=1, maxval=16, distribution="loguniform")], tensors=[benchmark_utils.FuzzedTensor("x", size=("n",))], seed=0, ) expected_results = [ (0.7821, 0.0536, 0.9888, 0.1949, 0.5242, 0.1987, 0.5094), (0.7166, 0.5961, 0.8303, 0.005), ] for i, (tensors, _, _) in enumerate(fuzzer.take(2)): x = tensors["x"] self.assertEqual( x, torch.Tensor(expected_results[i]), rtol=1e-3, atol=1e-3)
import torch import torch.utils.benchmark as benchmark import pickle fuzzer = benchmark.Fuzzer(parameters=[ benchmark.FuzzedParameter('n', minval=4, maxval=16, distribution='uniform'), benchmark.FuzzedParameter('c', minval=4, maxval=256, distribution='uniform'), benchmark.FuzzedParameter('d', minval=8, maxval=256, distribution='uniform'), benchmark.FuzzedParameter('h', minval=8, maxval=256, distribution='uniform'), benchmark.FuzzedParameter('w', minval=8, maxval=256, distribution='uniform'), ], tensors=[ benchmark.FuzzedTensor('x', size='ncdhw', min_elements=12, max_elements=10000000, cuda=True, dtype=torch.half, max_allocation_bytes=1_000_000_000) ], seed=42) res = [] for kernel_size in [2, 3, 5]: for tensors, tensor_params, params in fuzzer.take(20): sub_label = str(tensors['x'].size()) res.append( benchmark.Timer(stmt=f'torch.nn.functional.max_pool3d(x, {kernel_size})', setup='', globals=tensors,
def main(): add_fuzzer = benchmark_utils.Fuzzer( parameters=[[ benchmark_utils.FuzzedParameter( name=f"k{i}", minval=16, maxval=16 * 1024, distribution="loguniform", ) for i in range(3) ], benchmark_utils.FuzzedParameter( name="dim_parameter", distribution={ 2: 0.6, 3: 0.4 }, ), benchmark_utils.FuzzedParameter( name="sparse_dim", distribution={ 1: 0.3, 2: 0.4, 3: 0.3 }, ), benchmark_utils.FuzzedParameter( name="density", distribution={ 0.1: 0.4, 0.05: 0.3, 0.01: 0.3 }, ), benchmark_utils.FuzzedParameter( name="coalesced", distribution={ True: 0.7, False: 0.3 }, )], tensors=[ [ benchmark_utils.FuzzedSparseTensor( name=name, size=tuple([f"k{i}" for i in range(3)]), min_elements=64 * 1024, max_elements=128 * 1024, sparse_dim="sparse_dim", density="density", dim_parameter="dim_parameter", coalesced="coalesced") for name in ("x", "y") ], ], seed=0, ) n = 100 measurements = [] for i, (tensors, tensor_properties, _) in enumerate(add_fuzzer.take(n=n)): x = tensors["x"] y = tensors["y"] shape = ", ".join(tuple(f'{i:>4}' for i in x.shape)) x_tensor_properties = tensor_properties["x"] description = "".join([ f"| {shape:<20} | ", f"{x_tensor_properties['sparsity']:>9.2f} | ", f"{x_tensor_properties['sparse_dim']:>9d} | ", f"{x_tensor_properties['dense_dim']:>9d} | ", f"{('True' if x_tensor_properties['is_hybrid'] else 'False'):>9} | ", f"{('True' if x.is_coalesced() else 'False'):>9} | " ]) timer = benchmark_utils.Timer( stmt="torch.sparse.sum(x) + torch.sparse.sum(y)", globals=tensors, description=description, ) measurements.append(timer.blocked_autorange(min_run_time=0.1)) measurements[-1].metadata = {"nnz": x._nnz()} print(f"\r{i + 1} / {n}", end="") sys.stdout.flush() print() # More string munging to make pretty output. print( f"Average attemts per valid config: {1. / (1. - add_fuzzer.rejection_rate):.1f}" ) def time_fn(m): return m.mean / m.metadata["nnz"] measurements.sort(key=time_fn) template = f"{{:>6}}{' ' * 16} Shape{' ' * 17}\ sparsity{' ' * 4}sparse_dim{' ' * 4}dense_dim{' ' * 4}hybrid{' ' * 4}coalesced\n{'-' * 108}" print(template.format("Best:")) for m in measurements[:10]: print(f"{time_fn(m) * 1e9:>5.2f} ns / element {m.description}") print("\n" + template.format("Worst:")) for m in measurements[-10:]: print(f"{time_fn(m) * 1e9:>5.2f} ns / element {m.description}")
def main(): add_fuzzer = benchmark_utils.Fuzzer( parameters=[ [ benchmark_utils.FuzzedParameter( name=f"k{i}", minval=16, maxval=16 * 1024, distribution="loguniform", ) for i in range(3) ], benchmark_utils.FuzzedParameter( name="d", distribution={ 2: 0.6, 3: 0.4 }, ), ], tensors=[ [ benchmark_utils.FuzzedTensor( name=name, size=("k0", "k1", "k2"), dim_parameter="d", probability_contiguous=0.75, min_elements=64 * 1024, max_elements=128 * 1024, ) for name in ("x", "y") ], ], seed=0, ) n = 250 measurements = [] for i, (tensors, tensor_properties, _) in enumerate(add_fuzzer.take(n=n)): x, x_order = tensors["x"], str(tensor_properties["x"]["order"]) y, y_order = tensors["y"], str(tensor_properties["y"]["order"]) shape = ", ".join(tuple(f'{i:>4}' for i in x.shape)) description = "".join([ f"{x.numel():>7} | {shape:<16} | ", f"{'contiguous' if x.is_contiguous() else x_order:<12} | ", f"{'contiguous' if y.is_contiguous() else y_order:<12} | ", ]) timer = benchmark_utils.Timer( stmt="x + y", globals=tensors, description=description, ) measurements.append(timer.blocked_autorange(min_run_time=0.1)) measurements[-1].metadata = {"numel": x.numel()} print(f"\r{i + 1} / {n}", end="") sys.stdout.flush() print() # More string munging to make pretty output. print( f"Average attemts per valid config: {1. / (1. - add_fuzzer.rejection_rate):.1f}" ) def time_fn(m): return m.median / m.metadata["numel"] measurements.sort(key=time_fn) template = f"{{:>6}}{' ' * 19}Size Shape{' ' * 13}X order Y order\n{'-' * 80}" print(template.format("Best:")) for m in measurements[:15]: print(f"{time_fn(m) * 1e9:>4.1f} ns / element {m.description}") print("\n" + template.format("Worst:")) for m in measurements[-15:]: print(f"{time_fn(m) * 1e9:>4.1f} ns / element {m.description}")