Beispiel #1
0
def prof(dtype, op, nl, hidden_size_max):
    fuzzer = benchmark.Fuzzer(
        parameters=[
            benchmark.FuzzedParameter('s', minval=1000, maxval=6000, distribution='uniform'),    # seq_length
            benchmark.FuzzedParameter('b', minval=1, maxval=64, distribution='uniform'),   # batch_size
            benchmark.FuzzedParameter('i', minval=16, maxval=512, distribution='uniform'),   # input_size
            benchmark.FuzzedParameter('h', minval=16, maxval=hidden_size_max, distribution='uniform'),   # hidden_size
            benchmark.FuzzedParameter('n', minval=1, maxval=4, distribution='uniform'),   # num_layer
        ],
        tensors=[
            benchmark.FuzzedTensor('x',
                                   size='sbi',
                                   min_elements=12,
                                   max_elements=10000000,
                                   cuda=True,
                                   dtype=d_dtype[dtype],
                                   max_allocation_bytes=1_000_000_000)
        ],
        seed=42,
        constraints=[
            lambda params: params['i'] % 8 == 0,
            lambda params: params['h'] % 8 == 0
        ])

    res = []

    for tensors, tensor_params, params in fuzzer.take(20):
        s = params['s']
        b = params['b']
        i = params['i']
        h = params['h']
        n = params['n']
        sub_label = f'x=({s}, {b}, {i}),'.ljust(20) + f'op=({i}, {h}, {n})'
        # sub_label = str(tensors['x'].size())

        if nl is None:
            setup=f'rnn=torch.nn.{op}({i}, {h}, {n})'
        else:
            setup=f'rnn=torch.nn.{op}({i}, {h}, {n}, nonlinearity="{nl}")'
        setup += f'.to(device="cuda", dtype={d_dtype[dtype]})'

        res.append(
            benchmark.Timer(stmt=f'rnn(x)',
                            setup=setup,
                            globals=tensors,
                            label=f"{op=}, nonlinearity='{nl}', {dtype=}",
                            sub_label=sub_label,
                            description=f'{torch.__version__}')
                        .blocked_autorange(min_run_time=0.1))

    torch_ver = str(torch.__version__)
    torch_git_ver = torch_ver[torch_ver.index('+') + 1:]

    with open(f'{torch_git_ver}-{op}-{nl}-{dtype}.pkl', 'wb') as f:
        pickle.dump(res, f)

    compare = benchmark.Compare(res)
    # compare.colorize()
    compare.print()
Beispiel #2
0
def prof(dtype, op):
    fuzzer = benchmark.Fuzzer(parameters=[
        benchmark.FuzzedParameter('n',
                                  minval=4,
                                  maxval=16,
                                  distribution='uniform'),
        benchmark.FuzzedParameter('c',
                                  minval=4,
                                  maxval=256,
                                  distribution='uniform'),
        benchmark.FuzzedParameter('h',
                                  minval=8,
                                  maxval=256,
                                  distribution='uniform'),
        benchmark.FuzzedParameter('w',
                                  minval=8,
                                  maxval=256,
                                  distribution='uniform'),
    ],
                              tensors=[
                                  benchmark.FuzzedTensor(
                                      'x',
                                      size='nchw',
                                      min_elements=12,
                                      max_elements=10000000,
                                      cuda=True,
                                      dtype=d_dtype[dtype],
                                      max_allocation_bytes=1_000_000_000)
                              ],
                              seed=42)

    res = []

    for kernel_size in [2, 3, 5]:
        for tensors, tensor_params, params in fuzzer.take(20):
            sub_label = str(tensors['x'].size())
            res.append(
                benchmark.Timer(
                    stmt=f'torch.nn.functional.{op}(x, {kernel_size})',
                    setup='',
                    globals=tensors,
                    label=f'{op}, {dtype=}, {kernel_size=}',
                    sub_label=sub_label,
                    description=f'{torch.__version__}').blocked_autorange(
                        min_run_time=0.1))

    torch_ver = str(torch.__version__)
    torch_git_ver = torch_ver[torch_ver.index('+') + 1:]

    with open(f'{torch_git_ver}-{op}-{dtype}.pkl', 'wb') as f:
        pickle.dump(res, f)

    compare = benchmark.Compare(res)
    # compare.colorize()
    compare.print()
Beispiel #3
0
    def test_fuzzer(self):
        fuzzer = benchmark_utils.Fuzzer(
            parameters=[
                benchmark_utils.FuzzedParameter(
                    "n", minval=1, maxval=16, distribution="loguniform")],
            tensors=[benchmark_utils.FuzzedTensor("x", size=("n",))],
            seed=0,
        )

        expected_results = [
            (0.7821, 0.0536, 0.9888, 0.1949, 0.5242, 0.1987, 0.5094),
            (0.7166, 0.5961, 0.8303, 0.005),
        ]

        for i, (tensors, _, _) in enumerate(fuzzer.take(2)):
            x = tensors["x"]
            self.assertEqual(
                x, torch.Tensor(expected_results[i]), rtol=1e-3, atol=1e-3)
Beispiel #4
0
import torch
import torch.utils.benchmark as benchmark
import pickle

fuzzer = benchmark.Fuzzer(parameters=[
    benchmark.FuzzedParameter('n', minval=4, maxval=16, distribution='uniform'),
    benchmark.FuzzedParameter('c', minval=4, maxval=256, distribution='uniform'),
    benchmark.FuzzedParameter('d', minval=8, maxval=256, distribution='uniform'),
    benchmark.FuzzedParameter('h', minval=8, maxval=256, distribution='uniform'),
    benchmark.FuzzedParameter('w', minval=8, maxval=256, distribution='uniform'),
],
                          tensors=[
                              benchmark.FuzzedTensor('x',
                                                     size='ncdhw',
                                                     min_elements=12,
                                                     max_elements=10000000,
                                                     cuda=True,
                                                     dtype=torch.half,
                                                     max_allocation_bytes=1_000_000_000)
                          ],
                          seed=42)

res = []

for kernel_size in [2, 3, 5]:
    for tensors, tensor_params, params in fuzzer.take(20):
        sub_label = str(tensors['x'].size())
        res.append(
            benchmark.Timer(stmt=f'torch.nn.functional.max_pool3d(x, {kernel_size})',
                            setup='',
                            globals=tensors,
Beispiel #5
0
def main():
    add_fuzzer = benchmark_utils.Fuzzer(
        parameters=[[
            benchmark_utils.FuzzedParameter(
                name=f"k{i}",
                minval=16,
                maxval=16 * 1024,
                distribution="loguniform",
            ) for i in range(3)
        ],
                    benchmark_utils.FuzzedParameter(
                        name="dim_parameter",
                        distribution={
                            2: 0.6,
                            3: 0.4
                        },
                    ),
                    benchmark_utils.FuzzedParameter(
                        name="sparse_dim",
                        distribution={
                            1: 0.3,
                            2: 0.4,
                            3: 0.3
                        },
                    ),
                    benchmark_utils.FuzzedParameter(
                        name="density",
                        distribution={
                            0.1: 0.4,
                            0.05: 0.3,
                            0.01: 0.3
                        },
                    ),
                    benchmark_utils.FuzzedParameter(
                        name="coalesced",
                        distribution={
                            True: 0.7,
                            False: 0.3
                        },
                    )],
        tensors=[
            [
                benchmark_utils.FuzzedSparseTensor(
                    name=name,
                    size=tuple([f"k{i}" for i in range(3)]),
                    min_elements=64 * 1024,
                    max_elements=128 * 1024,
                    sparse_dim="sparse_dim",
                    density="density",
                    dim_parameter="dim_parameter",
                    coalesced="coalesced") for name in ("x", "y")
            ],
        ],
        seed=0,
    )

    n = 100
    measurements = []

    for i, (tensors, tensor_properties, _) in enumerate(add_fuzzer.take(n=n)):
        x = tensors["x"]
        y = tensors["y"]
        shape = ", ".join(tuple(f'{i:>4}' for i in x.shape))
        x_tensor_properties = tensor_properties["x"]
        description = "".join([
            f"| {shape:<20} | ", f"{x_tensor_properties['sparsity']:>9.2f} | ",
            f"{x_tensor_properties['sparse_dim']:>9d} | ",
            f"{x_tensor_properties['dense_dim']:>9d} | ",
            f"{('True' if x_tensor_properties['is_hybrid'] else 'False'):>9} | ",
            f"{('True' if x.is_coalesced() else 'False'):>9} | "
        ])
        timer = benchmark_utils.Timer(
            stmt="torch.sparse.sum(x) + torch.sparse.sum(y)",
            globals=tensors,
            description=description,
        )
        measurements.append(timer.blocked_autorange(min_run_time=0.1))
        measurements[-1].metadata = {"nnz": x._nnz()}
        print(f"\r{i + 1} / {n}", end="")
        sys.stdout.flush()
    print()

    # More string munging to make pretty output.
    print(
        f"Average attemts per valid config: {1. / (1. - add_fuzzer.rejection_rate):.1f}"
    )

    def time_fn(m):
        return m.mean / m.metadata["nnz"]

    measurements.sort(key=time_fn)

    template = f"{{:>6}}{' ' * 16} Shape{' ' * 17}\
    sparsity{' ' * 4}sparse_dim{' ' * 4}dense_dim{' ' * 4}hybrid{' ' * 4}coalesced\n{'-' * 108}"

    print(template.format("Best:"))
    for m in measurements[:10]:
        print(f"{time_fn(m) * 1e9:>5.2f} ns / element     {m.description}")

    print("\n" + template.format("Worst:"))
    for m in measurements[-10:]:
        print(f"{time_fn(m) * 1e9:>5.2f} ns / element     {m.description}")
Beispiel #6
0
def main():
    add_fuzzer = benchmark_utils.Fuzzer(
        parameters=[
            [
                benchmark_utils.FuzzedParameter(
                    name=f"k{i}",
                    minval=16,
                    maxval=16 * 1024,
                    distribution="loguniform",
                ) for i in range(3)
            ],
            benchmark_utils.FuzzedParameter(
                name="d",
                distribution={
                    2: 0.6,
                    3: 0.4
                },
            ),
        ],
        tensors=[
            [
                benchmark_utils.FuzzedTensor(
                    name=name,
                    size=("k0", "k1", "k2"),
                    dim_parameter="d",
                    probability_contiguous=0.75,
                    min_elements=64 * 1024,
                    max_elements=128 * 1024,
                ) for name in ("x", "y")
            ],
        ],
        seed=0,
    )

    n = 250
    measurements = []
    for i, (tensors, tensor_properties, _) in enumerate(add_fuzzer.take(n=n)):
        x, x_order = tensors["x"], str(tensor_properties["x"]["order"])
        y, y_order = tensors["y"], str(tensor_properties["y"]["order"])
        shape = ", ".join(tuple(f'{i:>4}' for i in x.shape))

        description = "".join([
            f"{x.numel():>7} | {shape:<16} | ",
            f"{'contiguous' if x.is_contiguous() else x_order:<12} | ",
            f"{'contiguous' if y.is_contiguous() else y_order:<12} | ",
        ])

        timer = benchmark_utils.Timer(
            stmt="x + y",
            globals=tensors,
            description=description,
        )

        measurements.append(timer.blocked_autorange(min_run_time=0.1))
        measurements[-1].metadata = {"numel": x.numel()}
        print(f"\r{i + 1} / {n}", end="")
        sys.stdout.flush()
    print()

    # More string munging to make pretty output.
    print(
        f"Average attemts per valid config: {1. / (1. - add_fuzzer.rejection_rate):.1f}"
    )

    def time_fn(m):
        return m.median / m.metadata["numel"]

    measurements.sort(key=time_fn)

    template = f"{{:>6}}{' ' * 19}Size    Shape{' ' * 13}X order        Y order\n{'-' * 80}"
    print(template.format("Best:"))
    for m in measurements[:15]:
        print(f"{time_fn(m) * 1e9:>4.1f} ns / element     {m.description}")

    print("\n" + template.format("Worst:"))
    for m in measurements[-15:]:
        print(f"{time_fn(m) * 1e9:>4.1f} ns / element     {m.description}")