예제 #1
0
        [512, 512, 2, 1],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=['short'],
)

cat_configs_long = op_bench.cross_product_configs(M=[128],
                                                  N=[128, 1024],
                                                  K=[1, 2],
                                                  dim=[0, 1, 2],
                                                  device=['cpu', 'cuda'],
                                                  tags=['long'])


class CatBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, dim, device):
        self.input_one = torch.rand(M, N, K, device=device)
        self.dim = dim
        self.set_module_name('cat')

    def forward(self):
        return torch.cat((self.input_one, self.input_one), dim=self.dim)


op_bench.generate_pt_test(cat_configs_short + cat_configs_long, CatBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #2
0
import torch.nn.functional as F
"""Microbenchmarks for batchnorm operator."""

configs = op_bench.config_list(attrs=[
    [1, 256, 3136],
    [1, 2**16, 1],
    [128, 2048, 1],
],
                               attr_names=["M", "N", "K"],
                               tags=["short"])


class BatchNormBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K):
        self.input_one = torch.rand(M, N, K)
        self.mean = torch.rand(N)
        self.var = torch.rand(N)
        self.weight = torch.rand(N)
        self.bias = torch.rand(N)
        self.set_module_name("batchnorm")

    def forward(self):
        return F.batch_norm(self.input_one, self.mean, self.var, self.weight,
                            self.bias)


op_bench.generate_pt_test(configs, BatchNormBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #3
0
                                requires_grad=False,
                                dtype=torch.float)

    def forward(self):
        return self.input.to(torch.half)


class HalfToFloatTensorConversionBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, device):
        self.input = torch.rand(M,
                                N,
                                device=device,
                                requires_grad=False,
                                dtype=torch.half)

    def forward(self):
        return self.input.to(torch.float)


op_bench.generate_pt_test(tensor_conversion_short_configs,
                          FloatToHalfTensorConversionBenchmark)
op_bench.generate_pt_test(tensor_conversion_long_configs,
                          FloatToHalfTensorConversionBenchmark)
op_bench.generate_pt_test(tensor_conversion_short_configs,
                          HalfToFloatTensorConversionBenchmark)
op_bench.generate_pt_test(tensor_conversion_long_configs,
                          HalfToFloatTensorConversionBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #4
0
    def forward(self, input):
        # Assume that the `self.input` is set in the child
        return self.qlinear(input)


class QLinearBenchmark(_QLinearBenchmarkBase):
    def init(self, N, IN, OUT, device):
        super(QLinearBenchmark, self).init(N, IN, OUT, nnq.Linear(IN, OUT))
        self.inputs = {"input": self.qX}
        self.set_module_name("QLinear")


class QDynamicLinearBenchmark(_QLinearBenchmarkBase):
    def init(self, N, IN, OUT, device):
        super(QDynamicLinearBenchmark, self).init(N, IN, OUT,
                                                  nnqd.Linear(IN, OUT))
        self.inputs = {"input": self.X}
        self.set_module_name("QDynamicLinear")


op_bench.generate_pt_test(
    configs.remove_cuda(configs.linear_configs_short +
                        configs.linear_configs_long), QLinearBenchmark)
op_bench.generate_pt_test(
    configs.remove_cuda(configs.linear_configs_short +
                        configs.linear_configs_long), QDynamicLinearBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #5
0
)

as_strided_configs_long = op_bench.cross_product_configs(
    M=[128, 1024],
    N=[128, 1024],
    size=[(16, 16), (128, 128)],
    stride=[(1, 1), (2, 2)],
    storage_offset=[0, 1],
    device=['cpu', 'cuda'],
    tags=['long'])


class As_stridedBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, size, stride, storage_offset, device):
        self.input_one = torch.rand(M, N, device=device)
        self.size = size
        self.stride = stride
        self.storage_offset = storage_offset
        self.set_module_name('as_strided')

    def forward(self):
        return torch.as_strided(self.input_one, self.size, self.stride,
                                self.storage_offset)


op_bench.generate_pt_test(as_strided_configs_short + as_strided_configs_long,
                          As_stridedBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #6
0
        'device': ['cpu'],
    },
    tags=["short"],
)

mm_long_configs = op_bench.cross_product_configs(M=[64, 128, 256],
                                                 N=range(2, 10, 3),
                                                 K=[128, 512, 1024],
                                                 trans_a=[True, False],
                                                 trans_b=[True, False],
                                                 device=['cpu'],
                                                 tags=["long"])


class MatMulBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, trans_a, trans_b, device):
        self.input_one = torch.rand(M, N, device=device) if trans_a \
            else torch.rand(N, M, device=device).t()
        self.input_two = torch.rand(N, K, device=device) if trans_b \
            else torch.rand(K, N, device=device).t()
        self.set_module_name("matmul")

    def forward(self):
        return torch.matmul(self.input_one, self.input_two)


op_bench.generate_pt_test(mm_long_configs + mm_short_configs, MatMulBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
        [1, 1, 1],
        [64, 64, 64],
        [64, 64, 128],
    ],
    cross_product_configs={
        'device': ['cpu'],
    },
    tags=["short"],
)

class LinearUnpackFP16Benchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device):
        # input to unpack operator must be what the output is for prepack operator
        self.inputs = {
            "input_one": torch.ops.quantized.linear_prepack_fp16(torch.rand(M, N, K, device=device,
                                                                            requires_grad=False,
                                                                            dtype=torch.float32))
        }
        self.set_module_name("linear_unpack_fp16")

    def forward(self, input_one):
        return torch.ops.quantized.linear_unpack_fp16(input_one)

# The generated test names based on linear_unpack_fp16_short_configs will be in the following pattern:
# linear_unpack_fp16_M8_N16_K32_devicecpu

op_bench.generate_pt_test(linear_unpack_fp16_long_configs + linear_unpack_fp16_short_configs, LinearUnpackFP16Benchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #8
0
        [512, 512, 2],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

chunks_long_configs = op_bench.cross_product_configs(M=[128, 1024],
                                                     N=[128, 1024],
                                                     chunks=[2, 4],
                                                     device=['cpu', 'cuda'],
                                                     tags=['long'])


class ChunkBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, chunks, device):
        self.input_one = torch.rand(M, N, device=device)
        self.chunks = chunks
        self.set_module_name('chunk')

    def forward(self):
        return torch.chunk(self.input_one, self.chunks)


op_bench.generate_pt_test(chunk_short_configs + chunks_long_configs,
                          ChunkBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #9
0
        self.input = qX
        self.qconv2d = nnq.Conv2d(IC, OC, kernel, stride=stride, padding=pad, groups=G)
        self.qconv2d.weight = qW
        self.qconv2d.scale = torch.tensor([scale], dtype=torch.double)
        self.qconv2d.zero_point = torch.tensor([zero_point], dtype=torch.int)

        W2 = torch.randn(OC, OC // G, kernel, kernel, dtype=torch.float32)
        qW2 = torch.quantize_per_tensor(W2, scale=scale, zero_point=0, dtype=torch.qint8)
        self.qconv2d2 = nnq.Conv2d(OC, OC, kernel, stride=stride, padding=pad, groups=G)
        self.qconv2d2.weight = qW2
        self.qconv2d2.scale = torch.tensor([scale], dtype=torch.double)
        self.qconv2d2.zero_point = torch.tensor([zero_point], dtype=torch.int)
        self.set_module_name("QConv2dChained")

    def forward(self):
        # test that layout propagation works fine
        x = self.qconv2d(self.input)
        x = x.relu()
        return self.qconv2d2(x)


op_bench.generate_pt_test(qconv_1d_configs, QConv1dBenchmark)
op_bench.generate_pt_test(qconv_2d_configs, QConv2dBenchmark)
op_bench.generate_pt_test(resnext_32_4d_shape_configs, QConv2dBenchmark)
op_bench.generate_pt_test(qconv_2d_configs, QConv2dChainedBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
import operator_benchmark as op_bench
import torch


add_configs = op_bench.cross_product_configs(
    M=[8],
    N=[8],
    K=[8],
    device=["cuda", "cpu"],
    tags=["short"]
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device): 
        self.input_one = torch.rand(M, N, K, device=device, requires_grad=True)
        self.input_two = torch.rand(M, N, K, device=device, requires_grad=True)
        self.set_module_name("add")

    def forward(self):
        return torch.add(self.input_one, self.input_two)


op_bench.generate_pt_test(add_configs, AddBenchmark)
op_bench.generate_pt_gradient_test(add_configs, AddBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #11
0
    def init(self, sizes, N, dim, device):
        random.seed(42)
        inputs = []
        gen_sizes = []
        if type(sizes) == list and N == -1:
            gen_sizes = sizes
        else:
            for i in range(N):
                gen_sizes.append([
                    old_size() if callable(old_size) else old_size
                    for old_size in sizes
                ])

        for s in gen_sizes:
            inputs.append(torch.rand(s, device=device))
        result = torch.empty(0, device=device)
        self.inputs = {"result": result, "inputs": inputs, "dim": dim}
        self.set_module_name('cat')

    def forward(self, result: torch.Tensor, inputs: List[torch.Tensor],
                dim: int):
        return torch.cat(inputs, dim=dim, out=result)


op_bench.generate_pt_test(
    cat_configs_short + cat_configs_long + cat_configs_multidim +
    cat_configs_manyinputs + cat_configs_static_runtime, CatBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #12
0
            bidirectional=D,
        )
        cell_temp = nn.Sequential(cell_nn)
        self.cell = torch.quantization.quantize_dynamic(cell_temp,
                                                        {nn.LSTM, nn.Linear},
                                                        dtype=dtype)[0]

        self.x = torch.randn(
            sequence_len,  # sequence length
            batch_size,  # batch size
            I)  # Number of featues in X
        self.h = torch.randn(
            NL * (D + 1),  # layer_num * dir_num
            batch_size,  # batch size
            H)  # hidden size
        self.c = torch.randn(
            NL * (D + 1),  # layer_num * dir_num
            batch_size,  # batch size
            H)  # hidden size

        self.set_module_name("QLSTM")

    def forward(self):
        return self.cell(self.x, (self.h, self.c))


op_bench.generate_pt_test(qrnn_configs, LSTMBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #13
0
    device=['cpu', 'cuda'],
    tags=['long']
)


class SumBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, R, V, dim, contiguous, device):
        shape = (R, V) if dim == 0 else (V, R)
        tensor = torch.rand(shape, device=device)

        if not contiguous:
            storage = torch.empty([s * 2 for s in shape], device=device)
            storage[::2, ::2] = tensor
            self.input_tensor = storage[::2, ::2]
        else:
            self.input_tensor = tensor

        self.inputs = {
            "input_tensor": self.input_tensor,
            "dim": dim
        }
        self.set_module_name("sum")

    def forward(self, input_tensor, dim: int):
        return input_tensor.sum(dim=dim)

op_bench.generate_pt_test(sum_configs, SumBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #14
0
    tags=["short"],
)


@torch.jit.script
def torch_sumall(a, iterations):
    # type: (Tensor, int)
    result = 0.0
    for _ in range(iterations):
        result += float(torch.sum(a))
        a[0][0] += 0.01
    return result


class TorchSumBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N):
        self.input_one = torch.rand(M, N)
        self.set_module_name("sum")

    # This is a very temporary method and will be removed soon, so
    # don't use this method in your benchmark
    # TODO(mingzhe): use one forward method for both JIT and Eager
    def jit_forward(self, iters):
        return torch_sumall(self.input_one, iters)


op_bench.generate_pt_test(intraop_bench_configs, TorchSumBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #15
0
                                    N,
                                    K,
                                    1,
                                    device=device,
                                    requires_grad=self.auto_set())
        x_scale = 0.1
        x_zero_point = 0
        self.q_input_one = torch.quantize_per_tensor(self.input_one,
                                                     scale=x_scale,
                                                     zero_point=x_zero_point,
                                                     dtype=dtype)
        self.mean = torch.rand(N)
        self.var = torch.rand(N)
        self.weight = torch.rand(N)
        self.bias = torch.rand(N)
        self.eps = 1e-5
        self.Y_scale = 0.1
        self.Y_zero_point = 0

    def forward(self):
        return torch.ops.quantized.batch_norm2d(self.q_input_one, self.weight,
                                                self.bias, self.mean, self.var,
                                                self.eps, self.Y_scale,
                                                self.Y_zero_point)


op_bench.generate_pt_test(batchnorm_configs_short, QBatchNormBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #16
0
        [8, 1, 2, 3, torch.int32],
        [9, 1, 2, 4, torch.int32],
        [10, 1, 2, 5, torch.int32],
    ],
    attr_names=["LENGTH", "M", "N", "MAX_LENGTH", "dtype"],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)


class ClipRangesBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, LENGTH, M, N, MAX_LENGTH, device, dtype):
        self.input = torch.rand(LENGTH, M, N, device=device).type(dtype)
        self.max_length = MAX_LENGTH
        self.set_module_name("clip_ranges")

    def forward(self):
        output = torch.ops.fb.clip_ranges(self.input, self.max_length)
        return output


op_bench.generate_pt_test(
    clip_ranges_long_configs + clip_ranges_short_configs, ClipRangesBenchmark
)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #17
0
    SBS=((1, 4), (1, 8), (4, 1), (8, 1)),  # Sparse block shape
    ZPB=(0, 1, 2, 3, 4, None),  # Zeros per block
    tags=("long",)
)

class WeightNormSparsifierBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, SL, SBS, ZPB):
        weight = torch.ones(M)
        model = nn.Module()
        model.register_buffer("weight", weight)

        sparse_config = [{"tensor_fqn": "weight"}]
        self.sparsifier = sparsity.WeightNormSparsifier(
            sparsity_level=SL,
            sparse_block_shape=SBS,
            zeros_per_block=ZPB,
        )
        self.sparsifier.prepare(model, config=sparse_config)
        self.inputs = {}  # All benchmarks need inputs :)
        self.set_module_name("weight_norm_sparsifier_step")

    def forward(self):
        self.sparsifier.step()

all_tests = sparse_configs_short + sparse_configs_long
op_bench.generate_pt_test(all_tests, WeightNormSparsifierBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #18
0
# An example input from this configuration is M=4, N=4, dim=0.
configs = op_bench.config_list(
    attrs=[
        [4, 4, 0],
        [256, 256, 1],
    ],
    attr_names=["M", "N", "dim"],
    tags=["short"]
)


class GatherBenchmark(op_bench.TorchBenchmarkBase):
    # TODO (mingzhe0908): should we have a global seed for all ops?
    def init(self, M, N, dim):
        self.input_one = torch.rand(M, N)
        self.dim = dim
        min_val = M if dim == 0 else N
        numpy.random.seed((1 << 32) - 1)
        self.index = torch.tensor(numpy.random.randint(0, min_val, (M, N)))
        self.set_module_name("gather")

    def forward(self):
        return torch.gather(self.input_one, self.dim, self.index)


op_bench.generate_pt_test(configs, GatherBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #19
0
"""


class QEmbeddingBagBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, embeddingbags, dim, mode, input_size, offset, sparse,
             include_last_offset, device):
        self.embedding = nnq.EmbeddingBag(
            num_embeddings=embeddingbags,
            embedding_dim=dim,
            mode=mode,
            include_last_offset=include_last_offset).to(device=device)
        numpy.random.seed((1 << 32) - 1)
        self.input = torch.tensor(numpy.random.randint(0, embeddingbags,
                                                       input_size),
                                  device=device).long()
        offset = torch.LongTensor([offset], device=device)
        self.offset = torch.cat(
            (offset, torch.tensor([self.input.size(0)], dtype=torch.long)), 0)
        self.inputs = {"input": self.input, "offset": self.offset}
        self.set_module_name('qEmbeddingBag')

    def forward(self, input, offset):
        return self.embedding(input, offset)


op_bench.generate_pt_test(configs.embeddingbag_short_configs,
                          QEmbeddingBagBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #20
0
diag_configs_short = op_bench.config_list(
    attr_names=['dim', 'M', 'N', 'diagonal', 'out'],
    attrs=[
        [1, 64, 64, 0, True],
        [2, 128, 128, -10, False],
        [1, 256, 256, 20, True],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=['short'],
)


class DiagBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, dim, M, N, diagonal, out, device):
        self.input = torch.rand(
            M, N, device=device) if dim == 2 else torch.rand(M, device=device)
        self.diagonal = diagonal
        self.out = torch.tensor((), ) if out else None
        self.set_module_name('diag')

    def forward(self):
        return torch.diag(self.input, diagonal=self.diagonal, out=self.out)


op_bench.generate_pt_test(diag_configs_short, DiagBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #21
0
        self.input = torch.rand(C, M, N)
        self.dtype = dtype
        self.op = nnq.Quantize(scale=1.0, zero_point=0, dtype=dtype)
        self.set_module_name('QuantizePerTensor')

        if mode == 'D':
            self.input = self.op(self.input)
            self.op = nnq.DeQuantize()
            self.set_module_name('DequantizePerTensor')

    def forward(self):
        return self.op(self.input)


op_bench.generate_pt_test(
    quantize_per_tensor_configs_short + quantize_per_tensor_configs_long,
    QuantizePerTensorBenchmark)

# === Per Channel quantization ===

quantize_per_channel_configs_short = op_bench.config_list(
    cross_product_configs={
        'axis': (0,)
    },
    **quantize_configs_short_dict
)

quantize_per_channel_configs_long = op_bench.cross_product_configs(
    axis=(0, 1, 2),
    **quantize_configs_long_dict
)
예제 #22
0
    def forward(self):
        return self.conv1d(self.input)


class ConvTranspose1dBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, IC, OC, kernel, stride, N, L, device):
        self.input = torch.rand(N, IC, L, device=device)
        self.convtranspose1d = nn.ConvTranspose1d(
            IC, OC, kernel, stride=stride).to(device=device)
        self.set_module_name('ConvTranspose1d')

    def forward(self):
        return self.convtranspose1d(self.input)


op_bench.generate_pt_test(conv_1d_configs_short + conv_1d_configs_long,
                          Conv1dBenchmark)
op_bench.generate_pt_test(conv_1d_configs_short + conv_1d_configs_long,
                          ConvTranspose1dBenchmark)
"""
Microbenchmarks for Conv2d and ConvTranspose2d operators.
"""

# Configs for Conv2d and ConvTranspose1d
conv_2d_configs_short = op_bench.config_list(
    attr_names=[
        'IC',
        'OC',
        'kernel',
        'stride',
        'N',
        'H',
예제 #23
0
        inputs = []
        gen_sizes = []
        if type(sizes) == list and N == -1:
            gen_sizes = sizes
        else:
            for i in range(N):
                gen_sizes.append([old_size() if callable(old_size) else old_size for old_size in sizes])

        for s in gen_sizes:
            inputs.append(torch.rand(s, device=device))
        result = torch.rand(gen_sizes[0], device=device)
        self.inputs = {
            "result": result,
            "inputs": inputs,
            "dim": dim
        }
        self.set_module_name('stack')

    def forward(self, result: torch.Tensor, inputs: List[torch.Tensor], dim: int):
        return torch.stack(inputs, dim=dim, out=result)


op_bench.generate_pt_test(stack_configs_static_runtime +
                          stack_configs_short +
                          stack_configs_long +
                          stack_configs_multidim,
                          StackBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
import torch.nn as nn
"""Microbenchmarks for Linear operator."""

configs = op_bench.config_list(attrs=[
    [1, 32, 10],
    [4, 256, 100],
    [16, 1024, 256],
],
                               attr_names=["N", "IN", "OUT"],
                               tags=["short"])


class LinearBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, IN, OUT):
        self.input_one = torch.rand(N, IN)
        self.linear = nn.Linear(IN, OUT)
        self.set_module_name("linear")

    def forward(self):
        return self.linear(self.input_one)


op_bench.generate_pt_test(configs, LinearBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #25
0
            torch.rand((B, M, K), device=device,
                       requires_grad=self.auto_set()),
            "batch2":
            torch.rand((
                B,
                K,
                N,
            ),
                       device=device,
                       requires_grad=self.auto_set())
        }
        self.set_module_name("bmm")

    def forward(self, batch1, batch2):
        return torch.bmm(batch1, batch2)


bmm_configs = op_bench.cross_product_configs(
    B=[2, 100],
    M=[8, 256],
    N=[256, 16],
    K=[16, 32],
    device=['cpu'],
    tags=["short"],
)

op_bench.generate_pt_test(bmm_configs, BmmBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #26
0
class QAvgPool2dBenchmark(_QPool2dBenchmarkBase):
    def init(self, N, C, H, W, k, s, p, contig, dtype):
        self.pool_op = torch.nn.AvgPool2d(kernel_size=k,
                                          stride=s,
                                          padding=p,
                                          ceil_mode=False)
        super(QAvgPool2dBenchmark, self).setup(N, C, H, W, dtype, contig)


class QAdaptiveAvgPool2dBenchmark(_QPool2dBenchmarkBase):
    def init(self, N, C, input_size, output_size, contig, dtype):
        self.pool_op = torch.nn.AdaptiveAvgPool2d(output_size=output_size)
        super(QAdaptiveAvgPool2dBenchmark, self).setup(N,
                                                       C,
                                                       *input_size,
                                                       dtype=dtype,
                                                       contig=contig)


op_bench.generate_pt_test(
    qadaptive_avgpool2d_short_configs + qadaptive_avgpool2d_long_configs,
    QAdaptiveAvgPool2dBenchmark)
op_bench.generate_pt_test(qpool2d_short_configs + qpool2d_long_configs,
                          QAvgPool2dBenchmark)
op_bench.generate_pt_test(qpool2d_short_configs + qpool2d_long_configs,
                          QMaxPool2dBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #27
0

class QInterpolateBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, dtype, mode, scale, contig):
        f_input = (torch.rand(1, M, N, K) - 0.5) * 256
        scale = 0.1
        zero_point = 42
        self.q_input = torch.quantize_per_tensor(f_input,
                                                 scale=scale,
                                                 zero_point=zero_point,
                                                 dtype=dtype)
        if not contig:
            permute_dims = list(range(q_input.ndim))[::-1]
            self.q_input_a = self.q_input_a.permute(permute_dims)

        self.mode = mode
        self.scale_factor = scale
        self.set_module_name('q_interpolate')

    def forward(self):
        return torch.nn.quantized.functional.interpolate(
            self.q_input, scale_factor=self.scale_factor, mode=self.mode)


op_bench.generate_pt_test(
    qinterpolate_short_configs + qinterpolate_long_configs,
    QInterpolateBenchmark)

if __name__ == '__main__':
    op_bench.benchmark_runner.main()
예제 #28
0
    # no channels_last for 3D tensors
    attr_names=["input_size", "output_size"],
    attrs=[
        [(4, 512, 320), (256, )],
        [(4, 512, 320), (512, )],
    ],
    cross_product_configs={
        'mode': ["nearest", "linear"],
    },
    tags=["long"],
)

config_5d = op_bench.config_list(
    attr_names=["input_size", "output_size"],
    attrs=[
        [(1, 3, 16, 320, 320), (8, 256, 256)],
        [(1, 3, 16, 320, 320), (32, 512, 512)],
    ],
    cross_product_configs={
        'channels_last': [True, False],
        'mode': ["nearest", "linear"],
    },
    tags=["long"],
)

for config in (config_short, config_long, config_3d, config_5d):
    op_bench.generate_pt_test(config, InterpolateBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #29
0
    K=[2 ** x for x in range(0, 3)], 
    tags=["long"]
)


add_short_configs = op_bench.config_list(
    attrs=[
        [8, 16, 32],
        [16, 32, 64],
    ],
    attr_names=["M", "N", "K"], 
    tags=["short"], 
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K): 
        self.input_one = torch.rand(M, N, K)
        self.input_two = torch.rand(M, N, K)
        self.set_module_name("add")

    def forward(self):
        return torch.add(self.input_one, self.input_two)


op_bench.generate_pt_test(add_long_configs + add_short_configs, AddBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
예제 #30
0
)


class QInstanceNormBenchmark(op_bench.TorchBenchmarkBase):

    def init(self, dims, dtype):
        X = (torch.rand(*dims) - 0.5) * 256
        num_channels = dims[1]
        scale = 1.0
        zero_point = 0
        self.qX = torch.quantize_per_tensor(
            X, scale=scale, zero_point=zero_point, dtype=dtype)
        self.weight = torch.rand(num_channels, dtype=torch.float)
        self.bias = torch.rand(num_channels, dtype=torch.float)
        self.eps = 1e-5
        self.Y_scale = 0.1
        self.Y_zero_point = 0

    def forward(self):
        return torch.ops.quantized.instance_norm(
            self.qX, weight=self.weight, bias=self.bias,
            eps=self.eps, output_scale=self.Y_scale,
            output_zero_point=self.Y_zero_point)


op_bench.generate_pt_test(instancenorm_configs_short, QInstanceNormBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()