], attr_names=['N', 'C', 'H', 'W'], tags=['short']) softmax_configs_long = op_bench.config_list(attrs=[ [8, 3, 128, 128], [16, 512, 14, 14], [16, 256, 28, 28], ], attr_names=['N', 'C', 'H', 'W'], tags=['long']) softmax_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['Softmax', nn.Softmax], ['Softmax2d', nn.Softmax2d], ['LogSoftmax', nn.LogSoftmax], ], ) class SoftmaxBenchmark(op_bench.TorchBenchmarkBase): def init(self, N, C, H, W, op_func): self.input_one = torch.rand(N, C, H, W) self.op_func = op_func() def forward(self): return self.op_func(self.input_one) op_bench.generate_pt_tests_from_op_list(
# Configs for softmax ops softmax_configs = op_bench.config_list(attrs=[ [1, 3, 32, 32], [2, 3, 64, 64], [8, 3, 128, 128], [16, 512, 14, 14], [16, 256, 28, 28], ], attr_names=["N", "C", "H", "W"], tags=["short"]) softmax_ops_list = op_bench.op_list( attr_names=["op_name", "op_func"], attrs=[ ["Softmax", nn.Softmax], ["Softmax2d", nn.Softmax2d], ["LogSoftmax", nn.LogSoftmax], ], ) class SoftmaxBenchmark(op_bench.TorchBenchmarkBase): def init(self, N, C, H, W, op_func): self.input_one = torch.rand(N, C, H, W) self.op_func = op_func() def forward(self): return self.op_func(self.input_one) op_bench.generate_pt_tests_from_list(softmax_ops_list, softmax_configs,
) qactivation_short_configs = op_bench.cross_product_configs( dims=((3, 4, 5), # Rank=3 (2, 3, 4, 5)), # Rank=4, contig=(False,), inplace=(False,), dtype=(torch.quint8, torch.qint8, torch.qint32), tags=('short',) ) qactivation_ops = op_bench.op_list( attrs=( ('relu', nnq.ReLU), ('relu6', nnq.ReLU6), ('functional.hardtanh', nnq.functional.hardtanh), ('functional.elu', nnq.functional.elu) ('functional.hardsigmoid', nnq.functional.hardsigmoid), ), attr_names=('op_name', 'op_func'), ) class QActivationBenchmarkBase(op_bench.TorchBenchmarkBase): r"""Base class for all the activations.""" def _setup(self, dims, contig, dtype): # Input f_input = (torch.rand(*dims) - 0.5) * 256 scale = 1.0 zero_point = 0 # Quantize the tensor
qcomparators_configs = op_bench.cross_product_configs( N=(8, 64), dtype=(torch.quint8, torch.qint8, torch.qint32), contig=(False, True), other_scalar=(False, True), out_variant=(False, True), tags=('short',) ) qcomparators_ops = op_bench.op_list( attrs=( ('eq', torch.eq), ('ne', torch.ne), ('lt', torch.lt), ('gt', torch.gt), ('le', torch.le), ('ge', torch.ge), ), attr_names=('op_name', 'op_func'), ) class QComparatorBenchmark(op_bench.TorchBenchmarkBase): def init(self, N, dtype, contig, other_scalar, out_variant, op_func): # TODO: Consider more diverse shapes f_input = (torch.rand(N, N) - 0.5) * 256 scale = 1.0 zero_point = 0 q_input_a = torch.quantize_per_tensor(f_input, scale=scale,
import numpy as np embeddingbag_conversion_short_configs = op_bench.cross_product_configs( num_embeddings=(80, ), embedding_dim=(128, 256, 512), tags=('short', )) embeddingbag_conversion_long_configs = op_bench.cross_product_configs( num_embeddings=(100, 120, 1000), embedding_dim=(16, 64, 128, 256, 512, 1024, 2048), tags=('long', )) conversion_ops = op_bench.op_list( attrs=( ('qembeddingbag_byte_prepack', torch.ops.quantized.embedding_bag_byte_prepack), ('qembeddingbag_4bit_prepack', torch.ops.quantized.embedding_bag_4bit_prepack), ('qembeddingbag_2bit_prepack', torch.ops.quantized.embedding_bag_2bit_prepack), ), attr_names=('op_name', 'op_func'), ) unpack_ops = op_bench.op_list( attrs=( ('qembeddingbag_byte_unpack', torch.ops.quantized.embedding_bag_byte_unpack), ('qembeddingbag_4bit_unpack', torch.ops.quantized.embedding_bag_4bit_unpack), ('qembeddingbag_2bit_unpack', torch.ops.quantized.embedding_bag_2bit_unpack), ),
], cross_product_configs={ 'device': ['cpu'], }, tags=['short']) hardswish_configs_long = op_bench.cross_product_configs(N=[8, 16], C=[3], H=[256, 512], W=[256, 512], device=['cpu'], tags=['long']) hardswish_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['Hardswish', nn.Hardswish], ], ) class HardswishBenchmark(op_bench.TorchBenchmarkBase): def init(self, N, C, H, W, device, op_func): self.input_one = torch.rand(N, C, H, W, device=device) self.op_func = op_func() def forward(self): return self.op_func(self.input_one) op_bench.generate_pt_tests_from_op_list( hardswish_ops_list, hardswish_configs_short + hardswish_configs_long,
unary_ops_list = op_bench.op_list( attr_names=["op_name", "op_function"], attrs=[ ["abs", torch.abs], ["abs_", torch.abs_], ["acos", torch.acos], ["acos_", torch.acos_], ["argsort", torch.argsort], ["asin", torch.asin], ["asin_", torch.asin_], ["atan", torch.atan], ["atan_", torch.atan_], ["ceil", torch.ceil], ["ceil_", torch.ceil_], ["clone", torch.clone], ["cos", torch.cos], ["cos_", torch.cos_], ["cosh", torch.cosh], ["cosh_", torch.cosh_], ["digamma", torch.digamma], ["erf", torch.erf], ["erf_", torch.erf_], ["erfc", torch.erfc], ["erfc_", torch.erfc_], ["erfinv", torch.erfinv], ["exp", torch.exp], ["exp_", torch.exp_], ["expm1", torch.expm1], ["expm1_", torch.expm1_], ["floor", torch.floor], ["floor_", torch.floor_], ["frac", torch.frac], ["frac_", torch.frac_], ["hardshrink", torch.hardshrink], ["lgamma", torch.lgamma], ["log", torch.log], ["log10", torch.log10], ["log10_", torch.log10_], ["log1p", torch.log1p], ["log1p_", torch.log1p_], ["log2", torch.log2], ["log2_", torch.log2_], ["log_", torch.log_], ["neg", torch.neg], ["neg_", torch.neg_], ["reciprocal", torch.reciprocal], ["reciprocal_", torch.reciprocal_], ["relu", torch.relu], ["relu_", torch.relu_], ["round", torch.round], ["round_", torch.round_], ["rsqrt", torch.rsqrt], ["rsqrt_", torch.rsqrt_], ["sigmoid", torch.sigmoid], ["sigmoid_", torch.sigmoid_], ["sign", torch.sign], ["sin", torch.sin], ["sin_", torch.sin_], ["sinh", torch.sinh], ["sinh_", torch.sinh_], ["sqrt", torch.sqrt], ["sqrt_", torch.sqrt_], ["tan", torch.tan], ["tan_", torch.tan_], ["tanh", torch.tanh], ["tanh_", torch.tanh_], ["trunc", torch.trunc], ["trunc_", torch.trunc_], ["unique", torch.unique], ["zero_", torch.zero_], ["bernoulli_", lambda t: t.bernoulli_()], ["cauchy_", lambda t: t.cauchy_()], ["contiguous", lambda t: t.contiguous()], ["digamma_", lambda t: t.digamma_()], ["erfinv_", lambda t: t.erfinv_()], ["exponential_", lambda t: t.exponential_()], ["lgamma_", lambda t: t.lgamma_()], ["normal_", lambda t: t.normal_()], ["random_", lambda t: t.random_()], ["sign_", lambda t: t.sign_()], ["uniform_", lambda t: t.uniform_()], ["half", lambda t: t.half()], ["long", lambda t: t.long()], ], )
attr_names=[ "kernel", "stride", "N", "C", "L" ], attrs=[ [3, 1, 1, 3, 32], [3, 2, 8, 3, 128], [3, 2, 16, 3, 256], ], tags=["short"] ) pool_1d_ops_list = op_bench.op_list( attr_names=["op_name", "op_func"], attrs=[ ["MaxPool1d", nn.MaxPool1d], ["AvgPool1d", nn.AvgPool1d], ], ) class Pool1dBenchmark(op_bench.TorchBenchmarkBase): def init(self, kernel, stride, N, C, L, op_func): self.input = torch.rand(N, C, L) self.kernel = kernel self.stride = stride self.op_func = op_func(self.kernel, stride=self.stride) def forward(self): return self.op_func(self.input)
qobserver_per_channel_configs_short = op_bench.config_list( cross_product_configs={ 'qscheme': (torch.per_channel_affine, torch.per_channel_symmetric) }, **qobserver_short_configs_dict, ) qobserver_per_channel_configs_long = op_bench.cross_product_configs( qscheme=(torch.per_channel_affine, torch.per_channel_symmetric), **qobserver_long_configs_dict, ) qobserver_per_tensor_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['MinMaxObserver', obs.MinMaxObserver], ['MovingAverageMinMaxObserver', obs.MovingAverageMinMaxObserver], ['HistogramObserver', obs.HistogramObserver], ]) qobserver_per_channel_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['PerChannelMinMaxObserver', obs.PerChannelMinMaxObserver], [ 'MovingAveragePerChannelMinMaxObserver', obs.MovingAveragePerChannelMinMaxObserver ], ])
import operator_benchmark as op_bench import torch """Microbenchmarks for binary operators.""" # Benchmark ops performance with broadcast binary_ops_bcast_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['add', torch.add], ], ) # Configs with broadcast binary_configs_broadcast = op_bench.config_list( attr_names=['in_one', 'in_two'], attrs=[ [[64, 1, 64], [1, 64, 1]], ], cross_product_configs={ 'device': ['cpu'], 'dtype': [torch.float], }, tags=["short"]) class BinaryOpBcastBenchmark(op_bench.TorchBenchmarkBase): def init(self, in_one, in_two, dtype, device, op_func): self.inputs = { "in_one": torch.randn(in_one, device=device).to(dtype=dtype), "in_two": torch.randn(in_two, device=device).to(dtype=dtype) }
'device': ['cpu'], }, tags=['short']) pool_1d_configs_long = op_bench.cross_product_configs(kernel=[3], stride=[1, 2], N=[8, 16], C=[3], L=[128, 256], device=['cpu'], tags=['long']) pool_1d_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['MaxPool1d', nn.MaxPool1d], ['AvgPool1d', nn.AvgPool1d], ], ) class Pool1dBenchmark(op_bench.TorchBenchmarkBase): def init(self, kernel, stride, N, C, L, device, op_func): self.input = torch.rand(N, C, L, device=device) self.kernel = kernel self.stride = stride self.op_func = op_func(self.kernel, stride=self.stride) def forward(self): return self.op_func(self.input)
import torch import operator_benchmark as op_bench qarithmetic_binary_configs = op_bench.cross_product_configs( N=(2, 8, 64, 512), dtype=(torch.quint8, torch.qint8, torch.qint32), # contig=(False, True), # TODO: Reenable this after #29435 contig=(True, ), tags=('short', )) qarithmetic_binary_ops = op_bench.op_list( attrs=( ('add', 'add'), ('add_scalar', 'add_scalar'), ('add_relu', 'add_relu'), ('mul', 'mul'), ('mul_scalar', 'mul_scalar'), ), attr_names=('op_name', 'op_func'), ) r"""Base class to use QFunctional. Children will need to set `self.qop` to the qfunctional op under test. I.e. `self.qop = 'add'` """ class _QFunctionalBinaryArithmeticBenchmarkBase(op_bench.TorchBenchmarkBase): def setup(self, N, dtype, contig): self.qfunctional = torch.nn.quantized.QFunctional()
import torch from torch._ops import ops import operator_benchmark as op_bench qarithmetic_binary_configs = op_bench.cross_product_configs( N=(2, 8, 64, 512), dtype=(torch.quint8, torch.qint8, torch.qint32), contig=(False, True), tags=('short', )) qarithmetic_binary_ops = op_bench.op_list( attrs=( ('add', ops.quantized.add), ('add_relu', ops.quantized.add_relu), ('mul', ops.quantized.mul), ), attr_names=('op_name', 'op_func'), ) qarithmetic_binary_scalar_ops = op_bench.op_list( attrs=( ('add_scalar', ops.quantized.add_scalar), ('mul_scalar', ops.quantized.mul_scalar), ), attr_names=('op_name', 'op_func'), ) class _QFunctionalBinaryArithmeticBenchmarkBase(op_bench.TorchBenchmarkBase): def setup(self, N, dtype, contig): self.qfunctional = torch.nn.quantized.QFunctional()
dtype=(torch.quint8, ), tags=('long', )) qactivation_short_configs = op_bench.cross_product_configs( dims=( (3, 4, 5), # Rank=3 (2, 3, 4, 5)), # Rank=4, contig=(False, ), inplace=(False, ), dtype=(torch.quint8, torch.qint8, torch.qint32), tags=('short', )) qactivation_ops = op_bench.op_list( attrs=( ('relu', nnq.ReLU), ('relu6', nnq.ReLU6), ), attr_names=('op_name', 'op_func'), ) class QActivationBenchmarkBase(op_bench.TorchBenchmarkBase): r"""Base class for all the activations.""" def _setup(self, dims, contig, dtype): # Input f_input = (torch.rand(*dims) - 0.5) * 256 scale = 1.0 zero_point = 0 # Quantize the tensor self.q_input = torch.quantize_per_tensor(f_input,
import operator_benchmark as op_bench import torch """Microbenchmarks for remainder operators.""" # Benchmark ops performance with broadcast remainder_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['fmod', torch.fmod], ['remainder', torch.remainder], ], ) remainder_short_configs = op_bench.config_list( attr_names=['M', 'N', 'K'], attrs=[ [1, 1, 1], [64, 64, 64], [64, 64, 128], ], cross_product_configs={ 'device': ['cpu', 'cuda'], 'dtype': [torch.int32, torch.float, torch.double], }, tags=['short'], ) remainder_long_configs = op_bench.cross_product_configs( M=[8, 128], N=[32, 64], K=[256, 512],
import operator_benchmark as op_bench import torch """Microbenchmarks for binary operators.""" # Benchmark ops performance with broadcast binary_ops_bcast_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['add', torch.add], ], ) # Configs with broadcast binary_configs_broadcast = op_bench.config_list( attr_names=['in_one', 'in_two'], attrs=[ [[64, 1, 64], [1, 64, 1]], ], cross_product_configs={ 'device': ['cpu'], 'dtype': [torch.float], }, tags=["short"]) class BinaryOpBcastBenchmark(op_bench.TorchBenchmarkBase): def init(self, in_one, in_two, dtype, device, op_func): self.in_one = torch.randn(in_one, device=device).to(dtype=dtype) self.in_two = torch.randn(in_two, device=device).to(dtype=dtype) self.op_func = op_func
def fakeQuantizePerTensorLearnableKernel(input, scale, zero_point, quant_min: int, quant_max: int): return torch._fake_quantize_learnable_per_tensor_affine( input, scale, zero_point, quant_min, quant_max) def fakeQuantizePerTensorOriginalKernel(input, scale, zero_point, quant_min: int, quant_max: int): return torch.fake_quantize_per_tensor_affine(input, 1.0, 0, quant_min, quant_max) fake_quantize_per_tensor_ops = op_bench.op_list( attrs=(('learnable_kernel', fakeQuantizePerTensorLearnableKernel), ('original_kernel', fakeQuantizePerTensorOriginalKernel)), attr_names=('op_name', 'op_func'), ) fake_quantize_operator_configs_short = op_bench.config_list( cross_product_configs={ 'nbits': (4, 8), 'device': ('cpu', 'cuda'), }, **fake_quantize_configs_short_dict) fake_quantize_operator_configs_long = op_bench.cross_product_configs( nbits=(4, 8), device=('cpu', 'cuda'), **fake_quantize_configs_long_dict) class FakeQuantizePerTensorBaseOpBenchmark(op_bench.TorchBenchmarkBase):
embedding_dim=(16, 64, 128, 256), num_offsets=range(10, 20), enable_per_sample_weights=(True, False), include_last_offset=(True, False), is_pruned_weights=(True, False,), use_32bit_indices=(True, False), use_32bit_offsets=(True, False), tags=['long'] ) full_configs = embedding_bag_rowwise_offsets_short_configs + embedding_bag_rowwise_offsets_long_configs four_bit_rowwise_ops = op_bench.op_list( attrs=( ('qembeddingbag_4bit_rowwise_offsets', torch.ops.quantized.embedding_bag_4bit_rowwise_offsets), ), attr_names=('op_name', 'op_func'), ) byte_rowwise_ops = op_bench.op_list( attrs=( ('qembeddingbag_byte_rowwise_offsets', torch.ops.quantized.embedding_bag_byte_rowwise_offsets), ), attr_names=('op_name', 'op_func'), ) def get_pruned_weights_and_mapping(q_weights): indicator = torch.from_numpy(np.random.uniform( low=-1.0, high=1.0, size=[q_weights.shape[0]]).astype(np.float32))
'device': ['cpu', 'cuda'], }, tags=['short']) pool_1d_configs_long = op_bench.cross_product_configs(kernel=[3], stride=[1, 2], N=[8, 16], C=[3], L=[128, 256], device=['cpu', 'cuda'], tags=['long']) pool_1d_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['MaxPool1d', nn.MaxPool1d], ['AvgPool1d', nn.AvgPool1d], ], ) class Pool1dBenchmark(op_bench.TorchBenchmarkBase): def init(self, kernel, stride, N, C, L, device, op_func): self.input = torch.rand(N, C, L, device=device) self.kernel = kernel self.stride = stride self.op_func = op_func(self.kernel, stride=self.stride) def forward(self): return self.op_func(self.input)
) hardsigmoid_configs_long = op_bench.cross_product_configs( N=[8, 16], C=[3], H=[256, 512], W=[256, 512], device=['cpu'], tags=['long'] ) hardsigmoid_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['Hardsigmoid', nn.Hardsigmoid], ], ) class HardsigmoidBenchmark(op_bench.TorchBenchmarkBase): def init(self, N, C, H, W, device, op_func): self.input_one = torch.rand(N, C, H, W, device=device) self.op_func = op_func() def forward(self): return self.op_func(self.input_one) op_bench.generate_pt_tests_from_op_list(hardsigmoid_ops_list, hardsigmoid_configs_short + hardsigmoid_configs_long,
(2, 3, 4, 5), # Rank=4, # Dimensions from the floating point benchmarks (512, 512), (256, 1024), ), contig=(False, ), inplace=(False, ), dtype=(torch.quint8, torch.qint8, torch.qint32), tags=('short', )) qactivation_ops = op_bench.op_list( attrs=( ('relu', torch.nn.ReLU()), ('relu6', torch.ops.quantized.relu6), ('functional.hardtanh', nnq.functional.hardtanh), ('functional.hardsigmoid', nnq.functional.hardsigmoid), ('functional.leaky_relu', nnq.functional.leaky_relu), ('functional.sigmoid', torch.nn.functional.sigmoid), ('functional.tanh', torch.nn.functional.tanh), ), attr_names=('op_name', 'op_func'), ) class QActivationBenchmarkBase(op_bench.TorchBenchmarkBase): r"""Base class for all the activations.""" def _setup(self, dims, contig, dtype): # Input f_input = (torch.rand(*dims) - 0.5) * 256 self.scale = 1.0 self.zero_point = 0
batch_mm_configs_long = op_bench.config_list( attr_names=["B", "M", "N", "K"], attrs=[ [128, 256, 128, 256], [512, 1024, 1024, 512], ], cross_product_configs={ 'device': ['cpu', 'cuda'], }, tags=["long"], ) batch_mm_op_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['einsum_bmm', torch.einsum], ['bmm', torch.bmm], ], ) class BatchMatrixMultBenchmark(op_bench.TorchBenchmarkBase): def init(self, B, M, N, K, device, op_func): self.inputs = { "input_one": torch.rand(B, M, N, device=device), "input_two": torch.rand(B, N, K, device=device) } self.op_func = op_func def forward(self, input_one, input_two): if self.op_func.__name__ == "einsum": return torch.einsum('bij,bjk->bik', input_one, input_two)
cross_product_configs={ 'dtype': [torch.quint8], 'contig': [False, True], }, tags=['short']) qmethods_configs_long = op_bench.cross_product_configs( M=[256, 1024], N=[256, 1024], dtype=[torch.qint8, torch.qint32], contig=[False, True], tags=['long']) qmethods_tensor_input_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['q_copy', 'copy_'], ], ) class _QMethodBenchmarkBase(op_bench.TorchBenchmarkBase): def init(self, M, N, dtype, contig, op_func): f_input = torch.rand(M, N) scale = 1.0 zero_point = 0 self.q_input = torch.quantize_per_tensor(f_input, scale=scale, zero_point=zero_point, dtype=dtype) if not contig: permute_dims = list(range(self.q_input.ndim))[::-1]
# Configs for pointwise unary ops unary_ops_configs = op_bench.config_list( attrs=[ [128, 128], ], attr_names=["M", "N"], tags=["short"] ) unary_ops_list = op_bench.op_list( attr_names=["op_name", "op_func"], attrs=[ ["abs", torch.abs], ["acos", torch.acos], ], ) class UnaryOpBenchmark(op_bench.TorchBenchmarkBase): def init(self, M, N, op_func): self.input_one = torch.rand(M, N) self.op_func = op_func def forward(self): return self.op_func(self.input_one) op_bench.generate_pt_tests_from_op_list(unary_ops_list, unary_ops_configs, UnaryOpBenchmark)
unary_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['abs', torch.abs], ['abs_', torch.abs_], ['acos', torch.acos], ['acos_', torch.acos_], ['argsort', torch.argsort], ['asin', torch.asin], ['asin_', torch.asin_], ['atan', torch.atan], ['atan_', torch.atan_], ['ceil', torch.ceil], ['ceil_', torch.ceil_], ['clone', torch.clone], ['cos', torch.cos], ['cos_', torch.cos_], ['cosh', torch.cosh], ['cosh_', torch.cosh_], ['digamma', torch.digamma], ['erf', torch.erf], ['erf_', torch.erf_], ['erfc', torch.erfc], ['erfc_', torch.erfc_], ['erfinv', torch.erfinv], ['exp', torch.exp], ['exp_', torch.exp_], ['expm1', torch.expm1], ['expm1_', torch.expm1_], ['floor', torch.floor], ['floor_', torch.floor_], ['frac', torch.frac], ['frac_', torch.frac_], ['hardshrink', torch.hardshrink], ['lgamma', torch.lgamma], ['log', torch.log], ['log10', torch.log10], ['log10_', torch.log10_], ['log1p', torch.log1p], ['log1p_', torch.log1p_], ['log2', torch.log2], ['log2_', torch.log2_], ['log_', torch.log_], ['neg', torch.neg], ['neg_', torch.neg_], ['reciprocal', torch.reciprocal], ['reciprocal_', torch.reciprocal_], ['relu', torch.relu], ['relu_', torch.relu_], ['round', torch.round], ['round_', torch.round_], ['rsqrt', torch.rsqrt], ['rsqrt_', torch.rsqrt_], ['sigmoid', torch.sigmoid], ['sigmoid_', torch.sigmoid_], ['sign', torch.sign], ['sin', torch.sin], ['sin_', torch.sin_], ['sinh', torch.sinh], ['sqrt', torch.sqrt], ['sqrt_', torch.sqrt_], ['tan', torch.tan], ['tan_', torch.tan_], ['tanh', torch.tanh], ['tanh_', torch.tanh_], ['trunc', torch.trunc], ['trunc_', torch.trunc_], ['unique', torch.unique], ['zero_', torch.zero_], ['bernoulli_', lambda t: t.bernoulli_()], ['cauchy_', lambda t: t.cauchy_()], ['digamma_', lambda t: t.digamma_()], ['exponential_', lambda t: t.exponential_()], ['normal_', lambda t: t.normal_()], ['random_', lambda t: t.random_()], ['sign_', lambda t: t.sign_()], ['uniform_', lambda t: t.uniform_()], ['half', lambda t: t.half()], ['long', lambda t: t.long()], ], )
import operator_benchmark as op_bench import torch import math """Microbenchmarks for torch.nan_to_num / nan_to_num_ operators""" # Configs for PT torch.nan_to_num / nan_to_num_ operators nan_to_num_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['nan_to_num', torch.nan_to_num], ['nan_to_num_', torch.nan_to_num_], ], ) nan_to_num_long_configs = op_bench.cross_product_configs( M=[32, 64, 128], N=range(32, 128, 32), dtype=[torch.float, torch.double], replace_inf=[True, False], tags=["long"], ) nan_to_num_short_configs = op_bench.cross_product_configs( M=[16, 64], N=[64, 64], dtype=[torch.float, torch.double], replace_inf=[True, False],