self.set_module_name("add") def forward(self): return torch.add(self.input_one, self.input_two) # The generated test names based on add_short_configs will be in the following pattern: # add_M8_N16_K32_devicecpu # add_M8_N16_K32_devicecpu_bwdall # add_M8_N16_K32_devicecpu_bwd1 # add_M8_N16_K32_devicecpu_bwd2 # ... # Those names can be used to filter tests. op_bench.generate_pt_test(add_long_configs + add_short_configs, AddBenchmark) op_bench.generate_pt_gradient_test(add_long_configs + add_short_configs, AddBenchmark) """Mircobenchmark for addmm operator.""" class AddmmBenchmark(op_bench.TorchBenchmarkBase): def init(self, M, N, K, device): self.input_one = torch.rand(M, K, device=device, requires_grad=self.auto_set()) self.mat1 = torch.rand(M, N, device=device, requires_grad=self.auto_set()) self.mat2 = torch.rand(N, K,
else: # Replace tensors with float and long types for original per tensor # fake quantize kernel. self.args[1], self.args[2] = 1., 0 self.op = torch.fake_quantize_per_tensor_affine def forward(self): return self.op(*self.args) op_bench.generate_pt_test( fake_quantize_operator_configs_short + fake_quantize_operator_configs_long, FakeQuantizePerTensorOpBenchmark ) op_bench.generate_pt_gradient_test( fake_quantize_operator_configs_short + fake_quantize_operator_configs_long, FakeQuantizePerTensorOpBenchmark ) class FakeQuantizePerChannelOpBenchmark(op_bench.TorchBenchmarkBase): r"""Benchmarks 3 different fake quantize per channel operators.""" def init(self, N, C, H, W, nbits, device, op_type): self.quant_min = 0 self.quant_max = 2 ** nbits - 1 self.quant_range = 2 ** nbits # Axis is chosen with respect to the number of channels: C. self.axis = 1 self.input = torch.rand(N, C, H, W, dtype=torch.float, device=device) self.scale = torch.tensor([1.] * C).to(device) self.zero_point = torch.tensor([0.] * C).to(device) self.input.requires_grad_() self.args = [
import operator_benchmark as op_bench import torch add_configs = op_bench.cross_product_configs( M=[8], N=[8], K=[8], device=["cuda", "cpu"], tags=["short"] ) class AddBenchmark(op_bench.TorchBenchmarkBase): def init(self, M, N, K, device): self.input_one = torch.rand(M, N, K, device=device, requires_grad=True) self.input_two = torch.rand(M, N, K, device=device, requires_grad=True) self.set_module_name("add") def forward(self): return torch.add(self.input_one, self.input_two) op_bench.generate_pt_test(add_configs, AddBenchmark) op_bench.generate_pt_gradient_test(add_configs, AddBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
input_size=[8, 16, 64], offset=[0], sparse=[True], tags=['short'] ) class EmbeddingBagBenchmark(op_bench.TorchBenchmarkBase): def init(self, embeddingbags, dim, mode, input_size, offset, sparse): self.embegging = torch.nn.EmbeddingBag( num_embeddings=embeddingbags, embedding_dim=dim, mode=mode, sparse=sparse) numpy.random.seed((1 << 32) - 1) self.input = torch.tensor(numpy.random.randint(0, embeddingbags, input_size)).long() self.offset = torch.LongTensor([offset]) self.set_module_name('embeddingbag') def forward(self): return self.embegging(self.input, self.offset) op_bench.generate_pt_test(embeddingbag_short_configs, EmbeddingBagBenchmark) op_bench.generate_pt_gradient_test(embeddingbag_short_configs, EmbeddingBagBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
) batchnorm_configs_long = op_bench.cross_product_configs( M=[1, 128], N=[8192, 2048], K=[1], device=['cpu', 'cuda'], tags=["long"] ) class BatchNormBenchmark(op_bench.TorchBenchmarkBase): def init(self, M, N, K, device): self.input_one = torch.rand(M, N, K, device=device, requires_grad=self.auto_set()) self.mean = torch.rand(N, device=device) self.var = torch.rand(N, device=device) self.weight = torch.rand(N, device=device) self.bias = torch.rand(N, device=device) self.set_module_name("batchnorm") def forward(self): return F.batch_norm(self.input_one, self.mean, self.var, self.weight, self.bias) op_bench.generate_pt_test(batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark) op_bench.generate_pt_gradient_test(batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
input = torch.tensor(numpy.random.randint(0, embeddingbags, input_size), device=device).long() self.inputs = { "input": input, "offset": torch.cat((offsets, torch.tensor([input.size(0)], dtype=torch.long)), 0) } self.set_module_name('qatEmbeddingBag') def forward(self, input, offset): return self.embedding(input, offset) # Currently, EmbeddingBag QAT does not support sparse embeddings. embeddingbag_short_dense_configs = [config for config in configs.embeddingbag_short_configs if {'sparse': True} not in config] op_bench.generate_pt_test(embeddingbag_short_dense_configs, QATEmbeddingBagBenchmark) op_bench.generate_pt_gradient_test(embeddingbag_short_dense_configs, QATEmbeddingBagBenchmark) class QATEmbeddingBenchmark(op_bench.TorchBenchmarkBase): def init(self, num_embeddings, embedding_dim, input_size, device): qconfig = default_embedding_qat_qconfig self.embedding = nnqat.Embedding( num_embeddings=num_embeddings, embedding_dim=embedding_dim, qconfig=qconfig, device=device) self.embedding.qconfig = default_embedding_qat_qconfig numpy.random.seed((1 << 32) - 1) self.input = torch.tensor(numpy.random.randint(0, num_embeddings, input_size), device=device).long() self.inputs = {"input": self.input} self.set_module_name('qatEmbedding')
self.input = torch.rand(N, C, H, W, dtype=torch.float) self.scale = torch.tensor([1.]) self.zero_point = torch.tensor([0.]) self.input.requires_grad_() self.scale.requires_grad_() self.zero_point.requires_grad_() def forward(self): return torch._fake_quantize_learnable_per_tensor_affine( self.input, self.scale, self.zero_point, self.quant_min, self.quant_max) op_bench.generate_pt_test(fake_quantize_learnable_configs, FakeQuantizeLearnablePerTensorBenchmark) op_bench.generate_pt_gradient_test(fake_quantize_learnable_configs, FakeQuantizeLearnablePerTensorBenchmark) class FakeQuantizeLearnablePerChannelBenchmark(op_bench.TorchBenchmarkBase): r"""Benchmarks learnable fake quantize per channel.""" def init(self, N, C, H, W, nbits): torch.manual_seed(TORCH_RANDOM_SEED) self.quant_min = 0 self.quant_max = 2**nbits - 1 self.quant_range = 2**nbits # Axis is chosen with respect to the number of channels: C. self.axis = 1 self.input = torch.rand(N, C, H, W, dtype=torch.float) self.scale = torch.tensor([1.] * C) self.zero_point = torch.tensor([0.] * C) self.input.requires_grad_()