[16, 16, "double"], [64, 64, "float"], [64, 64, "double"], ], attr_names=["M", "N", "dtype"], tags=["short"], ) class BatchBoxCoxBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, dtype): self.data = self.tensor([M, N], dtype) self.lambda1 = self.tensor([N], dtype) self.lambda2 = self.tensor([N], dtype) self.output = self.tensor([1, 1], dtype) self.set_module_name("batch_box_cox") def forward(self): op = core.CreateOperator("BatchBoxCox", [self.data, self.lambda1, self.lambda2], self.output) return op op_bench_c2.generate_c2_test( batch_box_cox_long_configs + batch_box_cox_short_configs, BatchBoxCoxBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
def init(self, sizes, N, axis, dtype, device): random.seed(42) self.inputs = [] self.args = {'axis': axis} gen_sizes = [] for i in range(N): gen_sizes.append([ old_size() if callable(old_size) else old_size for old_size in sizes ]) for s in gen_sizes: self.inputs.append(self.tensor(s, dtype, device=device)) self.output = self.tensor(gen_sizes[0], dtype, device=device) self.split_info = self.tensor(gen_sizes[0], "int") self.set_module_name("concat") def forward(self): op = core.CreateOperator("Concat", self.inputs, [self.output, self.split_info], **self.args) return op op_bench_c2.generate_c2_test( cat_configs_short + cat_configs_long + cat_configs_multidim + cat_configs_manyinputs, ConcatBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
N=[128, 1024], K=[1, 2], device=['cpu', 'cuda'], tags=["long"]) class BatchGatherBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, K, device): self.input_one = self.tensor([M, N, K], device=device) max_val = N numpy.random.seed((1 << 32) - 1) index_dim = numpy.random.randint(0, N) self.index = self.feed_tensor(numpy.random.randint( 0, max_val, index_dim), device=device) self.output = self.tensor([M, index_dim, K], device=device) self.set_module_name("batch_gather") def forward(self): op = core.CreateOperator("BatchGather", [self.input_one, self.index], self.output) return op op_bench_c2.generate_c2_test( batch_gather_configs_long + batch_gather_configs_short, BatchGatherBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
add_short_configs = op_bench.config_list( attrs=[ [8, 16, 32, "int"], [16, 16, 64, "float"], [64, 64, 128, "int"], ], attr_names=["M", "N", "K", "dtype"], tags=["short"], ) class AddBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, K, dtype): self.input_one = self.tensor([M, N, K], dtype) self.input_two = self.tensor([M, N, K], dtype) self.output = self.tensor([M, N, K], dtype) self.set_module_name("add") def forward(self): op = core.CreateOperator( "Add", [self.input_one, self.input_two], self.output, **self.args ) return op op_bench_c2.generate_c2_test(add_long_configs + add_short_configs, AddBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
[128, 128, 128, False, True], [1024, 1024, 256, True, False], [8192, 8192, 1024, True, False], ], attr_names=["M", "N", "K", "trans_a", "trans_b"], tags=["short"], ) class MatMulBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, K, trans_a, trans_b): self.input_one = self.tensor([N, M]) if trans_a else self.tensor( [M, N]) self.input_two = self.tensor([K, N]) if trans_b else self.tensor( [N, K]) self.args = {'trans_a': trans_a, 'trans_b': trans_b} self.output = self.tensor([M, K]) self.set_module_name("matmul") def forward(self): op = core.CreateOperator("MatMul", [self.input_one, self.input_two], self.output, **self.args) return op op_bench_c2.generate_c2_test(mm_long_configs + mm_short_configs, MatMulBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
replace_nan_short_configs = op_bench.config_list( attrs=[ [16, 16, "float"], [16, 16, "double"], [64, 64, "float"], [64, 64, "double"], ], attr_names=["M", "N", "dtype"], tags=["short"], ) class ReplaceNaNBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, dtype): self.input = self.tensor([M, N], dtype) self.set_module_name("replace_nan") def forward(self): op = core.CreateOperator("ReplaceNaN", self.input, self.input, value=1.0) return op op_bench_c2.generate_c2_test( replace_nan_long_configs + replace_nan_short_configs, ReplaceNaNBenchmark ) if __name__ == "__main__": op_bench.benchmark_runner.main()
[6, 1, 2, 1, "int32"], [7, 1, 2, 2, "int32"], [8, 1, 2, 3, "int32"], [9, 1, 2, 4, "int32"], [10, 1, 2, 5, "int32"], ], attr_names=["LENGTH", "M", "N", "MAX_LENGTH", "dtype"], tags=["short"], ) class ClipRangesBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, LENGTH, M, N, MAX_LENGTH, dtype): self.input = self.tensor([LENGTH, M, N], dtype) self.max_length = MAX_LENGTH self.set_module_name("clip_ranges") def forward(self): op = core.CreateOperator("ClipRanges", self.input, self.input, max_length=self.max_length) return op op_bench_c2.generate_c2_test( clip_ranges_long_configs + clip_ranges_short_configs, ClipRangesBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()
[16, 16, "float"], [16, 16, "double"], [64, 64, "float"], [64, 64, "double"], ], attr_names=["M", "N", "dtype"], tags=["short"], ) class QuantileOpBenchmark(op_bench_c2.Caffe2BenchmarkBase): def init(self, M, N, dtype): self.data = [self.tensor([N], dtype) for _ in range(M)] self.quantile = 0.3 self.output = self.tensor([1], dtype) self.set_module_name("quantile_op") def forward(self): op = core.CreateOperator("Quantile", inputs=self.data, outputs=self.output, quantile=self.quantile) return op op_bench_c2.generate_c2_test( quantile_op_long_configs + quantile_op_short_configs, QuantileOpBenchmark) if __name__ == "__main__": op_bench.benchmark_runner.main()