def register_element_ops(): binary_op_list = [ ["mul", lambda a, b: a * b], ["add", lambda a, b: a + b], ["sub", lambda a, b: a - b], ["div", lambda a, b: a / (b + 1e-4)], ["pow", lambda a, b: torch.pow(a, b), lambda a, b: np.power(a, b)], # no fuson triggered ["max", lambda a, b: torch.max(a, b), lambda a, b: np.maximum(a, b)], ["min", lambda a, b: torch.min(a, b), lambda a, b: np.minimum(a, b)], ] unary_op_list = [ ["exp", lambda x: torch.exp(x), lambda x: np.exp(x)], ["sin", lambda x: torch.sin(x), lambda x: np.sin(x)], ["cos", lambda x: torch.cos(x), lambda x: np.cos(x)], [ "rand_like", lambda x: torch.rand_like(x), lambda x: np.random.rand(*x.shape) ], ] for split_input, binary_op in itertools.product([True, False], binary_op_list): # Make a copy of ElementBench if len(binary_op) == 2: [op_str, op_pt_func] = binary_op op_np_func = op_pt_func elif len(binary_op) == 3: [op_str, op_pt_func, op_np_func] = binary_op split_str = 'split' if split_input else 'shared' op_str = split_str + '_' + op_str bm_cls = type('ElementBench_' + op_str, (ElementBench, ), {}) bm_cls.op_str = op_str bm_cls.binary_op_pt_func = op_pt_func bm_cls.binary_op_np_func = op_np_func bm_cls.split_input = split_input framework.register_benchmark_class(bm_cls) for split_input, unary_op in itertools.product([True, False], unary_op_list): # Make a copy of ElementBench if len(unary_op) == 2: [op_str, op_pt_func] = unary_op op_np_func = op_pt_func elif len(unary_op) == 3: [op_str, op_pt_func, op_np_func] = unary_op split_str = 'split' if split_input else 'shared' op_str = split_str + '_' + op_str bm_cls = type('ElementBench_' + op_str, (ElementBench, ), {}) bm_cls.op_str = op_str bm_cls.unary_op_pt_func = op_pt_func bm_cls.unary_op_np_func = op_np_func bm_cls.split_input = split_input framework.register_benchmark_class(bm_cls)
if self.mode == 'fwd': sol_count = 1 algorithmic_count = 1 else: sol_count = 1 + 1 algorithmic_count = 1 + (1 + 1) buffer_size = self.B * self.M * self.N + self.B * self.M * self.N + self.B * self.N * self.K buffer_size *= 4 return { 'sol': buffer_size * sol_count, 'algorithmic': buffer_size * algorithmic_count } def compute_workload(self): if self.mode == 'fwd': count = 1 else: count = 1 + (1 + 1) op_count = 2 * self.B * self.M * self.N * self.K return op_count * count @staticmethod def default_configs(): return [[128, 64, 128, 256]] framework.register_benchmark_class(MatMulBench)
y = self.softmax(self.data, dim=1) return y def reference(self): return scipy.special.softmax(self.numpy(self.data), axis=1) def config(self): return [self.M, self.N] @staticmethod def module(): return 'softmax' def memory_workload(self): if self.mode == 'fwd': sol_count = 1 + 1 algorithmic_count = 3 + 1 else: sol_count = (1 + 1) + (1 + 1) algorithmic_count = (3 + 1) + (3 + 1) buffer_size = self.M * self.N * 4 return {'sol': buffer_size * sol_count, 'algorithmic': buffer_size * algorithmic_count} @staticmethod def default_configs(): return [[128, 1<<16]] framework.register_benchmark_class(SoftmaxBench)
class ReduceRowBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceRowBench, self).__init__(mode, device, 'row', M, N, K) @staticmethod def module(): return 'reduce_row' class ReduceMidBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceMidBench, self).__init__(mode, device, 'mid', M, N, K) @staticmethod def module(): return 'reduce_mid' class ReduceColBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceColBench, self).__init__(mode, device, 'col', M, N, K) @staticmethod def module(): return 'reduce_col' framework.register_benchmark_class(ReduceRowBench) framework.register_benchmark_class(ReduceMidBench) framework.register_benchmark_class(ReduceColBench)
def reference(self): return self.numpy(self.forward(self.data)) def config(self): return [self.M, self.N] @staticmethod def module(): return "swish" def memory_workload(self): if self.mode == "fwd": sol_count = 1 + 1 algorithmic_count = 3 + 1 else: sol_count = (1 + 1) + (1 + 1) algorithmic_count = (3 + 1) + (3 + 1) buffer_size = self.M * self.N * 4 return { "sol": buffer_size * sol_count, "algorithmic": buffer_size * algorithmic_count, } @staticmethod def default_configs(): return [[128, 1 << 16]] framework.register_benchmark_class(SwishBench)
return { 'sol': buffer_size * sol_count, 'algorithmic': buffer_size * algorithmic_count } @staticmethod def default_configs(): return [[3, 16, 32, 256, 256]] class MaxPoolBench(PoolingBench): def __init__(self, *args): super().__init__('maxpool', *args) @staticmethod def module(): return 'maxpool' class AvgPoolBench(PoolingBench): def __init__(self, *args): super().__init__('avgpool', *args) @staticmethod def module(): return 'avgpool' framework.register_benchmark_class(MaxPoolBench) framework.register_benchmark_class(AvgPoolBench)
def module(): return 'batchnorm' class InstanceNormBench(NormalizationBench): def forward(self): y = self.instance_norm(self.data) return y @staticmethod def module(): return 'instance_norm' def is_supported(self): return tensor_engine.is_supported(self.instance_norm) class LayerNormBench(NormalizationBench): def forward(self): y = self.layer_norm(self.data, [self.H, self.W]) return y @staticmethod def module(): return 'layernorm' framework.register_benchmark_class(BatchNormBench) framework.register_benchmark_class(InstanceNormBench) framework.register_benchmark_class(LayerNormBench)