def test_best_block_config( test_id, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape, layouts, acc_config, expected_block_configs, ): nhwc_to_nhcwb16 = [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1 / 16, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 16], [0, 0, 0, 0, 1], ] nhcwb16_to_nhwc = [ [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 16, 0, 1, -16], [0, 0, 0, 0, 0, 1], ] ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices( op_type, kernel, stride, padding, layouts[0], layouts[1], dilation, in_shape[3]) ofm_channels = out_shape[3] ifm_channels = in_shape[3] if layouts[0] == "NHCWB16": in_shape = [ int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1, )).tolist()[:-1] ] if layouts[1] == "NHCWB16": out_shape = [ int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, out_shape + (1, )).tolist()[:-1] ] propagator = cs.Propagator(ifm_matrix, ifm_offset) weight_propagator = cs.Propagator(weight_matrix, weight_offset) subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8) op_attrs = { "op": op_type, "activation": activation, "stride_h": stride[0], "stride_w": stride[1], "dilation_h": dilation[0], "dilation_w": dilation[1], } device_config = cs.EthosuDeviceConfig(acc_config) block_configs = device_config.get_valid_block_configs( propagator, op_attrs, out_shape, ofm_channels, ifm_channels, layouts[1], layouts[0], "int8", "int8", kernel[0], kernel[1], ) output_quantum = [1, 1, 2, 8] if layouts[1] == "NHCWB16": output_quantum = [1, 1, 1, 2, 8] # Create EthosUPart te_subgraph = cs.TESubgraph([], None) part = cs.EthosuPart( te_subgraph, [propagator, weight_propagator], output_quantum, subkernels, block_configs, 1, ) order = [1, 2, 3, 4] if layouts[1] == "NHCWB16" else [1, 2, 4, 3, 0] stripes = [1] * len(output_quantum) offset = [0] * len(output_quantum) stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset) block = part.get_block_config(stripe_config) block_shape = tuple(int(a) for a in block.output_shape) assert block_shape in expected_block_configs[test_id]
def test_conv_performance( accelerator, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape, block_shape, input_block_shape, expected, ): ifm_channels = in_shape[3] ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices( op_type, kernel, stride, padding, "NHWC", "NHWC", dilation, ifm_channels, ) propagator = cs.Propagator(ifm_matrix, ifm_offset) weight_propagator = cs.Propagator(weight_matrix, weight_offset) subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8) device_config = cs.EthosuDeviceConfig(accelerator) output_cycles = device_config._get_output_cycles(op_type, "", "int8", "int8", activation) output_cycles *= reduce(lambda a, b: a * b, block_shape, 1) is_partkernel = device_config.is_partkernel( op_type, ifm_channels, "int8", kernel[0] * kernel[1] ) compute_cycles = device_config._estimate_compute_cycles_per_block( op_type, _Shape(block_shape), _Shape(input_block_shape), kernel[0], kernel[1], ifm_channels, "int8", is_partkernel, ) block_configs = [ cs.BlockConfig(input_block_shape, block_shape, compute_cycles, int(output_cycles)) ] output_quantum = [1, 1, 2, 8] te_subgraph = cs.TESubgraph([], None) part = cs.EthosuPart( te_subgraph, [propagator, weight_propagator], output_quantum, subkernels, block_configs, 1, ) part.set_input(0, cs.Tensor(in_shape, "int8")) part.set_input(1, cs.Tensor([ifm_channels, kernel[0], kernel[1], out_shape[-1]], "int8")) part.set_output(cs.Tensor(out_shape, "int8")) stripes = [1] * len(output_quantum) offset = [0] * len(output_quantum) order = [1, 2, 3, 4] stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset) compute_cycles = part.get_performance_info(stripe_config, cs.BufferMode.ROLLING).compute_cycles tolerance = expected * 0.1 assert expected - tolerance <= compute_cycles <= expected + tolerance