def test_ethosu_identity_matcher(): ofm_channels = 21 ifm_shape = (1, 12, 15, ofm_channels) ifm = te.placeholder(ifm_shape, dtype="int8") lut = te.placeholder((), dtype="uint8") out = identity_compute( ifm=ifm, lut=lut, ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, activation="NONE", ) length = len(ifm.shape) ifm_transform = np.identity(length + 1).tolist() ifm_offset = np.zeros(length, dtype="int64").tolist() device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_identity(out, device_config) assert isinstance(part, cs.EthosuPart) assert len(part.propagators) == 1 assert part.propagators[0].transform == ifm_transform assert part.propagators[0].offset == ifm_offset
def test_compute_cycles_annotation(SRAM, FLASH, TwoConv2DTE): device_config = cs.EthosuDeviceConfig("ethos-u55-256") options = infra.make_options( cascade_region=SRAM, max_proposals=64, stripe_factors=4, max_plan_size=10, max_open_plans=8, max_closed_plans=32, always_copy_size=1024, disable_pareto_plans=False, disable_pareto_proposals=False, enable_striping=False, ) sch, te_graph, const_dict = TwoConv2DTE cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config) # Stages that should have compute cycle annotations # [copy, copy, conv2d, copy, conv2d] stages = [6, 8, 9, 18, 19] # Expected hints for each operation compute_cycles_hints = [4096, 5120, 1440, 2560, 3072] for stage, compute_cycles_hint in zip(stages, compute_cycles_hints): op = sch.stages[stage] op_iter_vars = op.leaf_iter_vars[0] op_attrs = op.iter_var_attrs[op_iter_vars] assert op_attrs.pragma_keys[0] == "compute_cycles_hint" assert op_attrs.pragma_values[0] == compute_cycles_hint
def test_device_config_cycles(acc_config, expected): device_config = cs.EthosuDeviceConfig(acc_config) conv_type = "ethosu_conv2d" conv_str = None conv_ifm_dtype = "int8" conv_ofm_dtype = "int8" conv_activation = "LUT" conv_cycles = device_config._get_output_cycles(conv_type, conv_str, conv_ifm_dtype, conv_ofm_dtype, conv_activation) assert conv_cycles == expected[0] pool_type = "ethosu_pooling" pool_str = "MAX" pool_ifm_dtype = "int8" pool_ofm_dtype = "int8" pool_activation = "NONE" pool_cycles = device_config._get_output_cycles(pool_type, pool_str, pool_ifm_dtype, pool_ofm_dtype, pool_activation) assert pool_cycles == expected[1] add_type = "ethosu_binary_elementwise" add_str = "ADD" add_ifm_dtype = "int8" add_ofm_dtype = "int8" add_activation = "NONE" add_cycles = device_config._get_output_cycles(add_type, add_str, add_ifm_dtype, add_ofm_dtype, add_activation) assert add_cycles == expected[2] mul_type = "ethosu_binary_elementwise" mul_str = "MUL" mul_ifm_dtype = "int8" mul_ofm_dtype = "int8" mul_activation = "NONE" mul_cycles = device_config._get_output_cycles(mul_type, mul_str, mul_ifm_dtype, mul_ofm_dtype, mul_activation) assert mul_cycles == expected[3] mul_32_type = "ethosu_binary_elementwise" mul_32_str = "MUL" mul_32_ifm_dtype = "int8" mul_32_ofm_dtype = "int32" mul_32_activation = "NONE" mul_32_cycles = device_config._get_output_cycles(mul_32_type, mul_32_str, mul_32_ifm_dtype, mul_32_ofm_dtype, mul_32_activation) assert mul_32_cycles == expected[4]
def test_force_block_config_kernelwise(ofm_layout, block_config_str, expected_block_shape): op_type = "ethosu_pooling" activation = "NONE" kernel = (2, 2) stride = (2, 2) padding = (0, 0) dilation = (1, 1) ifm_channels = 32 out_shape = (1, 8, 10, 16) ifm_matrix, ifm_offset, _, _, _, _ = make_matrices(op_type, kernel, stride, padding, "NHWC", ofm_layout, dilation, ifm_channels) ofm_channels = out_shape[3] propagator = cs.Propagator(ifm_matrix, ifm_offset) op_attrs = { "op": op_type, "activation": activation, "stride_h": stride[0], "stride_w": stride[1], "dilation_h": dilation[0], "dilation_w": dilation[1], } config = { "enable_cascader": True, "dev_force_block_config": block_config_str, } with tvm.transform.PassContext( config={"relay.ext.ethos-u.options": config}): device_config = cs.EthosuDeviceConfig("ethos-u55-128") block_configs = device_config.get_valid_block_configs( propagator, op_attrs, out_shape, ofm_channels, ifm_channels, ofm_layout, "NHWC", "int8", "int8", kernel[0], kernel[1], ) assert len(block_configs) == 1 assert block_configs[0].output_shape == expected_block_shape
def test_create_cascader_graph(TwoConv2DWithSliceTE): _, te_graph, const_dict = TwoConv2DWithSliceTE device_config = cs.EthosuDeviceConfig("ethos-u55-256") graph = cs.create_cascader_graph(te_graph, const_dict, device_config) output_tensor = graph.output_tensors[0] assert output_tensor.shape == [1, 6, 1, 6, 16] assert len(output_tensor.producers) == 1 assert not output_tensor.is_constant conv2_part = output_tensor.producers[0] assert isinstance(conv2_part, cs.EthosuPart) assert len(conv2_part.input_tensors) == 3 assert conv2_part.input_tensors[0].shape == [1, 6, 6, 64] assert len(conv2_part.input_tensors[0].producers) == 1 assert not conv2_part.input_tensors[0].is_constant assert conv2_part.input_tensors[1].shape == [16, 3, 3, 64] assert len(conv2_part.input_tensors[1].producers) == 0 assert conv2_part.input_tensors[1].is_constant assert conv2_part.input_tensors[2].shape == [16, 10] assert len(conv2_part.input_tensors[2].producers) == 0 assert conv2_part.input_tensors[2].is_constant slice_part = conv2_part.input_tensors[0].producers[0] assert isinstance(slice_part, cs.InlinePart) assert len(slice_part.input_tensors) == 1 assert slice_part.input_tensors[0].shape == [1, 12, 12, 64] assert len(slice_part.input_tensors[0].producers) == 1 assert not slice_part.input_tensors[0].is_constant conv1_part = slice_part.input_tensors[0].producers[0] assert isinstance(conv1_part, cs.EthosuPart) assert len(conv1_part.input_tensors) == 3 assert conv1_part.input_tensors[0].shape == [1, 12, 12, 8] assert len(conv1_part.input_tensors[0].producers) == 0 assert not conv1_part.input_tensors[0].is_constant assert conv1_part.input_tensors[1].shape == [64, 1, 1, 8] assert len(conv1_part.input_tensors[1].producers) == 0 assert conv1_part.input_tensors[1].is_constant assert conv1_part.input_tensors[2].shape == [64, 10] assert len(conv1_part.input_tensors[2].producers) == 0 assert conv1_part.input_tensors[2].is_constant
def test_force_block_config_elementwise(ofm_layout, block_config_str, expected_block_shape): op_type = "ethosu_elementwise_unary" op_str = "ABS" activation = "NONE" ofm_shape = (1, 8, 10, 16) ifm_matrix = [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ] ifm_offset = [0, 0, 0, 0] propagator = cs.Propagator(ifm_matrix, ifm_offset) op_attrs = { "op": op_type, "operator_type": op_str, "activation": activation, "clip_min": 0, "clip_max": 0, "rounding_mode": "TFL", } config = { "enable_cascader": True, "dev_force_block_config": block_config_str, } with tvm.transform.PassContext( config={"relay.ext.ethos-u.options": config}): device_config = cs.EthosuDeviceConfig("ethos-u55-128") block_configs = device_config.get_elementwise_block_config( propagator, None, op_attrs, ofm_shape, ofm_layout, "NWHC", None, "int8", "int8", ) assert len(block_configs) == 1 assert block_configs[0].output_shape == expected_block_shape
def test_ethosu_pooling_matcher(pool_shape, stride, padding, ifm_layout, ofm_layout): ofm_channels = 21 if ifm_layout == "NHWC": ifm_shape = (1, 12, 15, ofm_channels) else: ifm_shape = (1, 12, 1 + ((ofm_channels - 1) // 16), 15, 16) ifm = te.placeholder(ifm_shape, dtype="int8") lut = te.placeholder((), dtype="uint8") out = pooling_compute( ifm=ifm, lut=lut, pooling_type="MAX", ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, pool_shape=pool_shape, ofm_channels=ofm_channels, strides=stride, padding=padding, activation="NONE", clip_min=0, clip_max=0, rounding_mode="TFL", upscale="NONE", ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) (ifm_transform, ifm_offset, _, _, _, _) = make_matrices( "ethosu_pooling", pool_shape, stride, padding, ifm_layout, ofm_layout, ofm_channels=ofm_channels, ) device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_pooling(out, device_config) assert isinstance(part, cs.EthosuPart) assert len(part.propagators) == 1 assert part.propagators[0].transform == ifm_transform assert part.propagators[0].offset == ifm_offset
def test_cascade( SRAM, FLASH, TwoConv2DWithSliceTE, TwoConv2DTE, MobileNetv1StartTE, MobileNetv1TE ): fixtures = [ TwoConv2DTE, TwoConv2DWithSliceTE, MobileNetv1StartTE, MobileNetv1TE, ] device_config = cs.EthosuDeviceConfig("ethos-u55-256") for sch, te_graph, const_dict in fixtures: options = cs.CascaderOptions( cascade_region=SRAM, max_proposals=64, stripe_factors=4, max_plan_size=10, always_copy_size=1024, ) cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config)
def test_ethosu_inline_matcher(): ifm_shape = (2, 5, 6) new_shape = (2, 30) ifm = te.placeholder(ifm_shape, dtype="int8") out = reshape(ifm, new_shape) ifm_transform = [ [0, 0, ifm_shape[0]], [0, 0, ifm_shape[1]], [0, 0, ifm_shape[2]], [0, 0, 1], ] ifm_offset = [0, 0, 0] device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_inline(out, device_config) assert isinstance(part, cs.InlinePart) assert len(part.propagators) == 1 assert part.propagators[0].transform == ifm_transform assert part.propagators[0].offset == ifm_offset
def test_cascade(SRAM, FLASH, TwoConv2DWithSliceTE, TwoConv2DTE, MobileNetv1StartTE, MobileNetv1TE): fixtures = [ TwoConv2DTE, TwoConv2DWithSliceTE, MobileNetv1StartTE, MobileNetv1TE, ] device_config = cs.EthosuDeviceConfig("ethos-u55-256") for sch, te_graph, const_dict in fixtures: options = infra.make_options( cascade_region=SRAM, max_proposals=64, stripe_factors=4, max_plan_size=10, max_open_plans=8, max_closed_plans=32, always_copy_size=1024, disable_pareto_plans=False, disable_pareto_proposals=False, ) cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config)
def test_create_diamond_graph(MobileNetv2DiamondTE): _, te_graph, const_dict = MobileNetv2DiamondTE device_config = cs.EthosuDeviceConfig("ethos-u55-256") graph = cs.create_cascader_graph(te_graph, const_dict, device_config) output_tensor = graph.output_tensors[0] assert output_tensor.shape == [1, 56, 56, 24] assert len(output_tensor.producers) == 1 assert not output_tensor.is_constant add1_part = output_tensor.producers[0] assert isinstance(add1_part, cs.EthosuPart) assert len(add1_part.input_tensors) == 2 assert graph.get_part_id(add1_part) == 0 assert add1_part.input_tensors[0].shape == [1, 56, 56, 24] assert len(add1_part.input_tensors[0].producers) == 1 assert not add1_part.input_tensors[0].is_constant assert add1_part.input_tensors[1].shape == [1, 56, 56, 24] assert len(add1_part.input_tensors[0].producers) == 1 assert not add1_part.input_tensors[0].is_constant
def test_ethosu_depthwise2d_matcher(kernel, stride, dilation, padding, ifm_layout, ofm_layout): ofm_channels = 57 if ifm_layout == "NHWC": ifm_shape = (1, 12, 15, ofm_channels) else: ifm_shape = (1, 12, 1 + ((ofm_channels - 1) // 16), 15, 16) kernel_h, kernel_w = kernel ifm = te.placeholder(ifm_shape, dtype="int8") weight = te.placeholder((ofm_channels, kernel_h, kernel_w, 1), dtype="int8") scale_bias = te.placeholder((ofm_channels, 10), dtype="uint8") lut = te.placeholder((), dtype="uint8") out = depthwise_conv2d_compute( ifm=ifm, weight=weight, scale_bias=scale_bias, lut=lut, ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, weight_zero_point=0, strides=stride, padding=padding, dilation=dilation, activation="NONE", clip_min=0, clip_max=0, rounding_mode="TFL", upscale="NONE", ifm_layout=ifm_layout, ofm_layout=ofm_layout, ofm_dtype=ifm.dtype, ) ( ifm_transform, ifm_offset, weight_transform, weight_offset, scale_bias_transform, scale_bias_offset, ) = make_matrices( "ethosu_depthwise_conv2d", kernel, stride, padding, ifm_layout, ofm_layout, dilation, ) device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_depthwise_conv2d(out, device_config) assert isinstance(part, cs.EthosuPart) assert len(part.propagators) == 3 assert part.propagators[0].transform == ifm_transform assert part.propagators[0].offset == ifm_offset assert part.propagators[1].transform == weight_transform assert part.propagators[1].offset == weight_offset assert part.propagators[2].transform == scale_bias_transform assert part.propagators[2].offset == scale_bias_offset
def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_type): ifm_shape = ofm_shape.copy() ofm_channels = ofm_shape[3] nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels) if ifm_layout == "NHCWB16": ifm_shape = [ int(math.ceil(n)) for n in np.matmul( nhwc_to_nhcwb16, ifm_shape + [ 1, ], ).tolist()[:-1] ] if ofm_layout == "NHCWB16": ofm_shape = [ int(math.ceil(n)) for n in np.matmul( nhwc_to_nhcwb16, ofm_shape + [ 1, ], ).tolist()[:-1] ] order = [1, 2, 4, 3, 0] else: order = [1, 2, 3, 4] ifm = te.placeholder(ifm_shape, dtype="int8") lut = te.placeholder((), dtype="uint8") out = unary_elementwise_compute( ifm=ifm, lut=lut, operator_type=op_type, ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, ofm_channels=ofm_channels, activation="NONE", clip_min=0, clip_max=0, rounding_mode="TFL", ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) ifm_propagator = out.op.attrs["ifm_propagator"] offset = [0] * len(ofm_shape) stripes = [0] * len(ofm_shape) output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset) ifm_transform = _make_matrices(ifm_layout, ofm_layout, ofm_channels) device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_unary_elementwise(out, device_config) assert isinstance(part, cs.EthosuPart) assert len(part.propagators) == 1 assert part.propagators[0].transform == ifm_transform propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape # The layout transforms that have the exact number of output channels in them # will lose no information about the number of channels assert ifm_shape == propagated_ifm
def test_best_block_config( test_id, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape, layouts, acc_config, expected_block_configs, ): nhwc_to_nhcwb16 = [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1 / 16, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 16], [0, 0, 0, 0, 1], ] nhcwb16_to_nhwc = [ [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 16, 0, 1, -16], [0, 0, 0, 0, 0, 1], ] ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices( op_type, kernel, stride, padding, layouts[0], layouts[1], dilation, in_shape[3]) ofm_channels = out_shape[3] ifm_channels = in_shape[3] if layouts[0] == "NHCWB16": in_shape = [ int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1, )).tolist()[:-1] ] if layouts[1] == "NHCWB16": out_shape = [ int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, out_shape + (1, )).tolist()[:-1] ] propagator = cs.Propagator(ifm_matrix, ifm_offset) weight_propagator = cs.Propagator(weight_matrix, weight_offset) subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8) op_attrs = { "op": op_type, "activation": activation, "stride_h": stride[0], "stride_w": stride[1], "dilation_h": dilation[0], "dilation_w": dilation[1], } device_config = cs.EthosuDeviceConfig(acc_config) block_configs = device_config.get_valid_block_configs( propagator, op_attrs, out_shape, ofm_channels, ifm_channels, layouts[1], layouts[0], "int8", "int8", kernel[0], kernel[1], ) output_quantum = [1, 1, 2, 8] if layouts[1] == "NHCWB16": output_quantum = [1, 1, 1, 2, 8] # Create EthosUPart te_subgraph = cs.TESubgraph([], None) part = cs.EthosuPart( te_subgraph, [propagator, weight_propagator], output_quantum, subkernels, block_configs, 1, ) order = [1, 2, 3, 4] if layouts[1] == "NHCWB16" else [1, 2, 4, 3, 0] stripes = [1] * len(output_quantum) offset = [0] * len(output_quantum) stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset) block = part.get_block_config(stripe_config) block_shape = tuple(int(a) for a in block.output_shape) assert block_shape in expected_block_configs[test_id]
def test_conv_performance( accelerator, op_type, activation, kernel, stride, dilation, padding, in_shape, out_shape, block_shape, input_block_shape, expected, ): ifm_channels = in_shape[3] ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices( op_type, kernel, stride, padding, "NHWC", "NHWC", dilation, ifm_channels, ) propagator = cs.Propagator(ifm_matrix, ifm_offset) weight_propagator = cs.Propagator(weight_matrix, weight_offset) subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8) device_config = cs.EthosuDeviceConfig(accelerator) output_cycles = device_config._get_output_cycles(op_type, "", "int8", "int8", activation) output_cycles *= reduce(lambda a, b: a * b, block_shape, 1) is_partkernel = device_config.is_partkernel( op_type, ifm_channels, "int8", kernel[0] * kernel[1] ) compute_cycles = device_config._estimate_compute_cycles_per_block( op_type, _Shape(block_shape), _Shape(input_block_shape), kernel[0], kernel[1], ifm_channels, "int8", is_partkernel, ) block_configs = [ cs.BlockConfig(input_block_shape, block_shape, compute_cycles, int(output_cycles)) ] output_quantum = [1, 1, 2, 8] te_subgraph = cs.TESubgraph([], None) part = cs.EthosuPart( te_subgraph, [propagator, weight_propagator], output_quantum, subkernels, block_configs, 1, ) part.set_input(0, cs.Tensor(in_shape, "int8")) part.set_input(1, cs.Tensor([ifm_channels, kernel[0], kernel[1], out_shape[-1]], "int8")) part.set_output(cs.Tensor(out_shape, "int8")) stripes = [1] * len(output_quantum) offset = [0] * len(output_quantum) order = [1, 2, 3, 4] stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset) compute_cycles = part.get_performance_info(stripe_config, cs.BufferMode.ROLLING).compute_cycles tolerance = expected * 0.1 assert expected - tolerance <= compute_cycles <= expected + tolerance
def test_ethosu_conv2d_block_config_from_matcher(ifm_layout, ofm_layout, ifm_channels, expected_cycles): ofm_channels = 10 ifm_height = 123 ifm_width = 155 ifm_shape = ((1, ifm_height, ifm_width, ifm_channels) if ifm_layout == "NHWC" else (1, ifm_height, 1 + ((ifm_channels - 1) // 16), ifm_width, 16)) weight_shape = (ofm_channels, 3, 3, ifm_channels) scale_bias_shape = (ofm_channels, 10) ifm = te.placeholder(ifm_shape, dtype="int8") weight = te.placeholder(weight_shape, dtype="int8") scale_bias = te.placeholder(scale_bias_shape, dtype="uint8") lut = te.placeholder((), dtype="uint8") out = conv2d_compute( ifm=ifm, weight=weight, scale_bias=scale_bias, lut=lut, ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, weight_zero_point=0, strides=(1, 1), padding=(0, 0, 0, 0), dilation=(1, 1), activation="NONE", clip_min=0, clip_max=0, upscale="NONE", rounding_mode="TFL", ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_conv2d(out, device_config) ofm_shape = [int(i) for i in part.subgraph.output_tensor.shape] # Add inputs and outputs to the part input_tensor = cs.Tensor(ifm_shape, "int8") part.set_input(0, input_tensor) weight_tensor = cs.Tensor(weight_shape, "int8") part.set_input(1, weight_tensor) scale_bias_tensor = cs.Tensor(scale_bias_shape, "int8") part.set_input(2, scale_bias_tensor) output_tensor = cs.Tensor(ofm_shape, "int8") part.set_output(output_tensor) # Create a stripe of a size of the output tensor order = [1, 2, 3, 4] if ofm_layout == "NHWC" else [1, 2, 4, 3, 0] stripes = [1] * len(order) offset = [0] * len(order) stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset) block = part.get_block_config(stripe_config) # Since we dont know the values of the variables we passed to the get_valid_block_configs in # the matcher, best we can do is to verify the compute cycle count since the channels have a # significant effect on it assert block.compute_cycles == expected_cycles
def TwoConv2DGraph(): _, te_graph, const_dict = make_TwoConv2DTE() device_config = cs.EthosuDeviceConfig("ethos-u55-256") return cs.create_cascader_graph(te_graph, const_dict, device_config)
def MobileNetv1Graph(): _, te_graph, const_dict = make_MobileNetv1TE() device_config = cs.EthosuDeviceConfig("ethos-u55-256") return cs.create_cascader_graph(te_graph, const_dict, device_config)
def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_type): ifm_shape = ofm_shape.copy() ofm_channels = ofm_shape[3] nhwc_to_nhcwb16 = [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1 / 16, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 16], [0, 0, 0, 0, 1], ] if ifm_layout == "NHCWB16": ifm_shape = [ int(math.ceil(n)) for n in np.matmul( nhwc_to_nhcwb16, ifm_shape + [ 1, ], ).tolist()[:-1] ] if ofm_layout == "NHCWB16": ofm_shape = [ int(math.ceil(n)) for n in np.matmul( nhwc_to_nhcwb16, ofm_shape + [ 1, ], ).tolist()[:-1] ] order = [1, 2, 4, 3, 0] else: order = [1, 2, 3, 4] ifm = te.placeholder(ifm_shape, dtype="int8") lut = te.placeholder((), dtype="uint8") out = unary_elementwise_compute( ifm=ifm, lut=lut, operator_type=op_type, ifm_scale=1, ifm_zero_point=0, ofm_scale=1, ofm_zero_point=0, ofm_channels=ofm_channels, activation="NONE", clip_min=0, clip_max=0, rounding_mode="TFL", ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) ifm_propagator = out.op.attrs["ifm_propagator"] offset = [0] * len(ofm_shape) stripes = [0] * len(ofm_shape) output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset) ifm_transform = _make_matrices(ifm_layout, ofm_layout) device_config = cs.EthosuDeviceConfig("ethos-u55-256") part = match_ethosu_unary_elementwise(out, device_config) assert isinstance(part, cs.EthosuPart) assert len(part.propagators) == 1 assert part.propagators[0].transform == ifm_transform propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape # Layout conversions will align the propagated IFMs to the brick, i.e. 16 # so the expected ifm_shape needs to be rounded up to 16 if ifm_layout != ofm_layout: assert ifm_shape[:-1] == propagated_ifm[:-1] assert ((ifm_shape[-1] + 16 - 1) // 16) * 16 == propagated_ifm[-1] else: assert ifm_shape == propagated_ifm