Esempio n. 1
0
def test_small_graph():
    subgraph = cs.TESubgraph([], None)
    part_a = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
            cs.Propagator(
                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                [-1, -1],
            ),
        ],
    )
    part_b = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([10, 10], "uint8")
    tensor_2 = cs.Tensor([9, 9], "uint8")
    tensor_3 = cs.Tensor([10, 10], "uint8")
    tensor_4 = cs.Tensor([10, 10], "uint8")

    part_a.set_input(0, tensor_1)
    part_a.set_input(1, tensor_2)
    part_a.set_output(tensor_3)
    tensor_1.add_consumer(part_a)
    tensor_2.add_consumer(part_a)
    tensor_3.add_producer(part_a)
    part_b.set_input(0, tensor_3)
    part_b.set_output(tensor_4)
    tensor_3.add_consumer(part_b)
    tensor_4.add_producer(part_b)

    assert part_a.input_tensors == [tensor_1, tensor_2]
    assert part_a.output_tensor == tensor_3
    assert part_b.input_tensors == [tensor_3]
    assert part_b.output_tensor == tensor_4

    assert tensor_1.producers == []
    assert tensor_1.consumers == [part_a]
    assert tensor_2.producers == []
    assert tensor_2.consumers == [part_a]
    assert tensor_3.producers == [part_a]
    assert tensor_3.consumers == [part_b]
    assert tensor_4.producers == [part_b]
    assert tensor_4.consumers == []

    graph = cs.CascaderGraph([tensor_1, tensor_2], [tensor_4])
    assert graph.input_tensors == [tensor_1, tensor_2]
    assert graph.output_tensors == [tensor_4]
    assert graph.part_order == [part_b, part_a]
    for i, part in enumerate(graph.part_order):
        assert graph.get_part_id(part) == i
Esempio n. 2
0
def test_generate_graph_plans(SRAM, DRAM):
    num_part_groups = 3
    stripe_factors = 4
    max_plan_size = 10
    subgraph = cs.TESubgraph([], None)
    part_a = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
            cs.Propagator(
                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                [-1, -1],
            ),
        ],
    )
    part_b = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([10, 10], "int8")
    tensor_2 = cs.Tensor([9, 9], "int8")
    tensor_3 = cs.Tensor([10, 10], "int8")
    tensor_4 = cs.Tensor([10, 10], "int8")

    part_a.set_input(0, tensor_1)
    part_a.set_input(1, tensor_2)
    part_a.set_output(tensor_3)
    tensor_1.add_consumer(part_a)
    tensor_2.add_consumer(part_a)
    tensor_3.add_producer(part_a)
    part_b.set_input(0, tensor_3)
    part_b.set_output(tensor_4)
    tensor_3.add_consumer(part_b)
    tensor_4.add_producer(part_b)

    graph = cs.CascaderGraph([tensor_1, tensor_2], [tensor_4])
    home_map = {
        tensor_1: [SRAM, DRAM],
        tensor_2: [SRAM],
        tensor_3: [SRAM],
        tensor_4: [SRAM, DRAM],
    }

    options = make_options(
        cascade_region=SRAM,
        stripe_factors=stripe_factors,
        max_plan_size=max_plan_size,
    )
    closed_plans = _generate_graph_plans(graph, home_map, options)

    assert len(closed_plans) == num_part_groups
Esempio n. 3
0
def test_generate_output_stripe_configs_disable_striping(stripe_factors):
    subgraph = cs.TESubgraph([], None)
    part_1 = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([800, 800], "uint8")
    tensor_2 = cs.Tensor([400, 400], "uint8")

    part_1.set_input(0, tensor_1)
    part_1.set_output(tensor_2)
    tensor_1.add_consumer(part_1)
    tensor_2.add_producer(part_1)

    assert (
        len(
            _generate_output_stripe_configs(
                part_1, stripe_factors, enable_striping=False, multi_dimensional=False
            )
        )
        == 1
    )
Esempio n. 4
0
def test_generate_output_stripe_configs_single_dimension():
    stripe_factors = 3
    subgraph = cs.TESubgraph([], None)
    part_1 = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([800, 800], "uint8")
    tensor_2 = cs.Tensor([400, 400], "uint8")

    part_1.set_input(0, tensor_1)
    part_1.set_output(tensor_2)
    tensor_1.add_consumer(part_1)
    tensor_2.add_producer(part_1)

    expected_stripe_configs = {
        cs.StripeConfig([400, 1], [400, 400], [400, 1], [2, 1], [1, 400], [0, 0]),
        cs.StripeConfig([400, 200], [400, 400], [400, 200], [2, 1], [1, 2], [0, 0]),
        cs.StripeConfig([1, 400], [400, 400], [1, 400], [1, 2], [400, 1], [0, 0]),
        cs.StripeConfig([200, 400], [400, 400], [200, 400], [1, 2], [2, 1], [0, 0]),
        cs.StripeConfig([400, 400], [400, 400], [400, 400], [1, 2], [1, 1], [0, 0]),
    }

    output_stripe_configs = _generate_output_stripe_configs(
        part=part_1, stripe_factors=stripe_factors, enable_striping=True, multi_dimensional=False
    )

    assert len(output_stripe_configs) == len(expected_stripe_configs)
    assert set(output_stripe_configs) == expected_stripe_configs
Esempio n. 5
0
def test_ethosu_part():
    te_subgraph = cs.TESubgraph([], None)
    output_quantum = [1, 2, 2, 8]
    propagator = cs.Propagator(
        [[1, 0, 0, 0, 2], [0, 1, 0, 0, 2], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]],
        [0, 0, 0, 0],
    )
    stripe_config = cs.StripeConfig(
        [1, 4, 4, 16], [1, 64, 72, 96], [1, 4, 4, 16], [1, 2, 3, 4], [1, 16, 13, 6], [0, 0, 0, 0]
    )
    subkernels = 3

    valid_block_configs = [cs.BlockConfig([1, 2, 4, 16], 15000, 7500)]

    part = EthosuPart(
        te_subgraph,
        [propagator],
        output_quantum,
        subkernels,
        valid_block_configs,
        1,
    )
    input_tensor = cs.Tensor(shape=[1, 66, 74, 16], dtype="int8")
    part.set_input(0, input_tensor)

    assert part.get_stripe_align_hint() == output_quantum
    # Check that the performance model runs, don't verify output
    part.get_performance_info(stripe_config, BufferMode.ROLLING)
    part.get_performance_info(stripe_config, BufferMode.RECOMPUTE)
Esempio n. 6
0
def test_inline_part():
    subgraph = cs.TESubgraph([], None)
    part = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    output_stripe_config = cs.StripeConfig([2, 4], [8, 8], [2, 4], [1, 2],
                                           [4, 2], [0, 0])
    input_stripe_config = cs.StripeConfig([4, 2], [8, 8], [4, 2], [2, 1],
                                          [2, 4], [0, 0])

    assert part.input_tensors == [None]
    assert part.output_tensor == None
    assert len(part.propagators) == 1
    assert part.in_line == True
    assert part.get_stripe_align_hint() == [1, 1]
    performance_info = part.get_performance_info(output_stripe_config,
                                                 is_rolling=False)
    assert performance_info.compute_cycles == 0
    assert performance_info.read_bytes == [0]
    assert performance_info.write_bytes == 0
    input_stripe_configs = part.calculate_input_stripe_configs(
        output_stripe_config)
    assert len(input_stripe_configs) == 1
    assert input_stripe_configs[0] == input_stripe_config
Esempio n. 7
0
def test_generate_single_plans(SRAM, DRAM):
    subgraph = cs.TESubgraph([], None)
    part_1 = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([800, 800], "int8")
    tensor_2 = cs.Tensor([400, 400], "int8")

    part_1.set_input(0, tensor_1)
    part_1.set_output(tensor_2)
    tensor_1.add_consumer(part_1)
    tensor_2.add_producer(part_1)

    home_map = {
        tensor_1: [SRAM, DRAM],
        tensor_2: [SRAM],
    }
    options = make_options(cascade_region=SRAM, stripe_factors=1)
    output_stripe_configs = _generate_output_stripe_configs(
        part_1, options.stripe_factors)
    plans = _generate_single_plans(part_1, output_stripe_configs, home_map,
                                   options)
    for plan in plans:
        assert plan.interior_region == SRAM
        assert plan.part_group == frozenset([part_1])
        assert set(plan.tensor_configs.keys()) == set([tensor_1, tensor_2])
        for open_config in plan.open_configs:
            assert open_config.state == cs.TensorConfigState.INTERIOR
Esempio n. 8
0
def test_force_block_config_kernelwise(ofm_layout, block_config_str,
                                       expected_block_shape):
    op_type = "ethosu_pooling"
    activation = "NONE"
    kernel = (2, 2)
    stride = (2, 2)
    padding = (0, 0)
    dilation = (1, 1)
    ifm_channels = 32
    out_shape = (1, 8, 10, 16)

    ifm_matrix, ifm_offset, _, _, _, _ = make_matrices(op_type, kernel, stride,
                                                       padding, "NHWC",
                                                       ofm_layout, dilation,
                                                       ifm_channels)

    ofm_channels = out_shape[3]

    propagator = cs.Propagator(ifm_matrix, ifm_offset)

    op_attrs = {
        "op": op_type,
        "activation": activation,
        "stride_h": stride[0],
        "stride_w": stride[1],
        "dilation_h": dilation[0],
        "dilation_w": dilation[1],
    }

    config = {
        "enable_cascader": True,
        "dev_force_block_config": block_config_str,
    }
    with tvm.transform.PassContext(
            config={"relay.ext.ethos-u.options": config}):
        device_config = cs.EthosuDeviceConfig("ethos-u55-128")
        block_configs = device_config.get_valid_block_configs(
            propagator,
            op_attrs,
            out_shape,
            ofm_channels,
            ifm_channels,
            ofm_layout,
            "NHWC",
            "int8",
            "int8",
            kernel[0],
            kernel[1],
        )

    assert len(block_configs) == 1
    assert block_configs[0].output_shape == expected_block_shape
Esempio n. 9
0
def test_force_block_config_elementwise(ofm_layout, block_config_str,
                                        expected_block_shape):
    op_type = "ethosu_elementwise_unary"
    op_str = "ABS"
    activation = "NONE"
    ofm_shape = (1, 8, 10, 16)
    ifm_matrix = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
    ]
    ifm_offset = [0, 0, 0, 0]

    propagator = cs.Propagator(ifm_matrix, ifm_offset)

    op_attrs = {
        "op": op_type,
        "operator_type": op_str,
        "activation": activation,
        "clip_min": 0,
        "clip_max": 0,
        "rounding_mode": "TFL",
    }

    config = {
        "enable_cascader": True,
        "dev_force_block_config": block_config_str,
    }
    with tvm.transform.PassContext(
            config={"relay.ext.ethos-u.options": config}):
        device_config = cs.EthosuDeviceConfig("ethos-u55-128")
        block_configs = device_config.get_elementwise_block_config(
            propagator,
            None,
            op_attrs,
            ofm_shape,
            ofm_layout,
            "NWHC",
            None,
            "int8",
            "int8",
        )

    assert len(block_configs) == 1
    assert block_configs[0].output_shape == expected_block_shape
Esempio n. 10
0
def test_ethosu_part():
    te_subgraph = pl.TESubgraph([], None)
    output_quantum = [1, 2, 2, 8]
    quantum_cycles = 32
    propagator = pl.Propagator(
        [[1, 0, 0, 0, 2], [0, 1, 0, 0, 2], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0],
         [0, 0, 0, 0, 1]],
        [0, 0, 0, 0],
    )
    stripe_config = pl.StripeConfig([1, 4, 4, 16], [1, 64, 72, 96],
                                    [1, 4, 4, 16], [1, 2, 3, 4],
                                    [1, 16, 13, 6], [0, 0, 0, 0])

    part = EthosuPart(te_subgraph, [propagator], output_quantum,
                      quantum_cycles)

    assert part.get_stripe_align_hint() == output_quantum
    # Check that the performance model runs, don't verify output
    part.get_performance_info(stripe_config, False)
    part.get_performance_info(stripe_config, True)
Esempio n. 11
0
def test_generate_output_stripe_configs():
    stripe_factors = 3
    expected_configs = 13
    subgraph = cs.TESubgraph([], None)
    part_1 = cs.InlinePart(
        subgraph,
        [
            cs.Propagator(
                [[2, 0, 0], [0, 2, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = cs.Tensor([800, 800], "uint8")
    tensor_2 = cs.Tensor([400, 400], "uint8")

    part_1.set_input(0, tensor_1)
    part_1.set_output(tensor_2)
    tensor_1.add_consumer(part_1)
    tensor_2.add_producer(part_1)

    assert len(_generate_output_stripe_configs(
        part_1, stripe_factors)) == expected_configs
Esempio n. 12
0
def test_best_block_config(
    test_id,
    op_type,
    activation,
    kernel,
    stride,
    dilation,
    padding,
    in_shape,
    out_shape,
    layouts,
    acc_config,
    expected_block_configs,
):
    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
        op_type, kernel, stride, padding, layouts[0], layouts[1], dilation,
        in_shape[3])

    ofm_channels = out_shape[3]
    ifm_channels = in_shape[3]

    if layouts[0] == "NHCWB16":
        in_shape = [
            int(math.ceil(n))
            for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1, )).tolist()[:-1]
        ]
    if layouts[1] == "NHCWB16":
        out_shape = [
            int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, out_shape +
                                                 (1, )).tolist()[:-1]
        ]

    propagator = cs.Propagator(ifm_matrix, ifm_offset)
    weight_propagator = cs.Propagator(weight_matrix, weight_offset)

    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)

    op_attrs = {
        "op": op_type,
        "activation": activation,
        "stride_h": stride[0],
        "stride_w": stride[1],
        "dilation_h": dilation[0],
        "dilation_w": dilation[1],
    }

    device_config = cs.EthosuDeviceConfig(acc_config)
    block_configs = device_config.get_valid_block_configs(
        propagator,
        op_attrs,
        out_shape,
        ofm_channels,
        ifm_channels,
        layouts[1],
        layouts[0],
        "int8",
        "int8",
        kernel[0],
        kernel[1],
    )

    output_quantum = [1, 1, 2, 8]
    if layouts[1] == "NHCWB16":
        output_quantum = [1, 1, 1, 2, 8]

    # Create EthosUPart
    te_subgraph = cs.TESubgraph([], None)
    part = cs.EthosuPart(
        te_subgraph,
        [propagator, weight_propagator],
        output_quantum,
        subkernels,
        block_configs,
        1,
    )

    order = [1, 2, 3, 4] if layouts[1] == "NHCWB16" else [1, 2, 4, 3, 0]
    stripes = [1] * len(output_quantum)
    offset = [0] * len(output_quantum)

    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order,
                                    stripes, offset)

    block = part.get_block_config(stripe_config)
    block_shape = tuple(int(a) for a in block.output_shape)

    assert block_shape in expected_block_configs[test_id]
Esempio n. 13
0
def test_conv_performance(
    accelerator,
    op_type,
    activation,
    kernel,
    stride,
    dilation,
    padding,
    in_shape,
    out_shape,
    block_shape,
    input_block_shape,
    expected,
):
    ifm_channels = in_shape[3]
    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
        op_type,
        kernel,
        stride,
        padding,
        "NHWC",
        "NHWC",
        dilation,
        ifm_channels,
    )

    propagator = cs.Propagator(ifm_matrix, ifm_offset)
    weight_propagator = cs.Propagator(weight_matrix, weight_offset)

    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)

    device_config = cs.EthosuDeviceConfig(accelerator)

    output_cycles = device_config._get_output_cycles(op_type, "", "int8", "int8", activation)
    output_cycles *= reduce(lambda a, b: a * b, block_shape, 1)
    is_partkernel = device_config.is_partkernel(
        op_type, ifm_channels, "int8", kernel[0] * kernel[1]
    )
    compute_cycles = device_config._estimate_compute_cycles_per_block(
        op_type,
        _Shape(block_shape),
        _Shape(input_block_shape),
        kernel[0],
        kernel[1],
        ifm_channels,
        "int8",
        is_partkernel,
    )
    block_configs = [
        cs.BlockConfig(input_block_shape, block_shape, compute_cycles, int(output_cycles))
    ]

    output_quantum = [1, 1, 2, 8]
    te_subgraph = cs.TESubgraph([], None)
    part = cs.EthosuPart(
        te_subgraph,
        [propagator, weight_propagator],
        output_quantum,
        subkernels,
        block_configs,
        1,
    )
    part.set_input(0, cs.Tensor(in_shape, "int8"))
    part.set_input(1, cs.Tensor([ifm_channels, kernel[0], kernel[1], out_shape[-1]], "int8"))
    part.set_output(cs.Tensor(out_shape, "int8"))

    stripes = [1] * len(output_quantum)
    offset = [0] * len(output_quantum)
    order = [1, 2, 3, 4]

    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset)

    compute_cycles = part.get_performance_info(stripe_config, cs.BufferMode.ROLLING).compute_cycles
    tolerance = expected * 0.1

    assert expected - tolerance <= compute_cycles <= expected + tolerance