Example #1
0
def test_copy_grad():
    data = relay.var("data", relay.TensorType((10, 4), "float64"))
    fwd_func = relay.Function([data], relay.copy(data))
    check_grad(fwd_func)
Example #2
0
def test_wildcard_match_solo():
    x = relay.Var("x", nat())
    copy = relay.Function([x], relay.Match(x, [relay.Clause(relay.PatternWildcard(), x)]), nat())

    res = intrp.evaluate(copy(s(s(s(z())))))
    assert count(res) == 3
Example #3
0
def test_iterate():
    expr = relay.Call(iterate(double, relay.const(2)), [make_nat_expr(3)])
    res = intrp.evaluate(relay.Function([], expr)())
    assert count(res) == 12
Example #4
0
def test_autotune_conv2d(temp_dir, board, west_cmd, tvm_debug):
    """Test AutoTune for microTVM Zephyr"""
    if board != "qemu_x86":
        pytest.xfail(f"Autotune fails on {board}.")

    runtime = Runtime("crt", {"system-lib": True})
    model = test_utils.ZEPHYR_BOARDS[board]
    build_config = {"debug": tvm_debug}

    # Create a Relay model
    data_shape = (1, 3, 16, 16)
    weight_shape = (8, 3, 5, 5)
    data = relay.var("data", relay.TensorType(data_shape, "float32"))
    weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
    y = relay.nn.conv2d(
        data,
        weight,
        padding=(2, 2),
        kernel_size=(5, 5),
        kernel_layout="OIHW",
        out_dtype="float32",
    )
    f = relay.Function([data, weight], y)
    mod = tvm.IRModule.from_expr(f)
    mod = relay.transform.InferType()(mod)

    data_sample = np.random.rand(data_shape[0], data_shape[1], data_shape[2],
                                 data_shape[3]).astype("float32")
    weight_sample = np.random.rand(weight_shape[0], weight_shape[1],
                                   weight_shape[2],
                                   weight_shape[3]).astype("float32")
    params = {mod["main"].params[1].name_hint: weight_sample}

    target = tvm.target.target.micro(model)
    pass_context = tvm.transform.PassContext(
        opt_level=3, config={"tir.disable_vectorize": True})
    with pass_context:
        tasks = tvm.autotvm.task.extract_from_program(mod["main"], {}, target)
    assert len(tasks) > 0

    config_main_stack_size = None
    if test_utils.qemu_boards(board):
        config_main_stack_size = 1536

    project_options = {
        "zephyr_board": board,
        "west_cmd": west_cmd,
        "verbose": 1,
        "project_type": "host_driven",
    }
    if config_main_stack_size is not None:
        project_options["config_main_stack_size"] = config_main_stack_size

    module_loader = tvm.micro.AutoTvmModuleLoader(
        template_project_dir=test_utils.TEMPLATE_PROJECT_DIR,
        project_options=project_options,
    )

    timeout = 200
    builder = tvm.autotvm.LocalBuilder(
        timeout=timeout,
        n_parallel=1,
        build_kwargs={"build_option": {
            "tir.disable_vectorize": True
        }},
        do_fork=True,
        build_func=tvm.micro.autotvm_build_func,
        runtime=runtime,
    )
    runner = tvm.autotvm.LocalRunner(number=1,
                                     repeat=1,
                                     timeout=timeout,
                                     module_loader=module_loader)

    measure_option = tvm.autotvm.measure_option(builder=builder, runner=runner)

    log_path = pathlib.Path("zephyr_autotune.log")
    if log_path.exists():
        log_path.unlink()

    n_trial = 10
    for task in tasks:
        tuner = tvm.autotvm.tuner.GATuner(task)
        tuner.tune(
            n_trial=n_trial,
            measure_option=measure_option,
            callbacks=[
                tvm.autotvm.callback.log_to_file(str(log_path)),
                tvm.autotvm.callback.progress_bar(n_trial, si_prefix="M"),
            ],
            si_prefix="M",
        )
        assert tuner.best_flops > 0

    check_tune_log(log_path)

    # Build without tuning
    with pass_context:
        lowered = tvm.relay.build(mod,
                                  target=target,
                                  runtime=runtime,
                                  params=params)

    temp_dir = utils.tempdir()
    with _make_session(temp_dir, board, west_cmd, lowered,
                       build_config) as session:
        graph_mod = tvm.micro.create_local_graph_executor(
            lowered.get_graph_json(), session.get_system_lib(), session.device)
        graph_mod.set_input(**lowered.get_params())
        graph_mod.run(data=data_sample)
        expected_output = graph_mod.get_output(0).numpy()
        del graph_mod

    # Build using autotune logs
    with tvm.autotvm.apply_history_best(str(log_path)):
        with pass_context:
            lowered_tuned = tvm.relay.build(mod,
                                            target=target,
                                            runtime=runtime,
                                            params=params)

    temp_dir = utils.tempdir()
    with _make_session(temp_dir, board, west_cmd, lowered_tuned,
                       build_config) as session:
        graph_mod = tvm.micro.create_local_graph_executor(
            lowered_tuned.get_graph_json(), session.get_system_lib(),
            session.device)
        graph_mod.set_input(**lowered_tuned.get_params())
        graph_mod.run(data=data_sample)
        output = graph_mod.get_output(0).numpy()
        del graph_mod

    tvm.testing.assert_allclose(output, expected_output, rtol=1e-4, atol=1e-5)
Example #5
0
 def expected():
     x = relay.var("x", shape=(1, 16, 16, 16), dtype="float32")
     w = relay.var("w", shape=(32, 16, 3, 3), dtype="float32")
     y = relay.nn.conv2d(x, w, padding=(1, 1))
     y = relay.reshape(y, newshape=(32, 16, 16))
     return relay.Function([x, w], y)
def fuse_partitions(pre_mod, mid_mod, post_mod):
    """Combine prefix, middle, and suffix modules into a single module.

    The combined module includes an additional `main` that fuses all three
    partitions together.

    Parameters
    ----------
    pre_mod : tvm.IRModule
        Module containing an input quantization function

    mid_mod : tvm.IRModule
        Module containing core of a quantized inference function

    post_mod : tvm.IRModule
        Module containing an output dequantization function

    Returns
    -------
    fused_mod : tvm.IRModule
        Module containing the input quantization, core quantized inference,
        output dequantization, and full quantized inference functions
    """
    pre_func = pre_mod["main"]
    mid_func = mid_mod["main"]
    post_func = post_mod["main"]
    # create a module containing the prefix, middle, and suffix partitions
    fused_mod = tvm.IRModule(
        functions={
            relay.GlobalVar("quantize_inputs"): pre_func,
            relay.GlobalVar("quantized_main"): mid_func,
            relay.GlobalVar("dequantize_outputs"): post_func,
        })
    # construct a `main` that strings together the partitions, such that its
    # behaviour is equivalent to `main` in an *unpartitioned* module
    scope_builder = relay.ScopeBuilder()
    fused_mod_main_params = [
        relay.Var(param.name_hint) for param in pre_func.params
    ]
    quantized_inputs = scope_builder.let(
        "quantized_inputs",
        relay.Call(fused_mod.get_global_var("quantize_inputs"),
                   fused_mod_main_params),
    )
    quantized_outputs = scope_builder.let(
        "quantized_outputs",
        relay.Call(
            fused_mod.get_global_var("quantized_main"),
            [
                relay.TupleGetItem(quantized_inputs, i)
                for i in range(len(pre_func.ret_type.fields))
            ],
        ),
    )
    dequantized_outputs = scope_builder.let(
        "dequantized_outputs",
        relay.Call(fused_mod.get_global_var("dequantize_outputs"),
                   [quantized_outputs]),
    )
    scope_builder.ret(dequantized_outputs)
    fused_mod["main"] = relay.Function(fused_mod_main_params,
                                       scope_builder.get())
    return fused_mod
Example #7
0
def get_network(name, batch_size, dtype='float32'):
    """Get the symbol definition and random weight of a network

    Parameters
    ----------
    name: str
        The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ...
    batch_size: int
        batch size
    dtype: str
        Data type

    Returns
    -------
    net: tvm.IRModule
        The relay function of network definition
    params: dict
        The random parameters for benchmark
    input_shape: tuple
        The shape of input tensor
    output_shape: tuple
        The shape of output tensor
    """
    input_shape = (batch_size, 3, 224, 224)
    output_shape = (batch_size, 1000)

    if name == 'mobilenet':
        net, params = testing.mobilenet.get_workload(batch_size=batch_size,
                                                     dtype=dtype)
    elif name == 'inception_v3':
        input_shape = (batch_size, 3, 299, 299)
        net, params = testing.inception_v3.get_workload(batch_size=batch_size,
                                                        dtype=dtype)
    elif "resnet" in name:
        n_layer = int(name.split('-')[1])
        net, params = testing.resnet.get_workload(num_layers=n_layer,
                                                  batch_size=batch_size,
                                                  dtype=dtype)
    elif "vgg" in name:
        n_layer = int(name.split('-')[1])
        net, params = testing.vgg.get_workload(num_layers=n_layer,
                                               batch_size=batch_size,
                                               dtype=dtype)
    elif "densenet" in name:
        n_layer = int(name.split('-')[1])
        net, params = testing.densenet.get_workload(densenet_size=n_layer,
                                                    batch_size=batch_size,
                                                    dtype=dtype)
    elif "squeezenet" in name:
        version = name.split("_v")[1]
        net, params = testing.squeezenet.get_workload(batch_size=batch_size,
                                                      version=version,
                                                      dtype=dtype)
    elif name == 'mxnet':
        # an example for mxnet model
        from mxnet.gluon.model_zoo.vision import get_model
        block = get_model('resnet18_v1', pretrained=True)
        net, params = relay.frontend.from_mxnet(block,
                                                shape={'data': input_shape},
                                                dtype=dtype)
        net = net["main"]
        net = relay.Function(net.params, relay.nn.softmax(net.body), None,
                             net.type_params, net.attrs)
        net = tvm.IRModule.from_expr(net)
    else:
        raise ValueError("Unsupported network: " + name)

    return net, params, input_shape, output_shape
Example #8
0
def test_match_func_attr():
    pattern = wildcard().has_attr({"Composite": "add"})
    x = relay.var("x")
    y = relay.var("y")
    f = relay.Function([x, y], x + y).with_attr("Composite", "add")
    assert pattern.match(f)
Example #9
0
def verify_sum_grad(d_shape, axis=None, keepdims=False, exclude=False):
    data = relay.var("data", relay.TensorType(d_shape, "float32"))
    fwd_func = relay.Function([data], relay.sum(data, axis=axis, keepdims=keepdims, exclude=exclude))
    check_grad(fwd_func)
Example #10
0
def test_quadruple_partition_dominator():
    # Pattern
    is_conv2d = is_op("nn.conv2d")(wildcard(), wildcard())
    is_unary_elemwise = (wildcard().has_attr({"TOpPattern": K_ELEMWISE}))(
        wildcard()) | is_op("add")(wildcard(), wildcard())
    reduction = is_op("add")(wildcard(), wildcard())
    diamond = dominates(is_conv2d, is_unary_elemwise, reduction)

    inp = relay.var("input")
    weight = relay.var("weight")

    # Classic Diamond
    def classic_diamond(inp, weight):
        conv2d = relay.op.nn.conv2d(inp, weight)
        relu = relay.op.nn.relu(conv2d)
        relu = relay.op.nn.relu(relu)
        leaky_relu = relay.op.nn.leaky_relu(conv2d, alpha=0)
        return relu + leaky_relu

    # Deeper Branch
    def deeper_diamond(inp, weight):
        conv2d = relay.op.nn.conv2d(inp, weight)
        relu = relay.op.nn.relu(conv2d)
        relu = relay.op.nn.relu(relu)
        relu = relay.op.tanh(relu)
        leaky_relu = relay.op.nn.leaky_relu(conv2d, alpha=0)
        return relu + leaky_relu

    # Single Branch
    def single_branch(inp, weight):
        conv2d = relay.op.nn.conv2d(inp, weight)
        relu = relay.op.nn.relu(conv2d)
        relu = relay.op.nn.relu(relu)
        tanh = relay.op.tanh(relu)
        return relu + tanh

    # Fuzzy path/nested Diamond
    def nested_diamond(inp, weight):
        conv2d = relay.op.nn.conv2d(inp, weight)
        relu = relay.op.nn.relu(conv2d)
        relu = relu + relu
        tanh = relay.op.tanh(relu)
        leaky_relu = relay.op.nn.leaky_relu(conv2d, alpha=0)
        return tanh + leaky_relu

    partitioned = diamond.partition(
        nested_diamond(
            single_branch(deeper_diamond(classic_diamond(inp, weight), weight),
                          weight), weight))

    functions = []
    partition_names = [
        "nn.conv2d_nn.relu_nn.relu_nn.leaky_relu_add_",
        "nn.conv2d_nn.relu_nn.relu_tanh_nn.leaky_relu_add_",
        "nn.conv2d_nn.relu_nn.relu_tanh_add_",
        "nn.conv2d_nn.relu_add_tanh_nn.leaky_relu_add_",
    ]
    for i, f in enumerate(
        [classic_diamond, deeper_diamond, single_branch, nested_diamond]):
        inpf = relay.var("input")
        weightf = relay.var("weight")
        functions.append(
            relay.Function([inpf, weightf],
                           f(inpf,
                             weightf)).with_attr("PartitionedFromPattern",
                                                 partition_names[i]))

    reference = functions[3](functions[2](functions[1](functions[0](inp,
                                                                    weight),
                                                       weight), weight),
                             weight)
    assert tvm.ir.structural_equal(partitioned, reference)
Example #11
0
def test_partition_constant_embedding():
    x = relay.var("x")
    w = relay.var("w")
    wc = relay.const(1)
    b = relay.var("b")

    xf = relay.var("x")
    wf = relay.var("w")
    bf = relay.var("b")
    embeded_func = relay.Function([xf, bf],
                                  conv_bias_relu(xf, wc, bf)).with_attr(
                                      "PartitionedFromPattern",
                                      "nn.conv2d_nn.bias_add_nn.relu_")
    xf = relay.var("x")
    wf = relay.var("w")
    bf = relay.var("b")
    lifted_func = relay.Function([xf, wf, bf],
                                 conv_bias_relu(xf, wf, bf)).with_attr(
                                     "PartitionedFromPattern",
                                     "nn.conv2d_nn.bias_add_nn.relu_")
    relu = conv_bias_relu(x, w, b)
    reluc = conv_bias_relu(x, wc, b)

    # Check lifting of wildcard matches
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), wildcard()), wildcard()))
    assert tvm.ir.structural_equal(lifted_func(x, w, b),
                                   pattern.partition(relu))
    assert tvm.ir.structural_equal(lifted_func(x, wc, b),
                                   pattern.partition(reluc))

    # Check lifting of input matches
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), is_var()), wildcard()))
    assert tvm.ir.structural_equal(lifted_func(x, w, b),
                                   pattern.partition(relu))
    assert tvm.ir.structural_equal(
        reluc, pattern.partition(reluc))  # Constants are not Inputs

    # Check embedding of constant matches
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), is_constant()), wildcard()))
    assert tvm.ir.structural_equal(relu, pattern.partition(relu))
    assert tvm.ir.structural_equal(embeded_func(x, b),
                                   pattern.partition(reluc))

    # Check embedding of constant ExprPatterns
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), is_expr(wc)), wildcard()))
    assert tvm.ir.structural_equal(relu, pattern.partition(relu))
    assert tvm.ir.structural_equal(embeded_func(x, b),
                                   pattern.partition(reluc))

    # Check lifting/embedding of Alt matches
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), is_var() | is_constant()), wildcard()))
    assert tvm.ir.structural_equal(lifted_func(x, w, b),
                                   pattern.partition(relu))
    assert tvm.ir.structural_equal(embeded_func(x, b),
                                   pattern.partition(reluc))

    # Check lifting/embedding of Alt matches with the other ordering
    pattern = is_op("nn.relu")(is_op("nn.bias_add")(is_op("nn.conv2d")(
        wildcard(), is_constant() | is_var()), wildcard()))
    assert tvm.ir.structural_equal(lifted_func(x, w, b),
                                   pattern.partition(relu))
    assert tvm.ir.structural_equal(embeded_func(x, b),
                                   pattern.partition(reluc))
Example #12
0
 def get_net(include_bn=True, include_sigmoid=False):
     data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
     block1 = get_blocks("block1_", data, 3, 8, include_bn, include_sigmoid)
     # The second block is always conv + relu, to make it more interesting
     block2 = get_blocks("block2_", block1, 8, 8, False, include_sigmoid)
     return relay.Function(relay.analysis.free_vars(block2), block2)
Example #13
0
def make_ethosn_composite(ethosn_expr, name):
    vars = relay.analysis.free_vars(ethosn_expr)
    func = relay.Function([relay.Var("a")], ethosn_expr)
    func = func.with_attr("Composite", name)
    call = relay.Call(func, vars)
    return call
Example #14
0
def make_module(func, params):
    func = relay.Function(relay.analysis.free_vars(func), func)
    if params:
        relay.build_module.bind_params_by_name(func, params)
    return tvm.IRModule.from_expr(func)
Example #15
0
 def create_model():
     ifm = relay.var("ifm", shape=ifm_shape, dtype="int32")
     clz = infra.make_ethosu_unary_elementwise(ifm, 4, "CLZ")
     return tvm.IRModule.from_expr(relay.Function([ifm], clz))
 def before():
     x = relay.var("x", shape=(1, 32, 56, 56))
     w = relay.var("w", shape=(32, 1, 3, 3))
     y = relay.nn.conv2d(x, w, padding=(1, 1), channels=32, kernel_size=(3, 3), groups=32)
     y = relay.Function(analysis.free_vars(y), y)
     return y
Example #17
0
                             channels=1,
                             padding=(0, 0))
simple_net = relay.nn.relu(simple_net)
simple_net = relay.nn.avg_pool2d(simple_net,
                                 pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding=(0, 0))
simple_net = relay.nn.relu(simple_net)
simple_net = relay.nn.conv2d(simple_net,
                             weight=conv2_weight,
                             kernel_size=(2, 2),
                             channels=1,
                             padding=(0, 0))
print("----------TEST2----------")
node = relay.analysis.free_vars(simple_net)
simple_net = relay.Function(node, simple_net)
print("----------TEST3----------")
net, params = testing.create_workload(simple_net)
print("-----NET.ASTEXT----------")
print(net.astext(show_meta_data=False))

print("----------TEST4----------")
opt_level = 0
target = tvm.target.cuda()
with relay.build_config(opt_level=opt_level):
    graph, lib, params = relay.build_module.build(net, target, params=params)

print("----------TEST5----------")
ctx = tvm.gpu()
#data = np.array([[[(1,2,3,4),(2,3,4,1),(3,4,1,2),(4,1,2,3)]]]).astype("float32")
data = np.array([[[(1, 2, 3, 4, 5, 6, 7, 8), (2, 3, 4, 5, 6, 7, 8, 9),
 def before():
     x = relay.var("x", shape=(1, 64, 56, 56))
     y = relay.nn.global_max_pool2d(x)
     y = relay.Function([x], y)
     return y
Example #19
0
def test_byoc_microtvm(board, arduino_cli_cmd, tvm_debug, workspace_dir):
    """This is a simple test case to check BYOC capabilities of microTVM"""
    model = test_utils.ARDUINO_BOARDS[board]
    build_config = {"debug": tvm_debug}

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))
    w2 = relay.var("w2", shape=(10, 10))
    w3 = relay.var("w3", shape=(10, 10))
    w4 = relay.var("w4", shape=(10, 10))
    w5 = relay.var("w5", shape=(10, 10))
    w6 = relay.var("w6", shape=(10, 10))
    w7 = relay.var("w7", shape=(10, 10))

    # C compiler
    z0 = relay.add(x, w0)
    p0 = relay.subtract(z0, w1)
    q0 = relay.multiply(p0, w2)

    z1 = relay.add(x, w3)
    p1 = relay.subtract(z1, w4)
    q1 = relay.multiply(p1, w5)

    # Other parts on TVM
    z2 = relay.add(x, w6)
    q2 = relay.subtract(z2, w7)

    r = relay.concatenate((q0, q1, q2), axis=0)
    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
    mod = tvm.IRModule()
    ann = byoc.CcompilerAnnotator()
    mod["main"] = ann.visit(f)
    mod = tvm.relay.transform.PartitionGraph()(mod)
    mod = tvm.relay.transform.InferType()(mod)

    x_data = np.random.rand(10, 10).astype("float32")
    w_data = []
    for _ in range(8):
        w_data.append(np.random.rand(10, 10).astype("float32"))

    map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
    map_inputs["x"] = x_data
    check_result(
        relay_mod=mod,
        map_inputs=map_inputs,
        out_shape=(30, 10),
        result=np.concatenate(
            (
                ((x_data + w_data[0]) - w_data[1]) * w_data[2],
                ((x_data + w_data[3]) - w_data[4]) * w_data[5],
                x_data + w_data[6] - w_data[7],
            ),
            axis=0,
        ),
        model=model,
        build_config=build_config,
        arduino_board=board,
        arduino_cli_cmd=arduino_cli_cmd,
        workspace_dir=workspace_dir,
    )
Example #20
0
 def expected():
     add = relay.add(x, y)
     copy_add_sub = relay.device_copy(add, ctx2, ctx1)
     sub = relay.subtract(copy_add_sub, z)
     func = relay.Function([x, y, z], sub)
     return func
Example #21
0
def to_relay(graph, shape_dict, dtype_dict, params):
    """Convert an NNVM graph into the corresponding Relay expression.

    Parameters
    ----------
    graph : Graph
       The input graph.

    shape_dict : dict of str to shape
       The input shape.

    dtype_dict : dict of str to str/dtype
       The input shape.

    params : dict of str to array
        The parameters.

    Returns
    -------
    (expr, params) : Tuple[relay.Expr, dict of str to array]
        The corresponding Relay expression and parameters.
    """
    if isinstance(graph, Symbol):
        graph = graph_create(graph)

    param_shapes = dict((k, params[k].shape) for k in params)
    shape_dict = shape_dict.copy()
    shape_dict.update(param_shapes)
    graph = graph_attr.set_shape_inputs(graph, shape_dict)
    graph = graph_attr.set_dtype_inputs(graph, dtype_dict)
    graph = graph.apply(["InferShape", "InferType"])
    shape = graph.json_attr("shape")
    dtype = [graph_attr.TCODE_TO_DTYPE[di] for di in graph.json_attr("dtype")]

    gidx = graph.index
    relay_map = {}
    fn_params = []

    for nid, node in enumerate(gidx.nodes):
        children = []
        for i in node['inputs']:
            child = relay_map[i[0]]
            if isinstance(child, expr.TupleWrapper):
                children.append(child[i[1]])
            else:
                children.append(child)

        oshape = shape[gidx.entry_id(nid, 0)]
        odtype = dtype[gidx.entry_id(nid, 0)]
        attrs = node.get("attrs", {})
        node_name = node["name"]
        op_name = node["op"]

        if op_name == "null":
            v = var(node_name, shape=oshape, dtype=odtype)
            fn_params.append(v)
            relay_map[nid] = v
        else:
            if op_name in NNVM_OP_2_RELAY_OP:
                str_attrs = StrAttrsDict(attrs)
                call = NNVM_OP_2_RELAY_OP[op_name](children, str_attrs, odtype)
                relay_map[nid] = call
            else:
                raise Exception(
                    "nnvm.to_relay: unsupported operator: {0}".format(op_name))

    outputs = []
    for nid, idx, _ in gidx.output_entries:
        output = relay_map[nid]
        if isinstance(output, expr.TupleWrapper):
            outputs.append(output[idx])
        else:
            outputs.append(output)

    if len(outputs) == 1:
        body = outputs[0]
    else:
        body = expr.Tuple(outputs)

    func = relay.Function(fn_params, body)
    return func, params
Example #22
0
 def expected():
     add = relay.add(x, y)
     sub = relay.subtract(add, z)
     func = relay.Function([x, y, z], sub)
     return func
Example #23
0
def resnet(
    units,
    num_stages,
    filter_list,
    num_classes,
    data_shape,
    bottle_neck=True,
    layout="NCHW",
    dtype="float32",
):
    """Return ResNet Program.

    Parameters
    ----------
    units : list
        Number of units in each stage

    num_stages : int
        Number of stages

    filter_list : list
        Channel size of each stage

    num_classes : int
        Ouput size of symbol

    data_shape : tuple of int.
        The shape of input data.

    bottle_neck : bool
        Whether apply bottleneck transformation.

    layout: str
        The data layout for conv2d

    dtype : str
        The global data type.
    """

    data_layout = layout
    kernel_layout = "OIHW" if layout == "NCHW" else "HWIO"
    bn_axis = data_layout.index("C")

    num_unit = len(units)
    assert num_unit == num_stages
    data = relay.var("data", shape=data_shape, dtype=dtype)
    data = layers.batch_norm_infer(data=data,
                                   epsilon=2e-5,
                                   axis=bn_axis,
                                   scale=False,
                                   name="bn_data")
    (_, _, height, _) = data_shape
    if layout == "NHWC":
        (_, height, _, _) = data_shape
    if height <= 32:  # such as cifar10
        body = layers.conv2d(
            data=data,
            channels=filter_list[0],
            kernel_size=(3, 3),
            strides=(1, 1),
            padding=(1, 1),
            name="conv0",
            data_layout=data_layout,
            kernel_layout=kernel_layout,
        )
    else:  # often expected to be 224 such as imagenet
        body = layers.conv2d(
            data=data,
            channels=filter_list[0],
            kernel_size=(7, 7),
            strides=(2, 2),
            padding=(3, 3),
            name="conv0",
            data_layout=data_layout,
            kernel_layout=kernel_layout,
        )
        body = layers.batch_norm_infer(data=body,
                                       epsilon=2e-5,
                                       axis=bn_axis,
                                       name="bn0")
        body = relay.nn.relu(data=body)
        body = relay.nn.max_pool2d(data=body,
                                   pool_size=(3, 3),
                                   strides=(2, 2),
                                   padding=(1, 1),
                                   layout=data_layout)

    for i in range(num_stages):
        body = residual_unit(
            body,
            filter_list[i + 1],
            (1 if i == 0 else 2, 1 if i == 0 else 2),
            False,
            name="stage%d_unit%d" % (i + 1, 1),
            bottle_neck=bottle_neck,
            data_layout=data_layout,
            kernel_layout=kernel_layout,
        )
        for j in range(units[i] - 1):
            body = residual_unit(
                body,
                filter_list[i + 1],
                (1, 1),
                True,
                name="stage%d_unit%d" % (i + 1, j + 2),
                bottle_neck=bottle_neck,
                data_layout=data_layout,
                kernel_layout=kernel_layout,
            )
    bn1 = layers.batch_norm_infer(data=body,
                                  epsilon=2e-5,
                                  axis=bn_axis,
                                  name="bn1")
    relu1 = relay.nn.relu(data=bn1)
    # Although kernel is not used here when global_pool=True, we should put one
    pool1 = relay.nn.global_avg_pool2d(data=relu1, layout=data_layout)
    flat = relay.nn.batch_flatten(data=pool1)
    fc1 = layers.dense_add_bias(data=flat, units=num_classes, name="fc1")
    net = relay.nn.softmax(data=fc1)
    return relay.Function(relay.analysis.free_vars(net), net)
Example #24
0
def test_saturation():
    # Same params
    data_dtype = "uint8"
    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
    y = relay.var("y", shape=(1, 4), dtype=data_dtype)
    z = relay.qnn.op.add(
        lhs=x,
        rhs=y,
        lhs_scale=relay.const(0.125, "float32"),
        lhs_zero_point=relay.const(0, "int32"),
        rhs_scale=relay.const(0.125, "float32"),
        rhs_zero_point=relay.const(0, "int32"),
        output_scale=relay.const(0.125, "float32"),
        output_zero_point=relay.const(0, "int32"),
    )

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]
    mod = relay.transform.InferType()(mod)

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 128, 0)).reshape((1, 4))
    golden_output = np.array((255, 255, 129, 0)).reshape((1, 4))

    op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)(
        x_data, y_data
    )
    np.testing.assert_equal(op_res.numpy(), golden_output)

    # Same params, different scale
    z = relay.qnn.op.add(
        lhs=x,
        rhs=y,
        lhs_scale=relay.const(0.125, "float32"),
        lhs_zero_point=relay.const(0, "int32"),
        rhs_scale=relay.const(0.125, "float32"),
        rhs_zero_point=relay.const(0, "int32"),
        output_scale=relay.const(0.25, "float32"),
        output_zero_point=relay.const(0, "int32"),
    )

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 127, 0)).reshape((1, 4))
    golden_output = np.array((255, 129, 65, 0)).reshape((1, 4))

    op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)(
        x_data, y_data
    )
    np.testing.assert_equal(op_res.numpy(), golden_output)

    # Same io params, different output scale
    z = relay.qnn.op.add(
        lhs=x,
        rhs=y,
        lhs_scale=relay.const(0.125, "float32"),
        lhs_zero_point=relay.const(0, "int32"),
        rhs_scale=relay.const(0.125, "float32"),
        rhs_zero_point=relay.const(0, "int32"),
        output_scale=relay.const(0.25, "float32"),
        output_zero_point=relay.const(0, "int32"),
    )

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 127, 0)).reshape((1, 4))
    golden_output = np.array((255, 129, 65, 0)).reshape((1, 4))

    op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)(
        x_data, y_data
    )
    np.testing.assert_equal(op_res.numpy(), golden_output)

    # All params different
    z = relay.qnn.op.add(
        lhs=x,
        rhs=y,
        lhs_scale=relay.const(0.5, "float32"),
        lhs_zero_point=relay.const(0, "int32"),
        rhs_scale=relay.const(0.25, "float32"),
        rhs_zero_point=relay.const(0, "int32"),
        output_scale=relay.const(0.125, "float32"),
        output_zero_point=relay.const(0, "int32"),
    )

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 0, 1, 0)).reshape((1, 4))
    y_data = np.array((0, 128, 64, 0)).reshape((1, 4))
    golden_output = np.array((255, 255, 132, 0)).reshape((1, 4))

    op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)(
        x_data, y_data
    )
    np.testing.assert_equal(op_res.numpy(), golden_output)
Example #25
0
def test_list_constructor():
    test_consz = relay.GlobalVar("test_consz")
    func = relay.Function([], cons(z(), nil()))
    mod[test_consz] = func
    assert mod[test_consz].body.checked_type == l(nat())
Example #26
0
 def create_model():
     ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
     ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
     c1 = relay.left_shift(ifm, ifm2)
     return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1))
Example #27
0
def test_compose():
    n = relay.Var("n")
    inc = relay.Function([n], s(n))
    x = relay.Var("x")
    res = intrp.evaluate(relay.Call(compose(inc, double), [s(s(z()))]))
    assert count(res) == 5
Example #28
0
 def create_model():
     ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
     reshape = relay.op.reshape(ifm, newshape=new_shape)
     return tvm.IRModule.from_expr(relay.Function([ifm], reshape))
Example #29
0
def tune_cutlass_function(
    func,
    use_3xtf32,
    split_k_slices,
    profile_all_alignments,
    find_first_valid,
    use_multiprocessing,
    gemm_profiler,
    conv2d_profiler,
):
    """Given a function intended to be offloaded to CUTLASS,  profile each workload to select which
    kernels to emit.

    Parameters
    ----------
    func : IRModule
        The Relay Function to tune for.

    use_3xtf32 : bool
        Wheter or not use slower but very accurate (compared to tf32) 3xtf32 mode for
        fp32 inputs on tensorcore.

    split_k_slices : list of int
        Split factor candidates for split-K GEMM. If split-K > 1, the GEMM K-loop is computed in
        parallel accross split-K blocks, and a seperate global reduction kernel is launched to
        accumulate partial reductions. The profiler will pick the best split-k factor from the
        given candidate list. Note that the larger split-K factor requires a larger workspace.
        Currently, parallel split-k has been tested only for wgrad. For GEMM and other conv2d
        kinds, split_k_slices is ignored.

    profile_all_alignments : bool
        When True, profile all kernal variants with smaller alignments than the largest possible.

    find_first_valid : bool
        Whether or not profile all candidate kernels, or stop profiling after
        the first applicable kernel is found.

    use_multiprocessing : bool
        Whether or not compile profiler executables for different kernels in parallel.

    gemm_profiler : CutlassGemmProfiler
        Profiler for dense operators. May cache results between tuned functions.

    conv2d_profiler : CutlassConv2DProfiler
        Profiler for conv2d operators. May cach results between tuned functions.

    Returns
    -------
    annot_func : Function
        The input function with attributes capturing the best CUTLASS kernel found by tuning.
    """
    annotator = OpAnnotator()
    annotator.visit(func)
    out_shape = annotator.signature["ret_shape"]
    out_dtype = annotator.signature["ret_dtype"]
    op_type = annotator.signature["op_type"]

    new_attrs = {"op_type": op_type}
    new_attrs.update(annotator.signature)
    new_attrs.update(func.attrs)
    arg0_shape = new_attrs["arg0_shape"]
    arg1_shape = new_attrs["arg1_shape"]
    arg0_dtype = new_attrs["arg0_dtype"]
    arg1_dtype = new_attrs["arg1_dtype"]

    if "conv2d" in op_type:
        new_attrs["padding"] = annotator.op_attrs.padding
        new_attrs["strides"] = annotator.op_attrs.strides
        new_attrs["dilation"] = annotator.op_attrs.dilation

        if "conv2d_transpose" in op_type:
            d_shape = out_shape
            w_shape = arg1_shape
        elif "conv2d_backward_weight" in op_type:
            d_shape = arg1_shape
            w_shape = out_shape
        else:
            d_shape = arg0_shape
            w_shape = arg1_shape

        new_attrs.update(
            handle_conv2d(
                conv2d_profiler,
                op_type,
                d_shape,
                w_shape,
                annotator.op_attrs.padding,
                annotator.op_attrs.strides,
                annotator.op_attrs.dilation,
                out_dtype,
                arg0_dtype,
                arg1_dtype,
                use_3xtf32,
                split_k_slices,
                profile_all_alignments,
                find_first_valid,
                use_multiprocessing,
            ))
    elif "batch_matmul" in op_type:
        new_attrs.update(
            handle_batch_matmul(
                gemm_profiler,
                op_type,
                arg0_shape,
                arg1_shape,
                out_dtype,
                arg0_dtype,
                arg1_dtype,
                use_3xtf32,
                find_first_valid,
                use_multiprocessing,
            ))
    elif "dense" in op_type:
        new_attrs.update(
            handle_dense(
                gemm_profiler,
                op_type,
                arg0_shape,
                arg1_shape,
                out_dtype,
                arg0_dtype,
                arg1_dtype,
                use_3xtf32,
                find_first_valid,
                use_multiprocessing,
            ))
    else:
        raise ValueError("%s unsupported composite" % op_type)

    new_attrs = tvm.ir.make_node("DictAttrs", **new_attrs)
    return relay.Function(
        func.params,
        func.body,
        ret_type=func.ret_type,
        type_params=func.type_params,
        attrs=new_attrs,
    )
Example #30
0
def test_negative_grad():
    data = relay.var("data", relay.TensorType((10, 4), "float32"))
    fwd_func = relay.Function([data], relay.negative(data))
    check_grad(fwd_func)