Exemple #1
0
def _lower(mod, target, params):
    """Helper to lower VTA properly."""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_executor_codegen

    if hasattr(target, "device_name") and target.device_name == "vta":
        import vta

        with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            mod, _ = relay.optimize(mod, target, params)
            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
            grc.codegen(mod["main"])
            return

    # default case
    # Try graph codegen first to extract autotvm tasks.
    # If failed to compile, then fallback to use VM compiler.
    # TODO: Currently VM compiler is likely to stack overflow for large models.
    try:
        # TODO(jwfromm) Remove this once AlterOpLayout bug that mutates
        # source module is fixed. Until then, create a clone.
        mod_clone = deepcopy(mod)
        opt_mod, _ = relay.optimize(mod_clone, target, params)
        grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
        grc.codegen(opt_mod["main"])
    except tvm.TVMError as e:
        print("Get errors with GraphExecutorCodegen for task extraction. "
              "Fallback to VMCompiler. Error details:\n%s" % str(e))
        mod_clone = deepcopy(mod)
        compiler = relay.vm.VMCompiler()
        if params:
            compiler.set_params(params)
        compiler.lower(mod_clone, target=target)
def _lower(mod,
           target,
           params):
    """ Helper to lower VTA properly.
    """
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    if hasattr(target, 'device_name') and target.device_name == "vta":
        import vta
        with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            mod, _ = relay.optimize(mod, target, params)
            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
            grc.codegen(mod["main"])
            return

    # default case
    # Try graph codegen first to extract autotvm tasks.
    # If failed to compile, then fallback to use VM compiler.
    # TODO: Currently VM compiler is likely to stack overflow for large models.
    try:
        opt_mod, _ = relay.optimize(mod, target, params)
        grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
        grc.codegen(opt_mod["main"])
    except tvm.TVMError:
        compiler = relay.vm.VMCompiler()
        if params:
            compiler.set_params(params)
        compiler.lower(mod, target=target)
def call_all_topi_funcs(mod, params, target):
    """Call all TOPI compute to extract auto_scheduler tasks in a Relay program"""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    # Turn off AutoTVM config not found warnings
    old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
    autotvm.GLOBAL_SCOPE.silent = True

    with transform.PassContext(
            opt_level=3,
            config={
                "relay.backend.use_auto_scheduler": True,
                "relay.backend.disable_compile_engine_cache": True,
            },
            disabled_pass={"AutoSchedulerLayoutRewrite"},
    ):
        try:
            opt_mod, _ = relay.optimize(mod, target, params)
            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
            grc.codegen(opt_mod["main"])
        except tvm.TVMError:
            print("Get errors with GraphRuntimeCodegen for task extraction. "
                  "Fallback to VMCompiler.")
            compiler = relay.vm.VMCompiler()
            if params:
                compiler.set_params(params)
            mod = tvm.IRModule.from_expr(mod) if isinstance(
                mod, relay.Function) else mod
            compiler.lower(mod, target)

    autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
Exemple #4
0
def test_annotate_spans_compatibility():
    data = relay.var("data", relay.TensorType((1, 3, 64, 64), "float32"))
    weight = relay.var("weight")

    bn_gamma = relay.var("bn_gamma")
    bn_beta = relay.var("bn_beta")
    bn_mmean = relay.var("bn_mean")
    bn_mvar = relay.var("bn_var")

    simple_net = relay.nn.conv2d(data=data,
                                 weight=weight,
                                 kernel_size=(3, 3),
                                 channels=3,
                                 padding=(1, 1))
    simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean,
                                     bn_mvar)[0]
    simple_net = relay.Function(relay.analysis.free_vars(simple_net),
                                simple_net)

    module, params = testing.create_workload(simple_net)

    # Apply some simple passes to legalize the IR.
    with tvm.transform.PassContext(opt_level=0):
        module, params = relay.optimize(module,
                                        tvm.testing.enabled_targets()[0][0],
                                        params)

    seq = tvm.transform.Sequential(
        [relay.transform.AnnotateSpans(),
         relay.transform.DefuseOps()])
    with tvm.transform.PassContext(opt_level=3):
        module = seq(module)
Exemple #5
0
def test_alter_layout_conv2d():
    """Additional layout transformations should occour on the graph.
    """
    def convnet():
        """Alternating layout of simple convnet (from image super-resolution).
        """
        bias1 = relay.var('bias1', shape=(64, ))
        bias2 = relay.var('bias2', shape=(64, ))
        bias3 = relay.var('bias3', shape=(64, ))
        bias4 = relay.var('bias4', shape=(64, ))
        weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
        weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
        weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
        weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
        data = relay.var("x", shape=(1, 1, 224, 224))
        n00 = relay.nn.conv2d(data,
                              weight1,
                              padding=[2, 2],
                              kernel_size=[5, 5])
        n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
        n02 = relay.add(n00, n01)
        n03 = relay.nn.relu(n02)
        n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
        n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
        n06 = relay.add(n04, n05)
        n07 = relay.nn.relu(n06)
        n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
        n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
        n10 = relay.add(n08, n09)
        n11 = relay.nn.relu(n10)
        n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
        n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
        n14 = relay.add(n12, n13)
        n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
        n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
        net = relay.reshape(n16, newshape=[1, 1, 672, 672])
        args = relay.ir_pass.free_vars(net)
        return relay.Function(args, net)

    # orig net
    N = convnet()
    N = infer_type(N)

    # trigger a test
    # for each known alter_conv2d
    targets = [
        'cuda', 'opencl -device=mali', 'opencl -device=intel_graphics',
        'llvm -device=arm_cpu', 'llvm -device=core-avx-ii'
    ]

    for tgt in targets:
        with tvm.target.create(tgt) as target:
            with relay.build_config(opt_level=-1, add_pass='******'):
                with autotvm.tophub.context(target):
                    O = relay.optimize(N, target, params=None)
                    O = relay.ir_pass.infer_type(O)

                    # graph should differ
                    assert not relay.ir_pass.alpha_equal(N, O)
 def build_graph(mod, target):
     target = relay.build_module.build_target_by_device_type_map(target)
     target, target_host = tvm.target.Target.check_and_update_host_consist(
         target)
     mod, _ = relay.optimize(mod, target, None)
     grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
     _, lowered_funcs, _ = grc.codegen(mod, mod["main"])
     _ = relay.backend._backend.build(lowered_funcs, target, target_host)
Exemple #7
0
def test_alter_layout_conv2d():
    """Additional layout transformations should occour on the graph.
    """

    def convnet():
        """Alternating layout of simple convnet (from image super-resolution).
        """
        bias1 = relay.var('bias1', shape=(64,))
        bias2 = relay.var('bias2', shape=(64,))
        bias3 = relay.var('bias3', shape=(64,))
        bias4 = relay.var('bias4', shape=(64,))
        weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
        weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
        weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
        weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
        data = relay.var("x", shape=(1, 1, 224, 224))
        n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5])
        n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
        n02 = relay.add(n00, n01)
        n03 = relay.nn.relu(n02)
        n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
        n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
        n06 = relay.add(n04, n05)
        n07 = relay.nn.relu(n06)
        n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
        n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
        n10 = relay.add(n08, n09)
        n11 = relay.nn.relu(n10)
        n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
        n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
        n14 = relay.add(n12, n13)
        n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
        n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
        net = relay.reshape(n16, newshape=[1, 1, 672, 672])
        args = relay.ir_pass.free_vars(net)
        return relay.Function(args, net)

    # orig net
    N = convnet()
    N = infer_type(N)

    # trigger a test
    # for each known alter_conv2d
    targets=['cuda',
             'opencl -device=mali',
             'opencl -device=intel_graphics',
             'llvm -device=arm_cpu',
             'llvm -device=core-avx-ii']

    for tgt in targets:
        with tvm.target.create(tgt) as target:
            with relay.build_config(opt_level=-1, add_pass='******'):
               with autotvm.tophub.context(target):
                   O = relay.optimize(N, target, params=None)
                   O = relay.ir_pass.infer_type(O)

                   # graph should differ
                   assert not relay.ir_pass.alpha_equal(N, O)
Exemple #8
0
def test_softmax():
    x = relay.var("x", shape=(1, 16), dtype="float32")
    y = relay.nn.softmax(x)
    func = relay.Function([x], y)
    mod = tvm.IRModule.from_expr(func)

    with tvm.transform.PassContext(opt_level=3, required_pass=["FastMath"]):
        fast_mod = relay.optimize(mod, target="llvm")
    assert "nn.fast_softmax" in fast_mod[0].astext()
Exemple #9
0
def call_all_topi_funcs(mod, params, target):
    """Call all TOPI compute + schedule to extract tasks in a relay program"""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    with transform.PassContext(opt_level=3):
        opt_mod, _ = relay.optimize(mod, target, params)
        grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
        grc.codegen(opt_mod["main"])
Exemple #10
0
def _lower(func,
           target,
           params):
    """ Helper to lower VTA properly.
    """

    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    if hasattr(target, 'device_name') and target.device_name == "vta":
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            import vta
            with vta.build_config():
                mod, _ = relay.optimize(func, target, params)
                grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
                return grc.codegen(mod["main"])
    # default case
    mod, _ = relay.optimize(func, target, params)
    grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
    return grc.codegen(mod["main"])
Exemple #11
0
def build_model(args, gluon_model):
    """Build with relay."""
    import tvm
    from tvm import relay
    from tvm.relay import quantize as qtz
    img_size = 299 if args.model == 'inceptionv3' else 224
    data_shape = (args.batch_size, 3, img_size, img_size)
    net, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
    target = args.target

    if args.original:
        # run original model
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(net, target, params=params)
        ctx = tvm.nd.context(target, 0)
        return graph, lib, params, ctx

    # constant folding and scale folding.
    print('original')
    print(net.astext(show_meta_data=False))
    with relay.build_config(opt_level=3):
        qgraph = relay.optimize(net, target, params)
        # qgraph = relay.optimize(qgraph)
    print('after optimize')
    print(qgraph.astext(show_meta_data=False))

    with qtz.qconfig(skip_k_conv=0,
                     nbit_input=args.nbit_input,
                     nbit_weight=args.nbit_input,
                     global_scale=args.global_scale,
                     dtype_input=args.dtype_input,
                     dtype_weight=args.dtype_input,
                     dtype_activation=args.dtype_output,
                     store_lowbit_output=False,
                     debug_enabled_ops=None):
        print(qtz.current_qconfig())
        qgraph = qtz.annotate(qgraph)
        print('after annotate')
        print(qgraph.astext(show_meta_data=False))
        qgraph = qtz.calibrate(qgraph)
        print('after calibrate\n')
        print(qgraph.astext(show_meta_data=False))
        if not args.simulated:
            qgraph = qtz.realize(qgraph)
            qgraph = relay.ir_pass.infer_type(qgraph)
            print('after realize\n')
            print(qgraph.astext(show_meta_data=False))

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(qgraph, target)
    ctx = tvm.nd.context(target, 0)
    return graph, lib, params, ctx
Exemple #12
0
def test_erf():
    x = relay.var("x", shape=(1, 16, 16, 16), dtype="float32")
    y = relay.erf(x)
    func = relay.Function([x], y)
    mod = tvm.IRModule.from_expr(func)

    fast_mod = FastMath()(mod)
    assert "fast_erf" in fast_mod.astext()

    # Check that FastMath option works for relay.build.
    with tvm.transform.PassContext(opt_level=3, required_pass=["FastMath"]):
        fast_mod = relay.optimize(mod, target="llvm", params=None)
    assert "fast_erf" in fast_mod[0].astext()
def test_tanh():
    x = relay.var("x", shape=(1, 16, 16, 16), dtype="float32")
    y = relay.tanh(x)
    func = relay.Function([x], y)
    mod = tvm.IRModule.from_expr(func)

    fast_mod = FastMath()(mod)
    assert "fast_tanh" in fast_mod.astext()

    # Check that FastMath option works for relay.build.
    with relay.build_config(opt_level=3, required_pass=['FastMath']):
        fast_mod = relay.optimize(mod, target='llvm', params=None)
    assert "fast_tanh" in fast_mod[0].astext()
Exemple #14
0
def call_all_topi_funcs(mod, params, target):
    """Call all TOPI compute to extract auto_scheduler tasks in a Relay program"""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    # Turn off AutoTVM config not found warnings
    old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
    autotvm.GLOBAL_SCOPE.silent = True

    with transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
        opt_mod, _ = relay.optimize(mod, target, params)
        grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
        grc.codegen(opt_mod["main"])

    autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def _lower(mod, target, params):
    """ Helper to lower VTA properly.
    """
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_runtime_codegen

    if hasattr(target, 'device_name') and target.device_name == "vta":
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            import vta
            with vta.build_config():
                mod, _ = relay.optimize(mod, target, params)
                grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
                grc.codegen(mod["main"])
    # default case
    compiler = relay.vm.VMCompiler()
    if params:
        compiler.set_params(params)
    compiler.lower(mod, target=target)
Exemple #16
0
def call_all_topi_funcs(mod, params, target, opt_level=3):
    """Call all TOPI compute to extract auto_scheduler tasks in a Relay program"""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_executor_codegen

    # Turn off AutoTVM config not found warnings
    old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
    autotvm.GLOBAL_SCOPE.silent = True

    with transform.PassContext(
        opt_level=opt_level,
        config={
            "relay.backend.use_auto_scheduler": True,
            "relay.backend.disable_compile_engine_cache": True,
        },
        disabled_pass={"AutoSchedulerLayoutRewrite"},
    ):
        try:
            # TODO(jwfromm) Remove this once AlterOpLayout bug that mutates
            # source module is fixed. Until then, create a clone.
            mod_clone = deepcopy(mod)
            opt_mod, _ = relay.optimize(mod_clone, target, params)
            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
            grc.codegen(opt_mod["main"])
        except tvm.TVMError:
            print(
                "Get errors with GraphExecutorCodegen for task extraction. "
                "Fallback to VMCompiler."
            )
            mod_clone = deepcopy(mod)
            compiler = relay.vm.VMCompiler()
            if params:
                compiler.set_params(params)
            mod_clone = (
                tvm.IRModule.from_expr(mod_clone)
                if isinstance(mod_clone, relay.Function)
                else mod_clone
            )
            compiler.lower(mod_clone, target)

    autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
Exemple #17
0
def _run_tvm(data, proto_file, blob_file):
    """ Run caffe model by TVM according to .caffemodel and .prototxt"""
    init_net = pb.NetParameter()
    predict_net = pb.NetParameter()

    # load model
    with open(proto_file, "r") as f:
        text_format.Merge(f.read(), predict_net)
    # load blob
    with open(blob_file, "rb") as f:
        init_net.ParseFromString(f.read())

    shape_dict = dict()
    dtype_dict = dict()
    if isinstance(data, (tuple, list)):
        for idx, d in enumerate(data):
            shape_dict["data" + str(idx)] = d.shape
            dtype_dict["data" + str(idx)] = "float32"
    else:
        shape_dict = {"data": data.shape}
        dtype_dict = {"data": "float32"}

    #print("++++++++++++++++++++++++++++++++++++++++")
    net, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict)

    tg = "dpu"
    print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
    
    #mod, _ = relay.optimize(net, tg, params)
    with relay.build_config(opt_level=0):
        mod, _ = relay.optimize(net, tg, params)
        #lib = relay.build(mod, target=target, target_host=target_host, params=params)
        #graph, lib, params = relay.build_module.build(mod, target=tg, params=params)
        graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"])
        dtype = "float32"
        func=tvm.build(func0, tg, name="default_function")
    f = open('/home/wangjj/wujq/test/alexnetCode.c', 'w')
    print(func.get_source(), file = f)
    f.close()
    print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
Exemple #18
0
def _lower(mod, target, params, opt_level=3):
    """Helper to lower VTA properly."""
    # pylint: disable=import-outside-toplevel
    from tvm import relay
    from tvm.relay.backend import graph_executor_codegen

    if hasattr(target, "device_name") and target.device_name == "vta":
        import vta

        with vta.build_config(opt_level=opt_level, disabled_pass={"AlterOpLayout"}):
            mod, _ = relay.optimize(mod, target, params)
            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
            grc.codegen(mod, mod["main"])
            return

    # Alter op layout code has been written expecting that tuning is applied
    # without it, so we disable AlterOpLayout to maintain that behavior.
    with tvm.transform.PassContext(opt_level=opt_level, disabled_pass={"AlterOpLayout"}):
        compiler = relay.vm.VMCompiler()
        if params:
            compiler.set_params(params)
        compiler.lower(mod, target=target)
Exemple #19
0
simple_net = relay.nn.bias_add(data=simple_net, bias=bias6)
simple_net = relay.nn.relu(data=simple_net)



simple_net = relay.nn.dense(data=simple_net, weight=conv7_weight,units=1000)
simple_net = relay.nn.bias_add(data=simple_net, bias=bias7)
simple_net = relay.nn.relu(data=simple_net)

simple_net = relay.nn.dense(data=simple_net, weight=conv8_weight,units=1000)
simple_net = relay.nn.bias_add(data=simple_net, bias=bias8)

simple_net = relay.reshape(simple_net, (batch_size,1000))
simple_net = relay.nn.softmax(data=simple_net)


tic = timer()
node = relay.analysis.free_vars(simple_net)
simple_net = relay.Function(node, simple_net)
net, params = testing.create_workload(simple_net)
tg = "c"

with relay.build_config(opt_level=3):
    mod, _ = relay.optimize(net, tg, params)
    graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"])
    func=tvm.build(func0, tg, name="default_function")
toc = timer()

print("AlexNet compile on TVM time is : ", (toc-tic))
Exemple #20
0
data = relay.var("data", relay.TensorType((batch_size,3,4,4), "float32"))
conv1_weight = relay.var("conv1_weight")
conv2_weight = relay.var("conv2_weight")
dense1_weight = relay.var("dense1_weight")
dense2_weight = relay.var("dense2_weight")

simple_net = relay.nn.conv2d(data=data, weight=conv1_weight, kernel_size=(2,2), channels=2, strides=(2,2),padding=(1, 1))
#simple_net = relay.nn.max_pool2d(simple_net,pool_size=(2, 2),strides=(2, 2),padding=(1, 1))
#simple_net = relay.nn.batch_flatten(simple_net)
#simple_net = relay.nn.dense(simple_net, dense1_weight,units=10)
#simple_net = relay.nn.relu(simple_net)
#simple_net = relay.nn.softmax(simple_net,1)

node = relay.analysis.free_vars(simple_net)
print("**************test1*************")
simple_net = relay.Function(node, simple_net)
print("**************test2*************")
net, params_tmp = testing.create_workload(simple_net)
target="cuda"
print("**************test3*************")
mod, _ = relay.optimize(net, target, params_tmp)
print("**************test4*************")
grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
print("**************test5*************")
graph0, func0, params0 = grc.codegen(mod["main"])
print(func0)
print("**************test6*************")
func=tvm.build(func0, target, name="default_function")
print("**************test7*************")
print(func.get_source())
Exemple #21
0
# ---------------------------------------------
if use_gpu:
    backend_target = 'cuda'  # or 'llvm'
    target = tvm.target.create('%s -model=%s' % (backend_target, gpu_model))
    ctx = tvm.gpu(0)
else:
    # cpu settings
    if deploy_remote:
        target = 'llvm -target=aarch64-linux-gnu'
    else:
        target = 'llvm'
    ctx = tvm.cpu(0)

with relay.build_config(opt_level=3):
    ssd_module, ssd_params = relay.optimize(ssd_module,
                                            target=target,
                                            params=ssd_params)
    graph, lib, params = relay.build(
        ssd_module,
        target=target,
        # target_host='llvm',
        params=ssd_params)

# export weights
if export_weight:
    # store IR representation.
    export_ssd_module = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_module.json")
    export_lib = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_lib.so")
    export_graph = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_graph.json")
    export_params = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_param.params")
    lib.export_library(export_lib)
Exemple #22
0
 def build_graph(mod, target):
     target, target_host = tvm.target.Target.canon_target_and_host(target)
     mod, _ = relay.optimize(mod, target)
     grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
     _, lowered_funcs, _ = grc.codegen(mod, mod["main"])
     _ = relay.backend._backend.build(lowered_funcs, target)
def quantize_model(args):
    """Build with relay."""
    import tvm
    from tvm import relay
    from tvm.relay import quantize as qtz
    img_size = 224
    data_shape = (args.batch_size, 3, img_size, img_size)
    mx_sym, mx_args, mx_auxs = mx.model.load_checkpoint(args.model, 0)
    net, params = relay.frontend.from_mxnet(mx_sym, {"data": data_shape},
                                            arg_params=mx_args,
                                            aux_params=mx_auxs)
    target = args.target

    if args.original:
        # run original model
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(net, target, params=params)
        ctx = tvm.nd.context(target, 0)
        return graph, lib, params, ctx

    # constant folding and scale folding.
    # print('original')
    # print(net.astext(show_meta_data=False))
    with relay.build_config(opt_level=3):
        qgraph = relay.optimize(net, target, params)
    # print('after optimize')
    # print(qgraph.astext(show_meta_data=False))

    with qtz.qconfig(skip_k_conv=0,
                     nbit_input=args.nbit_input,
                     nbit_weight=args.nbit_input,
                     global_scale=args.global_scale,
                     dtype_input=args.dtype_input,
                     dtype_weight=args.dtype_input,
                     dtype_activation=args.dtype_output,
                     store_lowbit_output=False,
                     debug_enabled_ops=None):
        print(qtz.current_qconfig())
        qgraph = qtz.annotate(qgraph)
        # print('after annotate')
        # print(qgraph.astext(show_meta_data=False))
        qgraph = qtz.calibrate(qgraph)
        # print('after calibrate\n')
        # print(qgraph.astext(show_meta_data=False))
        if not args.simulated:
            qgraph = qtz.realize(qgraph)
            qgraph = relay.ir_pass.infer_type(qgraph)
            # print('after realize\n')
            # print(qgraph.astext(show_meta_data=False))

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(qgraph, target)

    ### save/load the graph, lib and params into separate files
    # save
    lib.export_library(os.path.join(thisdir, "deploy_lib.so"))
    with open(os.path.join(thisdir, "deploy_graph.json"), "w") as fo:
        fo.write(graph)
    with open(os.path.join(thisdir, "deploy_param.params"), "wb") as fo:
        fo.write(relay.save_param_dict(params))
    # load
    graph = open(os.path.join(thisdir, "deploy_graph.json")).read()
    lib = tvm.module.load(os.path.join(thisdir, "deploy_lib.so"))
    params = bytearray(
        open(os.path.join(thisdir, "deploy_param.params"), "rb").read())

    ctx = tvm.nd.context(target, 0)
    return graph, lib, params, ctx