コード例 #1
0
ファイル: tune_resnet.py プロジェクト: r25willi/tvm
def compile_network(opt, env, target):

    # Populate the shape and data type dictionary
    dtype_dict = {"data": 'float32'}
    shape_dict = {"data": (env.BATCH, 3, 224, 224)}

    # Get off the shelf gluon model, and convert to relay
    gluon_model = vision.get_model(opt.model, pretrained=True)
    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

    # Update shape and type dictionary
    shape_dict.update({k: v.shape for k, v in params.items()})
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Perform quantization in Relay
    with relay.quantize.qconfig(global_scale=8.0,
                                skip_conv_layers=[0]):
        relay_prog = relay.quantize.quantize(mod[mod.entry_func], params=params)

    # Perform graph packing and constant folding for VTA target
    if target.device_name == "vta":
        assert env.BLOCK_IN == env.BLOCK_OUT
        relay_prog = graph_pack(
            relay_prog,
            env.BATCH,
            env.BLOCK_OUT,
            env.WGT_WIDTH,
            start_name=opt.start_name,
            stop_name=opt.stop_name)

    return relay_prog, params
コード例 #2
0
def compile_network(env, target, model, start_pack, stop_pack):

    # Populate the shape and data type dictionary
    dtype_dict = {"data": 'float32'}
    shape_dict = {"data": (env.BATCH, 3, 224, 224)}

    # Get off the shelf gluon model, and convert to relay
    gluon_model = vision.get_model(model, pretrained=True)
    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

    # Update shape and type dictionary
    shape_dict.update({k: v.shape for k, v in params.items()})
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Perform quantization in Relay
    # Note: We set opt_level to 3 in order to fold batch norm
    with relay.build_config(opt_level=3):
        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
            mod = relay.quantize.quantize(mod, params=params)

    # Perform graph packing and constant folding for VTA target
    if target.device_name == "vta":
        assert env.BLOCK_IN == env.BLOCK_OUT
        relay_prog = graph_pack(mod["main"],
                                env.BATCH,
                                env.BLOCK_OUT,
                                env.WGT_WIDTH,
                                start_name=start_pack,
                                stop_name=stop_pack)

    return relay_prog, params
コード例 #3
0
ファイル: run.py プロジェクト: uwsampl/relay-bench
def build_model(model_name, remote, target, ctx, vta_env):
    """Build the inference graph runtime."""
    # Load pre-configured AutoTVM schedules.
    with autotvm.tophub.context(target):
        # Populate the shape and data type dictionary for ResNet input.
        dtype_dict = {'data': 'float32'}
        shape_dict = {'data': (vta_env.BATCH, 3, 224, 224)}

        # Get off-the-shelf gluon model and convert to Relay.
        gluon_model = vision.get_model(model_name, pretrained=True)

        # Start frontend compilation.
        mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

        # Update shape and type dictionary.
        shape_dict.update({k: v.shape for k, v in params.items()})
        dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

        # Perform quantization in Relay.
        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
            relay_prog = relay.quantize.quantize(mod['main'], params=params)

        # Perform graph packing and constant folding for VTA target.
        if target.device_name == 'vta':
            assert vta_env.BLOCK_IN == vta_env.BLOCK_OUT
            relay_prog = graph_pack(relay_prog,
                                    vta_env.BATCH,
                                    vta_env.BLOCK_OUT,
                                    vta_env.WGT_WIDTH,
                                    start_name=START_PACK,
                                    stop_name=STOP_PACK)

        # Compile Relay program with AlterOpLayout disabled.
        with relay.build_config(opt_level=3, disabled_pass={'AlterOpLayout'}):
            if target.device_name == 'vta':
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=vta_env.target,
                        params=params,
                        target_host=vta_env.target_host)
            else:
                graph, lib, params = relay.build(
                    relay_prog,
                    target=target,
                    params=params,
                    target_host=vta_env.target_host)

        # Send the inference library over to the remote RPC server
        temp = util.tempdir()
        lib.save(temp.relpath('graphlib.o'))
        remote.upload(temp.relpath('graphlib.o'))
        lib = remote.load_module('graphlib.o')

        graph_module = graph_runtime.create(graph, lib, ctx)
        graph_module.set_input(**params)
        return graph_module
コード例 #4
0
ファイル: resnet_export.py プロジェクト: aiblackmaner/tvm
def compile_mxnet_gulon_resnet(_env, _model):
    """ Compile Model """
    # Generate tvm IR from mxnet gluon model
    # Populate the shape and data type dictionary for ImageNet classifier input
    dtype_dict = {"data": 'float32'}
    shape_dict = {"data": (_env.BATCH, 3, 224, 224)}
    # Get off the shelf gluon model, and convert to relay
    gluon_model = vision.get_model(_model, pretrained=True)
    # Start front end compilation
    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
    mod = merge_transform_to_mxnet_model(mod)
    # Update shape and type dictionary
    shape_dict.update({k: v.shape for k, v in params.items()})
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Load pre-configured AutoTVM schedules
    with autotvm.tophub.context(_env.target):
        # Perform quantization in Relay
        # Note: We set opt_level to 3 in order to fold batch norm
        with relay.build_config(opt_level=3):
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            relay_prog = graph_pack(mod["main"],
                                    _env.BATCH,
                                    _env.BLOCK_IN,
                                    _env.WGT_WIDTH,
                                    start_name=PACK_DICT[_model][0],
                                    stop_name=PACK_DICT[_model][1])

    # Compile Relay program with AlterOpLayout disabled
    with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
        with vta.build_config(debug_flag=0):
            graph, lib, params = relay.build(relay_prog,
                                             target=_env.target,
                                             params=params,
                                             target_host=_env.target_host)

    return graph, lib, params
コード例 #5
0
ファイル: tvmtorchyolo.py プロジェクト: huajsj/PyTorch-YOLOv3
def convert_to_vta(model_path, image_channel, image_size):
    device = torch.device('cpu')
    model = torch.load(model_path, map_location=device)
    model = model.eval()

    input_shape = [1, image_channel, image_size, image_size]
    input_data = torch.randn(input_shape)
    scripted_model = torch.jit.trace(model, input_data).eval()

    shape_list = [(input_name, input_shape)]
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    print(mod["main"])

    remote = rpc.LocalSession()
    ctx = remote.ext_dev(0)

    target = 'vta'
    target_host = 'vta'
    env = vta.get_env()
    pack_dict = {
        "yolov3-tiny": ["nn.max_pool2d", "cast", 8, 237],
    }
    MODEL_NAME = 'yolov3-tiny'
    with tvm.transform.PassContext(opt_level=2):
        with relay.quantize.qconfig(global_scale=33.0,
                                    skip_conv_layers=[0],
                                    store_lowbit_output=True,
                                    round_for_shift=True):
            mod = relay.quantize.quantize(mod, params=params)
        print(mod["main"])
        mod = graph_pack(mod["main"],
                         env.BATCH,
                         env.BLOCK_OUT,
                         env.WGT_WIDTH,
                         start_name=pack_dict[MODEL_NAME][0],
                         stop_name=pack_dict[MODEL_NAME][1],
                         start_name_idx=pack_dict[MODEL_NAME][2],
                         stop_name_idx=pack_dict[MODEL_NAME][3])
    return mod
コード例 #6
0
    def compile_model(self):
        if device == 'vta':
            self.remote = rpc.connect(self.pynq_addr, 9091)
            vta.reconfig_runtime(self.remote)
            vta.program_fpga(self.remote, bitstream=None)
        else:
            self.remote = rpc.LocalSession()

        self.ctx = self.remote.ext_dev(
            0) if device == 'vta' else self.remote.cpu(0)

        # Load pre-configured AutoTVM schedules
        with autotvm.tophub.context(target):

            # Populate the shape and data type dictionary for ResNet input
            dtype_dict = {'data': 'float32'}
            shape_dict = {'data': (env.BATCH, 3, 224, 224)}

            gluon_model = vision.resnet18_v1(
                pretrained=True, ctx=ctx
            ).features if args.nonsplit else splitnet.resnet18_v1_split(
                self.id + 1)

            # Measure build start time
            build_start = time.time()

            # Start front end compilation
            mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

            # Update shape and type dictionary
            shape_dict.update({k: v.shape for k, v in params.items()})
            dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

            # Perform quantization in Relay
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                relay_prog = relay.quantize.quantize(mod['main'],
                                                     params=params)

            # Perform graph packing and constant folding for VTA target
            if target.device_name == 'vta':
                assert env.BLOCK_IN == env.BLOCK_OUT
                relay_prog = graph_pack(relay_prog,
                                        env.BATCH,
                                        env.BLOCK_OUT,
                                        env.WGT_WIDTH,
                                        start_name=start_pack,
                                        stop_name=stop_pack)

            # Compile Relay program with AlterOpLayout disabled
            with relay.build_config(opt_level=3,
                                    disabled_pass={'AlterOpLayout'}):
                if target.device_name != 'vta':
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)
                else:
                    with vta.build_config():
                        graph, lib, params = relay.build(
                            relay_prog,
                            target=target,
                            params=params,
                            target_host=env.target_host)

            self.params = params

            # Measure Relay build time
            build_time = time.time() - build_start
            print(f'inference graph for thread {self.id} built in {0:.4f}s!'.
                  format(build_time))

            # Send the inference library over to the remote RPC server
            temp = util.tempdir()
            lib.save(temp.relpath('graphlib.o'))
            self.remote.upload(temp.relpath('graphlib.o'))
            lib = self.remote.load_module('graphlib.o')

            # Graph runtime
            self.m = graph_runtime.create(graph, lib, self.ctx)
コード例 #7
0
ファイル: deploy_detection.py プロジェクト: Manikant92/tvm
    mod, params = relay.frontend.from_darknet(net, dtype=dtype, shape=dshape)

    if target.device_name == "vta":
        # Perform quantization in Relay
        # Note: We set opt_level to 3 in order to fold batch norm
        with relay.build_config(opt_level=3):
            with relay.quantize.qconfig(global_scale=33.0,
                                        skip_conv_layers=[0],
                                        store_lowbit_output=True,
                                        round_for_shift=True):
                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            mod = graph_pack(mod["main"],
                             env.BATCH,
                             env.BLOCK_OUT,
                             env.WGT_WIDTH,
                             start_name=pack_dict[MODEL_NAME][0],
                             stop_name=pack_dict[MODEL_NAME][1],
                             start_name_idx=pack_dict[MODEL_NAME][2],
                             stop_name_idx=pack_dict[MODEL_NAME][3])
    else:
        mod = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    with vta.build_config(disabled_pass={"AlterOpLayout"}):
        graph, lib, params = relay.build(mod,
                                         target=target,
                                         params=params,
                                         target_host=env.target_host)

    # Measure Relay build time
    build_time = time.time() - build_start
コード例 #8
0
    # Update shape and type dictionary
    shape_dict.update({k: v.shape for k, v in params.items()})
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    if target.device_name == "vta":
        # Perform quantization in Relay
        # Note: We set opt_level to 3 in order to fold batch norm
        with relay.build_config(opt_level=3):
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            assert env.BLOCK_IN == env.BLOCK_OUT
            relay_prog = graph_pack(mod["main"],
                                    env.BATCH,
                                    env.BLOCK_OUT,
                                    env.WGT_WIDTH,
                                    start_name=pack_dict[model][0],
                                    stop_name=pack_dict[model][1])
    else:
        relay_prog = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
        if target.device_name != "vta":
            graph, lib, params = relay.build(relay_prog,
                                             target=target,
                                             params=params,
                                             target_host=env.target_host)
        else:
            with vta.build_config():
                graph, lib, params = relay.build(relay_prog,
コード例 #9
0
    # Update shape and type dictionary
    shape_dict.update({k: v.shape for k, v in params.items()})
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Perform quantization in Relay
    with relay.quantize.qconfig(global_scale=8.0,
                                skip_conv_layers=[0]):
        relay_prog = relay.quantize.quantize(mod["main"], params=params)

    # Perform graph packing and constant folding for VTA target
    if target.device_name == "vta":
        assert env.BLOCK_IN == env.BLOCK_OUT
        relay_prog = graph_pack(
            relay_prog,
            env.BATCH,
            env.BLOCK_OUT,
            env.WGT_WIDTH,
            start_name=start_pack,
            stop_name=stop_pack)

    # Compile Relay program with AlterOpLayout disabled
    with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
        if target.device_name != "vta":
            graph, lib, params = relay.build(
                relay_prog, target=target,
                params=params, target_host=env.target_host)
        else:
            with vta.build_config():
                graph, lib, params = relay.build(
                    relay_prog, target=target,
                    params=params, target_host=env.target_host)
コード例 #10
0
    if target.device_name == "vta":
        # Perform quantization in Relay
        # Note: We set opt_level to 3 in order to fold batch norm
        with tvm.transform.PassContext(opt_level=3):
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            assert env.BLOCK_IN == env.BLOCK_OUT
            # do device annotation if target is intelfocl or sim
            relay_prog = graph_pack(
                mod["main"],
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,
                start_name=pack_dict[model][0],
                stop_name=pack_dict[model][1],
                device_annot=(env.TARGET == "intelfocl"),
            )
    else:
        relay_prog = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    if target.device_name != "vta":
        with tvm.transform.PassContext(opt_level=3,
                                       disabled_pass={"AlterOpLayout"}):
            graph, lib, params = relay.build(relay_prog,
                                             target=target,
                                             params=params,
                                             target_host=env.target_host)
コード例 #11
0
ファイル: run_vta.py プロジェクト: he-actlab/cdstack
def main(model,
         start_pack,
         stop_pack,
         data_shape=(1, 3, 224, 224),
         dtype='float32'):
    # Make sure that TVM was compiled with RPC=1
    assert tvm.module.enabled("rpc")

    ######################################################################
    # Define the platform and model targets
    # -------------------------------------
    # Execute on CPU vs. VTA, and define the model.

    # Load VTA parameters from the vta/config/vta_config.json file
    env = vta.get_env()

    # Set ``device=arm_cpu`` to run inference on the CPU
    # or ``device=vta`` to run inference on the FPGA.
    device = "vta"
    target = env.target if device == "vta" else env.target_vta_cpu

    # Name of Gluon model to compile
    # The ``start_pack`` and ``stop_pack`` labels indicate where
    # to start and end the graph packing relay pass: in other words
    # where to start and finish offloading to VTA.

    ######################################################################
    # Obtain an execution remote
    # ---------------------------------
    # When target is 'pynq', reconfigure FPGA and runtime.
    # Otherwise, if target is 'sim', execute locally.
    print(f"Target is {env.TARGET}")
    if env.TARGET in ["sim", "tsim"]:
        remote = rpc.LocalSession()
    else:
        print(f"Error, incorrect target for benchmarking: {env.TARGET}")

    # Get execution context from remote
    ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

    ######################################################################
    # Build the inference graph runtime
    # ---------------------------------
    # Grab ResNet-18 model from Gluon model zoo and compile with Relay.
    # The compilation steps are:
    #    1) Front end translation from MxNet into Relay module.
    #    2) Apply 8-bit quantization: here we skip the first conv layer,
    #       and dense layer which will both be executed in fp32 on the CPU.
    #    3) Perform graph packing to alter the data layout for tensorization.
    #    4) Perform constant folding to reduce number of operators (e.g. eliminate
    #       batch norm multiply).
    #    5) Perform relay build to object file.
    #    6) Load the object file onto remote (FPGA device).
    #    7) Generate graph runtime, `m`.

    # Load pre-configured AutoTVM schedules
    with autotvm.tophub.context(target):

        # Populate the shape and data type dictionary for ResNet input
        dtype_dict = {"data": 'float32'}
        shape_dict = {"data": data_shape}

        # Measure build start time
        build_start = time.time()

        # Start front end compilation
        if model == 'resnet':
            mod, params = test_resnet_mxnet(env)
        elif model == 'yolo':
            mod, params = test_yolo_darknet()
        elif model == 'lenet':
            mod, params = lenet()
        elif model == 'mobilenet':
            mod, params = mobilenet()
        else:
            print(f"Error, incorrect model name: {model}")

        ### Need to bind params

        # Update shape and type dictionary
        shape_dict.update({k: v.shape for k, v in params.items()})
        dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
            relay_prog = relay.quantize.quantize(mod['main'], params=params)

        print(f"Finishing quantizing graph")
        # Perform graph packing and constant folding for VTA target
        if target.device_name == "vta":
            assert env.BLOCK_IN == env.BLOCK_OUT
            relay_prog = graph_pack(relay_prog,
                                    env.BATCH,
                                    env.BLOCK_OUT,
                                    env.WGT_WIDTH,
                                    start_name=start_pack,
                                    stop_name=stop_pack)

        print(f"Finishing packing graph")

        # Compile Relay program with AlterOpLayout disabled
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(relay_prog,
                                                 target=target,
                                                 params=params,
                                                 target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)

        # Measure Relay build time
        build_time = time.time() - build_start
        print(model + " inference graph built in {0:.2f}s!".format(build_time))

        # Send the inference library over to the remote RPC server
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Graph runtime
        m = graph_runtime.create(graph, lib, ctx)
    #
    # # Set the network parameters and inputs
    data = np.random.uniform(size=data_shape).astype(dtype)

    m.set_input(**params)
    m.set_input('data', tvm.nd.array(data.astype(dtype)))

    # Perform inference and gather execution statistics
    # More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
    num = 1  # number of times we run module for a single measurement
    rep = 1  # number of measurements (we derive std dev from this)
    timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)

    if env.TARGET in ["sim", "tsim"]:
        simulator.clear_stats()
        timer()
        sim_stats = simulator.stats()
        print("\nExecution statistics:")
        for k, v in sim_stats.items():
            # Since we execute the workload many times, we need to normalize stats
            # Note that there is always one warm up run
            # Therefore we divide the overall stats by (num * rep + 1)
            print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
    else:
        tcost = timer()
        std = np.std(tcost.results) * 1000
        mean = tcost.mean * 1000
        print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" %
              (mean, std, env.BATCH))
        print("Average per sample inference time: %.2fms" % (mean / env.BATCH))