예제 #1
0
def change_batch_and_dump(inp_file, oup_file):
    cg, _, outputs = G.load_graph(inp_file)
    inputs = cgtools.get_dep_vars(outputs[0], "Host2DeviceCopy")
    replace_dict = {}
    for var in inputs:
        n_shape = list(var.shape)
        n_shape[0] = 1
        new_input = make_h2d(cg, "xpux", var.dtype, n_shape, var.name)
        replace_dict[var] = new_input

    new_outputs = cgtools.replace_vars(outputs, replace_dict)
    dump_content, _ = G.dump_graph(map(G.VarNode, new_outputs), keep_var_name=2)
    with open(oup_file, "wb") as file:
        file.write(dump_content)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description="load a network and run inference on random data",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("net")
    parser.add_argument(
        "--device", "-d", help="set defult device, like 'gpux' or 'cpux'"
    )
    parser.add_argument(
        "--calc-output-rms",
        action="store_true",
        help="compute RMS of outputs; useful for comparing computing results",
    )
    parser.add_argument(
        "--output-name",
        nargs="*",
        help="Specify output name. This option can be"
        " specified multiple times. We will look for opr/var"
        " in the graph",
    )
    parser.add_argument(
        "--load-input-data",
        help="load input data from pickle file; it should be"
        " a numpy array or a dict of numpy array",
    )
    parser.add_argument("--profile", help="profiler output file")
    parser.add_argument(
        "--fast-run",
        action="store_true",
        help="enable fast running by profiling conv algorithms during compiling.",
    )
    parser.add_argument(
        "--reproducible", action="store_true", help="use reproducible kernels"
    )
    parser.add_argument(
        "--input-desc",
        help="specifiy input names and shapes manually in"
        " format: <name>:<shape>[;<name>:<shape>, ...], where"
        " name is a string and shape is a comma separated"
        ' string. e.g., "data:128,1,28,28,label:128".'
        " different input tensor are separated by semicolon.",
    )
    parser.add_argument(
        "--batchsize",
        type=int,
        help="change batchsize; the first dimension of each"
        " input is assumed to be batch size",
    )
    parser.add_argument(
        "--warm-up",
        action="store_true",
        help="warm up model before do timing " " for better estimation",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="verbose output, logging in debug mode",
    )
    parser.add_argument(
        "--iter", type=int, default=1, help="number of iters to run the model"
    )
    parser.add_argument("--log", help="give a file path to duplicate log to")
    parser.add_argument(
        "--seed",
        type=int,
        default=0,
        help="seed for random number generator for input data",
    )
    parser.add_argument(
        "--rng",
        help="special RNG options to generate input data in"
        " format: <name>:func[;<name>:func, ...] where name is"
        " a string and func is a python expression containing"
        ' "{}" for the size param, e.g. '
        ' "label:randint(low=0,high=1000,size={})"',
    )
    parser.add_argument(
        "--focused-nvprof",
        action="store_true",
        help="only profile last iter for `nvprof --profile-from-start off`",
    )
    parser.add_argument(
        "--optimize-for-inference",
        action="store_true",
        help="optimize model for inference",
    )
    parser.add_argument(
        "--enable-io16xc32",
        action="store_true",
        help="transform the mode to float16 io float32 compute",
    )
    parser.add_argument(
        "--enable-ioc16",
        action="store_true",
        help="transform the dtype of the model to float16 io and compute",
    )
    parser.add_argument(
        "--enable-hwcd4",
        action="store_true",
        help="transform the model format from NCHW to NHWCD4 for inference",
    )
    parser.add_argument(
        "--enable-nchw4",
        action="store_true",
        help="transform the model format from NCHW to NCHW4 for inference",
    )
    parser.add_argument(
        "--enable-nchw88",
        action="store_true",
        help="transform the model format from NCHW to NCHW88 for inference",
    )
    parser.add_argument(
        "--enable-nchw44",
        action="store_true",
        help="transform the model format from NCHW to NCHW44 for inference",
    )
    parser.add_argument(
        "--enable-nchw44-dot",
        action="store_true",
        help="transform the model format from NCHW to NCHW44_DOT "
        "for optimizing armv8.2 dot in inference",
    )
    parser.add_argument(
        "--enable-chwn4",
        action="store_true",
        help="transform the model format to CHWN4 "
        "for inference, mainly used for nvidia tensorcore",
    )
    parser.add_argument(
        "--enable-nchw32",
        action="store_true",
        help="transform the model format from NCHW4 to NCHW32 "
        "for inference on nvidia TensoCore",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-nonlinearity",
        action="store_true",
        help="fuse convolution bias and nonlinearity opr to a "
        "conv_bias opr and compute",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-with-z",
        action="store_true",
        help="fuse conv_bias with z input for inference on "
        "nvidia GPU (this optimization pass will result in mismatch "
        "of the precision of output of training and inference)",
    )
    parser.add_argument(
        "--dump-cpp-model",
        help="write a C++ model that can be loaded by "
        "megbrain/sdk/load-and-run; "
        "this implies --embed-input",
    )
    parser.add_argument(
        "--embed-input",
        action="store_true",
        help="embed input data as SharedDeviceTensor in model, "
        "to remove memory copy for inputs",
    )
    args = parser.parse_args()

    if args.verbose:
        enable_debug_log()
    if args.log:
        set_log_file(args.log)

    if args.device:
        set_default_device(args.device)

    if args.dump_cpp_model:
        args.embed_input = True

    logger.info("loading model ...")
    graph, _, output_vars = G.load_graph(args.net)
    input_vars = tools.get_dep_vars(output_vars, "Host2DeviceCopy")

    if args.output_name is not None:
        output_vars = tools.find_vars_by_name(output_vars, args.output_name)

    data = make_data_given_desc(args, input_vars)

    run_model(args, graph, input_vars, output_vars, data)
예제 #3
0
def main():
    parser = argparse.ArgumentParser(
        description="Pack computing graph, input values and expected output "
        "values into one file for checking correctness. README.md gives more "
        "details on the usage",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("input", help="MegEngine dumped model file")
    parser.add_argument("-o", "--output", help="output file", required=True)
    parser.add_argument(
        "-d",
        "--data",
        default=[],
        action="append",
        required=True,
        help="Given input test data when input file is a network, "
        "and current network output would be used as groundtruth. "
        "The format is var0:file0;var1:file1... to specify data files for "
        "input vars. It can also be #rand(min,max,shape...) for generating "
        "random input data, for example, #rand(0,255), "
        "#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means "
        "the remaining part of the original shape. "
        "If the shape is not specified, the shape of "
        "corresponding input tensors in the network will be used. "
        "If there is only one input var, its name can be omitted. "
        "Each data file can either be an image which can be loaded by opencv, "
        "or a pickled numpy.ndarray. "
        "This option can be given multiple times to add multiple testcases. "
        " *NOTE* "
        "If you start the data with the letter @, the rest should be a "
        "filename, and each line in the file should be a single datum in "
        "the format described above. ",
    )
    parser.add_argument(
        "--repeat",
        type=int,
        default=1,
        help="Specify how many times the input image is repeated. "
        "Useful when running benchmark for batch size other than one. "
        "Have no effect on randomly generated input data.",
    )
    parser.add_argument(
        "--silent",
        action="store_true",
        help="set verbose to False in asserti_equal opr",
    )
    parser.add_argument(
        "--optimize-for-inference",
        action="store_false",
        help="enbale optimization for inference",
    )
    parser.add_argument(
        "--no-assert",
        action="store_true",
        help="do not insert assert_equal opr to check result; "
        "this option is useful for benchmarking",
    )
    parser.add_argument(
        "--maxerr",
        type=float,
        default=1e-4,
        help="max error for assert_equal check during runtime",
    )
    parser.add_argument(
        "--resize-input",
        action="store_true",
        help="resize input image to fit input var shape",
    )
    parser.add_argument(
        "--input-transform",
        help="a python expression to transform the input data. "
        "Example: data / np.std(data)",
    )
    parser.add_argument(
        "--discard-var-name",
        action="store_true",
        help="discard variable and param names in the "
        "generated output",
    )
    parser.add_argument("--output-strip-info",
                        action="store_true",
                        help="output code strip information")
    parser.add_argument(
        "--enable-io16xc32",
        action="store_true",
        help="transform the mode to float16 io float32 compute",
    )
    parser.add_argument(
        "--enable-ioc16",
        action="store_true",
        help="transform the dtype of the model to float16 io "
        "and compute",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-nonlinearity",
        action="store_true",
        help="fuse convolution bias and nonlinearity opr to a "
        "conv_bias opr and compute",
    )
    parser.add_argument(
        "--enable-hwcd4",
        action="store_true",
        help="transform the model format from NCHW to NHWCD4 "
        "for inference; you may need to disable CUDA and set "
        "MGB_USE_MEGDNN_DBG=2",
    )
    parser.add_argument(
        "--enable-nchw4",
        action="store_true",
        help="transform the model format from NCHW to NCHW4 "
        "for inference",
    )
    parser.add_argument(
        "--enable-nchw88",
        action="store_true",
        help="transform the model format from NCHW to NCHW88 "
        "for inference",
    )
    parser.add_argument(
        "--enable-nchw44",
        action="store_true",
        help="transform the model format from NCHW to NCHW44 "
        "for inference",
    )
    parser.add_argument(
        "--enable-nchw44-dot",
        action="store_true",
        help="transform the model format from NCHW to NCHW44_DOT "
        "for optimizing armv8.2 dot in inference",
    )
    parser.add_argument(
        "--enable-nchw32",
        action="store_true",
        help="transform the model format from NCHW4 to NCHW32 "
        "for inference on nvidia TensoCore",
    )
    parser.add_argument(
        "--enable-chwn4",
        action="store_true",
        help="transform the model format to CHWN4 "
        "for inference, mainly used for nvidia tensorcore",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-with-z",
        action="store_true",
        help="fuse conv_bias with z input for inference on "
        "nvidia GPU (this optimization pass will result in mismatch "
        "of the precision of output of training and inference)",
    )
    args = parser.parse_args()

    feeds = make_feeds(args)

    assert isinstance(
        feeds, dict) and feeds["testcases"], "testcases can not be empty"

    output_mgbvars = feeds["outputs"]
    output_mgbvars = optimize_for_inference(args, output_mgbvars)

    inputs = cgtools.get_dep_vars(output_mgbvars, "Host2DeviceCopy")
    inputs = sorted((i.name, i.dtype) for i in inputs)

    if args.discard_var_name:
        sereg_kwargs = dict(keep_var_name=0, keep_param_name=False)
    else:
        sereg_kwargs = dict(keep_var_name=2, keep_param_name=True)

    strip_info_file = args.output + ".json" if args.output_strip_info else None

    with open(args.output, "wb") as fout:
        fout.write(b"mgbtest0")
        fout.write(struct.pack("I", len(feeds["testcases"])))
        if isinstance(output_mgbvars, dict):
            wrap_output_vars = dict([(i, VarNode(j))
                                     for i, j in output_mgbvars])
        else:
            wrap_output_vars = [VarNode(i) for i in output_mgbvars]
        dump_content, stat = G.dump_graph(wrap_output_vars,
                                          append_json=True,
                                          strip_info_file=strip_info_file,
                                          **sereg_kwargs)
        fout.write(dump_content)

        logger.info(
            "graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB".format(
                stat.tot_bytes / 1024,
                (stat.tot_bytes - stat.tensor_value_bytes) / 1024))

    def make_dev_tensor(value, dtype=None, device=None):
        return tensor(value, dtype=dtype, device=device)._dev_tensor()

    for testcase in feeds["testcases"]:
        assert isinstance(testcase, dict)
        cg = G.Graph()
        output_mgbvars = []
        for name, dtype in inputs:
            output_mgbvars.append(
                cg.make_const(
                    make_dev_tensor(testcase.pop(name),
                                    dtype=dtype,
                                    device="cpux")))
        assert not testcase, "extra inputs provided in testcase: {}".format(
            testcase.keys())
        with open(args.output, "ab") as fout:
            dump_content, _ = G.dump_graph(output_mgbvars,
                                           strip_info_file=strip_info_file,
                                           append_json=True)
            fout.write(dump_content)
예제 #4
0
def make_feeds(args):
    cg_rt, _, outputs = G.load_graph(args.input)
    inputs = cgtools.get_dep_vars(outputs, "Host2DeviceCopy")

    inputs = {i.name: i for i in inputs}
    if not args.no_assert:

        replace_varmap = {}
        inp_map = {}
        # replace var use InputNode
        for name, var in inputs.items():
            inp = G.InputNode(device="xpux",
                              dtype=var.dtype,
                              shape=var.shape,
                              graph=cg_rt)
            replace_varmap[var] = inp.outputs[0]
            inp_map[name] = inp

        new = cgtools.replace_vars(outputs, replace_varmap)
        if isinstance(new, rt.VarNode):
            new = list(new)

        output_nodes = [G.OutputNode(var) for var in new]
        func = cg_rt.compile([node.outputs[0] for node in output_nodes])

        def make_dev_tensor(value, dtype=None, device=None):
            return tensor(value, dtype=dtype, device=device)._dev_tensor()

        def calculate(*args, **kwargs):
            output_val = []
            # set inputs value
            for name, var in inputs.items():
                val = kwargs.pop(name, None)
                assert val is not None, "miss input name{}".format(name)
                dev_tensor = make_dev_tensor(val,
                                             dtype=var.dtype,
                                             device="xpux")
                inp_map[name].set_value(dev_tensor)

            func.execute()

            for res in output_nodes:
                output_val.append(res.get_value().numpy())
            return output_val

        def expect_name(var):
            return "{}:expect".format(var.name)

    testcases = []

    np.set_printoptions(precision=2, threshold=4, suppress=True)

    data_list = []
    for item in args.data:
        if item.startswith("@"):
            with open(item[1:], "r") as f:
                data_list.extend(
                    [line.rstrip() for line in f if line.rstrip() != ""])
        else:
            data_list.append(item)

    for inp_spec in data_list:
        cur_testcase = gen_one_testcase(args, inputs, inp_spec)
        assert len(cur_testcase) == len(
            inputs), "required inputs: {}; given data: {}".format(
                inputs.keys(), cur_testcase.keys())

        if not args.no_assert:
            outputs_get = calculate(**cur_testcase)
            for var, val in zip(outputs, outputs_get):
                cur_testcase[expect_name(var)] = val
                logger.info(
                    "generate test groundtruth: var={} shape={} range=({}, {})"
                    " mean={} var={}".format(var, val.shape, val.min(),
                                             val.max(), np.mean(val),
                                             np.var(val)))
        testcases.append(cur_testcase)
        logger.info("add testcase: \n {}".format("\n ".join(
            "{}: shape={} dtype={} range=({:.2f},{:.2f}) "
            "mean={:.2f} sd={:.2f}".format(k, v.shape, v.dtype, v.min(),
                                           v.max(), np.mean(v), np.std(v))
            for k, v in sorted(cur_testcase.items()))))

    if not args.no_assert:

        def expect_shp(var):
            ret = var.shape
            if ret:
                return ret
            return testcases[0][expect_name(var)].shape

        def assert_equal(expect, real, **kwargs):
            op = builtin.AssertEqual(**kwargs)
            (res, ) = G.apply_normal_varnode(op, expect, real)
            return G.VarNode(res)

        verbose = not args.silent

        outputs_new = []
        for i in outputs:
            device = rt.CompNode("xpux")
            dtype = i.dtype
            name = expect_name(i)
            shape = expect_shp(i)
            # make expect output as one input of model.
            expect_get = rt.make_h2d(cg_rt, device, dtype, shape, name)
            # insert assert opr to check expect and real.
            outputs_new.append(
                assert_equal(
                    G.VarNode(expect_get),
                    G.VarNode(i),
                    verbose=verbose,
                    maxerr=args.maxerr,
                ))
            inputs[expect_name(i)] = expect_get
        outputs = outputs_new

    return {"outputs": outputs, "testcases": testcases}