Esempio n. 1
0
def test_get_opr_seq():
    class Net(M.Module):
        def __init__(self):
            super().__init__()
            self.data = megengine.tensor(np.random.random((1, 1, 4, 4)),
                                         dtype=np.float32)

        def forward(self, input):
            A = input.shape[0]
            shape = astensor1d((A, A),
                               self.data,
                               dtype="int32",
                               device=input.device)
            x = F.reshape(self.data, shape)
            o = input + x
            return o

    net = Net()
    input = megengine.tensor(np.random.random((4, 4)), dtype=np.float32)

    @trace(symbolic=True, capture_as_const=True)
    def func(inp, *, net=None):
        return net(inp)

    func(input, net=net)
    file = io.BytesIO()
    func.dump(file, optimize_for_inference=False)
    file.seek(0)
    *_, outputs = mgb_graph.load_graph(file)

    seq_1 = cgtools.get_oprs_seq(outputs, True)
    assert len(seq_1) == 5

    seq_2 = cgtools.get_oprs_seq(outputs, False)
    assert len(seq_2) == 6
Esempio n. 2
0
def test_load_refcnt():
    graph = mgb_graph.Graph()
    varnode = graph.make_const(0)
    buf, _ = mgb_graph.dump_graph([varnode])
    graph, _, (varnode, ) = mgb_graph.load_graph(io.BytesIO(buf))
    del graph
    varnode.owner
Esempio n. 3
0
def load_comp_graph_from_file(path):
    if mge_version <= "0.6.0":
        cg, _, outputs = mgb.load_comp_graph_from_file(path)
    else:
        ret = G.load_graph(path)
        cg = ret.graph
        outputs = ret.output_vars_list
    return cg, outputs
Esempio n. 4
0
def test_load_refcnt():
    graph = mgb_graph.Graph()
    varnode = graph.make_const(0)
    buf, _ = mgb_graph.dump_graph([varnode])
    ret = mgb_graph.load_graph(io.BytesIO(buf))
    graph, (varnode, ) = ret.graph, ret.output_vars_list
    del ret
    del graph
    varnode.owner
Esempio n. 5
0
def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return exp(x)

    _, out = mkstemp()
    f(tensor(5.0))
    f.dump(out, enable_io16xc32=True)

    res = G.load_graph(out)
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
Esempio n. 6
0
def test_goptions_log_exp():
    @trace(symbolic=True, opt_level=0, capture_as_const=True)
    def f(x):
        return log(exp(x))

    @trace(symbolic=True, opt_level=1, capture_as_const=True)
    def g(x):
        return log(exp(x))

    f(tensor(1.0))
    _, out = mkstemp()
    f.dump(out, optimize_for_inference=False)
    *_, outputs = G.load_graph(out)
    oprs_1 = cgtools.get_oprs_seq(outputs)

    g(tensor(1.0))
    g.dump(out, optimize_for_inference=False)
    *_, outputs = G.load_graph(out)
    oprs_2 = cgtools.get_oprs_seq(outputs)

    assert len(oprs_1) - len(oprs_2) == 2
Esempio n. 7
0
def test_catch_input_name(tensor_name, var_name):
    def f(x):
        return 2 * x

    func = trace(f, symbolic=True, capture_as_const=True)
    x = Tensor(np.ones(shape=(2, 3)), name=tensor_name)
    func(x).numpy()
    file = io.BytesIO()
    func.dump(file, optimize_for_inference=False, keep_opr_name=True, keep_var_name=2)
    file.seek(0)
    *_, outputs = G.load_graph(file)
    op = cgtools.get_oprs_seq(outputs)[-1]
    assert op.inputs[0].name == var_name
Esempio n. 8
0
def change_batch_and_dump(inp_file, oup_file):
    cg, _, outputs = G.load_graph(inp_file)
    inputs = cgtools.get_dep_vars(outputs[0], "Host2DeviceCopy")
    replace_dict = {}
    for var in inputs:
        n_shape = list(var.shape)
        n_shape[0] = 1
        new_input = make_h2d(cg, "xpux", var.dtype, n_shape, var.name)
        replace_dict[var] = new_input

    new_outputs = cgtools.replace_vars(outputs, replace_dict)
    dump_content, _ = G.dump_graph(map(G.VarNode, new_outputs), keep_var_name=2)
    with open(oup_file, "wb") as file:
        file.write(dump_content)
Esempio n. 9
0
def _dump_and_load(func, symbolic, keep_opr_name=True):
    AutoNaming.clear()
    func = trace(func, symbolic=symbolic, capture_as_const=True)
    x = Tensor(np.ones(shape=(2, 3)))
    func(x).numpy()
    file = io.BytesIO()
    func.dump(
        file,
        optimize_for_inference=False,
        arg_names=("x", ),
        keep_opr_name=keep_opr_name,
        keep_var_name=2,
    )
    file.seek(0)
    outputs = G.load_graph(file).output_vars_list
    ops = cgtools.get_oprs_seq(outputs)
    return ops
Esempio n. 10
0
def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return F.exp(x)

    orig_model = io.BytesIO()
    f(Tensor(5.0))
    f.dump(orig_model, optimize_for_inference=False)
    orig_model.seek(0)

    optimize_model = io.BytesIO()
    net = Net.load(orig_model)
    net.dump(optimize_model, enable_io16xc32=True)
    optimize_model.seek(0)

    res = G.load_graph(optimize_model)
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
Esempio n. 11
0
def test_dump_volatile():
    p = tensor([2])

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return x * p

    x = tensor([3])
    y = f(x).numpy()

    for i in range(3):
        np.testing.assert_equal(f(x).numpy(), y)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)
    cg, _, outputs = G.load_graph(file)
    (out, ) = outputs
    assert (cgtools.get_owner_opr_type(
        cgtools.get_owner_opr_inputs(out)[1]) == "ImmutableTensor")
Esempio n. 12
0
def test_dump_volatile():
    p = as_raw_tensor([2])

    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        op = ops.Elemwise(Elemwise.Mode.MUL)
        (y, ) = apply(op, x, p)
        return y

    x = as_raw_tensor([3]).numpy()
    y = f.__wrapped__(as_raw_tensor(x)).numpy()

    for i in range(3):
        np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y)

    file = io.BytesIO()
    f.dump(file, optimize_for_inference=False)
    file.seek(0)
    cg, _, outputs = G.load_graph(file)
    (out, ) = outputs
    assert (cgtools.get_owner_opr_type(
        cgtools.get_owner_opr_inputs(out)[1]) == "ImmutableTensor")
Esempio n. 13
0
def test_graph_traversal():
    net = M.Conv2d(3, 32, 3)

    @trace(symbolic=True, capture_as_const=True)
    def fun(data):
        x = net(data)
        return x

    data = np.random.random([1, 3, 224, 224]).astype(np.float32)
    for _ in range(3):
        fun(megengine.tensor(data))

    file = io.BytesIO()
    fun.dump(file, optimize_for_inference=False)
    file.seek(0)
    cg, _, outputs = mgb_graph.load_graph(file)

    _, map_vars, var2oprs, *_ = cgtools.graph_traversal(outputs)
    input_var = map_vars[1]
    _, var_idx = var2oprs[input_var.id][0]

    assert var_idx == 0
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser(
        description="load a network and run inference on random data",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("net")
    parser.add_argument(
        "--device", "-d", help="set defult device, like 'gpux' or 'cpux'"
    )
    parser.add_argument(
        "--calc-output-rms",
        action="store_true",
        help="compute RMS of outputs; useful for comparing computing results",
    )
    parser.add_argument(
        "--output-name",
        nargs="*",
        help="Specify output name. This option can be"
        " specified multiple times. We will look for opr/var"
        " in the graph",
    )
    parser.add_argument(
        "--load-input-data",
        help="load input data from pickle file; it should be"
        " a numpy array or a dict of numpy array",
    )
    parser.add_argument("--profile", help="profiler output file")
    parser.add_argument(
        "--fast-run",
        action="store_true",
        help="enable fast running by profiling conv algorithms during compiling.",
    )
    parser.add_argument(
        "--reproducible", action="store_true", help="use reproducible kernels"
    )
    parser.add_argument(
        "--input-desc",
        help="specifiy input names and shapes manually in"
        " format: <name>:<shape>[;<name>:<shape>, ...], where"
        " name is a string and shape is a comma separated"
        ' string. e.g., "data:128,1,28,28,label:128".'
        " different input tensor are separated by semicolon.",
    )
    parser.add_argument(
        "--batchsize",
        type=int,
        help="change batchsize; the first dimension of each"
        " input is assumed to be batch size",
    )
    parser.add_argument(
        "--warm-up",
        action="store_true",
        help="warm up model before do timing " " for better estimation",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="verbose output, logging in debug mode",
    )
    parser.add_argument(
        "--iter", type=int, default=1, help="number of iters to run the model"
    )
    parser.add_argument("--log", help="give a file path to duplicate log to")
    parser.add_argument(
        "--seed",
        type=int,
        default=0,
        help="seed for random number generator for input data",
    )
    parser.add_argument(
        "--rng",
        help="special RNG options to generate input data in"
        " format: <name>:func[;<name>:func, ...] where name is"
        " a string and func is a python expression containing"
        ' "{}" for the size param, e.g. '
        ' "label:randint(low=0,high=1000,size={})"',
    )
    parser.add_argument(
        "--focused-nvprof",
        action="store_true",
        help="only profile last iter for `nvprof --profile-from-start off`",
    )
    parser.add_argument(
        "--optimize-for-inference",
        action="store_true",
        help="optimize model for inference",
    )
    parser.add_argument(
        "--enable-io16xc32",
        action="store_true",
        help="transform the mode to float16 io float32 compute",
    )
    parser.add_argument(
        "--enable-ioc16",
        action="store_true",
        help="transform the dtype of the model to float16 io and compute",
    )
    parser.add_argument(
        "--enable-hwcd4",
        action="store_true",
        help="transform the model format from NCHW to NHWCD4 for inference",
    )
    parser.add_argument(
        "--enable-nchw4",
        action="store_true",
        help="transform the model format from NCHW to NCHW4 for inference",
    )
    parser.add_argument(
        "--enable-nchw88",
        action="store_true",
        help="transform the model format from NCHW to NCHW88 for inference",
    )
    parser.add_argument(
        "--enable-nchw44",
        action="store_true",
        help="transform the model format from NCHW to NCHW44 for inference",
    )
    parser.add_argument(
        "--enable-nchw44-dot",
        action="store_true",
        help="transform the model format from NCHW to NCHW44_DOT "
        "for optimizing armv8.2 dot in inference",
    )
    parser.add_argument(
        "--enable-chwn4",
        action="store_true",
        help="transform the model format to CHWN4 "
        "for inference, mainly used for nvidia tensorcore",
    )
    parser.add_argument(
        "--enable-nchw32",
        action="store_true",
        help="transform the model format from NCHW4 to NCHW32 "
        "for inference on nvidia TensoCore",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-nonlinearity",
        action="store_true",
        help="fuse convolution bias and nonlinearity opr to a "
        "conv_bias opr and compute",
    )
    parser.add_argument(
        "--enable-fuse-conv-bias-with-z",
        action="store_true",
        help="fuse conv_bias with z input for inference on "
        "nvidia GPU (this optimization pass will result in mismatch "
        "of the precision of output of training and inference)",
    )
    parser.add_argument(
        "--dump-cpp-model",
        help="write a C++ model that can be loaded by "
        "megbrain/sdk/load-and-run; "
        "this implies --embed-input",
    )
    parser.add_argument(
        "--embed-input",
        action="store_true",
        help="embed input data as SharedDeviceTensor in model, "
        "to remove memory copy for inputs",
    )
    args = parser.parse_args()

    if args.verbose:
        enable_debug_log()
    if args.log:
        set_log_file(args.log)

    if args.device:
        set_default_device(args.device)

    if args.dump_cpp_model:
        args.embed_input = True

    logger.info("loading model ...")
    graph, _, output_vars = G.load_graph(args.net)
    input_vars = tools.get_dep_vars(output_vars, "Host2DeviceCopy")

    if args.output_name is not None:
        output_vars = tools.find_vars_by_name(output_vars, args.output_name)

    data = make_data_given_desc(args, input_vars)

    run_model(args, graph, input_vars, output_vars, data)
def make_feeds(args):
    cg_rt, _, outputs = G.load_graph(args.input)
    inputs = cgtools.get_dep_vars(outputs, "Host2DeviceCopy")

    inputs = {i.name: i for i in inputs}
    if not args.no_assert:

        replace_varmap = {}
        inp_map = {}
        # replace var use InputNode
        for name, var in inputs.items():
            inp = G.InputNode(device="xpux",
                              dtype=var.dtype,
                              shape=var.shape,
                              graph=cg_rt)
            replace_varmap[var] = inp.outputs[0]
            inp_map[name] = inp

        new = cgtools.replace_vars(outputs, replace_varmap)
        if isinstance(new, rt.VarNode):
            new = list(new)

        output_nodes = [G.OutputNode(var) for var in new]
        func = cg_rt.compile([node.outputs[0] for node in output_nodes])

        def make_dev_tensor(value, dtype=None, device=None):
            return as_raw_tensor(value, dtype=dtype,
                                 device=device)._dev_tensor()

        def calculate(*args, **kwargs):
            output_val = []
            # set inputs value
            for name, var in inputs.items():
                val = kwargs.pop(name, None)
                assert val is not None, "miss input name{}".format(name)
                dev_tensor = make_dev_tensor(val,
                                             dtype=var.dtype,
                                             device="xpux")
                inp_map[name].set_value(dev_tensor)

            func.execute()

            for res in output_nodes:
                output_val.append(res.get_value().numpy())
            return output_val

        def expect_name(var):
            return "{}:expect".format(var.name)

    testcases = []

    np.set_printoptions(precision=2, threshold=4, suppress=True)

    data_list = []
    for item in args.data:
        if item.startswith("@"):
            with open(item[1:], "r") as f:
                data_list.extend(
                    [line.rstrip() for line in f if line.rstrip() != ""])
        else:
            data_list.append(item)

    for inp_spec in data_list:
        cur_testcase = gen_one_testcase(args, inputs, inp_spec)
        assert len(cur_testcase) == len(
            inputs), "required inputs: {}; given data: {}".format(
                inputs.keys(), cur_testcase.keys())

        if not args.no_assert:
            outputs_get = calculate(**cur_testcase)
            for var, val in zip(outputs, outputs_get):
                cur_testcase[expect_name(var)] = val
                logger.info(
                    "generate test groundtruth: var={} shape={} range=({}, {})"
                    " mean={} var={}".format(var, val.shape, val.min(),
                                             val.max(), np.mean(val),
                                             np.var(val)))
        testcases.append(cur_testcase)
        logger.info("add testcase: \n {}".format("\n ".join(
            "{}: shape={} dtype={} range=({:.2f},{:.2f}) "
            "mean={:.2f} sd={:.2f}".format(k, v.shape, v.dtype, v.min(),
                                           v.max(), np.mean(v), np.std(v))
            for k, v in sorted(cur_testcase.items()))))

    if not args.no_assert:

        def expect_shp(var):
            ret = var.shape
            if ret:
                return ret
            return testcases[0][expect_name(var)].shape

        def assert_equal(expect, real, **kwargs):
            op = builtin.AssertEqual(**kwargs)
            (res, ) = apply(op, expect, real)
            return res

        verbose = not args.silent

        outputs_new = []
        for i in outputs:
            device = rt.CompNode("xpux")
            dtype = i.dtype
            name = expect_name(i)
            shape = expect_shp(i)
            # make expect output as one input of model.
            expect_get = rt.make_h2d(cg_rt, device, dtype, shape, name)
            # insert assert opr to check expect and real.
            outputs_new.append(
                assert_equal(
                    G.VarNode(expect_get),
                    G.VarNode(i),
                    verbose=verbose,
                    maxerr=args.maxerr,
                ))
            inputs[expect_name(i)] = expect_get
        outputs = outputs_new

    return cg_rt, {"outputs": outputs, "testcases": testcases}
Esempio n. 16
0
def load_comp_graph_from_file(path):
    if mge_version <= "0.6.0":
        cg, _, outputs = mgb.load_comp_graph_from_file(path)
    else:
        cg, _, outputs = G.load_graph(path)
    return cg, outputs