def test_get_opr_seq(): class Net(M.Module): def __init__(self): super().__init__() self.data = megengine.tensor(np.random.random((1, 1, 4, 4)), dtype=np.float32) def forward(self, input): A = input.shape[0] shape = astensor1d((A, A), self.data, dtype="int32", device=input.device) x = F.reshape(self.data, shape) o = input + x return o net = Net() input = megengine.tensor(np.random.random((4, 4)), dtype=np.float32) @trace(symbolic=True, capture_as_const=True) def func(inp, *, net=None): return net(inp) func(input, net=net) file = io.BytesIO() func.dump(file, optimize_for_inference=False) file.seek(0) *_, outputs = mgb_graph.load_graph(file) seq_1 = cgtools.get_oprs_seq(outputs, True) assert len(seq_1) == 5 seq_2 = cgtools.get_oprs_seq(outputs, False) assert len(seq_2) == 6
def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) graph, _, (varnode, ) = mgb_graph.load_graph(io.BytesIO(buf)) del graph varnode.owner
def load_comp_graph_from_file(path): if mge_version <= "0.6.0": cg, _, outputs = mgb.load_comp_graph_from_file(path) else: ret = G.load_graph(path) cg = ret.graph outputs = ret.output_vars_list return cg, outputs
def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) ret = mgb_graph.load_graph(io.BytesIO(buf)) graph, (varnode, ) = ret.graph, ret.output_vars_list del ret del graph varnode.owner
def test_optimize_for_inference(): @trace(symbolic=True, capture_as_const=True) def f(x): return exp(x) _, out = mkstemp() f(tensor(5.0)) f.dump(out, enable_io16xc32=True) res = G.load_graph(out) computing_input = res.output_vars_list[0].owner.inputs[0] assert computing_input.dtype == np.float16
def test_goptions_log_exp(): @trace(symbolic=True, opt_level=0, capture_as_const=True) def f(x): return log(exp(x)) @trace(symbolic=True, opt_level=1, capture_as_const=True) def g(x): return log(exp(x)) f(tensor(1.0)) _, out = mkstemp() f.dump(out, optimize_for_inference=False) *_, outputs = G.load_graph(out) oprs_1 = cgtools.get_oprs_seq(outputs) g(tensor(1.0)) g.dump(out, optimize_for_inference=False) *_, outputs = G.load_graph(out) oprs_2 = cgtools.get_oprs_seq(outputs) assert len(oprs_1) - len(oprs_2) == 2
def test_catch_input_name(tensor_name, var_name): def f(x): return 2 * x func = trace(f, symbolic=True, capture_as_const=True) x = Tensor(np.ones(shape=(2, 3)), name=tensor_name) func(x).numpy() file = io.BytesIO() func.dump(file, optimize_for_inference=False, keep_opr_name=True, keep_var_name=2) file.seek(0) *_, outputs = G.load_graph(file) op = cgtools.get_oprs_seq(outputs)[-1] assert op.inputs[0].name == var_name
def change_batch_and_dump(inp_file, oup_file): cg, _, outputs = G.load_graph(inp_file) inputs = cgtools.get_dep_vars(outputs[0], "Host2DeviceCopy") replace_dict = {} for var in inputs: n_shape = list(var.shape) n_shape[0] = 1 new_input = make_h2d(cg, "xpux", var.dtype, n_shape, var.name) replace_dict[var] = new_input new_outputs = cgtools.replace_vars(outputs, replace_dict) dump_content, _ = G.dump_graph(map(G.VarNode, new_outputs), keep_var_name=2) with open(oup_file, "wb") as file: file.write(dump_content)
def _dump_and_load(func, symbolic, keep_opr_name=True): AutoNaming.clear() func = trace(func, symbolic=symbolic, capture_as_const=True) x = Tensor(np.ones(shape=(2, 3))) func(x).numpy() file = io.BytesIO() func.dump( file, optimize_for_inference=False, arg_names=("x", ), keep_opr_name=keep_opr_name, keep_var_name=2, ) file.seek(0) outputs = G.load_graph(file).output_vars_list ops = cgtools.get_oprs_seq(outputs) return ops
def test_optimize_for_inference(): @trace(symbolic=True, capture_as_const=True) def f(x): return F.exp(x) orig_model = io.BytesIO() f(Tensor(5.0)) f.dump(orig_model, optimize_for_inference=False) orig_model.seek(0) optimize_model = io.BytesIO() net = Net.load(orig_model) net.dump(optimize_model, enable_io16xc32=True) optimize_model.seek(0) res = G.load_graph(optimize_model) computing_input = res.output_vars_list[0].owner.inputs[0] assert computing_input.dtype == np.float16
def test_dump_volatile(): p = tensor([2]) @trace(symbolic=True, capture_as_const=True) def f(x): return x * p x = tensor([3]) y = f(x).numpy() for i in range(3): np.testing.assert_equal(f(x).numpy(), y) file = io.BytesIO() f.dump(file, optimize_for_inference=False) file.seek(0) cg, _, outputs = G.load_graph(file) (out, ) = outputs assert (cgtools.get_owner_opr_type( cgtools.get_owner_opr_inputs(out)[1]) == "ImmutableTensor")
def test_dump_volatile(): p = as_raw_tensor([2]) @trace(symbolic=True, capture_as_const=True) def f(x): op = ops.Elemwise(Elemwise.Mode.MUL) (y, ) = apply(op, x, p) return y x = as_raw_tensor([3]).numpy() y = f.__wrapped__(as_raw_tensor(x)).numpy() for i in range(3): np.testing.assert_equal(f(as_raw_tensor(x)).numpy(), y) file = io.BytesIO() f.dump(file, optimize_for_inference=False) file.seek(0) cg, _, outputs = G.load_graph(file) (out, ) = outputs assert (cgtools.get_owner_opr_type( cgtools.get_owner_opr_inputs(out)[1]) == "ImmutableTensor")
def test_graph_traversal(): net = M.Conv2d(3, 32, 3) @trace(symbolic=True, capture_as_const=True) def fun(data): x = net(data) return x data = np.random.random([1, 3, 224, 224]).astype(np.float32) for _ in range(3): fun(megengine.tensor(data)) file = io.BytesIO() fun.dump(file, optimize_for_inference=False) file.seek(0) cg, _, outputs = mgb_graph.load_graph(file) _, map_vars, var2oprs, *_ = cgtools.graph_traversal(outputs) input_var = map_vars[1] _, var_idx = var2oprs[input_var.id][0] assert var_idx == 0
def main(): parser = argparse.ArgumentParser( description="load a network and run inference on random data", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("net") parser.add_argument( "--device", "-d", help="set defult device, like 'gpux' or 'cpux'" ) parser.add_argument( "--calc-output-rms", action="store_true", help="compute RMS of outputs; useful for comparing computing results", ) parser.add_argument( "--output-name", nargs="*", help="Specify output name. This option can be" " specified multiple times. We will look for opr/var" " in the graph", ) parser.add_argument( "--load-input-data", help="load input data from pickle file; it should be" " a numpy array or a dict of numpy array", ) parser.add_argument("--profile", help="profiler output file") parser.add_argument( "--fast-run", action="store_true", help="enable fast running by profiling conv algorithms during compiling.", ) parser.add_argument( "--reproducible", action="store_true", help="use reproducible kernels" ) parser.add_argument( "--input-desc", help="specifiy input names and shapes manually in" " format: <name>:<shape>[;<name>:<shape>, ...], where" " name is a string and shape is a comma separated" ' string. e.g., "data:128,1,28,28,label:128".' " different input tensor are separated by semicolon.", ) parser.add_argument( "--batchsize", type=int, help="change batchsize; the first dimension of each" " input is assumed to be batch size", ) parser.add_argument( "--warm-up", action="store_true", help="warm up model before do timing " " for better estimation", ) parser.add_argument( "--verbose", "-v", action="store_true", help="verbose output, logging in debug mode", ) parser.add_argument( "--iter", type=int, default=1, help="number of iters to run the model" ) parser.add_argument("--log", help="give a file path to duplicate log to") parser.add_argument( "--seed", type=int, default=0, help="seed for random number generator for input data", ) parser.add_argument( "--rng", help="special RNG options to generate input data in" " format: <name>:func[;<name>:func, ...] where name is" " a string and func is a python expression containing" ' "{}" for the size param, e.g. ' ' "label:randint(low=0,high=1000,size={})"', ) parser.add_argument( "--focused-nvprof", action="store_true", help="only profile last iter for `nvprof --profile-from-start off`", ) parser.add_argument( "--optimize-for-inference", action="store_true", help="optimize model for inference", ) parser.add_argument( "--enable-io16xc32", action="store_true", help="transform the mode to float16 io float32 compute", ) parser.add_argument( "--enable-ioc16", action="store_true", help="transform the dtype of the model to float16 io and compute", ) parser.add_argument( "--enable-hwcd4", action="store_true", help="transform the model format from NCHW to NHWCD4 for inference", ) parser.add_argument( "--enable-nchw4", action="store_true", help="transform the model format from NCHW to NCHW4 for inference", ) parser.add_argument( "--enable-nchw88", action="store_true", help="transform the model format from NCHW to NCHW88 for inference", ) parser.add_argument( "--enable-nchw44", action="store_true", help="transform the model format from NCHW to NCHW44 for inference", ) parser.add_argument( "--enable-nchw44-dot", action="store_true", help="transform the model format from NCHW to NCHW44_DOT " "for optimizing armv8.2 dot in inference", ) parser.add_argument( "--enable-chwn4", action="store_true", help="transform the model format to CHWN4 " "for inference, mainly used for nvidia tensorcore", ) parser.add_argument( "--enable-nchw32", action="store_true", help="transform the model format from NCHW4 to NCHW32 " "for inference on nvidia TensoCore", ) parser.add_argument( "--enable-fuse-conv-bias-nonlinearity", action="store_true", help="fuse convolution bias and nonlinearity opr to a " "conv_bias opr and compute", ) parser.add_argument( "--enable-fuse-conv-bias-with-z", action="store_true", help="fuse conv_bias with z input for inference on " "nvidia GPU (this optimization pass will result in mismatch " "of the precision of output of training and inference)", ) parser.add_argument( "--dump-cpp-model", help="write a C++ model that can be loaded by " "megbrain/sdk/load-and-run; " "this implies --embed-input", ) parser.add_argument( "--embed-input", action="store_true", help="embed input data as SharedDeviceTensor in model, " "to remove memory copy for inputs", ) args = parser.parse_args() if args.verbose: enable_debug_log() if args.log: set_log_file(args.log) if args.device: set_default_device(args.device) if args.dump_cpp_model: args.embed_input = True logger.info("loading model ...") graph, _, output_vars = G.load_graph(args.net) input_vars = tools.get_dep_vars(output_vars, "Host2DeviceCopy") if args.output_name is not None: output_vars = tools.find_vars_by_name(output_vars, args.output_name) data = make_data_given_desc(args, input_vars) run_model(args, graph, input_vars, output_vars, data)
def make_feeds(args): cg_rt, _, outputs = G.load_graph(args.input) inputs = cgtools.get_dep_vars(outputs, "Host2DeviceCopy") inputs = {i.name: i for i in inputs} if not args.no_assert: replace_varmap = {} inp_map = {} # replace var use InputNode for name, var in inputs.items(): inp = G.InputNode(device="xpux", dtype=var.dtype, shape=var.shape, graph=cg_rt) replace_varmap[var] = inp.outputs[0] inp_map[name] = inp new = cgtools.replace_vars(outputs, replace_varmap) if isinstance(new, rt.VarNode): new = list(new) output_nodes = [G.OutputNode(var) for var in new] func = cg_rt.compile([node.outputs[0] for node in output_nodes]) def make_dev_tensor(value, dtype=None, device=None): return as_raw_tensor(value, dtype=dtype, device=device)._dev_tensor() def calculate(*args, **kwargs): output_val = [] # set inputs value for name, var in inputs.items(): val = kwargs.pop(name, None) assert val is not None, "miss input name{}".format(name) dev_tensor = make_dev_tensor(val, dtype=var.dtype, device="xpux") inp_map[name].set_value(dev_tensor) func.execute() for res in output_nodes: output_val.append(res.get_value().numpy()) return output_val def expect_name(var): return "{}:expect".format(var.name) testcases = [] np.set_printoptions(precision=2, threshold=4, suppress=True) data_list = [] for item in args.data: if item.startswith("@"): with open(item[1:], "r") as f: data_list.extend( [line.rstrip() for line in f if line.rstrip() != ""]) else: data_list.append(item) for inp_spec in data_list: cur_testcase = gen_one_testcase(args, inputs, inp_spec) assert len(cur_testcase) == len( inputs), "required inputs: {}; given data: {}".format( inputs.keys(), cur_testcase.keys()) if not args.no_assert: outputs_get = calculate(**cur_testcase) for var, val in zip(outputs, outputs_get): cur_testcase[expect_name(var)] = val logger.info( "generate test groundtruth: var={} shape={} range=({}, {})" " mean={} var={}".format(var, val.shape, val.min(), val.max(), np.mean(val), np.var(val))) testcases.append(cur_testcase) logger.info("add testcase: \n {}".format("\n ".join( "{}: shape={} dtype={} range=({:.2f},{:.2f}) " "mean={:.2f} sd={:.2f}".format(k, v.shape, v.dtype, v.min(), v.max(), np.mean(v), np.std(v)) for k, v in sorted(cur_testcase.items())))) if not args.no_assert: def expect_shp(var): ret = var.shape if ret: return ret return testcases[0][expect_name(var)].shape def assert_equal(expect, real, **kwargs): op = builtin.AssertEqual(**kwargs) (res, ) = apply(op, expect, real) return res verbose = not args.silent outputs_new = [] for i in outputs: device = rt.CompNode("xpux") dtype = i.dtype name = expect_name(i) shape = expect_shp(i) # make expect output as one input of model. expect_get = rt.make_h2d(cg_rt, device, dtype, shape, name) # insert assert opr to check expect and real. outputs_new.append( assert_equal( G.VarNode(expect_get), G.VarNode(i), verbose=verbose, maxerr=args.maxerr, )) inputs[expect_name(i)] = expect_get outputs = outputs_new return cg_rt, {"outputs": outputs, "testcases": testcases}
def load_comp_graph_from_file(path): if mge_version <= "0.6.0": cg, _, outputs = mgb.load_comp_graph_from_file(path) else: cg, _, outputs = G.load_graph(path) return cg, outputs