def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) graph, _, (varnode, ) = mgb_graph.load_graph(io.BytesIO(buf)) del graph varnode.owner
def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) ret = mgb_graph.load_graph(io.BytesIO(buf)) graph, (varnode, ) = ret.graph, ret.output_vars_list del ret del graph varnode.owner
def _get_compiled_result(inp, dtype, shape, device, calc_func=None): graph = G.Graph() # graph.options.async_exec_level = 0b100 inp_node = G.InputNode(device=device, dtype=dtype, shape=shape, graph=graph) temp_rst = calc_func(inp_node.outputs[0]) oup_node = G.OutputNode(temp_rst) func = graph.compile(oup_node.outputs[0]) inp_node.set_value(as_raw_tensor(inp, dtype=dtype, device=device)._dev_tensor()) func.execute() return oup_node.get_value().numpy()
def test_io(): g = mgb_graph.Graph() x = Tensor(np.random.randn(3).astype("float32"), device="xpux")._dev_tensor() vx, _ = mgb_graph.input_callback( lambda: x, device=x.comp_node, dtype=x.dtype, graph=g ) y = Future() v = mgb_graph.output_callback(y.set_result, vx) f = g.compile(v) f() np.testing.assert_equal(x.numpy(), y.result().numpy())
def test_op(): g = mgb_graph.Graph() x = Tensor(np.random.randn(10).astype("float32"), device="xpux")._dev_tensor() v, _ = mgb_graph.input_callback( lambda: x, device=x.comp_node, dtype=x.dtype, graph=g ) neg = Elemwise(Elemwise.Mode.NEGATE) v = mgb_graph.apply_normal_varnode(neg, v)[0] y = Future() v = mgb_graph.output_callback(y.set_result, v) f = g.compile(v) f() np.testing.assert_equal(x.numpy(), -y.result().numpy())
def test_io2(): g = mgb_graph.Graph() g.options.async_exec_level = 0b100 dtype, device = "float32", "xpux" px = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) py = mgb_graph.OutputNode(px.outputs[0]) f = g.compile(py.outputs[0]) for _ in range(3): f.execute() x = Tensor(np.random.randn(10).astype(dtype), device=device)._dev_tensor() px.set_value(x) y = py.get_value() np.testing.assert_equal(x.numpy(), y.numpy()) f.wait()
def test_exception(): err_msg = "QwQ" def throw_exc(): raise RuntimeError(err_msg) g = mgb_graph.Graph() x, _ = mgb_graph.input_callback(throw_exc, device="xpux", dtype="float32", graph=g) y = mgb_graph.OutputNode(F.neg(x)) f = g.compile(y.outputs[0]) try: f.execute() y.get_value() except Exception as exc: assert err_msg in str(exc)
def test_exception(): err_msg = "QwQ" def throw_exc(): raise RuntimeError(err_msg) g = mgb_graph.Graph() x, _ = mgb_graph.input_callback(throw_exc, device="xpux", dtype="float32", graph=g) neg = Elemwise(Elemwise.Mode.NEGATE) y = mgb_graph.OutputNode(mgb_graph.apply_normal_varnode(neg, x)[0]) f = g.compile(y.outputs[0]) try: f.execute() y.get_value() except Exception as exc: assert err_msg in str(exc)
def test_attr_output(): g = mgb_graph.Graph() g.options.async_exec_level = 0b100 dtype, device = "float32", "xpux" px = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) py = mgb_graph.AttrOutputNode(px.outputs[0]) f = g.compile(py.outputs[0]) for shape in [(2,), (3,), (5,)]: f.execute() x = Tensor(np.random.randn(*shape).astype(dtype), device=device)._dev_tensor() px.set_value(x) ay = py.get_value() assert ay.shape == shape assert ay.dtype == np.dtype(dtype) assert ay.device == device f.wait()
def test_assert_equal(): g = G.Graph() inp1 = g.make_h2d(dtype=np.float32, device="xpux") inp2 = g.make_h2d(dtype=np.float32, device="xpux") op = builtin.AssertEqual(maxerr=1e-5) out = G.apply_normal_varnode(op, inp1._node, inp2._node)[0] g.compile(out) file = io.BytesIO() out_model = G.dump_graph([out]) file.write(out_model[0]) file.seek(0) net = Net.load(file) dump_file = io.BytesIO() net.dump(dump_file) dump_file.seek(0) g = GraphInference(dump_file) g.run(np.array([1.0, 2.0]), np.array([1.0, 2.0]))
def test_replace_vars(): g = mgb_graph.Graph() g.options.async_exec_level = 0b100 device = "xpux" dtype = np.float32 a = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) const = g.make_const(1.234) a_plus_a = F.add(a.outputs[0], a.outputs[0]) a_plus_a_mul_const = F.mul(a_plus_a, const) rst = F.add(a_plus_a_mul_const, a.outputs[0]) (new, ) = cgtools.replace_vars([rst._node], {const._node: a_plus_a._node}) out = mgb_graph.OutputNode(mgb_graph.VarNode(new)) func = g.compile(out.outputs[0]) func.execute() x = make_dev_tensor(5.0, device=device) a.set_value(x) res = out.get_value().numpy() np.testing.assert_equal(res, np.array([105.0]))
def test_replace_oprs(): g = mgb_graph.Graph() g.options.async_exec_level = 0b100 device = "xpux" dtype = np.float32 a = mgb_graph.InputNode(device=device, dtype=dtype, graph=g) const = g.make_const(1.25, device=device) add_op = Elemwise(Elemwise.Mode.ADD) mul_op = Elemwise(Elemwise.Mode.MUL) a_plus_a = apply_normal_varnode(add_op, a.outputs[0], a.outputs[0])[0] old_opr = a_plus_a.op a_plus_a_mul_const = apply_normal_varnode(mul_op, a_plus_a, const)[0] a_mul_a = apply_normal_varnode(mul_op, a.outputs[0], a.outputs[0])[0] new_opr = a_mul_a.op (new, ) = cgtools.replace_oprs([a_plus_a_mul_const._node], {old_opr._node: new_opr._node}) out = mgb_graph.OutputNode(mgb_graph.VarNode(new)) func = g.compile(out.outputs[0]) func.execute() x = make_dev_tensor(5.0, device=device) a.set_value(x) res = out.get_value().numpy() np.testing.assert_equal(res, np.array([5.0 * 5.0 * 1.25]))
def main(): parser = argparse.ArgumentParser( description="Pack computing graph, input values and expected output " "values into one file for checking correctness. README.md gives more " "details on the usage", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("input", help="MegEngine dumped model file") parser.add_argument("-o", "--output", help="output file", required=True) parser.add_argument( "-d", "--data", default=[], action="append", required=True, help="Given input test data when input file is a network, " "and current network output would be used as groundtruth. " "The format is var0:file0;var1:file1... to specify data files for " "input vars. It can also be #rand(min,max,shape...) for generating " "random input data, for example, #rand(0,255), " "#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means " "the remaining part of the original shape. " "If the shape is not specified, the shape of " "corresponding input tensors in the network will be used. " "If there is only one input var, its name can be omitted. " "Each data file can either be an image which can be loaded by opencv, " "or a pickled numpy.ndarray. " "This option can be given multiple times to add multiple testcases. " " *NOTE* " "If you start the data with the letter @, the rest should be a " "filename, and each line in the file should be a single datum in " "the format described above. ", ) parser.add_argument( "--repeat", type=int, default=1, help="Specify how many times the input image is repeated. " "Useful when running benchmark for batch size other than one. " "Have no effect on randomly generated input data.", ) parser.add_argument( "--silent", action="store_true", help="set verbose to False in asserti_equal opr", ) parser.add_argument( "--optimize-for-inference", action="store_true", help="enbale optimization for inference", ) parser.add_argument( "--no-assert", action="store_true", help="do not insert assert_equal opr to check result; " "this option is useful for benchmarking", ) parser.add_argument( "--maxerr", type=float, default=1e-4, help="max error for assert_equal check during runtime", ) parser.add_argument( "--resize-input", action="store_true", help="resize input image to fit input var shape", ) parser.add_argument( "--input-transform", help="a python expression to transform the input data. " "Example: data / np.std(data)", ) parser.add_argument( "--discard-var-name", action="store_true", help="discard variable and param names in the " "generated output", ) parser.add_argument("--output-strip-info", action="store_true", help="output code strip information") parser.add_argument( "--enable-io16xc32", action="store_true", help="transform the mode to float16 io float32 compute", ) parser.add_argument( "--enable-ioc16", action="store_true", help="transform the dtype of the model to float16 io " "and compute", ) parser.add_argument( "--enable-fuse-conv-bias-nonlinearity", action="store_true", help="fuse convolution bias and nonlinearity opr to a " "conv_bias opr and compute", ) parser.add_argument( "--enable-hwcd4", action="store_true", help="transform the model format from NCHW to NHWCD4 " "for inference; you may need to disable CUDA and set " "MGB_USE_MEGDNN_DBG=2", ) parser.add_argument( "--enable-nchw4", action="store_true", help="transform the model format from NCHW to NCHW4 " "for inference", ) parser.add_argument( "--enable-nchw88", action="store_true", help="transform the model format from NCHW to NCHW88 " "for inference", ) parser.add_argument( "--enable-nchw44", action="store_true", help="transform the model format from NCHW to NCHW44 " "for inference", ) parser.add_argument( "--enable-nchw44-dot", action="store_true", help="transform the model format from NCHW to NCHW44_DOT " "for optimizing armv8.2 dot in inference", ) parser.add_argument( "--enable-nchw32", action="store_true", help="transform the model format from NCHW4 to NCHW32 " "for inference on nvidia TensoCore", ) parser.add_argument( "--enable-chwn4", action="store_true", help="transform the model format to CHWN4 " "for inference, mainly used for nvidia tensorcore", ) parser.add_argument( "--enable-fuse-conv-bias-with-z", action="store_true", help="fuse conv_bias with z input for inference on " "nvidia GPU (this optimization pass will result in mismatch " "of the precision of output of training and inference)", ) args = parser.parse_args() _, feeds = make_feeds(args) assert isinstance( feeds, dict) and feeds["testcases"], "testcases can not be empty" output_mgbvars = feeds["outputs"] output_mgbvars = optimize_for_inference(args, output_mgbvars) inputs = cgtools.get_dep_vars(output_mgbvars, "Host2DeviceCopy") inputs = sorted((i.name, i.dtype) for i in inputs) if args.discard_var_name: sereg_kwargs = dict(keep_var_name=0, keep_param_name=False) else: sereg_kwargs = dict(keep_var_name=2, keep_param_name=True) strip_info_file = args.output + '.json' if args.output_strip_info else None with open(args.output, "wb") as fout: fout.write(b"mgbtest0") fout.write(struct.pack("I", len(feeds["testcases"]))) if isinstance(output_mgbvars, dict): wrap_output_vars = dict([(i, VarNode(j)) for i, j in output_mgbvars]) else: wrap_output_vars = [VarNode(i) for i in output_mgbvars] dump_content, stat = G.dump_graph(wrap_output_vars, append_json=True, strip_info_file=strip_info_file, **sereg_kwargs) fout.write(dump_content) logger.info( 'graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB'.format( stat.tot_bytes / 1024, (stat.tot_bytes - stat.tensor_value_bytes) / 1024)) def make_dev_tensor(value, dtype=None, device=None): return as_raw_tensor(value, dtype=dtype, device=device)._dev_tensor() for testcase in feeds["testcases"]: assert isinstance(testcase, dict) cg = G.Graph() output_mgbvars = [] for name, dtype in inputs: output_mgbvars.append( cg.make_const( make_dev_tensor(testcase.pop(name), dtype=dtype, device="cpux"))) assert not testcase, "extra inputs provided in testcase: {}".format( testcase.keys()) with open(args.output, "ab") as fout: dump_content, _ = G.dump_graph(output_mgbvars, strip_info_file=strip_info_file, append_json=True) fout.write(dump_content)