def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) graph, _, (varnode, ) = mgb_graph.load_graph(io.BytesIO(buf)) del graph varnode.owner
def test_load_refcnt(): graph = mgb_graph.Graph() varnode = graph.make_const(0) buf, _ = mgb_graph.dump_graph([varnode]) ret = mgb_graph.load_graph(io.BytesIO(buf)) graph, (varnode, ) = ret.graph, ret.output_vars_list del ret del graph varnode.owner
def change_batch_and_dump(inp_file, oup_file): cg, _, outputs = G.load_graph(inp_file) inputs = cgtools.get_dep_vars(outputs[0], "Host2DeviceCopy") replace_dict = {} for var in inputs: n_shape = list(var.shape) n_shape[0] = 1 new_input = make_h2d(cg, "xpux", var.dtype, n_shape, var.name) replace_dict[var] = new_input new_outputs = cgtools.replace_vars(outputs, replace_dict) dump_content, _ = G.dump_graph(map(G.VarNode, new_outputs), keep_var_name=2) with open(oup_file, "wb") as file: file.write(dump_content)
def test_assert_equal(): g = G.Graph() inp1 = g.make_h2d(dtype=np.float32, device="xpux") inp2 = g.make_h2d(dtype=np.float32, device="xpux") op = builtin.AssertEqual(maxerr=1e-5) out = G.apply_normal_varnode(op, inp1._node, inp2._node)[0] g.compile(out) file = io.BytesIO() out_model = G.dump_graph([out]) file.write(out_model[0]) file.seek(0) net = Net.load(file) dump_file = io.BytesIO() net.dump(dump_file) dump_file.seek(0) g = GraphInference(dump_file) g.run(np.array([1.0, 2.0]), np.array([1.0, 2.0]))
def run_model(args, graph, inputs, outputs, data): # must use level0 to avoid unintended opr modification graph.options.graph_opt_level = 0 logger.info("input tensors: ") for k, v in data.items(): logger.info(" {}: {}".format(k, v.shape)) G.modify_opr_algo_strategy_inplace(outputs, get_execution_strategy(args)) if args.optimize_for_inference: opt_kwargs = get_opt_kwargs(args) outputs = G.optimize_for_inference(outputs, **opt_kwargs) # embed inputs must be on the last, to avoid const fold if args.embed_input: outputs, inp_dict = tools.embed_inputs(outputs, data.values(), inputs=inputs) else: outputs, inp_dict = tools.convert_inputs(outputs, inputs=inputs) if args.dump_cpp_model: dump_content, _ = G.dump_graph(outputs, keep_var_name=2) with open(args.dump_cpp_model, "wb") as file: file.write(dump_content) logger.info("C++ model written to {}".format(args.dump_cpp_model)) outputs, output_dict = tools.convert_outputs(outputs) if args.profile: profiler = tools.GraphProfiler(graph) func = graph.compile(outputs) def run(): if not args.embed_input: for key in inp_dict: inp_dict[key].set_value(mge.Tensor(data[key])._dev_tensor()) func.execute() func.wait() return [oup_node.get_value().numpy() for oup_node in output_dict.values()] if args.warm_up: logger.info("warming up") run() total_time = 0 for i in range(args.iter): logger.info("iter {}".format(i)) start_time = time.time() retval = run() cur_time = time.time() - start_time total_time += cur_time avg_speed = (i + 1) / total_time if "data" in data: avg_speed *= data["data"].shape[0] avg_speed_txt = "{:.3f}sample/s".format(avg_speed) else: avg_speed_txt = "{:.3f}batch/s".format(avg_speed) msg = ( "iter {}: duration={:.4f}({:.4f})s average={:.4f}s " "avg_speed={} time={:.4f}s" ).format( i, cur_time, func.get_prev_exec_time(), total_time / (i + 1), avg_speed_txt, total_time, ) if args.calc_output_rms: rms = [] for v in retval: rms.append("{:.3g}".format(float(((v ** 2).mean()) ** 0.5))) msg += " output_rms=[{}]".format(", ".join(rms)) if logger.level > logging.INFO: print(msg) else: logger.info(msg) if args.focused_nvprof: if get_device_count("gpu") < 1: logger.warning( "No cuda device detected. ``focused_nvprof`` will be ignored." ) else: try: import pycuda.driver as D D.start_profiler() func.execute() func.wait() D.stop_profiler() except ImportError: logger.error("`focused_nvprof need pycuda`", exc_info=True) if args.profile: with open(args.profile, "w") as fout: fout.write(profiler.get()) return avg_speed
def main(): parser = argparse.ArgumentParser( description="Pack computing graph, input values and expected output " "values into one file for checking correctness. README.md gives more " "details on the usage", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("input", help="MegEngine dumped model file") parser.add_argument("-o", "--output", help="output file", required=True) parser.add_argument( "-d", "--data", default=[], action="append", required=True, help="Given input test data when input file is a network, " "and current network output would be used as groundtruth. " "The format is var0:file0;var1:file1... to specify data files for " "input vars. It can also be #rand(min,max,shape...) for generating " "random input data, for example, #rand(0,255), " "#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means " "the remaining part of the original shape. " "If the shape is not specified, the shape of " "corresponding input tensors in the network will be used. " "If there is only one input var, its name can be omitted. " "Each data file can either be an image which can be loaded by opencv, " "or a pickled numpy.ndarray. " "This option can be given multiple times to add multiple testcases. " " *NOTE* " "If you start the data with the letter @, the rest should be a " "filename, and each line in the file should be a single datum in " "the format described above. ", ) parser.add_argument( "--repeat", type=int, default=1, help="Specify how many times the input image is repeated. " "Useful when running benchmark for batch size other than one. " "Have no effect on randomly generated input data.", ) parser.add_argument( "--silent", action="store_true", help="set verbose to False in asserti_equal opr", ) parser.add_argument( "--optimize-for-inference", action="store_true", help="enbale optimization for inference", ) parser.add_argument( "--no-assert", action="store_true", help="do not insert assert_equal opr to check result; " "this option is useful for benchmarking", ) parser.add_argument( "--maxerr", type=float, default=1e-4, help="max error for assert_equal check during runtime", ) parser.add_argument( "--resize-input", action="store_true", help="resize input image to fit input var shape", ) parser.add_argument( "--input-transform", help="a python expression to transform the input data. " "Example: data / np.std(data)", ) parser.add_argument( "--discard-var-name", action="store_true", help="discard variable and param names in the " "generated output", ) parser.add_argument("--output-strip-info", action="store_true", help="output code strip information") parser.add_argument( "--enable-io16xc32", action="store_true", help="transform the mode to float16 io float32 compute", ) parser.add_argument( "--enable-ioc16", action="store_true", help="transform the dtype of the model to float16 io " "and compute", ) parser.add_argument( "--enable-fuse-conv-bias-nonlinearity", action="store_true", help="fuse convolution bias and nonlinearity opr to a " "conv_bias opr and compute", ) parser.add_argument( "--enable-hwcd4", action="store_true", help="transform the model format from NCHW to NHWCD4 " "for inference; you may need to disable CUDA and set " "MGB_USE_MEGDNN_DBG=2", ) parser.add_argument( "--enable-nchw4", action="store_true", help="transform the model format from NCHW to NCHW4 " "for inference", ) parser.add_argument( "--enable-nchw88", action="store_true", help="transform the model format from NCHW to NCHW88 " "for inference", ) parser.add_argument( "--enable-nchw44", action="store_true", help="transform the model format from NCHW to NCHW44 " "for inference", ) parser.add_argument( "--enable-nchw44-dot", action="store_true", help="transform the model format from NCHW to NCHW44_DOT " "for optimizing armv8.2 dot in inference", ) parser.add_argument( "--enable-nchw32", action="store_true", help="transform the model format from NCHW4 to NCHW32 " "for inference on nvidia TensoCore", ) parser.add_argument( "--enable-chwn4", action="store_true", help="transform the model format to CHWN4 " "for inference, mainly used for nvidia tensorcore", ) parser.add_argument( "--enable-fuse-conv-bias-with-z", action="store_true", help="fuse conv_bias with z input for inference on " "nvidia GPU (this optimization pass will result in mismatch " "of the precision of output of training and inference)", ) args = parser.parse_args() _, feeds = make_feeds(args) assert isinstance( feeds, dict) and feeds["testcases"], "testcases can not be empty" output_mgbvars = feeds["outputs"] output_mgbvars = optimize_for_inference(args, output_mgbvars) inputs = cgtools.get_dep_vars(output_mgbvars, "Host2DeviceCopy") inputs = sorted((i.name, i.dtype) for i in inputs) if args.discard_var_name: sereg_kwargs = dict(keep_var_name=0, keep_param_name=False) else: sereg_kwargs = dict(keep_var_name=2, keep_param_name=True) strip_info_file = args.output + '.json' if args.output_strip_info else None with open(args.output, "wb") as fout: fout.write(b"mgbtest0") fout.write(struct.pack("I", len(feeds["testcases"]))) if isinstance(output_mgbvars, dict): wrap_output_vars = dict([(i, VarNode(j)) for i, j in output_mgbvars]) else: wrap_output_vars = [VarNode(i) for i in output_mgbvars] dump_content, stat = G.dump_graph(wrap_output_vars, append_json=True, strip_info_file=strip_info_file, **sereg_kwargs) fout.write(dump_content) logger.info( 'graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB'.format( stat.tot_bytes / 1024, (stat.tot_bytes - stat.tensor_value_bytes) / 1024)) def make_dev_tensor(value, dtype=None, device=None): return as_raw_tensor(value, dtype=dtype, device=device)._dev_tensor() for testcase in feeds["testcases"]: assert isinstance(testcase, dict) cg = G.Graph() output_mgbvars = [] for name, dtype in inputs: output_mgbvars.append( cg.make_const( make_dev_tensor(testcase.pop(name), dtype=dtype, device="cpux"))) assert not testcase, "extra inputs provided in testcase: {}".format( testcase.keys()) with open(args.output, "ab") as fout: dump_content, _ = G.dump_graph(output_mgbvars, strip_info_file=strip_info_file, append_json=True) fout.write(dump_content)