def change_batch_and_dump(inp_file, oup_file): cg, _, outputs = G.load_graph(inp_file) inputs = cgtools.get_dep_vars(outputs[0], "Host2DeviceCopy") replace_dict = {} for var in inputs: n_shape = list(var.shape) n_shape[0] = 1 new_input = make_h2d(cg, "xpux", var.dtype, n_shape, var.name) replace_dict[var] = new_input new_outputs = cgtools.replace_vars(outputs, replace_dict) dump_content, _ = G.dump_graph(map(G.VarNode, new_outputs), keep_var_name=2) with open(oup_file, "wb") as file: file.write(dump_content)
def main(): parser = argparse.ArgumentParser( description="load a network and run inference on random data", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("net") parser.add_argument( "--device", "-d", help="set defult device, like 'gpux' or 'cpux'" ) parser.add_argument( "--calc-output-rms", action="store_true", help="compute RMS of outputs; useful for comparing computing results", ) parser.add_argument( "--output-name", nargs="*", help="Specify output name. This option can be" " specified multiple times. We will look for opr/var" " in the graph", ) parser.add_argument( "--load-input-data", help="load input data from pickle file; it should be" " a numpy array or a dict of numpy array", ) parser.add_argument("--profile", help="profiler output file") parser.add_argument( "--fast-run", action="store_true", help="enable fast running by profiling conv algorithms during compiling.", ) parser.add_argument( "--reproducible", action="store_true", help="use reproducible kernels" ) parser.add_argument( "--input-desc", help="specifiy input names and shapes manually in" " format: <name>:<shape>[;<name>:<shape>, ...], where" " name is a string and shape is a comma separated" ' string. e.g., "data:128,1,28,28,label:128".' " different input tensor are separated by semicolon.", ) parser.add_argument( "--batchsize", type=int, help="change batchsize; the first dimension of each" " input is assumed to be batch size", ) parser.add_argument( "--warm-up", action="store_true", help="warm up model before do timing " " for better estimation", ) parser.add_argument( "--verbose", "-v", action="store_true", help="verbose output, logging in debug mode", ) parser.add_argument( "--iter", type=int, default=1, help="number of iters to run the model" ) parser.add_argument("--log", help="give a file path to duplicate log to") parser.add_argument( "--seed", type=int, default=0, help="seed for random number generator for input data", ) parser.add_argument( "--rng", help="special RNG options to generate input data in" " format: <name>:func[;<name>:func, ...] where name is" " a string and func is a python expression containing" ' "{}" for the size param, e.g. ' ' "label:randint(low=0,high=1000,size={})"', ) parser.add_argument( "--focused-nvprof", action="store_true", help="only profile last iter for `nvprof --profile-from-start off`", ) parser.add_argument( "--optimize-for-inference", action="store_true", help="optimize model for inference", ) parser.add_argument( "--enable-io16xc32", action="store_true", help="transform the mode to float16 io float32 compute", ) parser.add_argument( "--enable-ioc16", action="store_true", help="transform the dtype of the model to float16 io and compute", ) parser.add_argument( "--enable-hwcd4", action="store_true", help="transform the model format from NCHW to NHWCD4 for inference", ) parser.add_argument( "--enable-nchw4", action="store_true", help="transform the model format from NCHW to NCHW4 for inference", ) parser.add_argument( "--enable-nchw88", action="store_true", help="transform the model format from NCHW to NCHW88 for inference", ) parser.add_argument( "--enable-nchw44", action="store_true", help="transform the model format from NCHW to NCHW44 for inference", ) parser.add_argument( "--enable-nchw44-dot", action="store_true", help="transform the model format from NCHW to NCHW44_DOT " "for optimizing armv8.2 dot in inference", ) parser.add_argument( "--enable-chwn4", action="store_true", help="transform the model format to CHWN4 " "for inference, mainly used for nvidia tensorcore", ) parser.add_argument( "--enable-nchw32", action="store_true", help="transform the model format from NCHW4 to NCHW32 " "for inference on nvidia TensoCore", ) parser.add_argument( "--enable-fuse-conv-bias-nonlinearity", action="store_true", help="fuse convolution bias and nonlinearity opr to a " "conv_bias opr and compute", ) parser.add_argument( "--enable-fuse-conv-bias-with-z", action="store_true", help="fuse conv_bias with z input for inference on " "nvidia GPU (this optimization pass will result in mismatch " "of the precision of output of training and inference)", ) parser.add_argument( "--dump-cpp-model", help="write a C++ model that can be loaded by " "megbrain/sdk/load-and-run; " "this implies --embed-input", ) parser.add_argument( "--embed-input", action="store_true", help="embed input data as SharedDeviceTensor in model, " "to remove memory copy for inputs", ) args = parser.parse_args() if args.verbose: enable_debug_log() if args.log: set_log_file(args.log) if args.device: set_default_device(args.device) if args.dump_cpp_model: args.embed_input = True logger.info("loading model ...") graph, _, output_vars = G.load_graph(args.net) input_vars = tools.get_dep_vars(output_vars, "Host2DeviceCopy") if args.output_name is not None: output_vars = tools.find_vars_by_name(output_vars, args.output_name) data = make_data_given_desc(args, input_vars) run_model(args, graph, input_vars, output_vars, data)
def main(): parser = argparse.ArgumentParser( description="Pack computing graph, input values and expected output " "values into one file for checking correctness. README.md gives more " "details on the usage", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("input", help="MegEngine dumped model file") parser.add_argument("-o", "--output", help="output file", required=True) parser.add_argument( "-d", "--data", default=[], action="append", required=True, help="Given input test data when input file is a network, " "and current network output would be used as groundtruth. " "The format is var0:file0;var1:file1... to specify data files for " "input vars. It can also be #rand(min,max,shape...) for generating " "random input data, for example, #rand(0,255), " "#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means " "the remaining part of the original shape. " "If the shape is not specified, the shape of " "corresponding input tensors in the network will be used. " "If there is only one input var, its name can be omitted. " "Each data file can either be an image which can be loaded by opencv, " "or a pickled numpy.ndarray. " "This option can be given multiple times to add multiple testcases. " " *NOTE* " "If you start the data with the letter @, the rest should be a " "filename, and each line in the file should be a single datum in " "the format described above. ", ) parser.add_argument( "--repeat", type=int, default=1, help="Specify how many times the input image is repeated. " "Useful when running benchmark for batch size other than one. " "Have no effect on randomly generated input data.", ) parser.add_argument( "--silent", action="store_true", help="set verbose to False in asserti_equal opr", ) parser.add_argument( "--optimize-for-inference", action="store_false", help="enbale optimization for inference", ) parser.add_argument( "--no-assert", action="store_true", help="do not insert assert_equal opr to check result; " "this option is useful for benchmarking", ) parser.add_argument( "--maxerr", type=float, default=1e-4, help="max error for assert_equal check during runtime", ) parser.add_argument( "--resize-input", action="store_true", help="resize input image to fit input var shape", ) parser.add_argument( "--input-transform", help="a python expression to transform the input data. " "Example: data / np.std(data)", ) parser.add_argument( "--discard-var-name", action="store_true", help="discard variable and param names in the " "generated output", ) parser.add_argument("--output-strip-info", action="store_true", help="output code strip information") parser.add_argument( "--enable-io16xc32", action="store_true", help="transform the mode to float16 io float32 compute", ) parser.add_argument( "--enable-ioc16", action="store_true", help="transform the dtype of the model to float16 io " "and compute", ) parser.add_argument( "--enable-fuse-conv-bias-nonlinearity", action="store_true", help="fuse convolution bias and nonlinearity opr to a " "conv_bias opr and compute", ) parser.add_argument( "--enable-hwcd4", action="store_true", help="transform the model format from NCHW to NHWCD4 " "for inference; you may need to disable CUDA and set " "MGB_USE_MEGDNN_DBG=2", ) parser.add_argument( "--enable-nchw4", action="store_true", help="transform the model format from NCHW to NCHW4 " "for inference", ) parser.add_argument( "--enable-nchw88", action="store_true", help="transform the model format from NCHW to NCHW88 " "for inference", ) parser.add_argument( "--enable-nchw44", action="store_true", help="transform the model format from NCHW to NCHW44 " "for inference", ) parser.add_argument( "--enable-nchw44-dot", action="store_true", help="transform the model format from NCHW to NCHW44_DOT " "for optimizing armv8.2 dot in inference", ) parser.add_argument( "--enable-nchw32", action="store_true", help="transform the model format from NCHW4 to NCHW32 " "for inference on nvidia TensoCore", ) parser.add_argument( "--enable-chwn4", action="store_true", help="transform the model format to CHWN4 " "for inference, mainly used for nvidia tensorcore", ) parser.add_argument( "--enable-fuse-conv-bias-with-z", action="store_true", help="fuse conv_bias with z input for inference on " "nvidia GPU (this optimization pass will result in mismatch " "of the precision of output of training and inference)", ) args = parser.parse_args() feeds = make_feeds(args) assert isinstance( feeds, dict) and feeds["testcases"], "testcases can not be empty" output_mgbvars = feeds["outputs"] output_mgbvars = optimize_for_inference(args, output_mgbvars) inputs = cgtools.get_dep_vars(output_mgbvars, "Host2DeviceCopy") inputs = sorted((i.name, i.dtype) for i in inputs) if args.discard_var_name: sereg_kwargs = dict(keep_var_name=0, keep_param_name=False) else: sereg_kwargs = dict(keep_var_name=2, keep_param_name=True) strip_info_file = args.output + ".json" if args.output_strip_info else None with open(args.output, "wb") as fout: fout.write(b"mgbtest0") fout.write(struct.pack("I", len(feeds["testcases"]))) if isinstance(output_mgbvars, dict): wrap_output_vars = dict([(i, VarNode(j)) for i, j in output_mgbvars]) else: wrap_output_vars = [VarNode(i) for i in output_mgbvars] dump_content, stat = G.dump_graph(wrap_output_vars, append_json=True, strip_info_file=strip_info_file, **sereg_kwargs) fout.write(dump_content) logger.info( "graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB".format( stat.tot_bytes / 1024, (stat.tot_bytes - stat.tensor_value_bytes) / 1024)) def make_dev_tensor(value, dtype=None, device=None): return tensor(value, dtype=dtype, device=device)._dev_tensor() for testcase in feeds["testcases"]: assert isinstance(testcase, dict) cg = G.Graph() output_mgbvars = [] for name, dtype in inputs: output_mgbvars.append( cg.make_const( make_dev_tensor(testcase.pop(name), dtype=dtype, device="cpux"))) assert not testcase, "extra inputs provided in testcase: {}".format( testcase.keys()) with open(args.output, "ab") as fout: dump_content, _ = G.dump_graph(output_mgbvars, strip_info_file=strip_info_file, append_json=True) fout.write(dump_content)
def make_feeds(args): cg_rt, _, outputs = G.load_graph(args.input) inputs = cgtools.get_dep_vars(outputs, "Host2DeviceCopy") inputs = {i.name: i for i in inputs} if not args.no_assert: replace_varmap = {} inp_map = {} # replace var use InputNode for name, var in inputs.items(): inp = G.InputNode(device="xpux", dtype=var.dtype, shape=var.shape, graph=cg_rt) replace_varmap[var] = inp.outputs[0] inp_map[name] = inp new = cgtools.replace_vars(outputs, replace_varmap) if isinstance(new, rt.VarNode): new = list(new) output_nodes = [G.OutputNode(var) for var in new] func = cg_rt.compile([node.outputs[0] for node in output_nodes]) def make_dev_tensor(value, dtype=None, device=None): return tensor(value, dtype=dtype, device=device)._dev_tensor() def calculate(*args, **kwargs): output_val = [] # set inputs value for name, var in inputs.items(): val = kwargs.pop(name, None) assert val is not None, "miss input name{}".format(name) dev_tensor = make_dev_tensor(val, dtype=var.dtype, device="xpux") inp_map[name].set_value(dev_tensor) func.execute() for res in output_nodes: output_val.append(res.get_value().numpy()) return output_val def expect_name(var): return "{}:expect".format(var.name) testcases = [] np.set_printoptions(precision=2, threshold=4, suppress=True) data_list = [] for item in args.data: if item.startswith("@"): with open(item[1:], "r") as f: data_list.extend( [line.rstrip() for line in f if line.rstrip() != ""]) else: data_list.append(item) for inp_spec in data_list: cur_testcase = gen_one_testcase(args, inputs, inp_spec) assert len(cur_testcase) == len( inputs), "required inputs: {}; given data: {}".format( inputs.keys(), cur_testcase.keys()) if not args.no_assert: outputs_get = calculate(**cur_testcase) for var, val in zip(outputs, outputs_get): cur_testcase[expect_name(var)] = val logger.info( "generate test groundtruth: var={} shape={} range=({}, {})" " mean={} var={}".format(var, val.shape, val.min(), val.max(), np.mean(val), np.var(val))) testcases.append(cur_testcase) logger.info("add testcase: \n {}".format("\n ".join( "{}: shape={} dtype={} range=({:.2f},{:.2f}) " "mean={:.2f} sd={:.2f}".format(k, v.shape, v.dtype, v.min(), v.max(), np.mean(v), np.std(v)) for k, v in sorted(cur_testcase.items())))) if not args.no_assert: def expect_shp(var): ret = var.shape if ret: return ret return testcases[0][expect_name(var)].shape def assert_equal(expect, real, **kwargs): op = builtin.AssertEqual(**kwargs) (res, ) = G.apply_normal_varnode(op, expect, real) return G.VarNode(res) verbose = not args.silent outputs_new = [] for i in outputs: device = rt.CompNode("xpux") dtype = i.dtype name = expect_name(i) shape = expect_shp(i) # make expect output as one input of model. expect_get = rt.make_h2d(cg_rt, device, dtype, shape, name) # insert assert opr to check expect and real. outputs_new.append( assert_equal( G.VarNode(expect_get), G.VarNode(i), verbose=verbose, maxerr=args.maxerr, )) inputs[expect_name(i)] = expect_get outputs = outputs_new return {"outputs": outputs, "testcases": testcases}