def __init__(self, xgraph, arch, work_dir=os.path.join(os.getcwd(), 'work'), build_dir=os.getcwd(), mode='debug'): super(VAICompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) self.arch = arch q_output = self.xgraph.get_quantizer_output() self.netcfgs = {q_key: q_output.get_q_file(q_key) for q_key in q_output.keys()} assert(len(self.netcfgs) == 1) self.work_dir = work_dir if not os.path.exists(self.work_dir): os.makedirs(self.work_dir) self.build_dir = build_dir if build_dir is not None else work_dir if not os.path.exists(self.build_dir): os.makedirs(self.build_dir) self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name())
def __init__(self, xgraph, target, arch, work_dir=os.path.join(os.getcwd(), 'work'), build_dir=None, mode='debug'): super(DPUCompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) warnings.warn("This compilation only works with one network" " configuration at the moment!!") q_output = self.xgraph.get_quantizer_output() self.netcfgs = { q_key: q_output.get_orig_pb(q_key) for q_key in q_output.keys() } self.quant_info = { q_key: q_output.get_q_info(q_key) for q_key in q_output.keys() } assert (len(self.netcfgs) == 1) self.work_dir = work_dir self.build_dir = build_dir if build_dir is not None else work_dir self.target = target self.arch = arch self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name())
def __init__(self, xgraph, arch, meta, cpu_arch='arm64', work_dir=os.path.join(os.getcwd(), 'work'), build_dir=os.getcwd(), mode='debug'): super(VAICompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) if cpu_arch != 'arm64': raise ValueError("Unsupported CPU architecture: {}. Supported" " architectures are: 'arm64'") warnings.warn("This compilation only works with one network" " configuration at the moment!!") q_output = self.xgraph.get_quantizer_output() self.netcfgs = { q_key: q_output.get_q_file(q_key) for q_key in q_output.keys() } assert (len(self.netcfgs) == 1) self.arch = arch self.meta = meta self.cpu_arch = cpu_arch self.work_dir = work_dir if not os.path.exists(self.work_dir): os.makedirs(self.work_dir) self.build_dir = build_dir if build_dir is not None else work_dir if not os.path.exists(self.build_dir): os.makedirs(self.build_dir) self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name())
class VAICompiler(XGraphBaseCompiler): """ Vitis-AI compiler wrapper for DPUCAHX8H """ xgraph_partitioner = XGraphPartitioner() xgraph_factory = XGraphFactory() def __init__(self, xgraph, arch, work_dir=os.path.join(os.getcwd(), 'work'), build_dir=os.getcwd(), mode='debug'): super(VAICompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) self.arch = arch q_output = self.xgraph.get_quantizer_output() self.netcfgs = {q_key: q_output.get_q_file(q_key) for q_key in q_output.keys()} assert(len(self.netcfgs) == 1) self.work_dir = work_dir if not os.path.exists(self.work_dir): os.makedirs(self.work_dir) self.build_dir = build_dir if build_dir is not None else work_dir if not os.path.exists(self.build_dir): os.makedirs(self.build_dir) self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name()) def compile(self) -> None: """ Start DPUCAHX8H compilation """ net_name = list(self.netcfgs.keys())[0] netcfg = list(self.netcfgs.values())[0] # We only handle one partition at the moment Xp = VAICompiler.xgraph_partitioner\ .get_subgraphs(self.xgraph)[0] subxg_layers = Xp.subgraph_data xgraph = VAICompiler.xgraph_factory.build_from_xlayer(subxg_layers) # assert xgraph.get_name() == net_name input_names = xgraph.get_input_names() input_shapes = [xgraph.get(in_name).shapes[:] for in_name in input_names] output_names = list(Xp.attrs['__top_tensors'].keys()) # xgraph.get_output_names() output_shapes = [xgraph.get(out_name).shapes[:] for out_name in output_names] if len(input_names) > 1: raise NotImplementedError("VAICompiler only handles models with" " one input at the moment but found: {}" .format(len(input_names))) netcfg=netcfg.replace('deploy_model.pb', 'quantize_eval_model.pb') command = """ vai_c_tensorflow \ --frozen_pb {} \ --arch {} \ --output_dir {} \ --net_name {} \ --options "{}" """.format(netcfg, self.arch, self.build_dir, net_name, str(dict())) logger.info("Command: {}".format(command)) process = subprocess.Popen(command, shell=True, cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() logger.debug("{} {}".format(output, error)) if error is not None: error = error.decode('utf-8') raise ValueError(error) in_map = {in_name: in_name for in_name in input_names} out_map = {out_name: out_name for out_name in output_names} self.c_output.add(net_name, ['libvart-runner.so'], in_map, out_map) self.xgraph.set_compiler_output(self.c_output) # TODO self.xgraph.meta_attrs['compiled'] = True self.xgraph.meta_attrs['compiler_libs'] = ['libvart-runner.so'] self.xgraph.meta_attrs['compiler_in_map'] = in_map self.xgraph.meta_attrs['compiler_out_map'] = out_map return self.xgraph
class VAICompiler(XGraphBaseCompiler): """ Vitis-AI compiler wrapper for DPUCZDX8G """ xgraph_partitioner = XGraphPartitioner() xgraph_factory = XGraphFactory() def __init__(self, xgraph, arch, meta, dcf, cpu_arch='arm64', work_dir=os.path.join(os.getcwd(), 'work'), build_dir=os.getcwd(), mode='debug'): super(VAICompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) if cpu_arch != 'arm64': raise ValueError("Unsupported CPU architecture: {}. Supported" " architectures are: 'arm64'") q_output = self.xgraph.get_quantizer_output() self.netcfgs = {q_key: q_output.get_q_file(q_key) for q_key in q_output.keys()} assert(len(self.netcfgs) == 1) self.arch = arch self.meta = meta self.dcf = dcf self.cpu_arch = cpu_arch self.work_dir = work_dir if not os.path.exists(self.work_dir): os.makedirs(self.work_dir) self.build_dir = build_dir if build_dir is not None else work_dir if not os.path.exists(self.build_dir): os.makedirs(self.build_dir) self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name()) def compile(self) -> None: """ Start DPUv2 compilation """ net_name = list(self.netcfgs.keys())[0] netcfg = list(self.netcfgs.values())[0] # We only handle one partition at the moment Xp = VAICompiler.xgraph_partitioner\ .get_subgraphs(self.xgraph)[0] subxg_layers = Xp.subgraph_data xgraph = VAICompiler.xgraph_factory.build_from_xlayer(subxg_layers) # assert xgraph.get_name() == net_name input_names = xgraph.get_input_names() input_shapes = [xgraph.get(in_name).shapes[:] for in_name in input_names] output_names = list(Xp.attrs['__top_tensors'].keys()) # xgraph.get_output_names() output_shapes = [xgraph.get(out_name).shapes[:] for out_name in output_names] if len(input_names) > 1: raise NotImplementedError("VAICompiler only handles models with" " one input at the moment but found: {}" .format(len(input_names))) #command = """ #vai_c_tensorflow \ # --frozen_pb {} \ # --arch {} \ # --output_dir {} \ # --net_name {} \ # --options "{}" #""".format(netcfg, self.arch, self.work_dir, net_name, str(dict())) # import pdb; pdb.set_trace() command = """ dnnc-dpuv2 --parser tensorflow\ --frozen_pb {} \ --cpu_arch {} \ --output_dir {} \ --net_name {} \ --dcf {} """.format(netcfg, self.cpu_arch, self.work_dir, net_name, self.dcf) logger.info("Command: {}".format(command)) process = subprocess.Popen(command, shell=True, cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() logger.debug("{} {}".format(output, error)) if output is not None: output = output.decode('utf-8') logger.info("Output: {}".format(output)) logger.info("Output names: {}".format(output_names)) do = DNNCOutput(str(repr(output))) dpu_input_nodes = do.get_input_nodes() dpu_output_nodes = do.get_output_nodes() dpu_output_nodes_on_shapes = do.get_output_nodes_on_shapes() in_shapes_log = ["{}*{}*{}".format(ishape[1], ishape[2], ishape[3]) for ishape in input_shapes] out_shapes_log = ["{}*{}*{}".format(os[1], os[2], os[3]) for os in output_shapes] in_map = {in_name: in_name + ':0' for in_name, _ in zip(input_names, in_shapes_log)} out_map = {} for out_name, out_shape_str in zip(output_names, out_shapes_log): # DNNC changes naming dnnc_out_name = do.get_dnnc_str(out_name) if dnnc_out_name in dpu_output_nodes: out_map[out_name] = dpu_output_nodes[dnnc_out_name] # out_name: dpu_output_nodes[out_shape_str] + ':0' else: assert len(dpu_output_nodes_on_shapes) == len(output_names),\ "Can't retrieve right out tensor names from DNNC compiler output" out_map[out_name] = dpu_output_nodes_on_shapes[out_shape_str] logger.info("DPU kernel in_map: {}".format(in_map)) logger.info("DPU kernel out_map: {}".format(out_map)) if error is not None: error = error.decode('utf-8') raise ValueError(error) logger.info("VAI_C Output: {}".format(output)) logger.info("VAI_C Error: {}".format(error)) logger.debug("CROSS COMPILATION") command = """ aarch64-linux-gnu-gcc -fPIC -shared {}/dpu_{}.elf -o {}/libdpumodel{}.so """.format(self.work_dir, net_name, self.work_dir, net_name) logger.debug("Command: {}".format(command)) process = subprocess.Popen(command.split(), cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() if output is not None: output = output.decode('utf-8') if error is not None: error = error.decode('utf-8') raise ValueError(error) logger.debug("Output: {}".format(output)) logger.debug("Error: {}".format(error)) lib_file = "{}/libdpumodel{}.so".format(self.work_dir, net_name) to_lib_file = "{}/libdpumodel{}.so".format(self.build_dir, net_name) shutil.move(lib_file, to_lib_file) # meta_file = "{}/meta.json".format(self.work_dir) self.meta["vitis_dpu_kernel"] = net_name to_meta_file = "{}/meta.json".format(self.build_dir) # shutil.move(meta_file, to_meta_file) with open(to_meta_file, 'w') as f: json.dump(self.meta, f) self.c_output.add(net_name, [to_lib_file], in_map, out_map) self.xgraph.set_compiler_output(self.c_output) return self.xgraph
class DPUCompiler(XGraphBaseCompiler): """ TODO """ xgraph_partitioner = XGraphPartitioner() xgraph_factory = XGraphFactory() tf_generator = TfGenerator() def __init__(self, xgraph, target, arch, work_dir=os.path.join(os.getcwd(), 'work'), build_dir=None, mode='debug'): super(DPUCompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) warnings.warn("This compilation only works with one network" " configuration at the moment!!") q_output = self.xgraph.get_quantizer_output() self.netcfgs = { q_key: q_output.get_orig_pb(q_key) for q_key in q_output.keys() } self.quant_info = { q_key: q_output.get_q_info(q_key) for q_key in q_output.keys() } assert (len(self.netcfgs) == 1) self.work_dir = work_dir self.build_dir = build_dir if build_dir is not None else work_dir self.target = target self.arch = arch self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name()) def Getopts(self, input_shapes): return { "maximumasrelu": True, "pipelineconvmaxpool": False, "bytesperpixels": 1, "dsp": 96, "memory": 9, "ddr": "256", "cpulayermustgo": True, "forceweightsfullyconnected": True, "mixmemorystrategy": True, "maximumasrelu": True, "pipelineconvmaxpool": True, 'bridges': ['bytype', 'Concat'], "usedeephi": True, 'placeholdershape': input_shapes } def compile(self): # type: () -> None """ """ layout_transform_pass = \ XGraphLayoutTransformationPass('NHWC', target=self.target) self.xgraph = layout_transform_pass.execute(self.xgraph, subgraphs_only=False) # netcfg = list(self.netcfgs.values())[0] # orig pb file quant_info_file = list(self.quant_info.values())[0] # quant info file subxg_layers = DPUCompiler.xgraph_partitioner\ .get_subgraphs(self.xgraph)[0].subgraph_data xgraph = DPUCompiler.xgraph_factory.build_from_xlayer(subxg_layers) net_name = list(self.netcfgs.keys())[0] fs = DPUCompiler.tf_generator.generate(xgraph, 'graph', subgraphs_only=True, layout='NHWC', batch_size=1, placeholder=True, out_dir=self.work_dir) netcfg = list(fs.values())[0] input_names = xgraph.get_input_names() input_shapes = [ xgraph.get(in_name).shapes.tolist()[:] for in_name in input_names ] output_names = xgraph.get_output_names() output_shapes = [ xgraph.get(out_name).shapes.tolist()[:] for out_name in output_names ] if len(input_names) > 1: raise NotImplementedError( "DPUCompiler only handles models with" " one input at the moment but found: {}".format( len(input_names))) opt_input_shapes = { in_name: [e if e != -1 else 1 for e in input_shape] for in_name, input_shape in zip(input_names, input_shapes) } opts = self.Getopts(opt_input_shapes) if not os.path.isfile(quant_info_file): raise ValueError( "quant file: {} does not exist".format(quant_info_file)) opts['quant_cfgfile'] = quant_info_file opts = str(opts) command = """ vai_c_tensorflow \ --frozen_pb {} \ --arch {} \ --output_dir {} \ --net_name {}\ --options "{}" """.format(netcfg, self.arch, self.build_dir, 'compiler', opts) logger.info("command: {}".format(command)) process = subprocess.Popen(command, shell=True, cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() if output is not None: output = output.decode('utf-8') if 'SUCCESSFUL COMPILATION' not in output: logger.info(output) raise ValueError('compiler is failed. Please see the log for' ' more details') if error is not None: error = error.decode('utf-8') # raise ValueError(error) logger.debug("Output: {}".format(output)) logger.debug("Error: {}".format(error)) compiler_json_file = self.build_dir + '/compiler.json' with open(compiler_json_file) as json_file: json_graph = json.load(json_file) graph_inputs = json_graph["inputs"] graph_outputs = json_graph["outputs"] logger.debug("{} {}".format(input_names, graph_inputs)) logger.debug("{} {}".format(output_names, graph_outputs)) in_map = {in_name: in_name for in_name in input_names} out_node_merged = [] out_nodes = [ graph_output['previous_layers'][0] for graph_output in graph_outputs ] for i in range(len(out_nodes)): out_node_merged.append([ layer['merged'][-1] for layer in json_graph['network'] if layer['name'] == out_nodes[i] ][0]) in_map = {in_name: in_name for in_name in input_names} out_map = {out_name: t for out_name, t in zip(output_names, out_nodes)} #out_map = {out_name: out_name for out_name in output_names} self.c_output.add(net_name, ['dpuv1lib.so'], in_map, out_map) self.xgraph.set_compiler_output(self.c_output) # TODO self.xgraph.meta_attrs['compiled'] = True self.xgraph.meta_attrs['compiler_libs'] = ['dpuv1lib.so'] self.xgraph.meta_attrs['compiler_in_map'] = in_map self.xgraph.meta_attrs['compiler_out_map'] = out_map return self.xgraph