def _infer_node_datatype(model, node): """Infer output datatype(s) for a particular node. Returns True if any changes were made.""" dt_identity_optypes = ["Reshape", "Transpose"] idtypes = list(map(lambda x: model.get_tensor_datatype(x), node.input)) odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) op_type = node.op_type if is_finn_op(node.domain): # handle DataType inference for CustomOp try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) inst.infer_node_datatype(model) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) else: if node.op_type == "Sign": # always produces bipolar outputs model.set_tensor_datatype(node.output[0], DataType.BIPOLAR) elif node.op_type in ["MatMul", "Conv"]: if len(list(filter(lambda x: x == DataType.FLOAT32, idtypes))) != 0: # node has at least one float input, output is also float model.set_tensor_datatype(node.output[0], DataType.FLOAT32) else: # TODO compute minimum / maximum result to minimize bitwidth # use (u)int32 accumulators for now has_signed_inp = len( list(filter(lambda x: x.signed(), idtypes))) != 0 if has_signed_inp: odtype = DataType.INT32 else: odtype = DataType.UINT32 model.set_tensor_datatype(node.output[0], odtype) elif node.op_type in dt_identity_optypes: # set output dtype = input dtype idtype = model.get_tensor_datatype(node.input[0]) model.set_tensor_datatype(node.output[0], idtype) else: # unknown, assume node produces float32 outputs for o in node.output: # check if output datatype is already set to a value != FLOAT32 odtype = model.get_tensor_datatype(o) if odtype is not None and odtype != DataType.FLOAT32: # don't change data type model.set_tensor_datatype(o, odtype) else: model.set_tensor_datatype(o, DataType.FLOAT32) # compare old and new output dtypes to see if anything changed new_odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) graph_modified = new_odtypes != odtypes return graph_modified
def is_fpgadataflow_node(node): """Returns True if given node is fpgadataflow node. Otherwise False.""" is_node = False if node is not None: if is_finn_op(node.domain): n_backend = get_by_name(node.attribute, "backend") if n_backend is not None: backend_value = n_backend.s.decode("UTF-8") if backend_value == "fpgadataflow": is_node = True return is_node
def op_and_param_counts(model): """Return per-node and aggregate op counts per inference.""" ret_dict = {} for node in model.graph.node: if is_finn_op(node.domain): inst = registry.getCustomOp(node) if hasattr(inst, "get_op_and_param_counts"): node_op_and_param_counts = inst.get_op_and_param_counts() ret_dict[node.name] = node_op_and_param_counts ret_dict["total"] = aggregate_dict_keys(ret_dict) return ret_dict
def _is_fpgadataflow_node(node): if node is not None: if is_finn_op(node.domain): n_backend = get_by_name(node.attribute, "backend") if n_backend is None: return False backend_value = n_backend.s.decode("UTF-8") if backend_value == "fpgadataflow": return True else: return False else: return False
def _make_shape_compatible_op(node, model): """Return a shape-compatible non-FINN op for a given FINN op. Used for shape inference with custom ops.""" assert is_finn_op(node.domain), "Node domain is not set to finn.*" op_type = node.op_type try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) return inst.make_shape_compatible_op(model) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type)
def _hide_finn_ops(model): """Replace any FINN ops by shape-compatible ones, and return a dict that can be used to map the string representations of the new (shape-compatible) ops back to the old ops.""" hidden_ops = {} node_ind = 0 for node in model.graph.node: node_ind += 1 if is_finn_op(node.domain): new_node = _make_shape_compatible_op(node, model) hidden_ops[str(new_node)] = node model.graph.node.insert(node_ind, new_node) model.graph.node.remove(node) return hidden_ops
def verify_nodes(model): """Checks if custom ops in graph are correctly built, with all attributes and inputs. Returns {node op_type : info_messages} * info_messages: is list of strings about the result of the verification.""" verification_dict = {} for node in model.graph.node: if is_finn_op(node.domain): op_type = node.op_type inst = registry.getCustomOp(node) verification_dict[op_type] = inst.verify_node() return verification_dict
def verify_nodes(model): """Checks if custom ops in graph are correctly built, with all attributes and inputs. Please note that many FINN CustomOps don't yet implement the verify_node function required for this analysis pass to work correctly. Returns {node op_type : info_messages} * info_messages: is list of strings about the result of the verification.""" verification_dict = {} for node in model.graph.node: if is_finn_op(node.domain): op_type = node.op_type inst = registry.getCustomOp(node) verification_dict[op_type] = inst.verify_node() return verification_dict
def _infer_node_data_layout(model, node): """Infer output data layout annotation(s) for a particular node. Returns True if any changes were made.""" old_layouts = list(map(lambda x: model.get_tensor_layout(x), node.output)) try: if is_finn_op(node.domain): # try to guess based on number of output dims for o in node.output: ndims = len(model.get_tensor_shape(o)) new_layout = _dims_to_layout(model, node, ndims) model.set_tensor_layout(o, new_layout) else: if node.op_type == "Transpose": # grab input annotation and switch it around using perm perm = get_by_name(node.attribute, "perm").ints inp_layout = model.get_tensor_layout(node.input[0]) out_layout = [inp_layout[i] for i in perm] model.set_tensor_layout(node.output[0], out_layout) elif node.op_type == "Unsqueeze": inp_layout = model.get_tensor_layout(node.input[0]) # add dummy dimension at the output out_layout = inp_layout + ["x"] model.set_tensor_layout(node.output[0], out_layout) elif node.op_type == "Squeeze": inp_layout = model.get_tensor_layout(node.input[0]) assert inp_layout[-1] == "x" # remove dummy dimension out_layout = inp_layout[:-1] model.set_tensor_layout(node.output[0], out_layout) else: # try to guess based on number of output dims for o in node.output: ndims = len(model.get_tensor_shape(o)) model.set_tensor_layout( o, _dims_to_layout(model, node, ndims)) except Exception: for o in node.output: model.set_tensor_layout(o, DataLayout.UNKNOWN) # compare old and new output dtypes to see if anything changed new_layouts = list(map(lambda x: model.get_tensor_layout(x), node.output)) graph_modified = new_layouts != old_layouts return graph_modified
def _dims_to_layout(model, node, ndims): if ndims == 2: return DataLayout.NC else: if is_finn_op(node.domain): if node.op_type == "MultiThreshold" or node.op_type == "QuantAvgPool2d": mt_inst = registry.getCustomOp(node) layout = mt_inst.get_nodeattr("data_layout") if layout == "NHWC" and ndims == 4: return DataLayout.NHWC elif layout == "NCHW" and ndims == 4: return DataLayout.NCHW else: return DataLayout.UNKNOWN else: if ndims == 4: return DataLayout.NHWC else: return DataLayout.UNKNOWN else: # propagate input layout to output # TODO this won't work for concat, squeeze/unsqueeze/reshape... return model.get_tensor_layout(node.input[0])
def execute_node(node, context, graph, return_full_exec_context=False): """Executes a single node by using onnxruntime, with custom function or if dataflow partition by using remote execution or rtlsim. Input/output provided via context.""" if node.op_type == "GenericPartition": partition_node = getCustomOp(node) model = ModelWrapper(partition_node.get_nodeattr("model")) inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) # inputs may have been renamed in partition for i, old_iname in enumerate(node.input): new_iname = model.graph.input[i].name if old_iname != new_iname: inp_ctx[new_iname] = inp_ctx[old_iname] del inp_ctx[old_iname] ret = execute_onnx(model, inp_ctx, return_full_exec_context) # outputs may have been renamed in partition for i, node_oname in enumerate(node.output): model_oname = model.graph.output[i].name context[node_oname] = ret[model_oname] # prefix and insert exec context entries if return_full_exec_context: for tname in ret.keys(): if tname not in [x.name for x in model.graph.output]: context[node.name + "_" + tname] = ret[tname] elif node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) # input may have been renamed in partition assert len(inp_ctx) == 1 old_iname = node.input[0] new_iname = model.graph.input[0].name if old_iname != new_iname: inp_ctx[new_iname] = inp_ctx[old_iname] del inp_ctx[old_iname] ret = execute_onnx(model, inp_ctx, return_full_exec_context) # if the model was in ip-stitched rtlsim mode, may get annotation # for numbet of elapsed cycles, save again if model.get_metadata_prop("exec_mode") == "rtlsim": model.save(sdp_node.get_nodeattr("model")) # output may have been renamed in partition assert len(model.graph.output) == 1 node_oname = node.output[0] model_oname = model.graph.output[0].name context[node_oname] = ret[model_oname] # prefix and insert exec context entries if return_full_exec_context: for tname in ret.keys(): if tname != model_oname: context[node.name + "_" + tname] = ret[tname] else: if is_finn_op(node.domain): ex_cu_node.execute_custom_node(node, context, graph) else: # onnxruntime unfortunately does not implement run_node as defined by ONNX, # it can only execute entire models -- so we create a model which solely # consists of our current node. # note: ensure that the same ValueInfo does not appear both in # graph.value_info as well as graph.output or graph.input # nodes with multiple outputs that are a mix of value_info and # input/outputs may get them reordered below node_inputs = list( filter(lambda x: x.name in node.input, graph.input)) node_inputs += list( filter(lambda x: x.name in node.input, graph.value_info)) node_outputs = list( filter(lambda x: x.name in node.output, graph.output)) node_outputs += list( filter(lambda x: x.name in node.output, graph.value_info)) node_graph = helper.make_graph( nodes=[node], name="single-node-exec", inputs=node_inputs, outputs=node_outputs, ) node_model = helper.make_model(node_graph) input_dict = dict() for inp in node.input: input_dict[inp] = context[inp] sess = rt.InferenceSession(node_model.SerializeToString()) output_list = sess.run(None, input_dict) for output_ind in range(len(node.output)): # get the name of the target buffer from node.output outp = node.output[output_ind] # retrieve the index of that name in node_outputs for i in range(len(node_outputs)): if outp == node_outputs[i].name: list_ind = i # use that index to index output_list if output_list[list_ind].shape != context[outp].shape: raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" % (str(output_list[list_ind].shape), str(context[outp].shape))) context[outp] = output_list[list_ind]
def get_non_finn_nodes(self): """Returns a list of nodes where domain != 'finn.*'.""" return list(filter(lambda x: not util.is_finn_op(x.domain), self.graph.node))
def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) ip_dirs = ["list"] # add RTL streamer IP ip_dirs.append("/workspace/finn/finn-rtllib/memstream") # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: assert is_finn_op(node.domain), "Found non-FINN node" backend_attribute = get_by_name(node.attribute, "backend") assert backend_attribute is not None, "Backend node attribute is not set." backend_value = backend_attribute.s.decode("UTF-8") assert (backend_value == "fpgadataflow" ), """Backend node attribute is not set to "fpgadataflow".""" node_inst = getCustomOp(node) ip_dir_value = node_inst.get_nodeattr("ip_path") assert os.path.isdir( ip_dir_value), "IP generation directory doesn't exist." ip_dirs += [ip_dir_value] self.create_cmds += node_inst.code_generation_ipi() my_producer = model.find_producer(node.input[0]) self.connect_clk_rst(node) self.connect_axi(node) if my_producer is None: # first node in graph self.connect_s_axis_external(node) if node.op_type == "TLastMarker": assert (node_inst.get_nodeattr("Direction") == "in" ), """Output TLastMarker incorrect direction""" elif node.op_type == "IODMA" and len(model.graph.node) != 1: # don't apply this check for a 1-node partition assert (node_inst.get_nodeattr("direction") == "in" ), """Input DMA incorrect direction""" else: # intermediate node # wire up input(s) to previous node output(s) # foreach input # find producer # find index of producer output connected to our target input # get names of hdl interfaces for input and producer output # issue a TCL directive to connect input to output # if FC layer with mode "decoupled", add a streamer on input 1 for i in range(len(node.input)): producer = model.find_producer(node.input[i]) if producer is None: continue j = list(producer.output).index(node.input[i]) src_intf_name = getCustomOp( producer).get_verilog_top_module_intf_names( )["m_axis"][j] dst_intf_name = node_inst.get_verilog_top_module_intf_names( )["s_axis"][i] self.connect_cmds.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " "[get_bd_intf_pins %s/%s]" % (producer.name, src_intf_name, node.name, dst_intf_name)) if model.find_consumers(node.output[0]) is None: # last node in graph self.connect_m_axis_external(node) if node.op_type == "TLastMarker": assert (node_inst.get_nodeattr("Direction") == "out" ), """Output TLastMarker incorrect direction""" elif node.op_type == "IODMA" and len(model.graph.node) != 1: assert (node_inst.get_nodeattr("direction") == "out" ), """Output DMA incorrect direction""" # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") model.set_metadata_prop("vivado_stitch_proj", vivado_stitch_proj_dir) # start building the tcl script tcl = [] # create vivado project tcl.append("create_project %s %s -part %s" % (prjname, vivado_stitch_proj_dir, self.fpgapart)) # add all the generated IP dirs to ip_repo_paths ip_dirs_str = " ".join(ip_dirs) tcl.append("set_property ip_repo_paths [%s] [current_project]" % ip_dirs_str) tcl.append("update_ip_catalog") # create block design and instantiate all layers block_name = self.ip_name tcl.append('create_bd_design "%s"' % block_name) tcl.extend(self.create_cmds) tcl.extend(self.connect_cmds) fclk_mhz = 1 / (self.clk_ns * 0.001) fclk_hz = fclk_mhz * 1000000 model.set_metadata_prop("clk_ns", str(self.clk_ns)) tcl.append("set_property CONFIG.FREQ_HZ %f [get_bd_ports /ap_clk]" % fclk_hz) tcl.append("regenerate_bd_layout") tcl.append("validate_bd_design") tcl.append("save_bd_design") # create wrapper hdl (for rtlsim later on) bd_base = "%s/%s.srcs/sources_1/bd/%s" % ( vivado_stitch_proj_dir, prjname, block_name, ) bd_filename = "%s/%s.bd" % (bd_base, block_name) tcl.append("make_wrapper -files [get_files %s] -top" % bd_filename) wrapper_filename = "%s/hdl/%s_wrapper.v" % (bd_base, block_name) tcl.append("add_files -norecurse %s" % wrapper_filename) model.set_metadata_prop("wrapper_filename", wrapper_filename) # synthesize to DCP and export stub, DCP and constraints if self.vitis: tcl.append( "set_property SYNTH_CHECKPOINT_MODE Hierarchical [ get_files %s ]" % bd_filename) tcl.append( "set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} " "-value {-mode out_of_context} -objects [get_runs synth_1]") num_workers = get_num_default_workers() assert num_workers >= 0, "Number of workers must be nonnegative." if num_workers == 0: num_workers = mp.cpu_count() tcl.append("launch_runs synth_1 -jobs %s" % str(num_workers)) tcl.append("wait_on_run [get_runs synth_1]") tcl.append("open_run synth_1 -name synth_1") tcl.append("write_verilog -force -mode synth_stub %s.v" % block_name) tcl.append("write_checkpoint %s.dcp" % block_name) tcl.append("write_xdc %s.xdc" % block_name) tcl.append("report_utilization -file %s_partition_util.rpt" % block_name) # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name) model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv) model.set_metadata_prop("vivado_stitch_ifnames", str(self.intf_names)) tcl.append( ("ipx::package_project -root_dir %s/ip -vendor %s " "-library %s -taxonomy /UserIP -module %s -import_files") % (vivado_stitch_proj_dir, block_vendor, block_library, block_name)) tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv) # if targeting Vitis, add some properties to the IP if self.vitis: tcl.append( "ipx::remove_bus_parameter FREQ_HZ " "[ipx::get_bus_interfaces CLK.AP_CLK -of_objects [ipx::current_core]]" ) # replace source code with dcp tcl.append( "set_property sdx_kernel true [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property sdx_kernel_type rtl [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property supported_families { } [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} " "[ipx::find_open_core %s]" % block_vlnv) tcl.append("set_property auto_family_support_level level_2 " "[ipx::find_open_core %s]" % block_vlnv) # remove all files from synthesis and sim groups # we'll replace with DCP, stub, and xdc tcl.append( "ipx::remove_all_file " "[ipx::get_file_groups xilinx_anylanguagebehavioralsimulation]" ) tcl.append("ipx::remove_all_file " "[ipx::get_file_groups xilinx_anylanguagesynthesis]") tcl.append( "ipx::remove_file_group " "xilinx_anylanguagebehavioralsimulation [ipx::current_core]") tcl.append("ipx::remove_file_group " "xilinx_anylanguagesynthesis [ipx::current_core]") # remove sim and src folders tcl.append("file delete -force %s/ip/sim" % vivado_stitch_proj_dir) tcl.append("file delete -force %s/ip/src" % vivado_stitch_proj_dir) # copy and add DCP, stub, and xdc tcl.append("file mkdir %s/ip/dcp" % vivado_stitch_proj_dir) tcl.append("file mkdir %s/ip/impl" % vivado_stitch_proj_dir) tcl.append("file copy -force %s.dcp %s/ip/dcp" % (block_name, vivado_stitch_proj_dir)) tcl.append("file copy -force %s.xdc %s/ip/impl" % (block_name, vivado_stitch_proj_dir)) tcl.append( "ipx::add_file_group xilinx_implementation [ipx::current_core]" ) tcl.append( "ipx::add_file impl/%s.xdc [ipx::get_file_groups xilinx_implementation]" % block_name) tcl.append( "set_property used_in [list implementation] " "[ipx::get_files impl/%s.xdc " "-of_objects [ipx::get_file_groups xilinx_implementation]]" % block_name) tcl.append("ipx::add_file_group " "xilinx_synthesischeckpoint [ipx::current_core]") tcl.append("ipx::add_file dcp/%s.dcp " "[ipx::get_file_groups xilinx_synthesischeckpoint]" % block_name) tcl.append( "ipx::add_file_group xilinx_simulationcheckpoint [ipx::current_core]" ) tcl.append("ipx::add_file dcp/%s.dcp " "[ipx::get_file_groups xilinx_simulationcheckpoint]" % block_name) tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) # export list of used Verilog files (for rtlsim later on) tcl.append( "set all_v_files [get_files -filter {FILE_TYPE == Verilog " + "&& USED_IN_SYNTHESIS == 1} ]") v_file_list = "%s/all_verilog_srcs.txt" % vivado_stitch_proj_dir tcl.append("set fp [open %s w]" % v_file_list) # write each verilog filename to all_verilog_srcs.txt tcl.append("foreach vf $all_v_files {puts $fp $vf}") tcl.append("close $fp") # write the project creator tcl script tcl_string = "\n".join(tcl) + "\n" with open(vivado_stitch_proj_dir + "/make_project.tcl", "w") as f: f.write(tcl_string) # create a shell script and call Vivado make_project_sh = vivado_stitch_proj_dir + "/make_project.sh" working_dir = os.environ["PWD"] with open(make_project_sh, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(vivado_stitch_proj_dir)) f.write("vivado -mode batch -source make_project.tcl\n") f.write("cd {}\n".format(working_dir)) bash_command = ["bash", make_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)
def _infer_node_datatype(model, node): """Infer output datatype(s) for a particular node. Returns True if any changes were made.""" dt_identity_optypes = [ "Reshape", "Transpose", "Flatten", "Slice", "Gather", "GatherElements", "GatherND", "Identity", "Expand", "Flatten", "MaxPool", "GlobalMaxPool", "Scatter", "ScatterElements", "ScatterND", "Squeeze", "Unsqueeze", "Tile", ] idtypes = list(map(lambda x: model.get_tensor_datatype(x), node.input)) odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) op_type = node.op_type if is_finn_op(node.domain): # handle DataType inference for CustomOp try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) inst.infer_node_datatype(model) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) else: if node.op_type == "Sign": # always produces bipolar outputs model.set_tensor_datatype(node.output[0], DataType["BIPOLAR"]) elif node.op_type in ["MatMul", "Conv"]: if len(list(filter(lambda x: x == DataType["FLOAT32"], idtypes))) != 0: # node has at least one float input, output is also float model.set_tensor_datatype(node.output[0], DataType["FLOAT32"]) else: # TODO compute minimum / maximum result to minimize bitwidth # use (u)int32 accumulators for now has_signed_inp = len(list(filter(lambda x: x.signed(), idtypes))) != 0 if has_signed_inp: odtype = DataType["INT32"] else: odtype = DataType["UINT32"] model.set_tensor_datatype(node.output[0], odtype) elif node.op_type in ["Resize", "Upsample"]: mode = get_by_name(node.attribute, "mode").s if mode is None: mode = "nearest" else: mode = mode.decode("UTF-8") if mode == "nearest": # set output dtype = input dtype idtype = model.get_tensor_datatype(node.input[0]) model.set_tensor_datatype(node.output[0], idtype) elif node.op_type in dt_identity_optypes: # set output dtype = input dtype idtype = model.get_tensor_datatype(node.input[0]) model.set_tensor_datatype(node.output[0], idtype) else: # unknown, assume node produces float32 outputs for o in node.output: # check if output datatype is already set to a value != FLOAT32 odtype = model.get_tensor_datatype(o) if odtype is not None and odtype != DataType["FLOAT32"]: # don't change data type model.set_tensor_datatype(o, odtype) else: model.set_tensor_datatype(o, DataType["FLOAT32"]) # compare old and new output dtypes to see if anything changed new_odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) graph_modified = new_odtypes != odtypes return graph_modified