def step_hls_codegen(model: ModelWrapper, cfg: DataflowBuildConfig): "Generate Vivado HLS code to prepare HLSCustomOp nodes for IP generation." model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) ) return model
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): """Create stitched IP for a graph after all HLS IP blocks have been generated. Depends on the DataflowOutputType.STITCHED_IP output product.""" if DataflowOutputType.STITCHED_IP in cfg.generate_outputs: stitched_ip_dir = cfg.output_dir + "/stitched_ip" model = model.transform( CreateStitchedIP(cfg._resolve_fpga_part(), cfg.synth_clk_period_ns)) # TODO copy all ip sources into output dir? as zip? copytree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir) print("Vivado stitched IP written into " + stitched_ip_dir) if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps( ): # prepare ip-stitched rtlsim verify_model = deepcopy(model) # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"): getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl") # similarly for StreamingDataWidthConverter with impl_style=hls for dwc_layer in verify_model.get_nodes_by_op_type( "StreamingDataWidthConverter_Batch"): getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls") verify_model = verify_model.transform(PrepareRTLSim()) verify_model.set_metadata_prop("exec_mode", "rtlsim") verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) return model
def step_resnet50_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): """ Depending on the auto_fifo_depths setting, do one of the following: * if auto_fifo_depths=True: Run the `InsertAndSetFIFODepths` transformation to attempt to determine the FIFO sizes that provide full throughput. Involves running stitched-IP rtlsim and may take a long time. * if auto_fifo_depths=False: Assume the folding config file contains FIFO sizes as well. Runs the `InsertFIFO` transformation, then `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`. Coherency with config file node naming is ensured by calling `GiveUniqueNodeNames`. """ if cfg.auto_fifo_depths: model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period(), vivado_ram_style=cfg.large_fifo_mem_style.value, )) else: # assume folding cfg json contains FIFO sizes too # insert DWCs, FIFOs and run ApplyConfig once more model = model.transform(InsertDWC()) # need to make sure all FIFOs are created so that their depth can be # set by ApplyConfig, so create_shallow_fifos=True model = model.transform(InsertFIFO(create_shallow_fifos=True)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if cfg.folding_config_file is not None: model = model.transform(ApplyConfig(cfg.folding_config_file)) # remove any shallow FIFOs model = model.transform(RemoveShallowFIFOs()) # extract the final configuration and save it as json hw_attrs = [ "PE", "SIMD", "ram_style", "depth", "impl_style", "resType", "mem_mode", "runtime_writeable_weights", ] extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP()) model = model.transform(ReplaceVerilogRelPaths()) return model
def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig): """Synthesize a bitfile for the using the specified shell flow, using either Vivado or Vitis, to target the specified board.""" if DataflowOutputType.BITFILE in cfg.generate_outputs: bitfile_dir = cfg.output_dir + "/bitfile" os.makedirs(bitfile_dir, exist_ok=True) report_dir = cfg.output_dir + "/report" os.makedirs(report_dir, exist_ok=True) partition_model_dir = cfg.output_dir + "/intermediate_models/kernel_partitions" if cfg.shell_flow_type == ShellFlowType.VIVADO_ZYNQ: model = model.transform( ZynqBuild( cfg.board, cfg.synth_clk_period_ns, cfg.enable_hw_debug, partition_model_dir=partition_model_dir, ) ) copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.bit") copy(model.get_metadata_prop("hw_handoff"), bitfile_dir + "/finn-accel.hwh") copy( model.get_metadata_prop("vivado_synth_rpt"), report_dir + "/post_synth_resources.xml", ) vivado_pynq_proj_dir = model.get_metadata_prop("vivado_pynq_proj") timing_rpt = ( "%s/finn_zynq_link.runs/impl_1/top_wrapper_timing_summary_routed.rpt" % vivado_pynq_proj_dir ) copy(timing_rpt, report_dir + "/post_route_timing.rpt") elif cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO: model = model.transform( VitisBuild( cfg._resolve_fpga_part(), cfg.synth_clk_period_ns, cfg.vitis_platform, strategy=cfg._resolve_vitis_opt_strategy(), enable_debug=cfg.enable_hw_debug, floorplan_file=cfg.vitis_floorplan_file, partition_model_dir=partition_model_dir, ) ) copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.xclbin") copy( model.get_metadata_prop("vivado_synth_rpt"), report_dir + "/post_synth_resources.xml", ) else: raise Exception("Unrecognized shell_flow_type: " + str(cfg.shell_flow_type)) print("Bitfile written into " + bitfile_dir) return model
def step_hls_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig): "Run Vivado HLS synthesis on any HLSCustomOp nodes to generate IP blocks." model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP()) model = model.transform(ReplaceVerilogRelPaths()) report_dir = cfg.output_dir + "/report" os.makedirs(report_dir, exist_ok=True) estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation) with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f: json.dump(estimate_layer_resources_hls, f, indent=2) return model
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def verify_step(model: ModelWrapper, cfg: DataflowBuildConfig, step_name: str, need_parent: bool): print("Running verification for " + step_name) verify_out_dir = cfg.output_dir + "/verification_output" intermediate_models_dir = cfg.output_dir + "/intermediate_models" os.makedirs(verify_out_dir, exist_ok=True) (in_npy, exp_out_npy) = cfg._resolve_verification_io_pair() if need_parent: assert (cfg.save_intermediate_models ), "Enable save_intermediate_models for verification" parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx" child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name model.save(child_model_fn) out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy) else: inp_tensor_name = model.graph.input[0].name out_tensor_name = model.graph.output[0].name inp_dict = {inp_tensor_name: in_npy} out_dict = execute_onnx(model, inp_dict) out_npy = out_dict[out_tensor_name] res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all() res_to_str = {True: "SUCCESS", False: "FAIL"} res_str = res_to_str[res] verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name, res_str) np.save(verification_output_fn, out_npy) print("Verification for %s : %s" % (step_name, res_str))
def build_dataflow_directory(path_to_cfg_dir: str): """Best-effort build a dataflow accelerator from the specified directory. :param path_to_cfg_dir: Directory containing the model and build config The specified directory path_to_cfg_dir must contain the following files: * model.onnx : ONNX model to be converted to dataflow accelerator * dataflow_build_config.json : JSON file with build configuration """ # get absolute path path_to_cfg_dir = os.path.abspath(path_to_cfg_dir) assert os.path.isdir( path_to_cfg_dir), "Directory not found: " + path_to_cfg_dir onnx_filename = path_to_cfg_dir + "/model.onnx" json_filename = path_to_cfg_dir + "/dataflow_build_config.json" assert os.path.isfile(onnx_filename), "ONNX not found: " + onnx_filename assert os.path.isfile( json_filename), "Build config not found: " + json_filename with open(json_filename, "r") as f: json_str = f.read() build_cfg = DataflowBuildConfig.from_json(json_str) old_wd = os.getcwd() # change into build dir to resolve relative paths os.chdir(path_to_cfg_dir) ret = build_dataflow_cfg(onnx_filename, build_cfg) os.chdir(old_wd) return ret
def step_out_of_context_synthesis(model: ModelWrapper, cfg: DataflowBuildConfig): """Run out-of-context synthesis and generate reports. Depends on the DataflowOutputType.STITCHED_IP output product.""" if DataflowOutputType.OOC_SYNTH in cfg.generate_outputs: assert ( DataflowOutputType.STITCHED_IP in cfg.generate_outputs ), "OOC needs stitched IP" model = model.transform( SynthOutOfContext( part=cfg._resolve_fpga_part(), clk_period_ns=cfg.synth_clk_period_ns ) ) report_dir = cfg.output_dir + "/report" os.makedirs(report_dir, exist_ok=True) ooc_res_dict = model.get_metadata_prop("res_total_ooc_synth") ooc_res_dict = eval(ooc_res_dict) estimate_network_performance = model.analysis(dataflow_performance) # add some more metrics to estimated performance n_clock_cycles_per_sec = float(ooc_res_dict["fmax_mhz"]) * (10 ** 6) est_fps = n_clock_cycles_per_sec / estimate_network_performance["max_cycles"] ooc_res_dict["estimated_throughput_fps"] = est_fps with open(report_dir + "/ooc_synth_and_timing.json", "w") as f: json.dump(ooc_res_dict, f, indent=2) return model
def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig): """If target_fps was specified, use the SetFolding transformation to determine parallelization attributes. The auto-generated config will be saved under auto_folding_config.json under the outputs, which can serve as a basis for customizing the folding factors further.""" target_cycles_per_frame = cfg._resolve_cycles_per_frame() if target_cycles_per_frame is not None: model = model.transform( SetFolding( target_cycles_per_frame, mvau_wwidth_max=cfg.mvau_wwidth_max, two_pass_relaxation=cfg.folding_two_pass_relaxation, ) ) # extract the suggested configuration and save it as json hw_attrs = [ "PE", "SIMD", "ram_style", "resType", "mem_mode", "runtime_writeable_weights", ] extract_model_config_to_json( model, cfg.output_dir + "/auto_folding_config.json", hw_attrs ) return model
def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig): """ This step will only execute if QONNX nodes are found. These include the following op_types: "Quant" , "Trunc" and "BinaryQuant". If such nodes are found the step will run the tidy-up step from QONNX and then convert the QONNX model to the FINN-ONNX dialect. """ # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc q_count = 0 for op_type in ["BinaryQuant", "Quant", "Trunc"]: q_count += len(model.get_nodes_by_op_type(op_type)) if q_count == 0: return model # QONNX cleanup model = cleanup_model(model) # QONNX to FINN-ONNX model = model.transform( ConvertQONNXtoFINN( filter_function=default_filter_function_generator( max_multithreshold_bit_width=cfg.max_multithreshold_bit_width ) ) ) if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps(): verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False) return model
def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig): """Create a PYNQ Python driver that can be used to interface the generated accelerator.""" if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs: driver_dir = cfg.output_dir + "/driver" model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform())) copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir) print("PYNQ Python driver written into " + driver_dir) return model
def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig): """If target_fps was specified, use the SetFolding transformation to determine parallelization attributes.""" target_cycles_per_frame = cfg._resolve_cycles_per_frame() if target_cycles_per_frame is not None: model = model.transform( SetFolding(target_cycles_per_frame, mvau_wwidth_max=cfg.mvau_wwidth_max)) return model
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): """Create stitched IP for a graph after all HLS IP blocks have been generated. Depends on the DataflowOutputType.STITCHED_IP output product.""" if DataflowOutputType.STITCHED_IP in cfg.generate_outputs: stitched_ip_dir = cfg.output_dir + "/stitched_ip" model = model.transform( CreateStitchedIP( cfg._resolve_fpga_part(), cfg.synth_clk_period_ns, vitis=cfg.stitched_ip_gen_dcp, ) ) # TODO copy all ip sources into output dir? as zip? copy_tree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir) print("Vivado stitched IP written into " + stitched_ip_dir) if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps(): # prepare ip-stitched rtlsim verify_model = deepcopy(model) verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg) # use critical path estimate to set rtlsim liveness threshold # (very conservative) verify_model = verify_model.transform(AnnotateCycles()) estimate_network_performance = verify_model.analysis(dataflow_performance) prev_liveness = pyverilate_get_liveness_threshold_cycles() os.environ["LIVENESS_THRESHOLD"] = str( int(estimate_network_performance["critical_path_cycles"]) ) if cfg.verify_save_rtlsim_waveforms: report_dir = cfg.output_dir + "/report" os.makedirs(report_dir, exist_ok=True) verify_model.set_metadata_prop( "rtlsim_trace", "%s/verify_rtlsim.vcd" % (report_dir) ) verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness) return model
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig): """Apply the folding configuration file onto the model to set folding (parallelization) and other attributes, if config file is specified.""" if cfg.folding_config_file is not None: model = model.transform(GiveUniqueNodeNames()) model = model.transform(ApplyConfig(cfg.folding_config_file)) if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps(): # prepare cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) verify_step(model, cfg, "folded_hls_cppsim", need_parent=True) return model
def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig): """Run the tidy-up step on given model. This includes shape and datatype inference, constant folding, and giving nodes and tensors better names. """ model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) if VerificationStepType.TIDY_UP_PYTHON in cfg._resolve_verification_steps(): verify_step(model, cfg, "initial_python", need_parent=False) return model