예제 #1
0
def step_hls_codegen(model: ModelWrapper, cfg: DataflowBuildConfig):
    "Generate Vivado HLS code to prepare HLSCustomOp nodes for IP generation."

    model = model.transform(
        PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())
    )
    return model
예제 #2
0
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Create stitched IP for a graph after all HLS IP blocks have been generated.
    Depends on the DataflowOutputType.STITCHED_IP output product."""

    if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
        stitched_ip_dir = cfg.output_dir + "/stitched_ip"
        model = model.transform(
            CreateStitchedIP(cfg._resolve_fpga_part(),
                             cfg.synth_clk_period_ns))
        # TODO copy all ip sources into output dir? as zip?
        copytree(model.get_metadata_prop("vivado_stitch_proj"),
                 stitched_ip_dir)
        print("Vivado stitched IP written into " + stitched_ip_dir)
    if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps(
    ):
        # prepare ip-stitched rtlsim
        verify_model = deepcopy(model)
        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
        for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
        # similarly for StreamingDataWidthConverter with impl_style=hls
        for dwc_layer in verify_model.get_nodes_by_op_type(
                "StreamingDataWidthConverter_Batch"):
            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
        verify_model = verify_model.transform(PrepareRTLSim())
        verify_model.set_metadata_prop("exec_mode", "rtlsim")
        verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
    return model
예제 #3
0
def step_resnet50_set_fifo_depths(model: ModelWrapper,
                                  cfg: DataflowBuildConfig):
    """
    Depending on the auto_fifo_depths setting, do one of the following:
    * if auto_fifo_depths=True:  Run the `InsertAndSetFIFODepths` transformation
    to attempt to determine the FIFO sizes that provide full throughput. Involves
    running stitched-IP rtlsim and may take a long time.
    * if auto_fifo_depths=False:  Assume the folding config file contains FIFO
    sizes as well. Runs the `InsertFIFO` transformation, then
    `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`.
    Coherency with config file node naming is ensured by calling
    `GiveUniqueNodeNames`.
    """

    if cfg.auto_fifo_depths:
        model = model.transform(
            InsertAndSetFIFODepths(
                cfg._resolve_fpga_part(),
                cfg._resolve_hls_clk_period(),
                vivado_ram_style=cfg.large_fifo_mem_style.value,
            ))
    else:
        # assume folding cfg json contains FIFO sizes too
        # insert DWCs, FIFOs and run ApplyConfig once more
        model = model.transform(InsertDWC())
        # need to make sure all FIFOs are created so that their depth can be
        # set by ApplyConfig, so create_shallow_fifos=True
        model = model.transform(InsertFIFO(create_shallow_fifos=True))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())
        if cfg.folding_config_file is not None:
            model = model.transform(ApplyConfig(cfg.folding_config_file))
        # remove any shallow FIFOs
        model = model.transform(RemoveShallowFIFOs())

    # extract the final configuration and save it as json
    hw_attrs = [
        "PE",
        "SIMD",
        "ram_style",
        "depth",
        "impl_style",
        "resType",
        "mem_mode",
        "runtime_writeable_weights",
    ]
    extract_model_config_to_json(model,
                                 cfg.output_dir + "/final_hw_config.json",
                                 hw_attrs)

    # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
    # this will only run for the new nodes (e.g. FIFOs and DWCs)
    model = model.transform(
        PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
    model = model.transform(HLSSynthIP())
    model = model.transform(ReplaceVerilogRelPaths())
    return model
예제 #4
0
def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Synthesize a bitfile for the using the specified shell flow, using either
    Vivado or Vitis, to target the specified board."""

    if DataflowOutputType.BITFILE in cfg.generate_outputs:
        bitfile_dir = cfg.output_dir + "/bitfile"
        os.makedirs(bitfile_dir, exist_ok=True)
        report_dir = cfg.output_dir + "/report"
        os.makedirs(report_dir, exist_ok=True)
        partition_model_dir = cfg.output_dir + "/intermediate_models/kernel_partitions"
        if cfg.shell_flow_type == ShellFlowType.VIVADO_ZYNQ:
            model = model.transform(
                ZynqBuild(
                    cfg.board,
                    cfg.synth_clk_period_ns,
                    cfg.enable_hw_debug,
                    partition_model_dir=partition_model_dir,
                )
            )
            copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.bit")
            copy(model.get_metadata_prop("hw_handoff"), bitfile_dir + "/finn-accel.hwh")
            copy(
                model.get_metadata_prop("vivado_synth_rpt"),
                report_dir + "/post_synth_resources.xml",
            )
            vivado_pynq_proj_dir = model.get_metadata_prop("vivado_pynq_proj")
            timing_rpt = (
                "%s/finn_zynq_link.runs/impl_1/top_wrapper_timing_summary_routed.rpt"
                % vivado_pynq_proj_dir
            )
            copy(timing_rpt, report_dir + "/post_route_timing.rpt")

        elif cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO:
            model = model.transform(
                VitisBuild(
                    cfg._resolve_fpga_part(),
                    cfg.synth_clk_period_ns,
                    cfg.vitis_platform,
                    strategy=cfg._resolve_vitis_opt_strategy(),
                    enable_debug=cfg.enable_hw_debug,
                    floorplan_file=cfg.vitis_floorplan_file,
                    partition_model_dir=partition_model_dir,
                )
            )
            copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.xclbin")
            copy(
                model.get_metadata_prop("vivado_synth_rpt"),
                report_dir + "/post_synth_resources.xml",
            )
        else:
            raise Exception("Unrecognized shell_flow_type: " + str(cfg.shell_flow_type))
        print("Bitfile written into " + bitfile_dir)

    return model
예제 #5
0
def step_hls_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
    "Run Vivado HLS synthesis on any HLSCustomOp nodes to generate IP blocks."

    model = model.transform(
        PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
    model = model.transform(HLSSynthIP())
    model = model.transform(ReplaceVerilogRelPaths())
    report_dir = cfg.output_dir + "/report"
    os.makedirs(report_dir, exist_ok=True)
    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
        json.dump(estimate_layer_resources_hls, f, indent=2)
    return model
예제 #6
0
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run streamlining on given model. Streamlining involves moving floating point
    scale/shift parameters around, collapsing adjacent ones into a single parameter,
    then absorbing the scale/shift into the following `MultiThreshold` node.
    Streamlining requires careful topology design and cannot be applied to all
    topologies.
    """

    model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
    model = model.transform(Streamline())
    need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0
    if need_lowering:
        model = model.transform(LowerConvsToMatMul())
        model = model.transform(MakeMaxPoolNHWC())
        model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
        model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    # absorb final add-mul nodes into TopK
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())

    if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps(
    ):
        verify_step(model, cfg, "streamlined_python", need_parent=False)

    return model
예제 #7
0
def verify_step(model: ModelWrapper, cfg: DataflowBuildConfig, step_name: str,
                need_parent: bool):
    print("Running verification for " + step_name)
    verify_out_dir = cfg.output_dir + "/verification_output"
    intermediate_models_dir = cfg.output_dir + "/intermediate_models"
    os.makedirs(verify_out_dir, exist_ok=True)
    (in_npy, exp_out_npy) = cfg._resolve_verification_io_pair()
    if need_parent:
        assert (cfg.save_intermediate_models
                ), "Enable save_intermediate_models for verification"
        parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx"
        child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name
        model.save(child_model_fn)
        out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy)
    else:
        inp_tensor_name = model.graph.input[0].name
        out_tensor_name = model.graph.output[0].name
        inp_dict = {inp_tensor_name: in_npy}
        out_dict = execute_onnx(model, inp_dict)
        out_npy = out_dict[out_tensor_name]
    res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all()
    res_to_str = {True: "SUCCESS", False: "FAIL"}
    res_str = res_to_str[res]
    verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name,
                                                                     res_str)
    np.save(verification_output_fn, out_npy)
    print("Verification for %s : %s" % (step_name, res_str))
예제 #8
0
def build_dataflow_directory(path_to_cfg_dir: str):
    """Best-effort build a dataflow accelerator from the specified directory.

    :param path_to_cfg_dir: Directory containing the model and build config

    The specified directory path_to_cfg_dir must contain the following files:

    * model.onnx : ONNX model to be converted to dataflow accelerator
    * dataflow_build_config.json : JSON file with build configuration

    """
    # get absolute path
    path_to_cfg_dir = os.path.abspath(path_to_cfg_dir)
    assert os.path.isdir(
        path_to_cfg_dir), "Directory not found: " + path_to_cfg_dir
    onnx_filename = path_to_cfg_dir + "/model.onnx"
    json_filename = path_to_cfg_dir + "/dataflow_build_config.json"
    assert os.path.isfile(onnx_filename), "ONNX not found: " + onnx_filename
    assert os.path.isfile(
        json_filename), "Build config not found: " + json_filename
    with open(json_filename, "r") as f:
        json_str = f.read()
    build_cfg = DataflowBuildConfig.from_json(json_str)
    old_wd = os.getcwd()
    # change into build dir to resolve relative paths
    os.chdir(path_to_cfg_dir)
    ret = build_dataflow_cfg(onnx_filename, build_cfg)
    os.chdir(old_wd)
    return ret
예제 #9
0
def step_out_of_context_synthesis(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run out-of-context synthesis and generate reports.
    Depends on the DataflowOutputType.STITCHED_IP output product."""
    if DataflowOutputType.OOC_SYNTH in cfg.generate_outputs:
        assert (
            DataflowOutputType.STITCHED_IP in cfg.generate_outputs
        ), "OOC needs stitched IP"
        model = model.transform(
            SynthOutOfContext(
                part=cfg._resolve_fpga_part(), clk_period_ns=cfg.synth_clk_period_ns
            )
        )
        report_dir = cfg.output_dir + "/report"
        os.makedirs(report_dir, exist_ok=True)
        ooc_res_dict = model.get_metadata_prop("res_total_ooc_synth")
        ooc_res_dict = eval(ooc_res_dict)

        estimate_network_performance = model.analysis(dataflow_performance)
        # add some more metrics to estimated performance
        n_clock_cycles_per_sec = float(ooc_res_dict["fmax_mhz"]) * (10 ** 6)
        est_fps = n_clock_cycles_per_sec / estimate_network_performance["max_cycles"]
        ooc_res_dict["estimated_throughput_fps"] = est_fps
        with open(report_dir + "/ooc_synth_and_timing.json", "w") as f:
            json.dump(ooc_res_dict, f, indent=2)
    return model
예제 #10
0
def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig):
    """If target_fps was specified, use the SetFolding transformation to determine
    parallelization attributes. The auto-generated config will be saved under
    auto_folding_config.json under the outputs, which can serve as a basis for
    customizing the folding factors further."""

    target_cycles_per_frame = cfg._resolve_cycles_per_frame()
    if target_cycles_per_frame is not None:
        model = model.transform(
            SetFolding(
                target_cycles_per_frame,
                mvau_wwidth_max=cfg.mvau_wwidth_max,
                two_pass_relaxation=cfg.folding_two_pass_relaxation,
            )
        )
        # extract the suggested configuration and save it as json
        hw_attrs = [
            "PE",
            "SIMD",
            "ram_style",
            "resType",
            "mem_mode",
            "runtime_writeable_weights",
        ]
        extract_model_config_to_json(
            model, cfg.output_dir + "/auto_folding_config.json", hw_attrs
        )

    return model
예제 #11
0
def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
    """
    This step will only execute if QONNX nodes are found.
    These include the following op_types: "Quant" , "Trunc" and "BinaryQuant".
    If such nodes are found the step will run the tidy-up step from QONNX
    and then convert the QONNX model to the FINN-ONNX dialect.
    """
    # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc
    q_count = 0
    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
        q_count += len(model.get_nodes_by_op_type(op_type))
    if q_count == 0:
        return model

    # QONNX cleanup
    model = cleanup_model(model)
    # QONNX to FINN-ONNX
    model = model.transform(
        ConvertQONNXtoFINN(
            filter_function=default_filter_function_generator(
                max_multithreshold_bit_width=cfg.max_multithreshold_bit_width
            )
        )
    )

    if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps():
        verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False)

    return model
예제 #12
0
def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Create a PYNQ Python driver that can be used to interface the generated
    accelerator."""

    if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs:
        driver_dir = cfg.output_dir + "/driver"
        model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform()))
        copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir)
        print("PYNQ Python driver written into " + driver_dir)
    return model
예제 #13
0
def step_target_fps_parallelization(model: ModelWrapper,
                                    cfg: DataflowBuildConfig):
    """If target_fps was specified, use the SetFolding transformation to determine
    parallelization attributes."""

    target_cycles_per_frame = cfg._resolve_cycles_per_frame()
    if target_cycles_per_frame is not None:
        model = model.transform(
            SetFolding(target_cycles_per_frame,
                       mvau_wwidth_max=cfg.mvau_wwidth_max))
    return model
예제 #14
0
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Create stitched IP for a graph after all HLS IP blocks have been generated.
    Depends on the DataflowOutputType.STITCHED_IP output product."""

    if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
        stitched_ip_dir = cfg.output_dir + "/stitched_ip"
        model = model.transform(
            CreateStitchedIP(
                cfg._resolve_fpga_part(),
                cfg.synth_clk_period_ns,
                vitis=cfg.stitched_ip_gen_dcp,
            )
        )
        # TODO copy all ip sources into output dir? as zip?
        copy_tree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir)
        print("Vivado stitched IP written into " + stitched_ip_dir)
    if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps():
        # prepare ip-stitched rtlsim
        verify_model = deepcopy(model)
        verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg)
        # use critical path estimate to set rtlsim liveness threshold
        # (very conservative)
        verify_model = verify_model.transform(AnnotateCycles())
        estimate_network_performance = verify_model.analysis(dataflow_performance)
        prev_liveness = pyverilate_get_liveness_threshold_cycles()
        os.environ["LIVENESS_THRESHOLD"] = str(
            int(estimate_network_performance["critical_path_cycles"])
        )
        if cfg.verify_save_rtlsim_waveforms:
            report_dir = cfg.output_dir + "/report"
            os.makedirs(report_dir, exist_ok=True)
            verify_model.set_metadata_prop(
                "rtlsim_trace", "%s/verify_rtlsim.vcd" % (report_dir)
            )
        verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
        os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness)
    return model
예제 #15
0
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Apply the folding configuration file onto the model to set folding (parallelization)
    and other attributes, if config file is specified."""

    if cfg.folding_config_file is not None:
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(ApplyConfig(cfg.folding_config_file))

    if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps():
        # prepare cppsim
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
        verify_step(model, cfg, "folded_hls_cppsim", need_parent=True)
    return model
예제 #16
0
def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run the tidy-up step on given model. This includes shape and datatype
    inference, constant folding, and giving nodes and tensors better names.
    """

    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())

    if VerificationStepType.TIDY_UP_PYTHON in cfg._resolve_verification_steps():
        verify_step(model, cfg, "initial_python", need_parent=False)

    return model