Esempio n. 1
0
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode):
    model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt)
    cdp_model = model.transform(InferDoublePackedConv())
    assert (len(cdp_model.graph.node) == 3 and
            cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and
            cdp_model.graph.node[0].op_type == "Transpose" and
            cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model"
    # execute models and compare
    x = gen_finn_dt_tensor(idt, (1, ich, idim, idim))
    input_dict = {"inp": x}
    y_expected = oxe.execute_onnx(model, input_dict)["outp"]

    if mode == "cppsim":
        cdp_model = cdp_model.transform(SetExecMode("cppsim"))
        cdp_model = cdp_model.transform(PrepareCppSim())
        cdp_model = cdp_model.transform(CompileCppSim())
        y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"]
    elif mode == "rtlsim":
        cdp_model = cdp_model.transform(SetExecMode("rtlsim"))
        cdp_model = cdp_model.transform(GiveUniqueNodeNames())
        cdp_model = cdp_model.transform(GiveReadableTensorNames())
        cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5))
        cdp_model = cdp_model.transform(HLSSynthIP())
        cdp_model = cdp_model.transform(PrepareRTLSim())
        input_dict = {"global_in": x}
        y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"]

    assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
Esempio n. 2
0
def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
    idt = wdt = DataType.INT4
    ifm_dim = 6
    ifm_ch = 4

    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
    model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride,
                                   padding)

    input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch])
    input_dict = {"inp": input_tensor}

    new_model = model.transform(InferConvInpGen())
    new_model = new_model.transform(InferVVAU())

    # set SIMD in ConvInputGen node and PE in VVAU node

    for n in new_model.graph.node:
        if n.op_type == "ConvolutionInputGenerator":
            convinputgen_node = getCustomOp(n)
            convinputgen_node.set_nodeattr("SIMD", pe)
        elif n.op_type == "Vector_Vector_Activate_Batch":
            vvau_node = getCustomOp(n)
            vvau_node.set_nodeattr("PE", pe)

    new_model = new_model.transform(SetExecMode("rtlsim"))
    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
    new_model = new_model.transform(HLSSynthIP())
    new_model = new_model.transform(PrepareRTLSim())

    assert oxe.compare_execution(model, new_model, input_dict)
Esempio n. 3
0
def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch,
                                       exec_mode):
    ifm_dim_h = ifm_dim
    k_h = k
    if dim_1d:
        ifm_dim_w = 1
        k_w = 1
    else:
        ifm_dim_w = ifm_dim_h
        k_w = k_h
    ifm_dim = (ifm_dim_h, ifm_dim_w)
    k = (k_h, k_w)

    stride_h = k_h
    stride_w = k_w
    ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1)
    ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1)
    ofm_dim = (ofm_dim_h, ofm_dim_w)
    if idt == DataType["BIPOLAR"] and dim_1d:
        pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
    if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception(
            "Unknown exec_mode in test_layer_streaming_maxpool_batch")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
        assert exp_cycles != 0
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode):
    np.random.seed(0)
    if fold == -1:
        pe = 1
    else:
        pe = labels // fold
    assert labels % pe == 0

    if k == -1:
        k = labels

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, labels))

    model = make_labelselect_modelwrapper(labels, pe, k, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    y = oxe.execute_onnx(model, input_dict)["outp"]

    assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
    stride = k
    ofm_dim = int(((ifm_dim - k) / stride) + 1)
    if ifm_dim % k != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs,
                                      exec_mode):
    if nf == -1:
        nf = ich
    pe = ich // nf
    assert ich % pe == 0

    # generate input and param data
    x = gen_finn_dt_tensor(idt, tuple(vecs + [ich]))
    # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32)
    C = gen_finn_dt_tensor(pdt, (ich))

    odt = act

    model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # package input data as dictionary
    input_dict = {"inp": x}

    oshape = model.get_tensor_shape("outp")

    C_reshaped = np.broadcast_to(C.flatten(), x.shape)
    if func == "add":
        y = x + C_reshaped
    elif func == "mul":
        y = x * C_reshaped

    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]

    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        hls_synt_res_est = model.analysis(hls_synth_res_estimation)
        assert "ChannelwiseOp_Batch_0" in hls_synt_res_est

        node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 7
0
def step_resnet50_set_fifo_depths(model: ModelWrapper,
                                  cfg: DataflowBuildConfig):
    """
    Depending on the auto_fifo_depths setting, do one of the following:
    * if auto_fifo_depths=True:  Run the `InsertAndSetFIFODepths` transformation
    to attempt to determine the FIFO sizes that provide full throughput. Involves
    running stitched-IP rtlsim and may take a long time.
    * if auto_fifo_depths=False:  Assume the folding config file contains FIFO
    sizes as well. Runs the `InsertFIFO` transformation, then
    `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`.
    Coherency with config file node naming is ensured by calling
    `GiveUniqueNodeNames`.
    """

    if cfg.auto_fifo_depths:
        model = model.transform(
            InsertAndSetFIFODepths(
                cfg._resolve_fpga_part(),
                cfg._resolve_hls_clk_period(),
                vivado_ram_style=cfg.large_fifo_mem_style.value,
            ))
    else:
        # assume folding cfg json contains FIFO sizes too
        # insert DWCs, FIFOs and run ApplyConfig once more
        model = model.transform(InsertDWC())
        # need to make sure all FIFOs are created so that their depth can be
        # set by ApplyConfig, so create_shallow_fifos=True
        model = model.transform(InsertFIFO(create_shallow_fifos=True))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())
        if cfg.folding_config_file is not None:
            model = model.transform(ApplyConfig(cfg.folding_config_file))
        # remove any shallow FIFOs
        model = model.transform(RemoveShallowFIFOs())

    # extract the final configuration and save it as json
    hw_attrs = [
        "PE",
        "SIMD",
        "ram_style",
        "depth",
        "impl_style",
        "resType",
        "mem_mode",
        "runtime_writeable_weights",
    ]
    extract_model_config_to_json(model,
                                 cfg.output_dir + "/final_hw_config.json",
                                 hw_attrs)

    # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
    # this will only run for the new nodes (e.g. FIFOs and DWCs)
    model = model.transform(
        PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
    model = model.transform(HLSSynthIP())
    model = model.transform(ReplaceVerilogRelPaths())
    return model
Esempio n. 8
0
 def test_ipgen(self, topology, wbits, abits, kind):
     if kind == "alveo" and ("VITIS_PATH" not in os.environ):
         pytest.skip("VITIS_PATH not set")
     prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind))
Esempio n. 9
0
    def apply(self, model):
        _check_vitis_envvars()
        # first infer layouts
        model = model.transform(InferDataLayouts())
        # prepare at global level, then break up into kernels
        prep_transforms = [
            MakePYNQDriver(platform="alveo"),
            InsertIODMA(512),
            InsertDWC(),
        ]
        for trn in prep_transforms:
            model = model.transform(trn)
            model = model.transform(GiveUniqueNodeNames())
            model = model.transform(GiveReadableTensorNames())

        model = model.transform(Floorplan(floorplan=self.floorplan_file))

        model = model.transform(CreateDataflowPartition())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())

        # Build each kernel individually
        sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
        for sdp_node in sdp_nodes:
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            kernel_model = ModelWrapper(dataflow_model_filename)
            kernel_model = kernel_model.transform(InsertFIFO())
            kernel_model = kernel_model.transform(
                InsertTLastMarker(both=True, external=False, dynamic=False))
            kernel_model = kernel_model.transform(GiveUniqueNodeNames())
            kernel_model.save(dataflow_model_filename)
            kernel_model = kernel_model.transform(
                PrepareIP(self.fpga_part, self.period_ns))
            kernel_model = kernel_model.transform(HLSSynthIP())
            kernel_model = kernel_model.transform(
                CreateStitchedIP(self.fpga_part, self.period_ns,
                                 sdp_node.onnx_node.name, True))
            kernel_model = kernel_model.transform(
                CreateVitisXO(sdp_node.onnx_node.name))
            kernel_model.set_metadata_prop("platform", "alveo")
            kernel_model.save(dataflow_model_filename)
        # Assemble design from kernels
        model = model.transform(
            VitisLink(
                self.platform,
                round(1000 / self.period_ns),
                strategy=self.strategy,
                enable_debug=self.enable_debug,
            ))
        # set platform attribute for correct remote execution
        model.set_metadata_prop("platform", "alveo")

        return (model, False)
Esempio n. 10
0
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
    if num_ch % simd != 0:
        pytest.skip(" num_ch % simd != 0, skipping")
    # generate input data
    x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
    input_dict = {"inp": x}
    odim = idim + pad

    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt,
                                               pad_style)
    model = model.transform(InferShapes())
    model = model.transform(SetExecMode(mode))
    model = model.transform(GiveUniqueNodeNames())
    if mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif mode == "rtlsim":
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    expected_oshape = (1, odim, odim, num_ch)
    assert y_produced.shape == expected_oshape

    # calculate reference
    # calculate correct pad according to parameters
    if pad_style == 2:
        if pad % 2 == 0:
            pad_up = pad // 2
            pad_left = pad // 2
        else:
            pad_up = pad // 2 + 1
            pad_left = pad // 2 + 1
    else:
        pad_up = pad // 2
        pad_left = pad // 2

    pad_down = pad - pad_up
    pad_right = pad - pad_left

    y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right),
                            (0, 0)), "constant")

    assert (y_produced == y_expected).all()

    if mode == "rtlsim":
        node = model.get_nodes_by_op_type("FMPadding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 11
0
def step_hls_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run Vivado HLS synthesis on generated code for HLSCustomOp nodes,
    in order to generate IP blocks."""

    model = model.transform(HLSSynthIP())
    model = model.transform(ReplaceVerilogRelPaths())
    report_dir = cfg.output_dir + "/report"
    os.makedirs(report_dir, exist_ok=True)
    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
        json.dump(estimate_layer_resources_hls, f, indent=2)
    return model
Esempio n. 12
0
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param,
                                          exec_mode):
    ifm_ch = 16
    ifm_dim = 5
    ishape = (1, ifm_ch, ifm_dim, ifm_dim)
    if scalar_param:
        pshape = (1, )
    else:
        pshape = (1, ifm_ch, 1, 1)

    np.random.seed(0)
    model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt,
                                             pshape)

    # Since the aren't Data types with a bit width of a non power of 2,
    # there are cases where the input won't use it full range.
    if idt == DataType["INT32"]:
        x = gen_finn_dt_tensor(DataType["INT16"],
                               (1, ifm_ch, ifm_dim, ifm_dim))
    elif idt == DataType["UINT32"]:
        x = gen_finn_dt_tensor(DataType["UINT16"],
                               (1, ifm_ch, ifm_dim, ifm_dim))
    else:
        x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))

    input_dict = prepare_inputs(x)
    y_expected = oxe.execute_onnx(model, input_dict)["outp"]

    new_model = model.transform(to_hls.InferChannelwiseLinearLayer())
    new_model = new_model.transform(GiveUniqueNodeNames())

    if exec_mode == "cppsim":
        new_model = new_model.transform(PrepareCppSim())
        new_model = new_model.transform(CompileCppSim())
        new_model = new_model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        new_model = new_model.transform(SetExecMode("rtlsim"))
        new_model = new_model.transform(GiveUniqueNodeNames())
        new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
        new_model = new_model.transform(HLSSynthIP())
        new_model = new_model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    ctx_produced = oxe.execute_onnx(new_model,
                                    input_dict,
                                    return_full_exec_context=True)
    y_produced = ctx_produced["outp"]

    assert (y_produced == y_expected).all()
    assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_fpgadataflow_slidingwindow(
    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
):
    ofm_dim = int(((ifm_dim - k) / stride) + 1)

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
    )

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    if dw == 0:
        assert (y_produced == y_expected).all()
    else:
        y_expected = y_expected.reshape(
            1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd
        )
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k)
        assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 14
0
def test_end2end_mobilenet_gen_hls_ip():
    model = load_test_checkpoint_or_skip(
        build_dir + "/end2end_mobilenet_dataflow_model.onnx")
    start = time.time()
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(ReplaceVerilogRelPaths())
    end = time.time()
    elapsed_time = end - start
    f = open(build_dir + "/end2end_mobilenet_ipgen_time.txt", "w+")
    f.write("Execution time in seconds: " + str(elapsed_time))
    f.close()

    model = model.transform(AnnotateResources("hls"))
    model.save(build_dir + "/end2end_mobilenet_ipgen.onnx")
Esempio n. 15
0
def test_fpgadataflow_ipstitch_gen_model(mem_mode):
    model = create_one_fc_model(mem_mode)
    if model.graph.node[0].op_type == "StreamingDataflowPartition":
        sdp_node = getCustomOp(model.graph.node[0])
        assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
        assert os.path.isfile(sdp_node.get_nodeattr("model"))
        model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
        model.set_metadata_prop("exec_mode", "remote_pynq")
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, 5))
    model = model.transform(HLSSynthIP())
    assert model.graph.node[0].op_type == "StreamingFCLayer_Batch"
    assert model.graph.node[-1].op_type == "TLastMarker"
    model.save(ip_stitch_model_dir +
               "/test_fpgadataflow_ipstitch_gen_model_%s.onnx" % mem_mode)
Esempio n. 16
0
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = max(1, ch // fold)
    assert ch % pe == 0

    # generate input data
    x1 = gen_finn_dt_tensor(idt, (1, ch))
    x2 = gen_finn_dt_tensor(idt, (1, ch))

    model = make_addstreams_modelwrapper(ch, pe, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data
    input_dict = prepare_inputs(x1, x2)

    oshape = model.get_tensor_shape("outp")
    y = x1 + x2
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("AddStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 17
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (
        y == x
    ).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""

    model = model.transform(ReplaceVerilogRelPaths())
    model = model.transform(CreateStitchedIP(test_fpga_part))
    model = model.transform(MakePYNQProject(test_pynq_board))
    model = model.transform(SynthPYNQProject())
    model = model.transform(MakePYNQDriver())
    ip = os.environ["PYNQ_IP"]
    username = os.getenv("PYNQ_USERNAME", "xilinx")
    password = os.getenv("PYNQ_PASSWORD", "xilinx")
    port = os.getenv("PYNQ_PORT", 22)
    target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
    model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))

    res = throughput_test(model)
    expected_dict = {}
    expected_dict["runtime[ms]"] = []
    expected_dict["throughput[images/s]"] = []
    expected_dict["DRAM_in_bandwidth[Mb/s]"] = []
    expected_dict["DRAM_out_bandwidth[Mb/s]"] = []
    for key in expected_dict:
        assert (
            key in res
        ), """Throughput test not successful, no value for {}
        in result dictionary""".format(
            key
        )
Esempio n. 18
0
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_accpool_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    y = oxe.execute_onnx(model, input_dict)["outp"]
    expected_y = np.sum(x, axis=(1, 2)).flatten()

    assert (y == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        # commented out, needs performance debug:
        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
        # assert False where False =
        # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
        # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
        assert exp_cycles != 0
        assert cycles_rtlsim != 0
Esempio n. 19
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape,
                                          finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y == x).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Esempio n. 20
0
def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]

    assert (y == x).all(), """The output values are not the same as the
        input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Esempio n. 21
0
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_dupstreams_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    output_dict = oxe.execute_onnx(model, input_dict)
    y0 = output_dict["outp0"]
    y1 = output_dict["outp1"]
    expected_y = x

    assert (y0 == expected_y).all(), exec_mode + " failed"
    assert (y1 == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 22
0
 def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
     prev_chkpt_name = get_checkpoint_name(
         topology, wbits, abits, "fifodepth_" + kind
     )
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
     model = model.transform(InsertDWC())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(AnnotateCycles())
     perf = model.analysis(dataflow_performance)
     latency = perf["critical_path_cycles"]
     # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
     for fifo_layer in model.get_nodes_by_op_type("StreamingFIFO"):
         getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model = model.transform(PrepareRTLSim())
     model.set_metadata_prop("exec_mode", "rtlsim")
     os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1))
     if rtlsim_trace:
         model.set_metadata_prop(
             "rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)
         )
         os.environ["RTLSIM_TRACE_DEPTH"] = "3"
     rtlsim_chkpt = get_checkpoint_name(
         topology, wbits, abits, "ipstitch_rtlsim_" + kind
     )
     model.save(rtlsim_chkpt)
     parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
     (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
         topology, wbits, abits, return_topk=1
     )
     y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy)
     model = ModelWrapper(rtlsim_chkpt)
     perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim")
     # warnings.warn("Estimated & rtlsim performance: " + str(perf))
     # for (k, v) in perf.items():
     #    update_dashboard_data(topology, wbits, abits, k, v)
     update_dashboard_data(
         topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"]
     )
     assert np.isclose(y, output_tensor_npy).all()
Esempio n. 23
0
    def apply(self, model):
        # first infer layouts
        model = model.transform(InferDataLayouts())
        # prepare at global level, then break up into kernels
        prep_transforms = [
            InsertIODMA(64),
            InsertDWC(),
            Floorplan(),
            CreateDataflowPartition(),
        ]
        for trn in prep_transforms:
            model = model.transform(trn)
            model = model.transform(GiveUniqueNodeNames())
            model = model.transform(GiveReadableTensorNames())
        # Build each kernel individually
        sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
        for sdp_node in sdp_nodes:
            prefix = sdp_node.name + "_"
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            kernel_model = ModelWrapper(dataflow_model_filename)
            kernel_model = kernel_model.transform(InsertFIFO())
            kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix))
            kernel_model.save(dataflow_model_filename)
            kernel_model = kernel_model.transform(
                PrepareIP(self.fpga_part, self.period_ns))
            kernel_model = kernel_model.transform(HLSSynthIP())
            kernel_model = kernel_model.transform(
                CreateStitchedIP(self.fpga_part, self.period_ns,
                                 sdp_node.onnx_node.name, True))
            kernel_model.set_metadata_prop("platform", "zynq-iodma")
            kernel_model.save(dataflow_model_filename)
        # Assemble design from IPs
        model = model.transform(
            MakeZYNQProject(self.platform, enable_debug=self.enable_debug))

        # set platform attribute for correct remote execution
        model.set_metadata_prop("platform", "zynq-iodma")

        # create driver
        model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
        return (model, False)
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
    stride = k
    ofm_dim = int(((ifm_dim - k) / stride) + 1)
    if ifm_dim % k != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
        assert exp_cycles != 0
Esempio n. 25
0
def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode):
    ishape = (1, 10)
    num_embeddings, idt, embedding_dim = embedding_cfg
    eshape = (num_embeddings, embedding_dim)
    exp_oshape = tuple(list(ishape) + [embedding_dim])
    embeddings = gen_finn_dt_tensor(edt, eshape)
    model = make_lookup_model(embeddings, ishape, idt, edt)
    assert len(model.graph.node) == 1
    assert model.graph.node[0].op_type == "Gather"
    iname = model.graph.input[0].name
    ename = model.graph.node[0].input[0]
    oname = model.graph.output[0].name
    assert model.get_tensor_datatype(iname) == idt
    assert model.get_tensor_datatype(ename) == edt
    assert model.get_tensor_datatype(oname) == edt
    assert tuple(model.get_tensor_shape(ename)) == eshape
    assert tuple(model.get_tensor_shape(oname)) == exp_oshape
    assert (model.get_initializer(ename) == embeddings).all()
    itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64)
    itensor = np.clip(itensor, 0, num_embeddings - 1)
    ret = execute_onnx(model, {iname: itensor})
    exp_out = np.take(embeddings, itensor, axis=0)
    assert (exp_out == ret[oname]).all()
    # call transformation to convert to HLS and verify conversion
    model = model.transform(InferLookupLayer())
    assert model.graph.node[0].op_type == "Lookup"
    assert model.graph.node[0].input[0] == iname
    assert model.graph.node[0].input[1] == ename
    assert model.graph.node[0].output[0] == oname
    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
        model = model.transform(HLSSynthIP())
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(PrepareRTLSim())
    ret_sim = execute_onnx(model, {iname: itensor})
    assert (exp_out == ret_sim[oname]).all()
Esempio n. 26
0
def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
    need_restitch = False
    # rtlsim only supports certain impl_style for some nodes
    # StreamingFIFO must have impl_style=rtl
    for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
        inst = getCustomOp(fifo_layer)
        if inst.get_nodeattr("impl_style") != "rtl":
            inst.set_nodeattr("impl_style", "rtl")
            inst.set_nodeattr("code_gen_dir_ipgen", "")
            inst.set_nodeattr("ipgen_path", "")
            need_restitch = True
    # StreamingDataWidthConverter must have impl_style=hls
    for dwc_layer in verify_model.get_nodes_by_op_type(
        "StreamingDataWidthConverter_Batch"
    ):
        inst = getCustomOp(dwc_layer)
        if inst.get_nodeattr("impl_style") != "hls":
            inst.set_nodeattr("impl_style", "hls")
            inst.set_nodeattr("code_gen_dir_ipgen", "")
            inst.set_nodeattr("ipgen_path", "")
            need_restitch = True
    # if we've made alterations to the model, need to do some re-prep
    if need_restitch:
        print("Need to regen/re-stitch some IP for STITCHED_IP_RTLSIM")
        verify_model = verify_model.transform(
            PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())
        )
        verify_model = verify_model.transform(HLSSynthIP())
        verify_model = verify_model.transform(
            CreateStitchedIP(
                cfg._resolve_fpga_part(),
                cfg.synth_clk_period_ns,
                vitis=False,
            )
        )
    # set top-level prop for stitched-ip rtlsim and launch
    verify_model.set_metadata_prop("exec_mode", "rtlsim")
    # TODO make configurable
    # verify_model.set_metadata_prop("rtlsim_trace", "trace.vcd")
    return verify_model
Esempio n. 27
0
def test_runtime_thresholds_single_layer():
    mem_mode = "decoupled"
    act = DataType["INT4"]
    idt = DataType["INT16"]
    nf = 8
    ich = 16
    pe = ich // nf
    assert ich % pe == 0

    # generate input data
    in_tensor = gen_finn_dt_tensor(idt, (1, ich))

    odt = act
    n_steps = act.get_num_possible_values() - 1
    T = np.random.randint(idt.min(),
                          idt.max() + 1, (ich, n_steps)).astype(np.float32)
    # provide non-decreasing thresholds
    T = np.sort(T, axis=1)

    if odt == DataType["BIPOLAR"]:
        actval = 0
    else:
        actval = odt.min()

    model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval,
                                                  mem_mode)
    op_inst = getCustomOp(model.graph.node[0])
    op_inst.set_nodeattr("runtime_writeable_weights", 1)
    op_inst.make_weight_file(T, "decoupled_runtime", "old_weights.dat")
    with open("old_weights.dat", "r") as f:
        old_weight_stream = f.read().strip()
    os.remove("old_weights.dat")
    old_weight_stream = map(lambda x: int(x, 16),
                            old_weight_stream.split("\n"))
    old_weight_stream = list(old_weight_stream)
    # need to create stitched IP for runtime weight testing
    model = model.transform(InsertFIFO(True))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
    model = model.transform(PrepareRTLSim())
    model.set_metadata_prop("exec_mode", "rtlsim")
    # add two copies of the input tensor as the first one is just used to
    # "flush out" the pipeline (as mvau already starts receiving old weights while
    # we read/write new ones and reads seem to cause a disturbance too)
    in_tensor = np.tile(in_tensor, (2, 1))
    exec_ctx = {"inp": in_tensor}
    extracted_weight_stream = []

    def read_weights(sim):
        addr = 0
        for i in range(len(old_weight_stream)):
            extracted_weight_stream.append(
                axilite_read(sim, addr, basename="s_axilite_0_"))
            addr += 4

    rtlsim_exec(model, exec_ctx, pre_hook=read_weights)
    assert extracted_weight_stream == old_weight_stream
    # only use second batch element in output; first will be invalid due to
    # old weights (see above)
    y = exec_ctx["outp"][1]
    expected = multithreshold(in_tensor, T)[1]
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        expected = 2 * expected - 1
    else:
        # signed offset
        expected += act.min()
    assert (y == expected).all()

    new_weights = np.random.randint(idt.min(),
                                    idt.max() + 1,
                                    (ich, n_steps)).astype(np.float32)
    # provide non-decreasing thresholds
    new_weights = np.sort(T, axis=1)
    op_inst.make_weight_file(new_weights, "decoupled_runtime",
                             "new_weights.dat")
    with open("new_weights.dat", "r") as f:
        new_weight_stream = f.read().strip()
    os.remove("new_weights.dat")
    new_weight_stream = map(lambda x: int(x, 16),
                            new_weight_stream.split("\n"))
    new_weight_stream = list(new_weight_stream)

    def write_weights(sim):
        addr = 0
        for nw in new_weight_stream:
            axilite_write(sim, addr, nw, basename="s_axilite_0_")
            addr += 4

    rtlsim_exec(model, exec_ctx, pre_hook=write_weights)
    y = exec_ctx["outp"][1]
    expected = multithreshold(in_tensor, new_weights)[1]
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        expected = 2 * expected - 1
    else:
        # signed offset
        expected += act.min()
    assert (y == expected).all()
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
    kernel_size, stride, pad = conv_config
    np.random.seed(0)
    idt = DataType.UINT4

    in_feature_dim = 7
    in_chn = 16

    if depthwise is True:
        group = out_chn = in_chn
        conv_param_shape = [out_chn, 1, kernel_size, kernel_size]
    else:
        group = 1
        out_chn = 20
        conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size]

    out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad)

    input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
    output_shape = [1, out_chn, out_feature_dim, out_feature_dim]

    conv_weight_dt = DataType.UINT4

    conv_config = {}
    conv_config["dilations"] = [1, 1]
    conv_config["group"] = group
    conv_config["kernel_shape"] = [kernel_size, kernel_size]
    conv_config["pads"] = [pad, pad, pad, pad]
    conv_config["strides"] = [stride, stride]

    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape)
    value_info = [
        helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)
    ]

    modelproto = helper.make_model(
        helper.make_graph(
            name="conv_test",
            inputs=[top_in],
            outputs=[top_out],
            value_info=value_info,
            nodes=[
                helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)
            ],
        )
    )

    model = ModelWrapper(modelproto)
    model.set_tensor_datatype("top_in", idt)
    model.set_tensor_datatype("top_out", idt)
    model.set_tensor_datatype("p1", conv_weight_dt)
    model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape))

    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    new_model = model.transform(LowerConvsToMatMul())
    new_model = new_model.transform(to_hls.InferConvInpGen())
    if depthwise is True:
        new_model = new_model.transform(to_hls.InferVVAU())
    else:
        new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
        fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
        fc_inst = getCustomOp(fc_node)
        mw = fc_inst.get_nodeattr("MW")
        mh = fc_inst.get_nodeattr("MH")
        pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1)))
        simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1)))
        fc_inst.set_nodeattr("PE", pe_cands[0])
        fc_inst.set_nodeattr("SIMD", simd_cands[0])

    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(InferShapes())
    new_model = new_model.transform(InferDataTypes())

    if exec_mode == "cppsim":
        new_model = new_model.transform(PrepareCppSim())
        new_model = new_model.transform(CompileCppSim())
        new_model = new_model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        new_model = new_model.transform(SetExecMode("rtlsim"))
        new_model = new_model.transform(GiveUniqueNodeNames())
        new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
        new_model = new_model.transform(HLSSynthIP())
        new_model = new_model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    x = gen_finn_dt_tensor(idt, input_shape)
    inp_dict = {model.graph.input[0].name: x}
    assert oxe.compare_execution(model, new_model, inp_dict)
    if kernel_size == 1 and stride > 1 and pad == 0:
        assert new_model.graph.node[1].op_type == "DownSampler"
        if exec_mode == "rtlsim":
            node = new_model.get_nodes_by_op_type("DownSampler")[0]
            inst = getCustomOp(node)
            cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
            exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
            exp_cycles = exp_cycles_dict[node.name]
            assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
            assert exp_cycles != 0

    if pad == 1:
        padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0]
        padding_inst = getCustomOp(padding_node)
        assert padding_inst.get_nodeattr("SIMD") == in_chn

    if depthwise is True and exec_mode == "rtlsim":
        node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
        assert exp_cycles != 0
Esempio n. 29
0
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
    if nf == -1:
        nf = ich
    pe = ich // nf
    assert ich % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, ich))

    odt = act
    n_steps = act.get_num_possible_values() - 1
    T = np.random.randint(idt.min(),
                          idt.max() + 1, (ich, n_steps)).astype(np.float32)
    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
    # threshold of first channel is zero, while using BIPOLAR output)
    if act == DataType["BIPOLAR"]:
        T[0][0] = 0
    # provide non-decreasing thresholds
    T = np.sort(T, axis=1)

    if odt == DataType["BIPOLAR"]:
        actval = 0
    else:
        actval = odt.min()

    model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval,
                                                  mem_mode)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # package input data as dictionary
    input_dict = {"inp": x}

    y = multithreshold(x, T)
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        y = 2 * y - 1
    else:
        # signed offset
        y += act.min()

    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]

    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        hls_synt_res_est = model.analysis(hls_synth_res_estimation)
        assert "Thresholding_Batch_0" in hls_synt_res_est

        node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Esempio n. 30
0
    def apply(self, model):
        # change external to decoupled and warn user
        # this way we are sure we have exactly one input/output
        modified_fc_nodes = []
        for node in model.graph.node:
            # verify assumptions
            assert is_fpgadataflow_node(
                node), "Found non-fpgadataflow node: " + str(node)
            assert node.op_type != "StreamingFIFO", "Found existing StreamingFIFO node"
            node = getCustomOp(node)
            node.set_nodeattr("inFIFODepth", self.max_depth)
            node.set_nodeattr("outFIFODepth", self.max_depth)
            if node.onnx_node.op_type == "StreamingFCLayer_Batch":
                mmode = node.get_nodeattr("mem_mode")
                if mmode == "external":
                    modified_fc_nodes.append(node.onnx_node.name)
                    node.set_nodeattr("mem_mode", "decoupled")
                    reset_implementation(node)
                    warnings.warn(
                        "Changed mem_mode from external to decoupled for " +
                        node.onnx_node.name)

        # insert stream infrastructure (DWC/FIFO)
        model = model.transform(InsertDWC())
        model = model.transform(InsertFIFO())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())

        # gather FIFO names, check they are of expected depth
        fifos = {}
        for node in model.graph.node:
            if node.op_type == "StreamingFIFO":
                fifos[node.name] = 0
                node = getCustomOp(node)
                # check depths and fix as necessary
                if node.get_nodeattr("depth") != self.max_depth:
                    node.set_nodeattr("depth", self.max_depth)

        # insert FIFOs and do all transformations for RTLsim
        model = model.transform(AnnotateCycles())
        perf = model.analysis(dataflow_performance)
        latency = perf["critical_path_cycles"]
        max_cycles = perf["max_cycles"]
        model = model.transform(PrepareIP(self.fpgapart, self.clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns))
        model.set_metadata_prop("exec_mode", "rtlsim")

        # calculate input frequency (number of cycles for each input word)
        first_node = getCustomOp(model.graph.node[0])
        ncycles_per_input = max(
            1,
            int(
                math.ceil(perf["max_cycles"] /
                          (np.prod(first_node.get_folded_input_shape()) /
                           first_node.get_folded_input_shape()[-1]))),
        )

        # set sufficiently large threshold for 1 image to  fully execute and exit
        ncycles = int(latency + max_cycles)

        # prepare pyverilator model
        sim = pyverilate_stitched_ip(model)

        reset_rtlsim(sim)
        toggle_clk(sim)

        # set all input valids to 0 and output readies to 1
        # set input data to some constant
        set_signal(sim, "tvalid", 0)
        set_signal(sim, "tready", 1)
        set_signal(sim, "tdata", 0)

        output_detected = False
        while ncycles > 0:
            toggle_clk(sim)
            # set/unset valids
            if ncycles % ncycles_per_input == 0:
                set_signal(sim, "tvalid", 1)
            else:
                set_signal(sim, "tvalid", 0)

            # check/update all fifo counts
            for key in fifos:
                current_state = sim.internals["finn_design_i"][key]["inst"][
                    key + "_" + key]["state"]
                current_addr = sim.internals["finn_design_i"][key]["inst"][
                    key + "_" + key]["addr"]
                if current_state == 2:
                    current_count = current_addr + 2
                else:
                    current_count = current_state
                if current_count > fifos[key]:
                    fifos[key] = current_count

            # since latency estimation is very pessimistic, detect first output
            # and fast-forward the sim
            if get_signal(sim, "tvalid") != 0 and not output_detected:
                ncycles = max_cycles
                output_detected = True
            else:
                ncycles = ncycles - 1

        if not output_detected:
            warnings.warn(
                "No output detected, calculated FIFO depths may not be correct"
            )

        # Apply depths back into the model;
        # also set in/outFIFODepth to zero for non-FIFO
        # nodes, preventing further FIFO insertion
        for node in model.graph.node:
            # set FIFO depth, reset FIFO implementation,
            # and set implementation/ram styles
            if node.op_type == "StreamingFIFO":
                assert node.name in fifos, "FIFO node not found in size dictionary"
                # set depth of FIFO
                depth = optimize_depth(fifos[node.name])
                node_inst = getCustomOp(node)
                node_inst.set_nodeattr("depth", depth)
                # Set FIFO implementation/ram styles
                if depth > self.max_qsrl_depth:
                    node_inst.set_nodeattr("impl_style", "vivado")
                    node_inst.set_nodeattr("ram_style", self.vivado_ram_style)
                else:
                    node_inst.set_nodeattr("impl_style", "rtl")
                # reset implementation
                reset_implementation(node_inst)
                del fifos[node.name]
            else:
                getCustomOp(node).set_nodeattr("inFIFODepth", 0)
                getCustomOp(node).set_nodeattr("outFIFODepth", 0)
                # for every FC node we changed from external to decoupled,
                # change back and reset implementation
                if node.op_type == "StreamingFCLayer_Batch":
                    if node.name in modified_fc_nodes:
                        node_inst = getCustomOp(node)
                        node_inst.set_nodeattr("mem_mode", "external")
                        reset_implementation(node_inst)
                        modified_fc_nodes.remove(node.name)

        assert (len(modified_fc_nodes) == 0 and len(fifos.keys()) == 0
                ), "FIFO/FC nodes left untouched after model reconfiguration"

        # handle custom sizing for SWG FIFOs if desired
        if self.swg_exception:
            model = model.transform(
                CapConvolutionFIFODepths(max_qsrl_depth=self.max_qsrl_depth))
        # remove shallow FIFOs
        model = model.transform(RemoveShallowFIFOs())

        return (model, False)