Esempio n. 1
0
def fold_cnv_small(model):
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
    folding = [
        (8, 3, 256, "auto"),
        (16, 16, 256, "auto"),
        (8, 16, 256, "auto"),
        (8, 16, 256, "block"),
        (4, 8, 214, "auto"),
        (1, 8, 2, "auto"),
        (1, 2, 126, "distributed"),
        (2, 2, 62, "block"),
        (5, 1, 6, "distributed"),
    ]
    for fcl, (pe, simd, ififodepth, ramstyle) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)
        fcl_inst.set_nodeattr("ram_style", ramstyle)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    swg_idepth = [2, 51, 9, 106, 2, 2]
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)
        swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i])
    return model
Esempio n. 2
0
def fold_cnv_large(model):
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
    folding = [
        (16, 3, 256),
        (32, 32, 256),
        (16, 32, 256),
        (16, 32, 256),
        (4, 32, 214),
        (1, 32, 2),
        (1, 4, 126),
        (1, 8, 62),
        (5, 1, 6),
    ]
    for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    swg_idepth = [2, 51, 9, 106, 2, 2]
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)
        swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i])
    return model
Esempio n. 3
0
def fold_cnv_small(model):
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD) for a layer
    folding = [
        (8, 3, "auto"),
        (16, 16, "auto"),
        (8, 16, "auto"),
        (8, 16, "block"),
        (4, 8, "auto"),
        (1, 8, "auto"),
        (1, 2, "distributed"),
        (2, 2, "block"),
        (5, 1, "distributed"),
    ]
    for fcl, (pe, simd, ramstyle) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("ram_style", ramstyle)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)
    return model
Esempio n. 4
0
def fold_cnv_large(model):
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD) for a layer
    folding = [
        (16, 3),
        (32, 32),
        (16, 32),
        (16, 32),
        (4, 32),
        (1, 32),
        (1, 4),
        (1, 8),
        (5, 1),
    ]
    for fcl, (pe, simd) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)
    return model
def attach_child_models_to_parent_model(parent_model,
                                        ordered_list_of_child_model_paths):
    # Assume the child model list is in order (entry 0 is the first child model that is accessed)
    streaming_dataflow_partition_nodes = parent_model.get_nodes_by_op_type(
        "StreamingDataflowPartition")
    # print(streaming_dataflow_partition_nodes)
    num_sdpn = len(streaming_dataflow_partition_nodes)
    num_child_models = len(ordered_list_of_child_model_paths)
    if (num_child_models != num_sdpn):
        raise ValueError(
            f"Number of child models supplied ({num_child_models}) does not match number of StreamingDataflowPartition Nodes ({num_sdpn})"
        )
    for i in range(0, num_child_models):
        sdpn = streaming_dataflow_partition_nodes[i]
        child_model_path = ordered_list_of_child_model_paths[i]
        getCustomOp(sdpn).set_nodeattr("model", child_model_path)
        # modify child model input and output to match streaming dataflow partition node's inputs and outputs
        new_input_name = sdpn.input[0]
        new_output_name = sdpn.output[0]
        child_model = ModelWrapper(child_model_path)
        child_model.rename_tensor(child_model.graph.input[0].name,
                                  new_input_name)
        child_model.rename_tensor(child_model.graph.output[0].name,
                                  new_output_name)
        child_model.save(child_model_path)
    return parent_model
Esempio n. 6
0
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Create stitched IP for a graph after all HLS IP blocks have been generated.
    Depends on the DataflowOutputType.STITCHED_IP output product."""

    if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
        stitched_ip_dir = cfg.output_dir + "/stitched_ip"
        model = model.transform(
            CreateStitchedIP(cfg._resolve_fpga_part(),
                             cfg.synth_clk_period_ns))
        # TODO copy all ip sources into output dir? as zip?
        copytree(model.get_metadata_prop("vivado_stitch_proj"),
                 stitched_ip_dir)
        print("Vivado stitched IP written into " + stitched_ip_dir)
    if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps(
    ):
        # prepare ip-stitched rtlsim
        verify_model = deepcopy(model)
        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
        for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
        # similarly for StreamingDataWidthConverter with impl_style=hls
        for dwc_layer in verify_model.get_nodes_by_op_type(
                "StreamingDataWidthConverter_Batch"):
            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
        verify_model = verify_model.transform(PrepareRTLSim())
        verify_model.set_metadata_prop("exec_mode", "rtlsim")
        verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
    return model
Esempio n. 7
0
def test_end2end_cnv_w1a1_fold_and_tlastmarker():
    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
    folding = [
        (16, 3, 128),
        (32, 32, 128),
        (16, 32, 128),
        (16, 32, 128),
        (4, 32, 81),
        (1, 32, 2),
        (1, 4, 2),
        (1, 8, 128),
        (5, 1, 3),
    ]
    for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)

    model = model.transform(InsertDWC())
    model = model.transform(InsertFIFO())
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(AnnotateResources("estimate"))
    model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
Esempio n. 8
0
def step_measure_rtlsim_performance(model: ModelWrapper,
                                    cfg: DataflowBuildConfig):
    """Measure performance + latency of stitched-IP model in rtlsim (pyverilator).
    Depends on the DataflowOutputType.STITCHED_IP output product.
    """

    if DataflowOutputType.RTLSIM_PERFORMANCE in cfg.generate_outputs:
        assert (DataflowOutputType.STITCHED_IP
                in cfg.generate_outputs), "rtlsim_perf needs stitched IP"
        # prepare ip-stitched rtlsim
        rtlsim_model = deepcopy(model)
        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
        for fifo_layer in rtlsim_model.get_nodes_by_op_type("StreamingFIFO"):
            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
        # similarly for StreamingDataWidthConverter with impl_style=hls
        for dwc_layer in rtlsim_model.get_nodes_by_op_type(
                "StreamingDataWidthConverter_Batch"):
            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
        rtlsim_model = rtlsim_model.transform(PrepareRTLSim())
        rtlsim_model.set_metadata_prop("exec_mode", "rtlsim")
        # run with single input to get latency
        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, 1)
        rtlsim_latency = rtlsim_perf_dict["cycles"]
        # run with num inputs equal to layers to fill the whole pipeline
        # to get the steady-state throughput
        rtlsim_bs = len(rtlsim_model.graph.node)
        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
        rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
        report_dir = cfg.output_dir + "/report"
        os.makedirs(report_dir, exist_ok=True)
        with open(report_dir + "/rtlsim_performance.json", "w") as f:
            json.dump(rtlsim_perf_dict, f, indent=2)

    return model
Esempio n. 9
0
def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
    idt = wdt = DataType.INT4
    ifm_dim = 6
    ifm_ch = 4

    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
    model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride,
                                   padding)

    input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch])
    input_dict = {"inp": input_tensor}

    new_model = model.transform(InferConvInpGen())
    new_model = new_model.transform(InferVVAU())

    # set SIMD in ConvInputGen node and PE in VVAU node

    for n in new_model.graph.node:
        if n.op_type == "ConvolutionInputGenerator":
            convinputgen_node = getCustomOp(n)
            convinputgen_node.set_nodeattr("SIMD", pe)
        elif n.op_type == "Vector_Vector_Activate_Batch":
            vvau_node = getCustomOp(n)
            vvau_node.set_nodeattr("PE", pe)

    new_model = new_model.transform(SetExecMode("rtlsim"))
    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
    new_model = new_model.transform(HLSSynthIP())
    new_model = new_model.transform(PrepareRTLSim())

    assert oxe.compare_execution(model, new_model, input_dict)
Esempio n. 10
0
def test_end2end_tfc_w1a2_fold_and_tlastmarker():
    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
    fc0 = model.graph.node[0]
    fc1 = model.graph.node[1]
    fc2 = model.graph.node[2]
    fc3 = model.graph.node[3]
    fc0w = getCustomOp(fc0)
    fc1w = getCustomOp(fc1)
    fc2w = getCustomOp(fc2)
    fc3w = getCustomOp(fc3)
    fc0w.set_nodeattr("inFIFODepth", 50)
    fc0w.set_nodeattr("SIMD", 8)
    fc0w.set_nodeattr("PE", 16)
    fc0w.set_nodeattr("outFIFODepth", 4)
    fc1w.set_nodeattr("SIMD", 16)
    fc1w.set_nodeattr("PE", 16)
    fc1w.set_nodeattr("outFIFODepth", 4)
    fc2w.set_nodeattr("SIMD", 16)
    fc2w.set_nodeattr("PE", 16)
    fc2w.set_nodeattr("outFIFODepth", 4)
    fc3w.set_nodeattr("SIMD", 16)
    fc3w.set_nodeattr("PE", 10)
    fc3w.set_nodeattr("outFIFODepth", 50)
    model = model.transform(InsertTLastMarker())
    model.save(build_dir + "/end2end_tfc_w1a2_folded.onnx")
Esempio n. 11
0
 def apply(self, model):
     graph = model.graph
     if self.mode == "estimate":
         res_fxn = res_estimation
     elif self.mode == "hls":
         res_fxn = hls_synth_res_estimation
     elif self.mode == "synth":
         res_fxn = post_synth_res
     else:
         raise Exception("Unrecognized mode for AnnotateResources")
     if self.res_dict is None:
         self.res_dict = model.analysis(res_fxn)
     children_dict = {}
     # annotate node resources
     for node in graph.node:
         if _is_fpgadataflow_node(
                 node) and node.name in self.res_dict.keys():
             op_inst = registry.getCustomOp(node)
             op_inst.set_nodeattr("res_" + self.mode,
                                  str(self.res_dict[node.name]))
             children_dict[node.name] = self.res_dict[node.name]
         elif node.op_type == "StreamingDataflowPartition":
             # recurse into model to manually annotate per-layer resources
             sdp_model_filename = getCustomOp(node).get_nodeattr("model")
             sdp_model = ModelWrapper(sdp_model_filename)
             sdp_model = sdp_model.transform(
                 AnnotateResources(self.mode, self.res_dict))
             sdp_dict = sdp_model.get_metadata_prop("res_total_" +
                                                    self.mode)
             sdp_dict = eval(sdp_dict)
             # save transformed model
             sdp_model.save(sdp_model_filename)
             # set res attribute for sdp node
             getCustomOp(node).set_nodeattr("res_" + self.mode,
                                            str(sdp_dict))
             children_dict[node.name] = sdp_dict
     self.res_dict.update(children_dict)
     total_dict = {}
     for lname in children_dict.keys():
         layer_res_dict = self.res_dict[lname]
         for r_type in layer_res_dict.keys():
             r_amount = layer_res_dict[r_type]
             r_amount = float(r_amount)
             if r_type in total_dict.keys():
                 total_dict[r_type] += r_amount
             else:
                 total_dict[r_type] = r_amount
     for k in total_dict.keys():
         if "efficiency" in k:
             total_dict[k] = total_dict[k] / len(graph.node)
     model.set_metadata_prop("res_total_" + self.mode, str(total_dict))
     if "(top)" in self.res_dict.keys():
         top_dict = self.res_dict["(top)"]
         model.set_metadata_prop("res_total_top_" + self.mode,
                                 str(top_dict))
     return (model, False)
def hw_accelerate_parent_model_setup(parent_onnx_model_dir,
                                     remote_exec_model_dir):
    parent_model = ModelWrapper(parent_onnx_model_dir)
    sdp_node = parent_model.graph.node[
        1]  #Need to look into parent model to customize the value
    getCustomOp(sdp_node).set_nodeattr("model", REMOTE_EXEC_MODEL_DIR)
    parent_model.save(
        BASE_DIR +
        "/qnn_harnn_model_dataflow_parent_with_remote_bitfile_exec.onnx")
    return parent_model
Esempio n. 13
0
    def apply(self, model):
        graph = model.graph
        node_ind = -1
        graph_modified = False
        for n in graph.node:
            node_ind += 1
            if _suitable_node(n):
                n_output = n.output[0]
                consumer = model.find_consumer(n_output)
                if _suitable_node(consumer) is True:
                    n0 = getCustomOp(n)
                    n1 = getCustomOp(consumer)
                    n0_out_shape = n0.get_folded_output_shape()
                    n1_in_shape = n1.get_folded_input_shape()
                    if n0_out_shape[-1] != n1_in_shape[-1]:
                        graph_modified = True
                        # determine dwc inwidth
                        dwc_in_width = n0.get_outstream_width()
                        # determine dwc outwidth
                        dwc_out_width = n1.get_instream_width()

                        # determine shape for dwc
                        dwc_shape = n0.get_normal_output_shape()

                        # determine dtype for dwc
                        dtype = n0.get_output_datatype()

                        dwc_output_tensor = oh.make_tensor_value_info(
                            model.make_new_valueinfo_name(),
                            TensorProto.FLOAT,
                            dwc_shape,
                        )
                        graph.value_info.append(dwc_output_tensor)

                        dwc_node = oh.make_node(
                            "StreamingDataWidthConverter_Batch",
                            [n_output],
                            [dwc_output_tensor.name],
                            domain="finn",
                            backend="fpgadataflow",
                            shape=dwc_shape,
                            inWidth=dwc_in_width,
                            outWidth=dwc_out_width,
                            dataType=str(dtype.name),
                        )
                        # insert dwc
                        graph.node.insert(node_ind + 1, dwc_node)

                        # set dwc output tensor as new input tensor of second node
                        consumer.input[0] = dwc_output_tensor.name

        return (model, graph_modified)
Esempio n. 14
0
def rtlsim_exec(model, execution_context):
    """Use PyVerilator to execute given model with stitched IP. The execution
    context contains the input values."""

    if PyVerilator is None:
        raise ImportError("Installation of PyVerilator is required.")
    # ensure stitched ip project already exists
    assert os.path.isfile(model.get_metadata_prop("wrapper_filename")), """The
    file name from metadata property "wrapper_filename" doesn't exist."""
    assert os.path.isdir(model.get_metadata_prop("vivado_stitch_proj")), """The
    directory from metadata property "vivado_stitch_proj" doesn't exist"""
    trace_file = model.get_metadata_prop("rtlsim_trace")
    # extract input shape
    # TODO extend for multiple inputs
    i_name = model.graph.input[0].name
    i_tensor = execution_context[i_name]
    i_dt = model.get_tensor_datatype(i_name)
    first_node = getCustomOp(model.find_consumer(i_name))
    i_stream_w = first_node.get_instream_width()
    # convert input into time multiplexed shape
    i_folded_shape = first_node.get_folded_input_shape()
    # TODO any other layout transformations need to happen here!
    i_tensor = i_tensor.reshape(i_folded_shape)
    # extract output shape
    o_name = model.graph.output[0].name
    o_shape = model.get_tensor_shape(o_name)
    o_dt = model.get_tensor_datatype(o_name)
    last_node = getCustomOp(model.find_producer(o_name))
    o_folded_shape = last_node.get_folded_output_shape()
    o_stream_w = last_node.get_outstream_width()
    packedBits = o_stream_w
    targetBits = o_dt.bitwidth()
    # pack input
    packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w)
    num_out_values = last_node.get_number_output_values()
    # prepare pyverilator model
    rtlsim_so = model.get_metadata_prop("rtlsim_so")
    if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)):
        sim = pyverilate_stitched_ip(model)
        model.set_metadata_prop("rtlsim_so", sim.lib._name)
    else:
        sim = PyVerilator(rtlsim_so)
    _reset_rtlsim(sim)
    _toggle_clk(sim)
    ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file)
    packed_output = ret[0]
    model.set_metadata_prop("sim_cycles", str(ret[1]))
    # unpack output and put into context
    o_folded_tensor = rtlsim_output_to_npy(packed_output, None, o_dt,
                                           o_folded_shape, packedBits,
                                           targetBits)
    execution_context[o_name] = o_folded_tensor.reshape(o_shape)
Esempio n. 15
0
def test_dataflow_partition_tlastmarker():
    model = ModelWrapper(build_dir + "/test_dataflow_partition_create.onnx")
    model_path = getCustomOp(model.graph.node[2]).get_nodeattr("model")
    model = ModelWrapper(model_path)
    model = model.transform(InsertTLastMarker())
    assert model.graph.node[-1].op_type == "TLastMarker"
    assert model.graph.node[-1].domain == "finn"
    tl_node = getCustomOp(model.graph.node[-1])
    assert tl_node.get_nodeattr("NumIters") == 1
    assert tl_node.get_nodeattr("StreamWidth") == 320
    assert tl_node.get_nodeattr("ElemWidth") == 32
    model.save(build_dir + "/test_dataflow_partition_tlastmarker.onnx")
    model = model.transform(InsertTLastMarker())
    model.save(build_dir + "/test_dataflow_partition_tlastmarker2.onnx")
Esempio n. 16
0
def test_end2end_tfc_w1a2_verify_dataflow_part():
    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
    x = np.zeros((1, 784), dtype=np.float32)
    inp_name = model.graph.input[0].name
    out_name = model.graph.output[0].name
    inp_dict = {inp_name: x}
    # npysim
    model = model.transform(CodeGen_npysim())
    model = model.transform(Compile())
    model = model.transform(SetExecMode("npysim"))
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_npysim.onnx")
    ret_npysim = execute_onnx(model, inp_dict, True)
    res_npysim = ret_npysim[out_name]
    # node-by-node rtlsim
    model = model.transform(SetExecMode("rtlsim"))
    getCustomOp(model.graph.node[0]).set_nodeattr("rtlsim_trace", "default")
    getCustomOp(model.graph.node[1]).set_nodeattr("rtlsim_trace", "default")
    getCustomOp(model.graph.node[2]).set_nodeattr("rtlsim_trace", "default")
    getCustomOp(model.graph.node[3]).set_nodeattr("rtlsim_trace", "default")
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx")
    ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
    res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
    # whole-network (ip-stitched) rtlsim
    model.set_metadata_prop("exec_mode", "rtlsim")
    model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd")
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx")
    ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
    res_rtlsim_whole = ret_rtlsim_whole[out_name]
    assert np.isclose(res_npysim, res_rtlsim_nodebynode).all()
    assert np.isclose(res_npysim, res_rtlsim_whole).all()
Esempio n. 17
0
def test_end2end_mobilenet_folding():
    model = load_test_checkpoint_or_skip(build_dir +
                                         "/end2end_mobilenet_hls_layers.onnx")
    # optional extra folding to use fewer resources
    # applied while setting the attributes on each node
    assert extra_fold in [1, 2, 4]
    # set up folding for the depthwise conv layers impl'd by VVAUs
    # each value is PE for a layer
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD, ram_style) for a layer
    folding = [
        (32, 3, "block"),
        (16, 16, "block"),
        (16, 16, "block"),
        (32, 16, "block"),
        (16, 16, "block"),
        (32, 16, "block"),
        (16, 16, "block"),
        (32, 16, "block"),
        (32, 16, "block"),
        (32, 16, "block"),
        (32, 16, "block"),
        (32, 16, "block"),
        (16, 16, "block"),
        (32, 16, "block"),
        (4, 4, "block"),
    ]
    for fcl, (pe, simd, ramstyle) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe // extra_fold)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("ram_style", ramstyle)
    # first layer uses 8-bit weights & activations
    # control its compute resource type explicitly
    getCustomOp(fc_layers[0]).set_nodeattr("resType", first_layer_res_type)
    # set up folding for the depthwise conv layers impl'd by VVAUs
    # each value is PE for a layer
    vvau_layers = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")
    folding = [32, 32, 64, 16, 32, 8, 16, 16, 16, 16, 16, 4, 8]
    for vvau, pe in zip(vvau_layers, folding):
        vvau_inst = getCustomOp(vvau)
        vvau_inst.set_nodeattr("PE", pe // extra_fold)
        # set SIMD in preceeding ConvInputGen to same value
        convinputgen = model.find_direct_predecessors(vvau)[0]
        convinputgen_inst = getCustomOp(convinputgen)
        convinputgen_inst.set_nodeattr("SIMD", pe // extra_fold)
        # set SIMD in preceeding FMPadding to same value
        padding = model.find_direct_predecessors(convinputgen)[0]
        if padding.op_type == "FMPadding_Batch":
            padding_inst = getCustomOp(padding)
            padding_inst.set_nodeattr("SIMD", pe // extra_fold)
    # adjust final pooling layer + its inpgen
    pool_node = model.get_nodes_by_op_type("Pool_Batch")[0]
    pool_inst = getCustomOp(pool_node)
    pool_inst.set_nodeattr("PE", 4 // extra_fold)
    pool_inpgen = model.find_direct_predecessors(pool_node)[0]
    pool_inpgen_inst = getCustomOp(pool_inpgen)
    pool_inpgen_inst.set_nodeattr("SIMD", 4 // extra_fold)
    model = model.transform(InferDataLayouts())
    model.save(build_dir + "/end2end_mobilenet_folded.onnx")
Esempio n. 18
0
 def apply(self, model):
     graph = model.graph
     node_ind = 0
     graph_modified = False
     for n in graph.node:
         node_ind += 1
         if n.op_type == "Transpose" and not model.is_fork_node(n):
             perms = list(get_by_name(n.attribute, "perm").ints)
             if perms == [0, 3, 1, 2]:
                 mt_cand = model.find_consumer(n.output[0])
                 if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node(
                     mt_cand
                 ):
                     final_t_cand = model.find_consumer(mt_cand.output[0])
                     if final_t_cand.op_type == "Transpose":
                         perms = list(
                             get_by_name(final_t_cand.attribute, "perm").ints
                         )
                         if perms == [0, 2, 3, 1]:
                             mt = getCustomOp(mt_cand)
                             mt.set_nodeattr("data_layout", "NHWC")
                             # get rid of tranpose nodes, wire MT directly
                             mt_cand.input[0] = n.input[0]
                             mt_cand.output[0] = final_t_cand.output[0]
                             graph.node.remove(n)
                             graph.node.remove(final_t_cand)
                             graph_modified = True
                     else:
                         mt = getCustomOp(mt_cand)
                         mt.set_nodeattr("data_layout", "NHWC")
                         # get rid of first tranpose node
                         mt_cand.input[0] = n.input[0]
                         graph.node.remove(n)
                         # fix output shape for MultiThreshold
                         mt_ishape = model.get_tensor_shape(mt_cand.input[0])
                         model.set_tensor_shape(mt_cand.output[0], mt_ishape)
                         # re-insert Transpose behind MultiThreshold
                         transpose_output = model.make_new_valueinfo_name()
                         new_transpose = oh.make_node(
                             "Transpose",
                             [mt_cand.output[0]],
                             [transpose_output],
                             perm=[0, 3, 1, 2],
                         )
                         graph.node.insert(node_ind + 1, new_transpose)
                         final_t_cand.input[0] = transpose_output
                         graph_modified = True
     if graph_modified:
         model = model.transform(InferDataTypes())
     return (model, graph_modified)
Esempio n. 19
0
 def apply(self, model):
     graph = model.graph
     node_ind = 0
     graph_modified = False
     for n in graph.node:
         node_ind += 1
         if n.op_type == "Transpose" and not model.is_fork_node(n):
             perms = list(get_by_name(n.attribute, "perm").ints)
             if perms == [0, 3, 1, 2]:
                 mt_cand = model.find_consumer(n.output[0])
                 if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node(
                         mt_cand):
                     final_t_cand = model.find_consumer(mt_cand.output[0])
                     if final_t_cand.op_type == "Transpose":
                         perms = list(
                             get_by_name(final_t_cand.attribute,
                                         "perm").ints)
                         if perms == [0, 2, 3, 1]:
                             mt = getCustomOp(mt_cand)
                             mt.set_nodeattr("data_layout", "NHWC")
                             # get rid of tranpose nodes, wire MT directly
                             mt_cand.input[0] = n.input[0]
                             mt_cand.output[0] = final_t_cand.output[0]
                             graph.node.remove(n)
                             graph.node.remove(final_t_cand)
                             graph_modified = True
                     elif final_t_cand.op_type == "Reshape":
                         oshape = model.get_tensor_shape(
                             final_t_cand.output[0])
                         if len(oshape) == 2:
                             # transition to FC part, can still use NHWC
                             mt = getCustomOp(mt_cand)
                             mt.set_nodeattr("data_layout", "NHWC")
                             # get rid of first tranpose node
                             mt_cand.input[0] = n.input[0]
                             # fix output shape for MultiThreshold
                             mt_ishape = model.get_tensor_shape(
                                 mt_cand.input[0])
                             (b, h, w, c) = mt_ishape
                             assert (h == 1
                                     and w == 1), """Untested spatial dim
                             in conv->fc transition, proceed with caution!"""
                             model.set_tensor_shape(mt_cand.output[0],
                                                    mt_ishape)
                             graph.node.remove(n)
                             graph_modified = True
     if graph_modified:
         model = model.transform(InferDataTypes())
     return (model, graph_modified)
Esempio n. 20
0
def test_fpgadataflow_ipstitch_iodma_floorplan():
    model = create_one_fc_model()
    if model.graph.node[0].op_type == "StreamingDataflowPartition":
        sdp_node = getCustomOp(model.graph.node[0])
        assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
        assert os.path.isfile(sdp_node.get_nodeattr("model"))
        model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
    model = model.transform(InferDataLayouts())
    model = model.transform(InsertIODMA())
    model = model.transform(Floorplan())
    assert getCustomOp(model.graph.node[0]).get_nodeattr("partition_id") == 0
    assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2
    assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1
    model.save(ip_stitch_model_dir +
               "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx")
Esempio n. 21
0
    def apply(self, model):

        if isinstance(self.config, dict):
            model_config = self.config
        else:
            with open(self.config, "r") as f:
                model_config = json.load(f)

        used_configurations = ["Defaults"]
        missing_configurations = []

        # Configure network
        for node_idx, node in enumerate(model.graph.node):

            try:
                node_config = model_config[node.name]
            except KeyError:
                missing_configurations += [node.name]
                node_config = {}

            from finn.custom_op.registry import getCustomOp

            try:
                inst = getCustomOp(node)
            except Exception:
                continue
            used_configurations += [node.name]

            # set specified defaults
            default_configs = {
                k: v
                for k, v in model_config["Defaults"].items()
                if k not in model_config
            }
            default_configs = {
                k: v[0]
                for k, v in default_configs.items()
                if v[1] == "all" or node.op_type in v[1]
            }
            for attr, value in default_configs.items():
                inst.set_nodeattr(attr, value)

            # set node attributes from specified configuration
            for attr, value in node_config.items():
                inst.set_nodeattr(attr, value)

        # Configuration verification
        if len(missing_configurations) > 0:
            warnings.warn("\nNo HW configuration for nodes: " +
                          ", ".join(missing_configurations))

        unused_configs = [
            x for x in model_config if x not in used_configurations
        ]
        if len(unused_configs) > 0:
            warnings.warn("\nUnused HW configurations: " +
                          ", ".join(unused_configs))

        # one iteration is enough
        return (model, False)
Esempio n. 22
0
 def apply(self, model):
     for node in model.graph.node:
         if is_fpgadataflow_node(node) is True:
             try:
                 # lookup op_type in registry of CustomOps
                 inst = registry.getCustomOp(node)
                 # find the IP gen dir
                 ipgen_path = inst.get_nodeattr("ipgen_path")
                 if ipgen_path is not None and os.path.isdir(ipgen_path):
                     for dname, dirs, files in os.walk(ipgen_path):
                         for fname in files:
                             if fname.endswith(".v"):
                                 fpath = os.path.join(dname, fname)
                                 with open(fpath, "r") as f:
                                     s = f.read()
                                 old = '$readmemh(".'
                                 new = '$readmemh("%s' % dname
                                 s = s.replace(old, new)
                                 old = '"./'
                                 new = '"%s/' % dname
                                 s = s.replace(old, new)
                                 with open(fpath, "w") as f:
                                     f.write(s)
             except KeyError:
                 pass
     return (model, False)
Esempio n. 23
0
 def applyNodeLocal(self, node):
     op_type = node.op_type
     if is_fpgadataflow_node(node) is True:
         try:
             # lookup op_type in registry of CustomOps
             inst = registry.getCustomOp(node)
             # ensure that code is generated
             assert (
                 inst.get_nodeattr("code_gen_dir_cppsim") != ""
             ), """Node
             attribute "code_gen_dir_cppsim" is not set. Please run
             Transformation PrepareCppSim first."""
             # call the compilation function for this node
             inst.compile_singlenode_code()
             # ensure that executable path is now set
             assert (
                 inst.get_nodeattr("executable_path") != ""
             ), """Transformation
             compile was not successful, there is no path to executables set
             in node attribute "executable_path"."""
         except KeyError:
             # exception if op_type is not supported
             raise Exception(
                 "Custom op_type %s is currently not supported." % op_type
             )
     return (node, False)
Esempio n. 24
0
 def connect_clk_rst(self, node):
     inst_name = node.name
     node_inst = getCustomOp(node)
     clock_intf_name = node_inst.get_verilog_top_module_intf_names(
     )["clk"][0]
     reset_intf_name = node_inst.get_verilog_top_module_intf_names(
     )["rst"][0]
     # make clock and reset external, if they aren't already
     if not self.clock_reset_are_external:
         self.connect_cmds.append(
             "make_bd_pins_external [get_bd_pins %s/%s]" %
             (inst_name, clock_intf_name))
         self.connect_cmds.append(
             "set_property name ap_clk [get_bd_ports ap_clk_0]")
         self.connect_cmds.append(
             "make_bd_pins_external [get_bd_pins %s/%s]" %
             (inst_name, reset_intf_name))
         self.connect_cmds.append(
             "set_property name ap_rst_n [get_bd_ports ap_rst_n_0]")
         self.clock_reset_are_external = True
         self.intf_names["clk"] = ["ap_clk"]
         self.intf_names["rst"] = ["ap_rst_n"]
     # otherwise connect clock and reset
     else:
         self.connect_cmds.append(
             "connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins %s/%s]" %
             (inst_name, reset_intf_name))
         self.connect_cmds.append(
             "connect_bd_net [get_bd_ports ap_clk] [get_bd_pins %s/%s]" %
             (inst_name, clock_intf_name))
Esempio n. 25
0
 def connect_axi(self, node):
     inst_name = node.name
     node_inst = getCustomOp(node)
     axilite_intf_name = node_inst.get_verilog_top_module_intf_names(
     )["axilite"]
     aximm_intf_name = node_inst.get_verilog_top_module_intf_names(
     )["aximm"]
     if len(axilite_intf_name) != 0:
         self.connect_cmds.append("make_bd_intf_pins_external "
                                  "[get_bd_intf_pins %s/%s]" %
                                  (inst_name, axilite_intf_name[0]))
         ext_if_name = "%s_%d" % (
             axilite_intf_name[0],
             len(self.intf_names["axilite"]),
         )
         self.intf_names["axilite"].append(ext_if_name)
     if len(aximm_intf_name) != 0:
         self.connect_cmds.append(
             "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" %
             (inst_name, aximm_intf_name[0]))
         self.connect_cmds.append(
             "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]"
         )
         self.intf_names["aximm"] = ["m_axi_gmem0"]
         assert self.has_aximm is False, "Currently limited to one AXI-MM interface"
         self.has_aximm = True
Esempio n. 26
0
    def apply(self, model):
        shallow_fifos = []
        for node in model.graph.node:
            if (node.op_type == "StreamingFIFO"
                    and getCustomOp(node).get_nodeattr("depth") <=
                    self.shallow_threshold):
                # bypass shallow fifos
                shallow_fifos.append(node)
                consumers = model.find_consumers(node.output[0])
                if consumers is None:
                    producer = model.find_producer(node.input[0])
                    for idx, inp in enumerate(producer.output):
                        if inp == node.input[0]:
                            producer.output[idx] = node.output[0]
                else:
                    assert len(
                        consumers) == 1, "Fanout detected from FIFO output"
                    consumer = consumers[0]
                    # set fifo input tensor as new input tensor of second node
                    for idx, inp in enumerate(consumer.input):
                        if inp == node.output[0]:
                            consumer.input[idx] = node.input[0]
        # now filter out
        for node_to_remove in shallow_fifos:
            model.graph.node.remove(node_to_remove)

        return (model, False)
Esempio n. 27
0
    def apply(self, model):
        # TODO move this to own transformation
        for node in model.graph.node:
            # look for following pattern:
            # ConvolutionInputGenerator -> StreamingFIFO -> StreamingFCLayer
            if node.op_type == "StreamingFIFO":
                fifo_prod = model.find_producer(node.input[0])
                fifo_cons = model.find_consumer(node.output[0])
                if fifo_prod is None:
                    continue
                if fifo_prod.op_type != "ConvolutionInputGenerator":
                    continue
                if fifo_cons is None:
                    continue
                if fifo_cons.op_type != "StreamingFCLayer_Batch":
                    continue
                op_inst = getCustomOp(node)
                depth = op_inst.get_nodeattr("depth")
                # SWG has an internal buffer of 1 row, so we use this as a
                # rule of thumb to set FIFO depth to be no larger than 1 row
                (bs, h, w, ifold, simd) = op_inst.get_folded_input_shape()
                new_depth = optimize_depth(w * ifold)
                new_depth = min(new_depth, depth)
                op_inst.set_nodeattr("depth", new_depth)
                # Set FIFO implementation/ram styles
                if new_depth > self.max_qsrl_depth:
                    op_inst.set_nodeattr("impl_style", "vivado")
                    op_inst.set_nodeattr("ram_style", "auto")
                else:
                    op_inst.set_nodeattr("impl_style", "rtl")

        return (model, False)
Esempio n. 28
0
def test_end2end_tfc_w1a2_run_on_pynq():
    # use the streamlined model as the "golden" model for right answers
    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
    iname = golden.graph.input[0].name
    oname = golden.graph.output[0].name
    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    x = nph.to_array(input_tensor)
    # x = np.zeros(ishape, dtype=np.float32)
    # run using FINN-based execution
    ret_golden = execute_onnx(golden, {iname: x}, True)
    y_golden = ret_golden[oname]
    # set up parent+child graph to test
    # we'll use models from the previous step as the child model
    parent_model = ModelWrapper(build_dir +
                                "/end2end_tfc_w1a2_dataflow_parent.onnx")
    iname = parent_model.graph.input[0].name
    oname = parent_model.graph.output[0].name
    try:
        ip = os.environ["PYNQ_IP"]  # NOQA
        if ip == "":
            pytest.skip("PYNQ board IP address not specified")
        # produce results with cppsim
        sdp_node = parent_model.get_nodes_by_op_type(
            "StreamingDataflowPartition")[0]
        sdp_node = getCustomOp(sdp_node)
        sdp_node.set_nodeattr("model",
                              build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx")
        ret = execute_onnx(parent_model, {iname: x}, True)
        y = ret[oname]
        assert np.isclose(y, y_golden).all()

    except KeyError:
        pytest.skip("PYNQ board IP address not specified")
Esempio n. 29
0
 def applyNodeLocal(self, node):
     op_type = node.op_type
     if is_fpgadataflow_node(node) is True:
         try:
             # lookup op_type in registry of CustomOps
             inst = registry.getCustomOp(node)
             # ensure that code is generated
             assert (inst.get_nodeattr("code_gen_dir_ipgen") != ""), """Node
             attribute "code_gen_dir_ipgen" is empty. Please run
             transformation PrepareIP first."""
             if not os.path.isdir(inst.get_nodeattr("ipgen_path")):
                 # call the compilation function for this node
                 inst.ipgen_singlenode_code()
             else:
                 warnings.warn("Using pre-existing IP for %s" % node.name)
             # ensure that executable path is now set
             assert (inst.get_nodeattr("ipgen_path") !=
                     ""), """Transformation
             HLSSynthIP was not successful. Node attribute "ipgen_path"
             is empty."""
         except KeyError:
             # exception if op_type is not supported
             raise Exception(
                 "Custom op_type %s is currently not supported." % op_type)
     return (node, False)
Esempio n. 30
0
 def apply(self, model):
     graph = model.graph
     # annotate node cycles
     for node in graph.node:
         if _is_fpgadataflow_node(node):
             op_inst = registry.getCustomOp(node)
             cycles = op_inst.get_exp_cycles()
             op_inst.set_nodeattr("cycles_estimate", cycles)
         elif node.op_type == "StreamingDataflowPartition":
             # recurse into model to manually annotate per-layer cycles
             sdp_model_filename = getCustomOp(node).get_nodeattr("model")
             sdp_model = ModelWrapper(sdp_model_filename)
             sdp_model = sdp_model.transform(AnnotateCycles())
             # save transformed model
             sdp_model.save(sdp_model_filename)
     return (model, False)