Esempio n. 1
0
def test_end2end_cnv_w1a1_fold_and_tlastmarker():
    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
    folding = [
        (16, 3, 128),
        (32, 32, 128),
        (16, 32, 128),
        (16, 32, 128),
        (4, 32, 81),
        (1, 32, 2),
        (1, 4, 2),
        (1, 8, 128),
        (5, 1, 3),
    ]
    for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)

    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    for i in range(len(swg_layers)):
        swg_inst = getCustomOp(swg_layers[i])
        simd = folding[i][1]
        swg_inst.set_nodeattr("SIMD", simd)

    model = model.transform(InsertDWC())
    model = model.transform(InsertFIFO())
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(AnnotateResources("estimate"))
    model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
Esempio n. 2
0
def test_end2end_cnv_w1a1_run_on_pynq():
    # use the streamlined model as the "golden" model for right answers
    golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
    iname = golden.graph.input[0].name
    oname = golden.graph.output[0].name
    # load one of the test vectors
    fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    x = input_tensor
    # run using FINN-based execution
    ret_golden = execute_onnx(golden, {iname: x}, True)
    y_golden = ret_golden[oname]
    # set up parent+child graph to test
    # we'll use models from the previous step as the child model
    parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
    iname = parent_model.graph.input[0].name
    oname = parent_model.graph.output[0].name
    try:
        ip = os.environ["PYNQ_IP"]  # NOQA
        if ip == "":
            pytest.skip("PYNQ board IP address not specified")
        # produce results with cppsim
        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
        sdp_node = getCustomOp(sdp_node)
        sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx")
        ret = execute_onnx(parent_model, {iname: x}, True)
        y = ret[oname]
        assert np.isclose(y, y_golden).all()
        assert np.argmax(y) == 3

    except KeyError:
        pytest.skip("PYNQ board IP address not specified")
Esempio n. 3
0
def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
    """
    This step will only execute if QONNX nodes are found.
    These include the following op_types: "Quant" , "Trunc" and "BinaryQuant".
    If such nodes are found the step will run the tidy-up step from QONNX
    and then convert the QONNX model to the FINN-ONNX dialect.
    """
    # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc
    q_count = 0
    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
        q_count += len(model.get_nodes_by_op_type(op_type))
    if q_count == 0:
        return model

    # QONNX cleanup
    model = cleanup_model(model)
    # QONNX to FINN-ONNX
    model = model.transform(
        ConvertQONNXtoFINN(
            filter_function=default_filter_function_generator(
                max_multithreshold_bit_width=cfg.max_multithreshold_bit_width
            )
        )
    )

    if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps():
        verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False)

    return model
Esempio n. 4
0
def test_end2end_tfc_w1a2_run_on_pynq():
    # use the streamlined model as the "golden" model for right answers
    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
    iname = golden.graph.input[0].name
    oname = golden.graph.output[0].name
    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    x = nph.to_array(input_tensor)
    # x = np.zeros(ishape, dtype=np.float32)
    # run using FINN-based execution
    ret_golden = execute_onnx(golden, {iname: x}, True)
    y_golden = ret_golden[oname]
    # set up parent+child graph to test
    # we'll use models from the previous step as the child model
    parent_model = ModelWrapper(build_dir +
                                "/end2end_tfc_w1a2_dataflow_parent.onnx")
    iname = parent_model.graph.input[0].name
    oname = parent_model.graph.output[0].name
    try:
        ip = os.environ["PYNQ_IP"]  # NOQA
        if ip == "":
            pytest.skip("PYNQ board IP address not specified")
        # produce results with cppsim
        sdp_node = parent_model.get_nodes_by_op_type(
            "StreamingDataflowPartition")[0]
        sdp_node = getCustomOp(sdp_node)
        sdp_node.set_nodeattr("model",
                              build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx")
        ret = execute_onnx(parent_model, {iname: x}, True)
        y = ret[oname]
        assert np.isclose(y, y_golden).all()

    except KeyError:
        pytest.skip("PYNQ board IP address not specified")
Esempio n. 5
0
def step_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Convert eligible nodes to `HLSCustomOp` subclasses that represent HLS
    layers. Which nodes and particular configurations can be converted to HLS
    is limited, see the source code of the `convert_to_hls` module for more."""

    mem_mode = cfg.default_mem_mode.value
    if cfg.standalone_thresholds:
        # doing this first causes all threshold layers to be standalone
        model = model.transform(to_hls.InferThresholdingLayer())
    # needed for bipolar MatMul layers
    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
    # needed for non-bipolar MatMul layers
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
    # TopK to LabelSelect
    model = model.transform(to_hls.InferLabelSelectLayer())
    # input quantization (if any) as standalone threshold
    model = model.transform(to_hls.InferThresholdingLayer())
    # needed for convolutions -- TODO always exec?
    need_conv = len(model.get_nodes_by_op_type("Im2Col")) > 0
    if need_conv:
        model = model.transform(to_hls.InferConvInpGen())
        model = model.transform(to_hls.InferStreamingMaxPool())
        model = model.transform(RemoveCNVtoFCFlatten())
    # get rid of Tranpose -> Tranpose identity seq
    model = model.transform(absorb.AbsorbConsecutiveTransposes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(InferDataLayouts())
    return model
def copy_onnx_model(parent_model_path,
                    new_path,
                    ip_src_path="/tmp/finn_dev_justin"):
    # Copies all IP into new_path, updates all paths in child_models then saves a copy of each child_model to the new path, updates DataflowPartitions in parent_model, then saves a copy of the new parent_model to new_path
    # IMPORTANT: All verilog paths must be relative for this to work
    parent_model = ModelWrapper(parent_model_path)
    streaming_dataflow_partition_nodes = parent_model.get_nodes_by_op_type(
        "StreamingDataflowPartition")
    num_child_models = len(streaming_dataflow_partition_nodes)
    list_of_new_child_model_paths = []
    for i in range(0, num_child_models):
        child_model_path = getCustomOp(
            streaming_dataflow_partition_nodes[i]).get_nodeattr("model")
        child_model = ModelWrapper(child_model_path)
        # Copy the IP into new_path and update child model paths
        new_child_model = copy_ip(child_model, new_path, ip_src_path)
        # Save the new child_model
        new_child_model_path = new_path + f"/child_{i}.onnx"
        new_child_model.save(new_child_model_path)
        list_of_new_child_model_paths.append(new_child_model_path)
    # Update the parent model by attaching the new child model paths, then save
    new_parent_model = attach_child_models_to_parent_model(
        parent_model, list_of_new_child_model_paths)
    new_parent_model_path = new_path + "/parent.onnx"
    new_parent_model.save(new_parent_model_path)
    return new_parent_model_path
Esempio n. 7
0
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run streamlining on given model. Streamlining involves moving floating point
    scale/shift parameters around, collapsing adjacent ones into a single parameter,
    then absorbing the scale/shift into the following `MultiThreshold` node.
    Streamlining requires careful topology design and cannot be applied to all
    topologies.
    """

    model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
    model = model.transform(Streamline())
    need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0
    if need_lowering:
        model = model.transform(LowerConvsToMatMul())
        model = model.transform(MakeMaxPoolNHWC())
        model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
        model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    # absorb final add-mul nodes into TopK
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())

    if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps(
    ):
        verify_step(model, cfg, "streamlined_python", need_parent=False)

    return model
def test_const_folding_shapes():
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    model = model.transform(InferShapes())
    mm_node_w_in = model.get_nodes_by_op_type("MatMul")[0].input[1]
    assert model.find_producer(mm_node_w_in) is not None
    assert model.find_producer(mm_node_w_in).op_type == "Reshape"
    assert model.get_initializer(mm_node_w_in) is None
    model = model.transform(FoldConstants())
    assert model.find_producer(mm_node_w_in) is None
    assert model.get_initializer(mm_node_w_in) is not None
Esempio n. 9
0
def test_modelwrapper():
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    assert model.check_all_tensor_shapes_specified() is True
    inp_name = model.graph.input[0].name
    inp_shape = model.get_tensor_shape(inp_name)
    assert inp_shape == [1, 1, 28, 28]
    conv_nodes = model.get_nodes_by_op_type("Conv")
    matmul_nodes = model.get_nodes_by_op_type("MatMul")
    assert len(conv_nodes) == 2
    assert len(matmul_nodes) == 1
    first_conv = conv_nodes[0]
    first_conv_iname = first_conv.input[0]
    first_conv_wname = first_conv.input[1]
    first_conv_oname = first_conv.output[0]
    assert first_conv_iname != "" and (first_conv_iname is not None)
    assert first_conv_wname != "" and (first_conv_wname is not None)
    assert first_conv_oname != "" and (first_conv_oname is not None)
    first_conv_weights = model.get_initializer(first_conv_wname)
    assert first_conv_weights.shape == (8, 1, 5, 5)
    first_conv_weights_rand = np.random.randn(8, 1, 5, 5)
    model.set_initializer(first_conv_wname, first_conv_weights_rand)
    assert (model.get_initializer(first_conv_wname) == first_conv_weights_rand
            ).all()
    inp_cons = model.find_consumer(first_conv_iname)
    assert inp_cons == first_conv
    out_prod = model.find_producer(first_conv_oname)
    assert out_prod == first_conv
    inp_layout = model.get_tensor_layout(first_conv_iname)
    assert inp_layout is None
    inp_layout = DataLayout.NCHW
    model.set_tensor_layout(first_conv_iname, inp_layout)
    assert model.get_tensor_layout(first_conv_iname) == inp_layout
    inp_sparsity = model.get_tensor_sparsity(first_conv_iname)
    assert inp_sparsity is None
    inp_sparsity = {"dw": {"kernel_shape": [3, 3]}}
    model.set_tensor_sparsity(first_conv_iname, inp_sparsity)
    assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity
Esempio n. 10
0
def test_linear_past_eltwise_add(ch, ifmdim):
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_tensor_shape = (1, ch)
    else:
        input_tensor_shape = (1, ch, ifmdim, ifmdim)

    model = make_model(input_tensor_shape)
    model.save(export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    x1 = np.random.randn(*input_tensor_shape).astype(np.float32)
    x2 = np.random.randn(*input_tensor_shape).astype(np.float32)

    # generate expected value from streamlined net
    input_dict = {model.graph.input[0].name: x1, model.graph.input[1].name: x2}

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    expected_sum = 3.0 * ((x1 + x2) + 15.0)
    assert np.isclose(expected_sum, produced_sum, atol=1e-3).all()
    assert len(model.get_nodes_by_op_type("Add")) == 3
    assert len(model.get_nodes_by_op_type("Mul")) == 2

    model = model.transform(MoveLinearPastEltwiseAdd())

    # verify again, to check we didnt break anything
    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    assert np.isclose(expected_sum, produced_sum, atol=1e-3).all()
    assert len(model.get_nodes_by_op_type("Add")) == 2
    assert len(model.get_nodes_by_op_type("Mul")) == 1

    os.remove(export_onnx_path)
Esempio n. 11
0
def inference_cost(model_filename,
                   *,
                   output_json=None,
                   output_onnx=None,
                   preprocess=True,
                   discount_sparsity=True):
    """Print the inference cost estimate metric for given ONNX model.
    Supports the Quant op for weight/activation quantization.

    :param model_filename: Filename for ONNX model
    :param output_json: Optional JSON filename to save the inference cost dict
    :param output_onnx: Optional ONNX filename to save the final model after any
        preprocessing
    :param preprocess: If set, run preprocessing steps such as shape inference,
        datatype inference and constant folding. Strongly recommended.
    :param discount_sparsity: If set, will discount op cost of MAC ops with a
        constant zero weight, and the mem cost of constant zero weights.
    """
    print("Inference cost for " + model_filename)
    model = ModelWrapper(model_filename)
    if preprocess:
        qnt_nodes = model.get_nodes_by_op_type("Quant")
        for qnt_node in qnt_nodes:
            qnt_node.domain = "finn.custom_op.general"
        model = model.transform(InferShapes())
        model = model.transform(GiveUniqueParameterTensors())
        model = model.transform(InferDataTypes())
        model = model.transform(FoldConstants())
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(RemoveStaticGraphInputs())
        model = model.transform(InferDataTypes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    if output_onnx is not None:
        model.save(output_onnx)
    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
    bops = compute_bops(ret)
    mem_w_bits = compute_mem_bits(ret, "mem_w")
    mem_o_bits = compute_mem_bits(ret, "mem_o")
    ret["total_bops"] = bops
    ret["total_mem_w_bits"] = mem_w_bits
    ret["total_mem_o_bits"] = mem_o_bits

    if "unsupported" in ret:
        ret["unsupported"] = str(ret["unsupported"])
    print(json.dumps(ret, sort_keys=True, indent=2))

    if output_json is not None:
        with open(output_json, "w") as f:
            json.dump(ret, f, sort_keys=True, indent=2)
Esempio n. 12
0
def test_end2end_cnv_w1a1_verify_all():
    # use the streamlined model as the "golden" model for right answers
    golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
    iname = golden.graph.input[0].name
    oname = golden.graph.output[0].name
    # load one of the test vectors
    fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    x = input_tensor
    # x = np.zeros(ishape, dtype=np.float32)
    ret_golden = execute_onnx(golden, {iname: x}, True)
    y_golden = ret_golden[oname]
    # set up parent+child graph to test
    # we'll use models from the previous step as the child model
    parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
    iname = parent_model.graph.input[0].name
    oname = parent_model.graph.output[0].name
    # produce results with cppsim
    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
    sdp_node = getCustomOp(sdp_node)
    sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
    y_cppsim = ret_cppsim[oname]
    # produce results with node-by-node rtlsim
    sdp_node.set_nodeattr(
        "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx"
    )
    ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
    y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
    # produce results with whole-network (stitched ip) rtlsim
    sdp_node.set_nodeattr(
        "model", build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx"
    )
    # this is a particularly long-running test, set liveness thr. to unlimited
    os.environ["LIVENESS_THRESHOLD"] = "-1"
    ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
    y_whole_rtlsim = ret_whole_rtlsim[oname]
    assert np.isclose(y_golden, y_cppsim).all()
    assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
    assert np.isclose(y_golden, y_whole_rtlsim).all()
    assert np.argmax(y_golden) == 3
Esempio n. 13
0
def test_end2end_tfc_w1a2_verify_all():
    # use the streamlined model as the "golden" model for right answers
    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
    iname = golden.graph.input[0].name
    oname = golden.graph.output[0].name
    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    x = nph.to_array(input_tensor)
    # x = np.zeros(ishape, dtype=np.float32)
    ret_golden = execute_onnx(golden, {iname: x}, True)
    y_golden = ret_golden[oname]
    # set up parent+child graph to test
    # we'll use models from the previous step as the child model
    parent_model = ModelWrapper(build_dir +
                                "/end2end_tfc_w1a2_dataflow_parent.onnx")
    iname = parent_model.graph.input[0].name
    oname = parent_model.graph.output[0].name
    # produce results with cppsim
    sdp_node = parent_model.get_nodes_by_op_type(
        "StreamingDataflowPartition")[0]
    sdp_node = getCustomOp(sdp_node)
    sdp_node.set_nodeattr("model",
                          build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
    y_cppsim = ret_cppsim[oname]
    # produce results with node-by-node rtlsim
    sdp_node.set_nodeattr(
        "model",
        build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx")
    ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
    y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
    # produce results with whole-network (stitched ip) rtlsim
    sdp_node.set_nodeattr(
        "model", build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx")
    ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
    y_whole_rtlsim = ret_whole_rtlsim[oname]
    assert np.isclose(y_golden, y_cppsim).all()
    assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
    assert np.isclose(y_golden, y_whole_rtlsim).all()
Esempio n. 14
0
def test_end2end_tfc_w1a2_fold_and_tlastmarker():
    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
    config = [
        (16, 49, 16, 64, "block"),
        (8, 8, 64, 64, "auto"),
        (8, 8, 64, 64, "auto"),
        (10, 8, 64, 10, "distributed"),
    ]
    for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
        fcl_inst = getCustomOp(fcl)
        fcl_inst.set_nodeattr("PE", pe)
        fcl_inst.set_nodeattr("SIMD", simd)
        fcl_inst.set_nodeattr("inFIFODepth", ififo)
        fcl_inst.set_nodeattr("outFIFODepth", ofifo)
        fcl_inst.set_nodeattr("ram_style", ramstyle)
    model = model.transform(InsertDWC())
    model = model.transform(InsertFIFO())
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(AnnotateResources("estimate"))
    model.save(build_dir + "/end2end_tfc_w1a2_folded.onnx")
Esempio n. 15
0
def test_brevitas_compare_exported_mobilenet():
    if "IMAGENET_VAL_PATH" not in os.environ.keys():
        pytest.skip("Can't do validation without IMAGENET_VAL_PATH")
    n_images = 10
    debug_mode = False
    export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_")
    # export preprocessing
    preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx"
    preproc = NormalizePreProc(mean, std, ch)
    bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
    preproc_model = ModelWrapper(preproc_onnx)
    preproc_model = preproc_model.transform(InferShapes())
    preproc_model = preproc_model.transform(GiveUniqueNodeNames())
    preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
    preproc_model = preproc_model.transform(GiveReadableTensorNames())
    # export the actual MobileNet-v1
    finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx"
    mobilenet = get_test_model_trained("mobilenet", 4, 4)
    if debug_mode:
        dbg_hook = bo.enable_debug(mobilenet)
    bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx)
    model = ModelWrapper(finn_onnx)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(RemoveStaticGraphInputs())
    model = model.transform(InsertTopK())
    # get initializer from Mul that will be absorbed into topk

    a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1])
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveUniqueParameterTensors())
    model = model.transform(GiveReadableTensorNames())
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx")
    # create merged preprocessing + MobileNet-v1 model
    model = model.transform(MergeONNXModels(preproc_model))
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx")

    with open(
        export_onnx_path + "/mobilenet_validation.csv", "w", newline=""
    ) as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            [
                "goldenID",
                "brevitasTop5",
                "brevitasTop5[%]",
                "finnTop5",
                "finnTop5[%]",
                "top5equal",
                "top5%equal",
            ]
        )
        csvfile.flush()
        workload = imagenet_util.get_val_images(n_images, interleave_classes=True)
        all_inds_ok = True
        all_probs_ok = True
        for (img_path, target_id) in workload:
            img_np = imagenet_util.load_resize_crop(img_path)
            img_torch = torch.from_numpy(img_np).float()
            # do forward pass in PyTorch/Brevitas
            input_tensor = preproc.forward(img_torch)
            expected = mobilenet.forward(input_tensor).detach().numpy()
            expected_topk = expected.flatten()
            expected_top5 = np.argsort(expected_topk)[-5:]
            expected_top5 = np.flip(expected_top5)
            expected_top5_prob = []
            for index in expected_top5:
                expected_top5_prob.append(expected_topk[index])
            idict = {model.graph.input[0].name: img_np}
            odict = oxe.execute_onnx(model, idict, return_full_exec_context=True)
            produced = odict[model.graph.output[0].name]
            produced_prob = odict["TopK_0_out0"] * a0
            inds_ok = (produced.flatten() == expected_top5).all()
            probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all()
            all_inds_ok = all_inds_ok and inds_ok
            all_probs_ok = all_probs_ok and probs_ok
            writer.writerow(
                [
                    str(target_id),
                    str(expected_top5),
                    str(expected_top5_prob),
                    str(produced.flatten()),
                    str(produced_prob.flatten()),
                    str(inds_ok),
                    str(probs_ok),
                ]
            )
            csvfile.flush()
            if ((not inds_ok) or (not probs_ok)) and debug_mode:
                print("Results differ for %s" % img_path)
                # check all tensors at debug markers
                names_brevitas = set(dbg_hook.values.keys())
                names_finn = set(odict.keys())
                names_common = names_brevitas.intersection(names_finn)
                for dbg_name in names_common:
                    if not np.isclose(
                        dbg_hook.values[dbg_name].detach().numpy(),
                        odict[dbg_name],
                        atol=1e-3,
                    ).all():
                        print("Tensor %s differs between Brevitas and FINN" % dbg_name)
        assert all_inds_ok and all_probs_ok
Esempio n. 16
0
def test_end2end_cybsec_mlp_export(QONNX_export):
    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
    # load up trained net in Brevitas
    input_size = 593
    hidden1 = 64
    hidden2 = 64
    hidden3 = 64
    weight_bit_width = 2
    act_bit_width = 2
    num_classes = 1
    model = nn.Sequential(
        QuantLinear(input_size,
                    hidden1,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden1),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden1,
                    hidden2,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden2),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden2,
                    hidden3,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden3),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden3,
                    num_classes,
                    bias=True,
                    weight_bit_width=weight_bit_width),
    )
    trained_state_dict = torch.load(assets_dir +
                                    "/state_dict.pth")["models_state_dict"][0]
    model.load_state_dict(trained_state_dict, strict=False)
    W_orig = model[0].weight.data.detach().numpy()
    # pad the second (593-sized) dimensions with 7 zeroes at the end
    W_new = np.pad(W_orig, [(0, 0), (0, 7)])
    model[0].weight.data = torch.from_numpy(W_new)
    model_for_export = CybSecMLPForExport(model)
    export_onnx_path = get_checkpoint_name("export", QONNX_export)
    input_shape = (1, 600)
    # create a QuantTensor instance to mark the input as bipolar during export
    input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
    input_a = 2 * input_a - 1
    scale = 1.0
    input_t = torch.from_numpy(input_a * scale)
    input_qt = QuantTensor(input_t,
                           scale=torch.tensor(scale),
                           bit_width=torch.tensor(1.0),
                           signed=True)

    if QONNX_export:
        # With the BrevitasONNXManager we need to manually set
        # the FINN DataType at the input
        BrevitasONNXManager.export(model_for_export,
                                   input_shape,
                                   export_path=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model.set_tensor_datatype(model.graph.input[0].name,
                                  DataType["BIPOLAR"])
        model.save(export_onnx_path)
        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(export_onnx_path)
    else:
        bo.export_finn_onnx(model_for_export,
                            export_path=export_onnx_path,
                            input_t=input_qt)
    assert os.path.isfile(export_onnx_path)
    # fix input datatype
    finn_model = ModelWrapper(export_onnx_path)
    finnonnx_in_tensor_name = finn_model.graph.input[0].name
    assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1,
                                                                           600)
    # verify a few exported ops
    if QONNX_export:
        # The first "Mul" node doesn't exist in the QONNX export,
        # because the QuantTensor scale is not exported.
        # However, this node would have been unity scale anyways and
        # the models are still equivalent.
        assert finn_model.graph.node[0].op_type == "Add"
        assert finn_model.graph.node[1].op_type == "Div"
        assert finn_model.graph.node[2].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    else:
        assert finn_model.graph.node[0].op_type == "Mul"
        assert finn_model.get_initializer(
            finn_model.graph.node[0].input[1]) == 1.0
        assert finn_model.graph.node[1].op_type == "Add"
        assert finn_model.graph.node[2].op_type == "Div"
        assert finn_model.graph.node[3].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    # verify datatypes on some tensors
    assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) ==
            DataType["BIPOLAR"])
    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
    assert finn_model.get_tensor_datatype(
        first_matmul_w_name) == DataType["INT2"]
Esempio n. 17
0
def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
    if (not QONNX_export) and QONNX_FINN_conversion:
        pytest.skip(
            "This test configuration is not valid and is thus skipped.")
    finn_onnx = "test_brevitas_debug.onnx"
    fc = get_test_model_trained("TFC", 2, 2)
    ishape = (1, 1, 28, 28)
    if QONNX_export:
        dbg_hook = bo.enable_debug(fc, proxy_level=True)
        BrevitasONNXManager.export(fc, ishape, finn_onnx)
        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
        model = ModelWrapper(finn_onnx)
        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
        for dbg_node in dbg_nodes:
            dbg_node.domain = "finn.custom_op.general"
        model.save(finn_onnx)
        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
        if QONNX_FINN_conversion:
            model = ModelWrapper(finn_onnx)
            model = model.transform(ConvertQONNXtoFINN())
            model.save(finn_onnx)
    else:
        dbg_hook = bo.enable_debug(fc)
        bo.export_finn_onnx(fc, ishape, finn_onnx)
        model = ModelWrapper(finn_onnx)
        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
        # ToDo: We should probably have transformation pass, which does this
        #  domain conversion for us?
        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
        for dbg_node in dbg_nodes:
            dbg_node.domain = "finn.custom_op.general"
        model = model.transform(InferShapes())
        model = model.transform(FoldConstants())
        model = model.transform(RemoveStaticGraphInputs())
        model.save(finn_onnx)
    model = ModelWrapper(finn_onnx)
    assert len(model.graph.input) == 1
    assert len(model.graph.output) == 1
    # load one of the test vectors
    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    # run using FINN-based execution
    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
    output_dict = oxe.execute_onnx(model,
                                   input_dict,
                                   return_full_exec_context=True)
    produced = output_dict[model.graph.output[0].name]
    # run using PyTorch/Brevitas
    input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float()
    assert input_tensor.shape == (1, 1, 28, 28)
    # do forward pass in PyTorch/Brevitas
    expected = fc.forward(input_tensor).detach().numpy()
    assert np.isclose(produced, expected, atol=1e-3).all()
    # check all tensors at debug markers
    names_brevitas = set(dbg_hook.values.keys())
    names_finn = set(output_dict.keys())
    names_common = names_brevitas.intersection(names_finn)
    # The different exports return debug markers in different numbers and places
    print(len(names_common))
    if QONNX_export and not QONNX_FINN_conversion:
        assert len(names_common) == 12
    elif QONNX_export and QONNX_FINN_conversion:
        assert len(names_common) == 8
    else:
        assert len(names_common) == 16
    for dbg_name in names_common:
        if QONNX_export:
            tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy()
        else:
            tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
        tensor_finn = output_dict[dbg_name]
        assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all()
    os.remove(finn_onnx)
def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
    cnv = get_test_model_trained("CNV", 1, 1)
    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
    model = ModelWrapper(export_onnx_path_cnv)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # load one of the test vectors
    fn = pk.resource_filename("finn.qnn-data",
                              "cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    # generate expected value from streamlined net
    input_dict = {"global_in": input_tensor}
    expected_ctx = oxe.execute_onnx(model, input_dict, True)
    expected = expected_ctx[model.graph.output[0].name]

    # if we infer thresholding first, all MultiThresholds get converted to HLS
    # subsequently, the FC inference will generate passthrough MVAUs
    if not fused_activation:
        model = model.transform(to_hls.InferThresholdingLayer())
    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
    for node in model.graph.node:
        if node.op_type == "StreamingFCLayer_Batch":
            inst = getCustomOp(node)
            inst.set_nodeattr("mem_mode", "decoupled")
            mw = inst.get_nodeattr("MW")
            mh = inst.get_nodeattr("MH")
            if mh % 4 == 0:
                pe = mh // 4
            else:
                pe = mh
            inst.set_nodeattr("PE", pe)
            if mw % 16 == 0:
                simd = mw // 16
            else:
                simd = mw
            inst.set_nodeattr("SIMD", simd)
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    # check topology status
    finn_nodes = model.get_finn_nodes()
    if fused_activation:
        assert len(finn_nodes) == 18
    else:
        assert len(finn_nodes) == 26
        thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch")
        assert len(thr_nodes) == 8
    non_finn_nodes = model.get_non_finn_nodes()
    assert len(non_finn_nodes) == 4
    exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"]
    assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes
    fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    assert len(fc_nodes) == 9
    swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    assert len(swg_nodes) == 6
    mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch")
    assert len(mp_nodes) == 2
    # model.save("cnv-pre-compile.onnx")
    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))
    # model.save("cnv-post-compile.onnx")
    produced_ctx = oxe.execute_onnx(model, input_dict, True)
    produced = produced_ctx[model.graph.output[0].name]
    assert np.isclose(expected, produced, atol=1e-3).all()
    assert np.argmax(produced) == 3
    os.remove(export_onnx_path_cnv)
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt):
    model = make_model(ch, ifmdim)
    model.save(export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_tensor_shape = (1, ch)
    else:
        input_tensor_shape = (1, ch, ifmdim, ifmdim)

    x = gen_finn_dt_tensor(idt, input_tensor_shape)

    # generate expected value from streamlined net
    input_dict = {model.graph.input[0].name: x}

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    chw_mul = model.get_initializer(model.graph.node[-1].input[1])
    chw_mul = 1
    expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0),
                                    axis=(2, 3)) / (ifmdim * ifmdim)
    assert (produced_sum.flatten() == expected_sum.flatten()).all()

    model = model.transform(InferDataLayouts())

    # convert to hls
    model.set_tensor_datatype(model.graph.input[0].name, idt)
    # extra streamlining
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(MoveAddPastMul())
    model = model.transform(CollapseRepeatedMul())
    model = model.transform(CollapseRepeatedAdd())
    # insert top-k node, which should absorb linear ops before it

    model = model.transform(InferShapes())
    model = model.transform(InferDataLayouts())
    model = model.transform(InferDataTypes())

    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferAddStreamsLayer())
    model = model.transform(to_hls.InferGlobalAccPoolLayer())
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(InsertTopK())
    model = model.transform(AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(AbsorbConsecutiveTransposes())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(to_hls.InferDuplicateStreamsLayer())

    model = model.transform(SortGraph())

    # model.save("golden_hls.onnx")
    # check topology status

    finn_nodes = model.get_finn_nodes()
    assert len(finn_nodes) == 9
    add_nodes = model.get_nodes_by_op_type("AddStreams_Batch")
    assert len(add_nodes) == 1
    pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch")
    assert len(pool_nodes) == 1
    label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch")
    assert len(label_nodes) == 1
    channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch")
    assert len(channelwise_nodes) == 5
    dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch")
    assert len(dup_nodes) == 1

    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_topk_hls = output_dict[model.graph.output[0].name]
    topk_input = output_dict[model.graph.node[-1].input[0]]
    assert soft_verify_topk(topk_input, produced_topk_hls, 5)

    os.remove(export_onnx_path)