def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt):
    model = make_model(ch, ifmdim)
    model.save(export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_tensor_shape = (1, ch)
    else:
        input_tensor_shape = (1, ch, ifmdim, ifmdim)

    x = gen_finn_dt_tensor(idt, input_tensor_shape)

    # generate expected value from streamlined net
    input_dict = {model.graph.input[0].name: x}

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    chw_mul = model.get_initializer(model.graph.node[-1].input[1])
    chw_mul = 1
    expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0),
                                    axis=(2, 3)) / (ifmdim * ifmdim)
    assert (produced_sum.flatten() == expected_sum.flatten()).all()

    model = model.transform(InferDataLayouts())

    # convert to hls
    model.set_tensor_datatype(model.graph.input[0].name, idt)
    # extra streamlining
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(MoveAddPastMul())
    model = model.transform(CollapseRepeatedMul())
    model = model.transform(CollapseRepeatedAdd())
    # insert top-k node, which should absorb linear ops before it

    model = model.transform(InferShapes())
    model = model.transform(InferDataLayouts())
    model = model.transform(InferDataTypes())

    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferAddStreamsLayer())
    model = model.transform(to_hls.InferGlobalAccPoolLayer())
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(InsertTopK())
    model = model.transform(AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(AbsorbConsecutiveTransposes())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(to_hls.InferDuplicateStreamsLayer())

    model = model.transform(SortGraph())

    # model.save("golden_hls.onnx")
    # check topology status

    finn_nodes = model.get_finn_nodes()
    assert len(finn_nodes) == 9
    add_nodes = model.get_nodes_by_op_type("AddStreams_Batch")
    assert len(add_nodes) == 1
    pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch")
    assert len(pool_nodes) == 1
    label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch")
    assert len(label_nodes) == 1
    channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch")
    assert len(channelwise_nodes) == 5
    dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch")
    assert len(dup_nodes) == 1

    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_topk_hls = output_dict[model.graph.output[0].name]
    topk_input = output_dict[model.graph.node[-1].input[0]]
    assert soft_verify_topk(topk_input, produced_topk_hls, 5)

    os.remove(export_onnx_path)
def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
    cnv = get_test_model_trained("CNV", 1, 1)
    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
    model = ModelWrapper(export_onnx_path_cnv)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # load one of the test vectors
    fn = pk.resource_filename("finn.qnn-data",
                              "cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    # generate expected value from streamlined net
    input_dict = {"global_in": input_tensor}
    expected_ctx = oxe.execute_onnx(model, input_dict, True)
    expected = expected_ctx[model.graph.output[0].name]

    # if we infer thresholding first, all MultiThresholds get converted to HLS
    # subsequently, the FC inference will generate passthrough MVAUs
    if not fused_activation:
        model = model.transform(to_hls.InferThresholdingLayer())
    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
    for node in model.graph.node:
        if node.op_type == "StreamingFCLayer_Batch":
            inst = getCustomOp(node)
            inst.set_nodeattr("mem_mode", "decoupled")
            mw = inst.get_nodeattr("MW")
            mh = inst.get_nodeattr("MH")
            if mh % 4 == 0:
                pe = mh // 4
            else:
                pe = mh
            inst.set_nodeattr("PE", pe)
            if mw % 16 == 0:
                simd = mw // 16
            else:
                simd = mw
            inst.set_nodeattr("SIMD", simd)
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    # check topology status
    finn_nodes = model.get_finn_nodes()
    if fused_activation:
        assert len(finn_nodes) == 18
    else:
        assert len(finn_nodes) == 26
        thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch")
        assert len(thr_nodes) == 8
    non_finn_nodes = model.get_non_finn_nodes()
    assert len(non_finn_nodes) == 4
    exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"]
    assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes
    fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    assert len(fc_nodes) == 9
    swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    assert len(swg_nodes) == 6
    mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch")
    assert len(mp_nodes) == 2
    # model.save("cnv-pre-compile.onnx")
    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))
    # model.save("cnv-post-compile.onnx")
    produced_ctx = oxe.execute_onnx(model, input_dict, True)
    produced = produced_ctx[model.graph.output[0].name]
    assert np.isclose(expected, produced, atol=1e-3).all()
    assert np.argmax(produced) == 3
    os.remove(export_onnx_path_cnv)
Esempio n. 3
0
def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode):
    atol = 1e-3
    # Create the test model and inputs for it
    torch_model = PyTorchTestModel(upscale_factor=scale)
    input_shape = (1, NumChannels, IFMDim, IFMDim)
    test_in = torch.arange(0, np.prod(np.asarray(input_shape)))
    # Limit the input to values valid for the given datatype
    test_in %= dt.max() - dt.min() + 1
    test_in += dt.min()
    # Additionally make sure we always start with 0, for convenience purposes.
    test_in = torch.roll(test_in, dt.min())
    test_in = test_in.view(*input_shape).type(torch.float32)

    # Get golden PyTorch and ONNX inputs
    golden_torch_float = torch_model(test_in)
    export_path = f"{tmpdir}/Upsample_exported.onnx"
    FINNManager.export(torch_model,
                       input_shape=input_shape,
                       export_path=export_path,
                       opset_version=11)
    model = ModelWrapper(export_path)
    input_dict = {model.graph.input[0].name: test_in.numpy().astype(np.int32)}
    input_dict = {model.graph.input[0].name: test_in.numpy()}
    golden_output_dict = oxe.execute_onnx(model, input_dict, True)
    golden_result = golden_output_dict[model.graph.output[0].name]

    # Make sure PyTorch and ONNX match
    pyTorch_onnx_match = np.isclose(golden_result, golden_torch_float).all()
    assert pyTorch_onnx_match, "ONNX and PyTorch upsampling output don't match."

    # Prep model for execution
    model = ModelWrapper(export_path)
    # model = model.transform(TransposeUpsampleIO())
    model = model.transform(MakeInputChannelsLast())
    model = model.transform(InferDataLayouts())
    model = model.transform(absorb.AbsorbTransposeIntoResize())
    model = model.transform(InferShapes())
    model = model.transform(ForceDataTypeForTensors(dType=dt))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(InferUpsample())
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    # Check that all nodes are UpsampleNearestNeighbour_Batch nodes
    for n in model.get_finn_nodes():
        node_check = n.op_type == "UpsampleNearestNeighbour_Batch"
        assert node_check, "All nodes should be UpsampleNearestNeighbour_Batch nodes."

    # Prep sim
    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
        model = model.transform(HLSSynthIP())
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # Run sim
    test_in_transposed = test_in.numpy().transpose(_to_chan_last_args)
    input_dict = {model.graph.input[0].name: test_in_transposed}
    output_dict = oxe.execute_onnx(model, input_dict, True)
    test_result = output_dict[model.graph.output[0].name]
    output_matches = np.isclose(golden_result, test_result, atol=atol).all()

    if exec_mode == "cppsim":
        assert output_matches, "Cppsim output doesn't match ONNX/PyTorch."
    elif exec_mode == "rtlsim":
        assert output_matches, "Rtlsim output doesn't match ONNX/PyTorch."