Exemple #1
0
def test_modelwrapper():
    lfc = get_test_model_trained("LFC", 1, 1)
    bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    assert model.check_all_tensor_shapes_specified() is False
    inp_name = model.graph.input[0].name
    inp_shape = model.get_tensor_shape(inp_name)
    assert inp_shape == [1, 1, 28, 28]
    # find first matmul node
    l0_mat_tensor_name = ""
    l0_inp_tensor_name = ""
    for node in model.graph.node:
        if node.op_type == "MatMul":
            l0_inp_tensor_name = node.input[0]
            l0_mat_tensor_name = node.input[1]
            break
    assert l0_mat_tensor_name != ""
    l0_weights = model.get_initializer(l0_mat_tensor_name)
    assert l0_weights.shape == (784, 1024)
    l0_weights_hist = Counter(l0_weights.flatten())
    assert (l0_weights_hist[1.0] + l0_weights_hist[-1.0]) == 784 * 1024
    l0_weights_rand = np.random.randn(784, 1024)
    model.set_initializer(l0_mat_tensor_name, l0_weights_rand)
    assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all()
    assert l0_inp_tensor_name != ""
    inp_cons = model.find_consumer(l0_inp_tensor_name)
    assert inp_cons.op_type == "MatMul"
    out_prod = model.find_producer(l0_inp_tensor_name)
    assert out_prod.op_type == "Sign"
    os.remove(export_onnx_path)
def test_const_folding_shapes():
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    model = model.transform(InferShapes())
    mm_node_w_in = model.get_nodes_by_op_type("MatMul")[0].input[1]
    assert model.find_producer(mm_node_w_in) is not None
    assert model.find_producer(mm_node_w_in).op_type == "Reshape"
    assert model.get_initializer(mm_node_w_in) is None
    model = model.transform(FoldConstants())
    assert model.find_producer(mm_node_w_in) is None
    assert model.get_initializer(mm_node_w_in) is not None
Exemple #3
0
def test_brevitas_cnv_w1a1_export():
    cnv = get_test_model_untrained("CNV", 1, 1)
    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    assert model.graph.node[2].op_type == "Sign"
    assert model.graph.node[3].op_type == "Conv"
    conv0_wname = model.graph.node[3].input[1]
    assert list(model.get_initializer(conv0_wname).shape) == [64, 3, 3, 3]
    assert model.graph.node[4].op_type == "Mul"
    os.remove(export_onnx_path)
Exemple #4
0
def test_modelwrapper():
    lfc = get_test_model_trained("LFC", 1, 1)
    bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    assert model.check_all_tensor_shapes_specified() is False
    inp_shape = model.get_tensor_shape("0")
    assert inp_shape == [1, 1, 28, 28]
    l0_mat_tensor_name = "33"
    l0_weights = model.get_initializer(l0_mat_tensor_name)
    assert l0_weights.shape == (784, 1024)
    l0_weights_hist = Counter(l0_weights.flatten())
    assert l0_weights_hist[1.0] == 401311 and l0_weights_hist[-1.0] == 401505
    l0_weights_rand = np.random.randn(784, 1024)
    model.set_initializer(l0_mat_tensor_name, l0_weights_rand)
    assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all()
    l0_inp_tensor_name = "32"
    inp_cons = model.find_consumer(l0_inp_tensor_name)
    assert inp_cons.op_type == "MatMul"
    out_prod = model.find_producer(l0_inp_tensor_name)
    assert out_prod.op_type == "Sign"
    os.remove(export_onnx_path)
Exemple #5
0
def test_modelwrapper():
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    assert model.check_all_tensor_shapes_specified() is True
    inp_name = model.graph.input[0].name
    inp_shape = model.get_tensor_shape(inp_name)
    assert inp_shape == [1, 1, 28, 28]
    conv_nodes = model.get_nodes_by_op_type("Conv")
    matmul_nodes = model.get_nodes_by_op_type("MatMul")
    assert len(conv_nodes) == 2
    assert len(matmul_nodes) == 1
    first_conv = conv_nodes[0]
    first_conv_iname = first_conv.input[0]
    first_conv_wname = first_conv.input[1]
    first_conv_oname = first_conv.output[0]
    assert first_conv_iname != "" and (first_conv_iname is not None)
    assert first_conv_wname != "" and (first_conv_wname is not None)
    assert first_conv_oname != "" and (first_conv_oname is not None)
    first_conv_weights = model.get_initializer(first_conv_wname)
    assert first_conv_weights.shape == (8, 1, 5, 5)
    first_conv_weights_rand = np.random.randn(8, 1, 5, 5)
    model.set_initializer(first_conv_wname, first_conv_weights_rand)
    assert (model.get_initializer(first_conv_wname) == first_conv_weights_rand
            ).all()
    inp_cons = model.find_consumer(first_conv_iname)
    assert inp_cons == first_conv
    out_prod = model.find_producer(first_conv_oname)
    assert out_prod == first_conv
    inp_layout = model.get_tensor_layout(first_conv_iname)
    assert inp_layout is None
    inp_layout = DataLayout.NCHW
    model.set_tensor_layout(first_conv_iname, inp_layout)
    assert model.get_tensor_layout(first_conv_iname) == inp_layout
    inp_sparsity = model.get_tensor_sparsity(first_conv_iname)
    assert inp_sparsity is None
    inp_sparsity = {"dw": {"kernel_shape": [3, 3]}}
    model.set_tensor_sparsity(first_conv_iname, inp_sparsity)
    assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity
Exemple #6
0
    def apply(self, model):

        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # create the base FINN driver -- same for all accels
        driver_base_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/driver_base.py"
        )
        driver_base_py = pynq_driver_dir + "/driver_base.py"
        shutil.copy(driver_base_template, driver_base_py)
        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass?
        idt = []
        idma_names = []
        ishape_normal = []
        ishape_folded = []
        ishape_packed = []
        for idma_ind, graph_in in enumerate(model.graph.input):
            i_tensor_name = graph_in.name
            # get inp tensor properties
            i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
            i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
            # go down into dataflow partition to get folded shape info etc
            # TODO consider setting these as attributes during dataflow partitioning
            i_consumer = model.find_consumer(i_tensor_name)
            assert (
                i_consumer.op_type == "StreamingDataflowPartition"
            ), """
                Ensure CreateDataflowPartition called before driver creation."""
            first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model"))
            assert (
                first_df_model.graph.node[0].op_type == "IODMA"
            ), "First partition must hold input IODMA"
            successors = model.find_direct_successors(i_consumer)
            successor_input_num = list(successors[0].input).index(i_consumer.output[0])
            successor_sdp = getCustomOp(successors[0])
            successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model"))
            first_node = successor_df_model.find_consumer(
                successor_df_model.graph.input[successor_input_num].name
            )
            i_tensor_shape_folded = tuple(
                getCustomOp(first_node).get_folded_input_shape()
            )
            # generate dummy folded i/o tensors and their packed versions
            i_tensor_dummy_folded = gen_finn_dt_tensor(
                i_tensor_dt, i_tensor_shape_folded
            )
            i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
                i_tensor_dummy_folded, i_tensor_dt
            )
            i_tensor_shape_packed = i_tensor_dummy_packed.shape
            # append all input tensor info to relevant lists
            idt.append("DataType['%s']" % i_tensor_dt.name)
            ishape_normal.append(i_tensor_shape_normal)
            ishape_folded.append(i_tensor_shape_folded)
            ishape_packed.append(i_tensor_shape_packed)
            idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name"))

        odt = []
        odma_names = []
        oshape_normal = []
        oshape_folded = []
        oshape_packed = []
        for odma_ind, graph_out in enumerate(model.graph.output):
            o_tensor_name = graph_out.name
            # get inp tensor properties
            o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
            o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
            # go down into IODMA partition to get folded shape info etc
            # TODO consider setting these as attributes during dataflow partitioning
            o_producer = model.find_producer(o_tensor_name)
            assert (
                o_producer.op_type == "StreamingDataflowPartition"
            ), """
                Ensure CreateDataflowPartition called before driver creation."""
            df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model"))
            assert (
                df_model.graph.node[-1].op_type == "IODMA"
            ), "Partition must hold output IODMA"
            predecessors = model.find_direct_predecessors(o_producer)
            predecessor_output_num = list(predecessors[0].output).index(
                o_producer.input[0]
            )
            predecessor_sdp = getCustomOp(predecessors[0])
            predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model"))
            last_node = predecessor_df_model.find_producer(
                predecessor_df_model.graph.output[predecessor_output_num].name
            )
            o_tensor_shape_folded = tuple(
                getCustomOp(last_node).get_folded_output_shape()
            )
            o_tensor_dummy_folded = gen_finn_dt_tensor(
                o_tensor_dt, o_tensor_shape_folded
            )
            o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
                o_tensor_dummy_folded, o_tensor_dt
            )
            o_tensor_shape_packed = o_tensor_dummy_packed.shape
            # append all output tensor info to relevant lists
            odt.append("DataType['%s']" % o_tensor_dt.name)
            oshape_normal.append(o_tensor_shape_normal)
            oshape_folded.append(o_tensor_shape_folded)
            oshape_packed.append(o_tensor_shape_packed)
            odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name"))

        # generate external weights npy files
        weights_dir = pynq_driver_dir + "/runtime_weights"

        os.makedirs(weights_dir)
        idma_idx = 0
        ext_weight_dma_cnt = 0

        for node in model.graph.node:
            assert (
                node.op_type == "StreamingDataflowPartition"
            ), "CreateDataflowPartition needs to be applied before driver generation"

            if len(node.input) > 0:
                producer = model.find_producer(node.input[0])
                init_tensor = model.get_initializer(node.input[0])
            else:
                producer = None
                init_tensor = None

            if producer is None:  # input dma?
                sdp_inst = getCustomOp(node)
                idma_name = sdp_inst.get_nodeattr("instance_name")
                df_model = ModelWrapper(sdp_inst.get_nodeattr("model"))
                assert df_model.graph.node[0].op_type == "IODMA"
                iodma_node = getCustomOp(df_model.graph.node[0])
                if iodma_node.get_nodeattr("burstMode") == "wrap":  # input weights dma?
                    init_tensor = df_model.get_initializer(
                        iodma_node.onnx_node.input[0]
                    )
                    ext_weight_dma_cnt += 1
                    w_dtype = df_model.get_tensor_datatype(
                        iodma_node.onnx_node.input[0]
                    )
                    init_external_tensor = to_external_tensor(init_tensor, w_dtype)
                    np.save(
                        weights_dir + "/" + idma_name + ".npy", init_external_tensor
                    )
                idma_idx += 1

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = template_driver.pynq_driver_template

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', ""))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', ""))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed))
        driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names))
        driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names))
        driver = driver.replace("$NUM_INPUTS$", str(len(idma_names)))
        driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names)))
        driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/validate.py"
        )
        shutil.copy(validate_template, validate_py)

        # copy all the dependencies into the driver folder
        # driver imports utils/data_packing and core/datatype
        # both of which are in finn-base
        # e.g. /workspace/finn-base/src/finn/util/data_packing.py
        dpk_root = dpk.__file__
        # e.g. /workspace/finn-base/src/finn/util
        dpk_root = dpk_root.replace("data_packing.py", "")
        # e.g. /workspace/finn-base/src/finn/core/datatype.py
        dtp_root = dtp.__file__
        # e.g. /workspace/finn-base/src/finn/core
        dtp_root = dtp_root.replace("datatype.py", "")
        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")

        # generate weight files for runtime-writable layers

        for sdp_ind, sdp_node in enumerate(model.graph.node):
            assert sdp_node.op_type == "StreamingDataflowPartition"
            # get dataflow model
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            dataflow_model = ModelWrapper(dataflow_model_filename)
            rt_layer_ind = 0
            for node in dataflow_model.graph.node:
                if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
                    node_inst = getCustomOp(node)
                    is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights")
                    if is_rt_weights == 1:
                        fcl_w = dataflow_model.get_initializer(node.input[1])
                        w_filename = weights_dir + "/%d_%d_%s.dat" % (
                            sdp_ind,
                            rt_layer_ind,
                            node.name,
                        )
                        node_inst.make_weight_file(
                            fcl_w, "decoupled_runtime", w_filename
                        )
                        rt_layer_ind += 1
                elif node.op_type == "StreamingDataflowPartition":
                    warnings.warn(
                        """Nested StreamingDataflowPartition are not supported
                    """
                    )
                else:
                    continue

        return (model, False)
def test_linear_past_eltwise_add_multiple_forks(ch, ifmdim):
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_shape = (1, ch)
    else:
        input_shape = (1, ch, ifmdim, ifmdim)

    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT,
                                           input_shape)
    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT,
                                            input_shape)

    num_of_params = 6
    value_info = []
    for i in range(num_of_params):
        value_info += [
            helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT,
                                          input_shape)
        ]

    modelproto = helper.make_model(
        helper.make_graph(
            name="test",
            inputs=[top_in],
            outputs=[top_out],
            value_info=value_info,
            nodes=[
                helper.make_node("Add", ["top_in", "p0"], ["fork1"]),
                helper.make_node("Mul", ["fork1", "p1"], ["t2"]),
                helper.make_node("Mul", ["fork1", "p2"], ["t3"]),
                helper.make_node("Add", ["t2", "t3"], ["t4"]),
                helper.make_node("Mul", ["t4", "p3"], ["fork2"]),
                helper.make_node("Add", ["fork2", "p4"], ["t5"]),
                helper.make_node("Add", ["fork2", "p5"], ["t6"]),
                helper.make_node("Add", ["t5", "t6"], ["top_out"]),
            ],
        ))
    model = ModelWrapper(modelproto)
    model = model.transform(InferShapes())

    np.random.seed(0)
    for i in range(num_of_params):
        model.set_initializer("p" + str(i),
                              np.random.rand(*input_shape).astype(np.float32))

    # need equal mults:
    model.set_initializer("p2", model.get_initializer("p1"))

    # Transform
    new_model = model.transform(MoveLinearPastEltwiseAdd())
    inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)}

    # Test
    assert oxe.compare_execution(model, new_model, inp_dict)
    assert new_model.graph.node[0].op_type == "Add"
    assert new_model.graph.node[1].op_type == "Add"
    assert new_model.graph.node[2].op_type == "Mul"
    assert new_model.graph.node[3].op_type == "Mul"
    assert new_model.graph.node[4].op_type == "Add"
    assert new_model.graph.node[5].op_type == "Add"
    assert len(new_model.graph.node) == 6
Exemple #8
0
def test_brevitas_compare_exported_mobilenet():
    if "IMAGENET_VAL_PATH" not in os.environ.keys():
        pytest.skip("Can't do validation without IMAGENET_VAL_PATH")
    n_images = 10
    debug_mode = False
    export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_")
    # export preprocessing
    preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx"
    preproc = NormalizePreProc(mean, std, ch)
    bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
    preproc_model = ModelWrapper(preproc_onnx)
    preproc_model = preproc_model.transform(InferShapes())
    preproc_model = preproc_model.transform(GiveUniqueNodeNames())
    preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
    preproc_model = preproc_model.transform(GiveReadableTensorNames())
    # export the actual MobileNet-v1
    finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx"
    mobilenet = get_test_model_trained("mobilenet", 4, 4)
    if debug_mode:
        dbg_hook = bo.enable_debug(mobilenet)
    bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx)
    model = ModelWrapper(finn_onnx)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(RemoveStaticGraphInputs())
    model = model.transform(InsertTopK())
    # get initializer from Mul that will be absorbed into topk

    a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1])
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveUniqueParameterTensors())
    model = model.transform(GiveReadableTensorNames())
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx")
    # create merged preprocessing + MobileNet-v1 model
    model = model.transform(MergeONNXModels(preproc_model))
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx")

    with open(
        export_onnx_path + "/mobilenet_validation.csv", "w", newline=""
    ) as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            [
                "goldenID",
                "brevitasTop5",
                "brevitasTop5[%]",
                "finnTop5",
                "finnTop5[%]",
                "top5equal",
                "top5%equal",
            ]
        )
        csvfile.flush()
        workload = imagenet_util.get_val_images(n_images, interleave_classes=True)
        all_inds_ok = True
        all_probs_ok = True
        for (img_path, target_id) in workload:
            img_np = imagenet_util.load_resize_crop(img_path)
            img_torch = torch.from_numpy(img_np).float()
            # do forward pass in PyTorch/Brevitas
            input_tensor = preproc.forward(img_torch)
            expected = mobilenet.forward(input_tensor).detach().numpy()
            expected_topk = expected.flatten()
            expected_top5 = np.argsort(expected_topk)[-5:]
            expected_top5 = np.flip(expected_top5)
            expected_top5_prob = []
            for index in expected_top5:
                expected_top5_prob.append(expected_topk[index])
            idict = {model.graph.input[0].name: img_np}
            odict = oxe.execute_onnx(model, idict, return_full_exec_context=True)
            produced = odict[model.graph.output[0].name]
            produced_prob = odict["TopK_0_out0"] * a0
            inds_ok = (produced.flatten() == expected_top5).all()
            probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all()
            all_inds_ok = all_inds_ok and inds_ok
            all_probs_ok = all_probs_ok and probs_ok
            writer.writerow(
                [
                    str(target_id),
                    str(expected_top5),
                    str(expected_top5_prob),
                    str(produced.flatten()),
                    str(produced_prob.flatten()),
                    str(inds_ok),
                    str(probs_ok),
                ]
            )
            csvfile.flush()
            if ((not inds_ok) or (not probs_ok)) and debug_mode:
                print("Results differ for %s" % img_path)
                # check all tensors at debug markers
                names_brevitas = set(dbg_hook.values.keys())
                names_finn = set(odict.keys())
                names_common = names_brevitas.intersection(names_finn)
                for dbg_name in names_common:
                    if not np.isclose(
                        dbg_hook.values[dbg_name].detach().numpy(),
                        odict[dbg_name],
                        atol=1e-3,
                    ).all():
                        print("Tensor %s differs between Brevitas and FINN" % dbg_name)
        assert all_inds_ok and all_probs_ok
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt):
    model = make_model(ch, ifmdim)
    model.save(export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_tensor_shape = (1, ch)
    else:
        input_tensor_shape = (1, ch, ifmdim, ifmdim)

    x = gen_finn_dt_tensor(idt, input_tensor_shape)

    # generate expected value from streamlined net
    input_dict = {model.graph.input[0].name: x}

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    chw_mul = model.get_initializer(model.graph.node[-1].input[1])
    chw_mul = 1
    expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0),
                                    axis=(2, 3)) / (ifmdim * ifmdim)
    assert (produced_sum.flatten() == expected_sum.flatten()).all()

    model = model.transform(InferDataLayouts())

    # convert to hls
    model.set_tensor_datatype(model.graph.input[0].name, idt)
    # extra streamlining
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(MoveAddPastMul())
    model = model.transform(CollapseRepeatedMul())
    model = model.transform(CollapseRepeatedAdd())
    # insert top-k node, which should absorb linear ops before it

    model = model.transform(InferShapes())
    model = model.transform(InferDataLayouts())
    model = model.transform(InferDataTypes())

    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferAddStreamsLayer())
    model = model.transform(to_hls.InferGlobalAccPoolLayer())
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(InsertTopK())
    model = model.transform(AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(AbsorbConsecutiveTransposes())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(to_hls.InferDuplicateStreamsLayer())

    model = model.transform(SortGraph())

    # model.save("golden_hls.onnx")
    # check topology status

    finn_nodes = model.get_finn_nodes()
    assert len(finn_nodes) == 9
    add_nodes = model.get_nodes_by_op_type("AddStreams_Batch")
    assert len(add_nodes) == 1
    pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch")
    assert len(pool_nodes) == 1
    label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch")
    assert len(label_nodes) == 1
    channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch")
    assert len(channelwise_nodes) == 5
    dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch")
    assert len(dup_nodes) == 1

    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_topk_hls = output_dict[model.graph.output[0].name]
    topk_input = output_dict[model.graph.node[-1].input[0]]
    assert soft_verify_topk(topk_input, produced_topk_hls, 5)

    os.remove(export_onnx_path)
def test_brevitas_mobilenet():
    # get single image as input and prepare image
    img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg")
    # resize smallest side of the image to 256 pixels and resize larger side
    # with same ratio
    img = resize_smaller_side(256, img)
    # crop central 224*224 window
    img = crop_center(224, img)
    # save image as numpy array and as torch tensor to enable testing in
    # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W)
    img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1)
    img_np = img_np.reshape(1, 3, 224, 224)
    img_torch = torch.from_numpy(img_np).float()

    # export preprocess
    export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_")
    preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx"
    mean = [0.485, 0.456, 0.406]
    std = 0.226
    ch = 3
    preproc = NormalizePreProc(mean, std, ch)
    bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
    preproc_model = ModelWrapper(preproc_onnx)
    # set input finn datatype to UINT8
    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
    preproc_model = preproc_model.transform(InferShapes())
    preproc_model = preproc_model.transform(GiveUniqueNodeNames())
    preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
    preproc_model = preproc_model.transform(GiveReadableTensorNames())

    finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_exported.onnx"
    mobilenet = get_test_model_trained("mobilenet", 4, 4)
    bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx)

    # do forward pass in PyTorch/Brevitas
    input_tensor = preproc.forward(img_torch)
    expected = mobilenet.forward(input_tensor).detach().numpy()
    expected_topk = expected.flatten()
    expected_top5 = np.argsort(expected_topk)[-5:]
    expected_top5 = np.flip(expected_top5)
    expected_top5_prob = []
    for index in expected_top5:
        expected_top5_prob.append(expected_topk[index])
    model = ModelWrapper(finn_onnx)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(InsertTopK())
    # get initializer from Mul that will be absorbed into topk
    a0 = model.get_initializer(model.graph.node[-2].input[1])
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveUniqueParameterTensors())
    model = model.transform(GiveReadableTensorNames())
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx")
    model = model.transform(MergeONNXModels(preproc_model))
    model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx")
    idict = {model.graph.input[0].name: img_np}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    produced_prob = odict["TopK_0_out0"] * a0
    assert (produced.flatten() == expected_top5).all()
    assert np.isclose(produced_prob.flatten(), expected_top5_prob).all()
Exemple #11
0
def test_end2end_cybsec_mlp_export(QONNX_export):
    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
    # load up trained net in Brevitas
    input_size = 593
    hidden1 = 64
    hidden2 = 64
    hidden3 = 64
    weight_bit_width = 2
    act_bit_width = 2
    num_classes = 1
    model = nn.Sequential(
        QuantLinear(input_size,
                    hidden1,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden1),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden1,
                    hidden2,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden2),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden2,
                    hidden3,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden3),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden3,
                    num_classes,
                    bias=True,
                    weight_bit_width=weight_bit_width),
    )
    trained_state_dict = torch.load(assets_dir +
                                    "/state_dict.pth")["models_state_dict"][0]
    model.load_state_dict(trained_state_dict, strict=False)
    W_orig = model[0].weight.data.detach().numpy()
    # pad the second (593-sized) dimensions with 7 zeroes at the end
    W_new = np.pad(W_orig, [(0, 0), (0, 7)])
    model[0].weight.data = torch.from_numpy(W_new)
    model_for_export = CybSecMLPForExport(model)
    export_onnx_path = get_checkpoint_name("export", QONNX_export)
    input_shape = (1, 600)
    # create a QuantTensor instance to mark the input as bipolar during export
    input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
    input_a = 2 * input_a - 1
    scale = 1.0
    input_t = torch.from_numpy(input_a * scale)
    input_qt = QuantTensor(input_t,
                           scale=torch.tensor(scale),
                           bit_width=torch.tensor(1.0),
                           signed=True)

    if QONNX_export:
        # With the BrevitasONNXManager we need to manually set
        # the FINN DataType at the input
        BrevitasONNXManager.export(model_for_export,
                                   input_shape,
                                   export_path=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model.set_tensor_datatype(model.graph.input[0].name,
                                  DataType["BIPOLAR"])
        model.save(export_onnx_path)
        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(export_onnx_path)
    else:
        bo.export_finn_onnx(model_for_export,
                            export_path=export_onnx_path,
                            input_t=input_qt)
    assert os.path.isfile(export_onnx_path)
    # fix input datatype
    finn_model = ModelWrapper(export_onnx_path)
    finnonnx_in_tensor_name = finn_model.graph.input[0].name
    assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1,
                                                                           600)
    # verify a few exported ops
    if QONNX_export:
        # The first "Mul" node doesn't exist in the QONNX export,
        # because the QuantTensor scale is not exported.
        # However, this node would have been unity scale anyways and
        # the models are still equivalent.
        assert finn_model.graph.node[0].op_type == "Add"
        assert finn_model.graph.node[1].op_type == "Div"
        assert finn_model.graph.node[2].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    else:
        assert finn_model.graph.node[0].op_type == "Mul"
        assert finn_model.get_initializer(
            finn_model.graph.node[0].input[1]) == 1.0
        assert finn_model.graph.node[1].op_type == "Add"
        assert finn_model.graph.node[2].op_type == "Div"
        assert finn_model.graph.node[3].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    # verify datatypes on some tensors
    assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) ==
            DataType["BIPOLAR"])
    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
    assert finn_model.get_tensor_datatype(
        first_matmul_w_name) == DataType["INT2"]
Exemple #12
0
    def apply(self, model):
        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # create the base FINN driver -- same for all accels
        driver_base_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/driver_base.py")
        driver_base_py = pynq_driver_dir + "/driver_base.py"
        shutil.copy(driver_base_template, driver_base_py)

        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass?
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)

        first_node = model.find_consumer(i_tensor_name)
        last_node = model.find_producer(o_tensor_name)
        if first_node.op_type == "StreamingDataflowPartition":
            # IODMAs and dataflow partitions have already been created
            # extract folded i/o shapes from IODMA consumer/producer
            first_df_model = ModelWrapper(
                getCustomOp(first_node).get_nodeattr("model"))
            assert (first_df_model.graph.node[0].op_type == "IODMA"
                    ), "First partition must hold input IODMA"
            successors = model.find_direct_successors(first_node)
            successor_sdp = getCustomOp(successors[0])
            successor_df_model = ModelWrapper(
                successor_sdp.get_nodeattr("model"))
            first_node = successor_df_model.find_consumer(
                successor_df_model.graph.input[0].name)

            last_df_model = ModelWrapper(
                getCustomOp(last_node).get_nodeattr("model"))
            assert (last_df_model.graph.node[0].op_type == "IODMA"
                    ), "Last partition must hold output IODMA"
            predecessors = model.find_direct_predecessors(last_node)
            predecessor_sdp = getCustomOp(predecessors[0])
            predecessor_df_model = ModelWrapper(
                predecessor_sdp.get_nodeattr("model"))
            last_node = predecessor_df_model.find_producer(
                predecessor_df_model.graph.output[0].name)

        # else: transformation called before IODMA/SDP creation (legacy flow)
        # can access folded i/o shapes directly
        i_tensor_shape_folded = tuple(
            getCustomOp(first_node).get_folded_input_shape())
        o_tensor_shape_folded = tuple(
            getCustomOp(last_node).get_folded_output_shape())

        # generate dummy folded i/o tensors and their packed versions
        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt,
                                                   i_tensor_shape_folded)
        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt,
                                                   o_tensor_shape_folded)
        i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            i_tensor_dummy_folded, i_tensor_dt)
        o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            o_tensor_dummy_folded, o_tensor_dt)
        i_tensor_shape_packed = i_tensor_dummy_packed.shape
        o_tensor_shape_packed = o_tensor_dummy_packed.shape

        # generate external weights npy files
        weights_dir = pynq_driver_dir + "/runtime_weights"

        os.makedirs(weights_dir)
        idma_idx = 0
        ext_weight_dma_cnt = 0

        for node in model.graph.node:
            assert (
                node.op_type == "StreamingDataflowPartition"
            ), "CreateDataflowPartition needs to be applied before driver generation"

            producer = model.find_producer(node.input[0])
            init_tensor = model.get_initializer(node.input[0])

            if producer is None:  # input dma?
                idma_name = "idma" + str(idma_idx)
                if init_tensor is not None:  # input weights dma?
                    ext_weight_dma_cnt += 1
                    w_dtype = model.get_tensor_datatype(node.input[0])
                    init_external_tensor = to_external_tensor(
                        init_tensor, w_dtype)
                    np.save(weights_dir + "/" + idma_name + ".npy",
                            init_external_tensor)
                else:
                    net_input_name = idma_name

                idma_idx += 1

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = template_driver.pynq_driver_template

        def mss(x, batch_var_name="1"):
            # "make shape string"
            # for a shape like (1, ...) emit a string (N, ...)
            # where N is the default value for batch_var_name
            # this lets the driver work with a batch of samples at once
            ret = str(x)
            ret = ret.replace("(1,", "(%s," % batch_var_name)
            ret = ret.replace("[1,", "[%s," % batch_var_name)
            return ret

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$",
                                mss(i_tensor_shape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$",
                                mss(i_tensor_shape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$",
                                mss(i_tensor_shape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$",
                                mss(o_tensor_shape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$",
                                mss(o_tensor_shape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$",
                                mss(o_tensor_shape_packed))
        driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
        driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/validate.py")
        shutil.copy(validate_template, validate_py)

        # copy all the dependencies into the driver folder
        # driver imports utils/data_packing and core/datatype
        # both of which are in finn-base
        # e.g. /workspace/finn-base/src/finn/util/data_packing.py
        dpk_root = dpk.__file__
        # e.g. /workspace/finn-base/src/finn/util
        dpk_root = dpk_root.replace("data_packing.py", "")
        # e.g. /workspace/finn-base/src/finn/core/datatype.py
        dtp_root = dtp.__file__
        # e.g. /workspace/finn-base/src/finn/core
        dtp_root = dtp_root.replace("datatype.py", "")
        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")

        # generate weight files for runtime-writable layers

        for sdp_ind, sdp_node in enumerate(model.graph.node):
            assert sdp_node.op_type == "StreamingDataflowPartition"
            # get dataflow model
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            dataflow_model = ModelWrapper(dataflow_model_filename)
            rt_layer_ind = 0
            for node in dataflow_model.graph.node:
                if node.op_type in [
                        "StreamingFCLayer_Batch", "Thresholding_Batch"
                ]:
                    node_inst = getCustomOp(node)
                    is_rt_weights = node_inst.get_nodeattr(
                        "runtime_writeable_weights")
                    if is_rt_weights == 1:
                        fcl_w = dataflow_model.get_initializer(node.input[1])
                        w_filename = weights_dir + "/%d_%d_%s.dat" % (
                            sdp_ind,
                            rt_layer_ind,
                            node.name,
                        )
                        node_inst.make_weight_file(fcl_w, "decoupled_runtime",
                                                   w_filename)
                        rt_layer_ind += 1
                elif node.op_type == "StreamingDataflowPartition":
                    warnings.warn(
                        """Nested StreamingDataflowPartition are not supported
                    """)
                else:
                    continue

        return (model, False)