Exemple #1
0
def test_sort_linear_graph(num_of_nodes):
    model = make_randomly_sorted_linear_model(num_of_nodes, seed=0)
    new_model = model.transform(SortGraph())

    # Test
    ret = new_model.analysis(ta.nodes_topologically_sorted)
    assert ret["nodes_topologically_sorted"], "Nodes are not topologically sorted."
Exemple #2
0
    def apply(self, model):
        graph = model.graph
        graph_modified = False
        for n in graph.node:
            if n.op_type in self.join_node_op and model.is_join_node(n):
                in0 = n.input[0]
                in1 = n.input[1]
                if in0 is None or in1 is None:
                    continue

                prod0 = model.find_producer(in0)
                prod1 = model.find_producer(in1)
                # Checks if the join node is preceded by
                # two different, but identical operations
                if prod0 == prod1:
                    continue

                identical_op = prod0.op_type == prod1.op_type

                if identical_op and prod0.op_type in self.ops_to_move:
                    self.move_node(model, n, prod0, prod1)
                    graph_modified = True

        if graph_modified:
            model = model.transform(SortGraph(),
                                    make_deepcopy=False,
                                    cleanup=False)

        return (model, graph_modified)
Exemple #3
0
 def cleanup(self):
     "Run cleanup transformations on the model."
     transformed_model = self
     cleanup_transforms = [
         RemoveUnusedTensors(),
         RemoveStaticGraphInputs(),
         SortGraph(),
     ]
     for trn in cleanup_transforms:
         transformed_model = transformed_model.transform(
             trn, cleanup=False, make_deepcopy=False)
     return transformed_model
Exemple #4
0
def test_sort_nonlinear_graph():
    ch = 2
    ifmdim = 16
    input_shape = (1, ch, ifmdim, ifmdim)

    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT,
                                           input_shape)
    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT,
                                            input_shape)

    num_of_params = 8
    value_info = []
    for i in range(num_of_params):
        value_info += [
            helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT,
                                          input_shape)
        ]

    modelproto = helper.make_model(
        helper.make_graph(
            name="test",
            inputs=[top_in],
            outputs=[top_out],
            value_info=value_info,
            nodes=[
                # Not sorted nodes
                helper.make_node("Mul", ["fork1", "p2"], ["t3"]),
                helper.make_node("Add", ["t4", "p3"], ["t5"]),
                helper.make_node("Add", ["t2", "t3"], ["t4"]),
                helper.make_node("Add", ["t6", "t7"], ["t8"]),
                helper.make_node("Add", ["fork3", "fork3"], ["top_out"]),
                helper.make_node("Mul", ["t5", "p4"], ["fork2"]),
                helper.make_node("Add", ["top_in", "p0"], ["fork1"]),
                helper.make_node("Mul", ["fork1", "p1"], ["t2"]),
                helper.make_node("Add", ["fork2", "p5"], ["t6"]),
                helper.make_node("Add", ["fork2", "p6"], ["t7"]),
                helper.make_node("Mul", ["t8", "p7"], ["fork3"]),
            ],
        ))
    model = ModelWrapper(modelproto)
    model = model.transform(InferShapes())

    np.random.seed(0)
    for i in range(num_of_params):
        model.set_initializer("p" + str(i),
                              np.random.rand(*input_shape).astype(np.float32))

    new_model = model.transform(SortGraph())

    # Test
    ret = new_model.analysis(ta.nodes_topologically_sorted)
    assert ret[
        "nodes_topologically_sorted"], "Nodes are not topologically sorted."
Exemple #5
0
def step_resnet50_convert_to_hls(model: ModelWrapper,
                                 cfg: DataflowBuildConfig):
    model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"])
    model = model.transform(InferDataLayouts())

    try:
        from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv
        model = model.transform(InferDoublePackedConv([1]))
    except:
        print(
            " FINN Experimental not available. Using non-packed convolution ")

    model = model.transform(DoubleToSingleFloat())
    model = model.transform(InferDataTypes())
    model = model.transform(SortGraph())

    to_hls_transformations = [
        to_hls.InferAddStreamsLayer, LowerConvsToMatMul,
        to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch,
        AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds,
        to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer,
        AbsorbConsecutiveTransposes, to_hls.InferConvInpGen,
        to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer
    ]
    for trn in to_hls_transformations:
        model = model.transform(trn())
        model = model.transform(InferDataLayouts())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(InferDataTypes())

    model = model.transform(RemoveCNVtoFCFlatten())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveUnusedTensors())
    model = model.transform(SortGraph())

    return model
Exemple #6
0
def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):

    for iter_id in range(4):
        model = step_resnet50_streamline_linear(model, cfg)
        model = step_resnet50_streamline_nonlinear(model, cfg)

        # big loop tidy up
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(GiveReadableTensorNames())
        model = model.transform(InferDataTypes())
        model = model.transform(SortGraph())

    model = model.transform(DoubleToSingleFloat())

    return model
Exemple #7
0
    def apply(self, model):
        # only makes sense for a pure fpgadataflow graph -- so we check!
        all_nodes = list(model.graph.node)
        assert all(
            get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow"
            for x in all_nodes
        )
        # parse streamingfclayers looking for external weights with no attached IODMA
        fc_extw_nodes = list(
            filter(
                lambda x: x.op_type == "StreamingFCLayer_Batch"
                and getCustomOp(x).get_nodeattr("mem_mode") == "external"
                and model.find_producer(x.input[1]) is None,
                all_nodes,
            )
        )
        graph_in_name = model.graph.input[0].name
        first_node = model.find_consumer(graph_in_name)
        graph_out_name = model.graph.output[0].name
        final_node = model.find_producer(graph_out_name)
        if (
            final_node.op_type == "IODMA"
            and first_node.op_type == "IODMA"
            and len(fc_extw_nodes) == 0
        ):
            # TODO maybe check the correctness of properties
            return (model, False)
        else:
            if final_node.op_type != "IODMA":
                out_shape = model.get_tensor_shape(graph_out_name)
                out_dtype = model.get_tensor_datatype(graph_out_name)
                final_node_inst = getCustomOp(final_node)
                out_folded_shape = final_node_inst.get_folded_output_shape()
                # take advantage of AXI stream width padding for DMA alignment
                # (AXI streams are always padded to 8 bits)
                # this is the width of stream input to DMA
                padded_outstream_width = final_node_inst.get_outstream_width_padded()
                padded_outstream_bytes = padded_outstream_width // 8
                # determine the feasible interface width
                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                assert (
                    intfwidth % 8 == 0
                ), "No feasible interface width for transfer size"
                # make new buffer
                final_node_out = oh.make_tensor_value_info(
                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
                )
                model.graph.value_info.append(final_node_out)
                model.set_tensor_datatype(final_node_out.name, out_dtype)
                # reroute final node output to final_node_out_name
                final_node.output[0] = final_node_out.name
                # FIXME: currently always using 8-bit dtypes to work around the
                # padding problems for i/o DMA
                dma_node = oh.make_node(
                    "IODMA",
                    [final_node_out.name],
                    [graph_out_name],
                    numInputVectors=out_folded_shape[:-1],
                    NumChannels=padded_outstream_bytes,
                    dataType="UINT8",
                    intfWidth=intfwidth,
                    streamWidth=padded_outstream_width,
                    direction="out",
                    domain="finn.custom_op.fpgadataflow",
                    backend="fpgadataflow",
                )
                model.graph.node.append(dma_node)
            if first_node.op_type != "IODMA":
                in_shape = model.get_tensor_shape(graph_in_name)
                in_dtype = model.get_tensor_datatype(graph_in_name)
                first_node_inst = getCustomOp(first_node)
                in_folded_shape = first_node_inst.get_folded_input_shape()
                # take advantage of AXI stream width padding for DMA alignment
                # (AXI streams are always padded to 8 bits)
                # this is the width of stream output expected from the DMA
                padded_instream_width = first_node_inst.get_instream_width_padded()
                padded_instream_bytes = padded_instream_width // 8
                # determine the feasible interface width
                transfer_bits = padded_instream_width * np.prod(in_folded_shape[:-1])
                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                assert (
                    intfwidth % 8 == 0
                ), "No feasible interface width for transfer size"
                # make new buffer
                first_node_in = oh.make_tensor_value_info(
                    model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
                )
                model.graph.value_info.append(first_node_in)
                model.set_tensor_datatype(first_node_in.name, in_dtype)
                # reroute first node input
                # FIXME: currently always using 8-bit dtypes to work around the
                # padding problems for i/o DMA
                first_node.input[0] = first_node_in.name
                dma_node = oh.make_node(
                    "IODMA",
                    [graph_in_name],
                    [first_node_in.name],
                    numInputVectors=in_folded_shape[:-1],
                    NumChannels=padded_instream_bytes,
                    dataType="UINT8",
                    intfWidth=intfwidth,
                    streamWidth=padded_instream_width,
                    direction="in",
                    domain="finn.custom_op.fpgadataflow",
                    backend="fpgadataflow",
                )
                model.graph.node.insert(0, dma_node)
            for fc_node in fc_extw_nodes:
                fc_inst = getCustomOp(fc_node)
                fc_w_name = fc_node.input[1]
                w_shape = model.get_tensor_shape(fc_w_name)
                w_dtype = model.get_tensor_datatype(fc_w_name)
                # determine the feasible interface width
                transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                assert (
                    intfwidth % 8 == 0
                ), "No feasible interface width for transfer size"
                # calculate width of stream output from DMA
                pe = get_by_name(fc_node.attribute, "PE").i
                simd = get_by_name(fc_node.attribute, "SIMD").i
                streamWidth = fc_inst.get_weightstream_width_padded()
                # make new buffer
                W = model.get_initializer(fc_w_name)
                iodma_mem = self.get_mem_init(W, pe, simd)
                model.set_initializer(fc_w_name, iodma_mem)

                fc_node_in = oh.make_tensor_value_info(
                    model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
                )
                model.graph.value_info.append(fc_node_in)
                model.set_tensor_datatype(fc_node_in.name, w_dtype)
                model.set_initializer(fc_node_in.name, W)
                dma_node = oh.make_node(
                    "IODMA",
                    [fc_w_name],
                    [fc_node_in.name],
                    numInputVectors=[iodma_mem.shape[0]],
                    NumChannels=pe * simd,
                    dataType=str(w_dtype.name),
                    intfWidth=intfwidth,
                    streamWidth=streamWidth,
                    direction="in",
                    burstMode="wrap",
                    domain="finn.custom_op.fpgadataflow",
                    backend="fpgadataflow",
                )
                fc_node.input[1] = fc_node_in.name
                model.graph.node.insert(0, dma_node)
            model = model.transform(SortGraph())
            return (model, True)
Exemple #8
0
    sizes = [10, 50, 100, 500, 1000]
    times = []
    reps = 10

    print("SortGraph performance test:")
    print("Test sizes", sizes)
    print("Repetitions per size:", reps)
    for sz in sizes:
        acc_time = 0
        print(" Testing size ", sz)
        for i in range(reps):
            # it should take the same time even with the sorted one
            # but better new model each time as it is a more general approach
            model = make_randomly_sorted_linear_model(sz)  # new model as seed is None
            bef = time.time()
            new_model = model.transform(SortGraph(), make_deepcopy=False)
            acc_time += time.time() - bef

        times += [acc_time / reps]

    # print csv
    print("\nnum_of_nodes,  seconds")
    for sz, tm in zip(sizes, times):
        print("{:12d}, {:6.4e}".format(sz, tm))

    # plot
    # import matplotlib.pyplot as plt
    # plt.plot(sizes,times,"--o")
    # plt.grid(True)
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt):
    model = make_model(ch, ifmdim)
    model.save(export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # generate test vectors of correct shape
    if ifmdim == -1:
        input_tensor_shape = (1, ch)
    else:
        input_tensor_shape = (1, ch, ifmdim, ifmdim)

    x = gen_finn_dt_tensor(idt, input_tensor_shape)

    # generate expected value from streamlined net
    input_dict = {model.graph.input[0].name: x}

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_sum = output_dict[model.graph.output[0].name]
    chw_mul = model.get_initializer(model.graph.node[-1].input[1])
    chw_mul = 1
    expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0),
                                    axis=(2, 3)) / (ifmdim * ifmdim)
    assert (produced_sum.flatten() == expected_sum.flatten()).all()

    model = model.transform(InferDataLayouts())

    # convert to hls
    model.set_tensor_datatype(model.graph.input[0].name, idt)
    # extra streamlining
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(MoveAddPastMul())
    model = model.transform(CollapseRepeatedMul())
    model = model.transform(CollapseRepeatedAdd())
    # insert top-k node, which should absorb linear ops before it

    model = model.transform(InferShapes())
    model = model.transform(InferDataLayouts())
    model = model.transform(InferDataTypes())

    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferAddStreamsLayer())
    model = model.transform(to_hls.InferGlobalAccPoolLayer())
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(InsertTopK())
    model = model.transform(AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(AbsorbConsecutiveTransposes())
    model = model.transform(InferDataTypes())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(to_hls.InferDuplicateStreamsLayer())

    model = model.transform(SortGraph())

    # model.save("golden_hls.onnx")
    # check topology status

    finn_nodes = model.get_finn_nodes()
    assert len(finn_nodes) == 9
    add_nodes = model.get_nodes_by_op_type("AddStreams_Batch")
    assert len(add_nodes) == 1
    pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch")
    assert len(pool_nodes) == 1
    label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch")
    assert len(label_nodes) == 1
    channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch")
    assert len(channelwise_nodes) == 5
    dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch")
    assert len(dup_nodes) == 1

    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))

    output_dict = oxe.execute_onnx(model, input_dict, True)
    produced_topk_hls = output_dict[model.graph.output[0].name]
    topk_input = output_dict[model.graph.node[-1].input[0]]
    assert soft_verify_topk(topk_input, produced_topk_hls, 5)

    os.remove(export_onnx_path)
Exemple #10
0
 def apply(self, model):
     # only makes sense for a pure fpgadataflow graph -- so we check!
     all_nodes = list(model.graph.node)
     assert all(
         get_by_name(x.attribute, "backend").s.decode("UTF-8") ==
         "fpgadataflow" for x in all_nodes)
     # parse streamingfclayers looking for external weights with no attached IODMA
     fc_extw_nodes = list(
         filter(
             lambda x: x.op_type == "StreamingFCLayer_Batch" and
             get_by_name(x.attribute, "mem_mode") is not None and
             get_by_name(x.attribute, "mem_mode").s.decode("UTF-8") ==
             "external" and model.find_producer(x.input[1]) is None,
             all_nodes,
         ))
     graph_in_name = model.graph.input[0].name
     first_node = model.find_consumer(graph_in_name)
     graph_out_name = model.graph.output[0].name
     final_node = model.find_producer(graph_out_name)
     if (final_node.op_type == "IODMA" and first_node.op_type == "IODMA"
             and len(fc_extw_nodes) == 0):
         # TODO maybe check the correctness of properties
         return (model, False)
     else:
         if final_node.op_type != "IODMA":
             # check if tensor is NHWC
             assert (
                 model.get_tensor_layout(graph_out_name) == DataLayout.NHWC
                 or model.get_tensor_layout(graph_out_name) == DataLayout.NC
             ), "Data layout of output tensor must be NHWC or NC"
             out_shape = model.get_tensor_shape(graph_out_name)
             out_dtype = model.get_tensor_datatype(graph_out_name)
             # determine the feasible interface width
             transfer_bits = np.prod(out_shape) * out_dtype.bitwidth()
             intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
             assert (
                 intfwidth %
                 8 == 0), "No feasible interface width for transfer size"
             # get width of stream input to DMA
             streamWidth = getCustomOp(final_node).get_outstream_width()
             # make new buffer
             final_node_out = oh.make_tensor_value_info(
                 model.make_new_valueinfo_name(), TensorProto.FLOAT,
                 out_shape)
             model.graph.value_info.append(final_node_out)
             model.set_tensor_datatype(final_node_out.name, out_dtype)
             # reroute final node output to final_node_out_name
             final_node.output[0] = final_node_out.name
             dma_node = oh.make_node(
                 "IODMA",
                 [final_node_out.name],
                 [graph_out_name],
                 numInputVectors=out_shape[:-1],
                 NumChannels=out_shape[-1],
                 dataType=str(out_dtype.name),
                 intfWidth=intfwidth,
                 streamWidth=streamWidth,
                 direction="out",
                 domain="finn",
                 backend="fpgadataflow",
             )
             model.graph.node.append(dma_node)
         if first_node.op_type != "IODMA":
             # check if tensor is NHWC
             assert (
                 model.get_tensor_layout(graph_in_name) == DataLayout.NHWC
                 or model.get_tensor_layout(graph_in_name) == DataLayout.NC
             ), "Data layout of input tensor must be NHWC or NC"
             in_shape = model.get_tensor_shape(graph_in_name)
             in_dtype = model.get_tensor_datatype(graph_in_name)
             # determine the feasible interface width
             transfer_bits = np.prod(in_shape) * in_dtype.bitwidth()
             intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
             assert (
                 intfwidth %
                 8 == 0), "No feasible interface width for transfer size"
             # get width of stream output from DMA
             streamWidth = getCustomOp(first_node).get_instream_width()
             # make new buffer
             first_node_in = oh.make_tensor_value_info(
                 model.make_new_valueinfo_name(), TensorProto.FLOAT,
                 in_shape)
             model.graph.value_info.append(first_node_in)
             model.set_tensor_datatype(first_node_in.name, in_dtype)
             # reroute final node output to final_node_out_name
             first_node.input[0] = first_node_in.name
             dma_node = oh.make_node(
                 "IODMA",
                 [graph_in_name],
                 [first_node_in.name],
                 numInputVectors=in_shape[:-1],
                 NumChannels=in_shape[-1],
                 dataType=str(in_dtype.name),
                 intfWidth=intfwidth,
                 streamWidth=streamWidth,
                 direction="in",
                 domain="finn",
                 backend="fpgadataflow",
             )
             model.graph.node.insert(0, dma_node)
         for fc_node in fc_extw_nodes:
             # check if tensor is NHWC
             assert (model.get_tensor_layout(
                 fc_node.input[1]) == DataLayout.NHWC or
                     model.get_tensor_layout(graph_in_name) == DataLayout.NC
                     ), "Data layout of tensors must be NHWC or NC"
             fc_w_name = fc_node.input[1]
             w_shape = model.get_tensor_shape(fc_w_name)
             w_dtype = model.get_tensor_datatype(fc_w_name)
             # determine the feasible interface width
             transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
             intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
             assert (
                 intfwidth %
                 8 == 0), "No feasible interface width for transfer size"
             # calculate width of stream output from DMA
             pe = get_by_name(fc_node.attribute, "PE").i
             simd = get_by_name(fc_node.attribute, "SIMD").i
             assert pe * simd == w_shape[0], "Malformed weight matrix"
             streamWidth = simd * pe * w_dtype.bitwidth()
             # make new buffer
             fc_node_in = oh.make_tensor_value_info(
                 model.make_new_valueinfo_name(), TensorProto.FLOAT,
                 w_shape)
             model.graph.value_info.append(fc_node_in)
             model.set_tensor_datatype(fc_node_in.name, w_dtype)
             model.set_initializer(fc_node_in.name,
                                   model.get_initializer(fc_w_name))
             dma_node = oh.make_node(
                 "IODMA",
                 [fc_w_name],
                 [fc_node_in.name],
                 numInputVectors=[w_shape[1]],
                 NumChannels=w_shape[0],
                 dataType=str(w_dtype.name),
                 intfWidth=intfwidth,
                 streamWidth=streamWidth,
                 direction="in",
                 burstMode="wrap",
                 domain="finn",
                 backend="fpgadataflow",
             )
             fc_node.input[1] = fc_node_in.name
             model.graph.node.insert(0, dma_node)
         model = model.transform(SortGraph())
         return (model, True)