def test_add_pre_and_postproc(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model = load_test_checkpoint_or_skip(prev_chkpt_name) global_inp_name = model.graph.input[0].name ishape = model.get_tensor_shape(global_inp_name) # preprocessing: torchvision's ToTensor divides uint8 inputs by 255 totensor_pyt = ToTensor() chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc") bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name) assert os.path.isfile(chkpt_preproc_name) # join preprocessing and core model pre_model = ModelWrapper(chkpt_preproc_name) pre_model = pre_model.transform(InferShapes()) pre_model = pre_model.transform(FoldConstants()) model = model.transform(MergeONNXModels(pre_model)) # add input quantization annotation: UINT8 for all BNN-PYNQ models global_inp_name = model.graph.input[0].name model.set_tensor_datatype(global_inp_name, DataType.UINT8) # postprocessing: insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") # tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) model.save(chkpt_name) assert os.path.isfile(chkpt_name)
def test_topk_insert(k): tfc = get_test_model_trained("TFC", 1, 1) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float() output_golden = tfc.forward(input_brevitas).detach().numpy() output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() input_dict = {"global_in": nph.to_array(input_tensor)} # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk)
def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph(nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp]) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout( node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout( node.output[0]) == DataLayout.NHWC
def test_topk_insert(k): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model.model.opset_import[0].version = 11 # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_tensor = nph.to_array(input_tensor) input_dict = {"global_in": input_tensor} output_golden = oxe.execute_onnx(model, input_dict)["global_out"] output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk)
def step_resnet50_tidy(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(InsertTopK()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) return model
def test_brevitas_debug(): finn_onnx = "test_brevitas_debug.onnx" fc = get_test_model_trained("TFC", 2, 2) dbg_hook = bo.enable_debug(fc) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) produced = output_dict[model.graph.output[0].name] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(output_dict.keys()) names_common = names_brevitas.intersection(names_finn) assert len(names_common) == 16 for dbg_name in names_common: tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx)
def test_brevitas_cnv_export_exec(wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") cnv = get_test_model_trained("CNV", wbits, abits) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {model.graph.input[0].name: input_tensor} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] # do forward pass in PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() expected = cnv.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_quartznet_asr_4b(pretrained): finn_onnx = "quant_quartznet_perchannelscaling_4b.onnx" quartznet = quant_quartznet_perchannelscaling_4b(pretrained, export_mode=True) quartznet.eval() FINNManager.export(quartznet, QUARTZNET_POSTPROCESSED_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) #load a random test vector input_tensor = np.random.uniform( MIN_INP_VAL, MAX_INP_VAL, size=QUARTZNET_POSTPROCESSED_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = quartznet(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val): def get_quant_type(bit_width): if bit_width is None: return QuantType.FP elif bit_width == 1: return QuantType.BINARY else: return QuantType.INT act_quant_type = get_quant_type(abits) min_val = -1.0 ishape = (1, 10) b_act = QuantHardTanh( bit_width=abits, quant_type=act_quant_type, max_val=max_val, min_val=min_val, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_impl_type=ScalingImplType.CONST, narrow_range=narrow_range, ) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_act.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype): i_shape = (1, in_features) w_shape = (out_features, in_features) b_linear = QuantLinear( out_features=out_features, in_features=in_features, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=w_bits, weight_quant_type=QuantType.INT, weight_scaling_per_output_channel=True, ) weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0, size=w_shape).astype(np.float32) b_linear.weight.data = torch.from_numpy(weight_tensor_fp) b_linear.eval() bo.export_finn_onnx(b_linear, i_shape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_linear.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # model.save("orig_cnv.onnx") model = model.transform(Streamline()) model = model.transform(RemoveUnusedTensors()) assert len(model.graph.initializer) == 21 assert len(model.graph.value_info) == 43 # model.save("streamlined_cnv.onnx") assert len(model.graph.node) == 23 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert model.graph.node[0].op_type == "MultiThreshold" assert np.argmax(produced) == 3
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if ( n.op_type in ["Add", "Sub"] and not model.is_fork_node(n) and not model.is_join_node(n) ): A = model.get_initializer(n.input[1]) if A is not None and (A == np.zeros_like(A)).all(): producer = model.find_producer(n.input[0]) # remove node and wire output tensor to # output of producer node producer.output[0] = n.output[0] graph.node.remove(n) elif ( n.op_type in ["Mul", "Div"] and not model.is_fork_node(n) and not model.is_join_node(n) ): A = model.get_initializer(n.input[1]) if A is not None and (A == np.ones_like(A)).all(): producer = model.find_producer(n.input[0]) # remove node and wire output tensor to # output of producer node producer.output[0] = n.output[0] graph.node.remove(n) model = model.transform(InferShapes()) return (model, graph_modified)
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False execution_context = model.make_empty_exec_context() for n in graph.node: node_ind += 1 node_inp_inits = list(map(lambda x: model.get_initializer(x), n.input)) node_inp_dyn = list(filter(lambda x: x is None, node_inp_inits)) node_out = n.output[0] is_all_constant_inputs = len(node_inp_dyn) == 0 ishape = model.get_tensor_shape(n.input[0]) is_const_shape = (n.op_type == "Shape") and (ishape is not None) if is_all_constant_inputs or is_const_shape: # this node has no dynamic inputs, only constant ones -- so we can # do constant folding. oxe.execute_node(n, execution_context, graph) # use the execution result as an initializer model.set_initializer(node_out, execution_context[node_out]) # remove old node graph.node.remove(n) graph_modified = True if graph_modified: model = model.transform(InferShapes()) return (model, graph_modified)
def test_onnx_exec_internal_rounding(): inp0 = onnx.helper.make_tensor_value_info("inp0", onnx.TensorProto.FLOAT, [2, 2]) inp1 = onnx.helper.make_tensor_value_info("inp1", onnx.TensorProto.FLOAT, [1]) outp = onnx.helper.make_tensor_value_info("outp", onnx.TensorProto.FLOAT, [2, 2]) mul_node = onnx.helper.make_node("Mul", inputs=["inp0", "inp1"], outputs=["outp"]) graph = onnx.helper.make_graph(nodes=[mul_node], name="mul_graph", inputs=[inp0, inp1], outputs=[outp]) model = onnx.helper.make_model(graph, producer_name="mul-model") model = ModelWrapper(model) idt = DataType.INT2 model.set_tensor_datatype("inp0", idt) model.set_tensor_datatype("inp1", idt) model.transform(InferShapes()) mul_value = np.asarray([-1], dtype=np.float32) inp_int = gen_finn_dt_tensor(idt, [2, 2]) scale = np.random.uniform(low=0, high=1, size=(2, 2)).astype(np.float32) inp_rounded = (inp_int * scale) / (scale + 1e-7) input_dict = {"inp0": inp_rounded, "inp1": mul_value} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] expected = np.multiply(inp_int, mul_value) assert (produced == expected).all()
def test_move_scalar_add_past_matmul(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [1, 1]) matmul_param = oh.make_tensor_value_info("matmul_param", TensorProto.FLOAT, [2, 2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 2]) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[add_param, matmul_param], nodes=[ oh.make_node("Add", ["top_in", "add_param"], ["middle"]), oh.make_node("MatMul", ["middle", "matmul_param"], ["top_out"]), ], ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("add_param", np.asarray([[3]], dtype=np.float32)) model.set_initializer( "matmul_param", np.asarray([[2, 4], [-1, 1]], dtype=np.float32) ) new_model = model.transform(MoveScalarAddPastMatMul()) inp_dict = {"top_in": np.asarray([[-1.0, 1.0]], dtype=np.float32)} assert ox.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "MatMul" assert new_model.graph.node[1].op_type == "Add" assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0]
def test_mnist_onnx_download_extract_run(): # load the onnx model raw_m = get_data("finn", "data/onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) # run using FINN-based execution (full graph) input_dict = {"Input3": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) assert np.isclose(np_helper.to_array(output_tensor), output_dict["Plus214_Output_0"], atol=1e-3).all() # test subgraph execution start_node = model.graph.node[1] end_node = model.graph.node[3] subgraph_i_dict = {start_node.input[0]: output_dict[start_node.input[0]]} subgraph_o_dict = oxe.execute_onnx( model, subgraph_i_dict, return_full_exec_context=True, start_node=start_node, end_node=end_node, ) assert np.isclose(subgraph_o_dict[end_node.output[0]], output_dict[end_node.output[0]], atol=1e-3).all()
def make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, idt, odt): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim]) mp_node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn.custom_op.general", stride=stride, kernel=k, ibits=idt.bitwidth(), obits=odt.bitwidth(), signed=1 if idt.signed() else 0, data_layout="NCHW", ) graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model = model.transform(InferShapes()) return model
def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt): odt = idt inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim]) mp_node = helper.make_node( "MaxPool", ["inp"], ["outp"], kernel_shape=[k, k], pads=[pad, pad, pad, pad], strides=[stride, stride], ) graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model = model.transform(InferShapes()) return model
def test_move_chw_add_past_conv(idim, k, s, ich, och): odim = compute_conv_output_dim(idim, k, s) ishape = [1, ich, idim, idim] oshape = [1, och, odim, odim] add_param_shape = [1, ich, 1, 1] conv_param_shape = [och, ich, k, k] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [k, k] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [s, s] add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) model = helper.make_model( helper.make_graph( nodes=[add_node, conv_node], name="move-add-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1], )) model = ModelWrapper(model) # initialize model a0_values = np.random.uniform( low=0, high=1, size=tuple(add_param_shape)).astype(np.float32) model.set_initializer("a0", a0_values) a1_values = np.random.uniform( low=0, high=1, size=tuple(conv_param_shape)).astype(np.float32) model.set_initializer("a1", a1_values) model = model.transform(InferShapes()) # execution before transformation inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) idict = {model.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model, idict) y_before = odict[model.graph.output[0].name] model = model.transform(MoveAddPastConv()) odict = oxe.execute_onnx(model, idict) y_after = odict[model.graph.output[0].name] assert np.isclose(y_before, y_after).all() assert model.graph.node[0].op_type == "Conv" assert model.graph.node[1].op_type == "Add"
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if n.op_type == "TopK": prod = model.find_producer(n.input[0]) if prod is not None and (prod.op_type in ["Mul", "Add"]): prod_input = prod.input[0] param_name = prod.input[1] A = model.get_initializer(param_name) if A is None: warnings.warn("Param is not constant, skipping") continue is_scalar = all(x == 1 for x in A.shape) is_scalar_pos_mul = is_scalar and (prod.op_type == "Mul") and A > 0 is_scalar_add = is_scalar and (prod.op_type == "Add") if is_scalar_pos_mul or is_scalar_add: # if the mul is scalar and positive, we can just delete the # mul node and rewire the top k node. Because the top k node # works with probabilities and their relation to each other # the relation doesn't change if every value is multiplied # with a scalar graph.node.remove(prod) n.input[0] = prod_input # to avoid error the dataype is set to float32 model.set_tensor_datatype(n.input[0], DataType.FLOAT32) graph_modified = True if graph_modified: model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified)
def test_collapse_repeated_op(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param_0 = oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [2]) mul_param_0 = oh.make_tensor_value_info("mul_param_0", TensorProto.FLOAT, [2]) add_param_1 = oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [2]) mul_param_1 = oh.make_tensor_value_info("mul_param_1", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[add_param_0, mul_param_0, add_param_1, mul_param_1], nodes=[ oh.make_node("Add", ["top_in", "add_param_0"], ["middle_0"]), oh.make_node("Add", ["middle_0", "add_param_1"], ["middle_1"]), oh.make_node("Mul", ["middle_1", "mul_param_0"], ["middle_2"]), oh.make_node("Mul", ["middle_2", "mul_param_1"], ["top_out"]), ], ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("add_param_0", np.asarray([1, 3], dtype=np.float32)) model.set_initializer("add_param_1", np.asarray([-1, 3], dtype=np.float32)) model.set_initializer("mul_param_0", np.asarray([2, 4], dtype=np.float32)) model.set_initializer("mul_param_1", np.asarray([2, -4], dtype=np.float32)) new_model = model.transform(CollapseRepeatedAdd()) new_model = new_model.transform(CollapseRepeatedMul()) inp_dict = {"top_in": np.asarray([-1.0, 1.0], dtype=np.float32)} assert ox.compare_execution(model, new_model, inp_dict)
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, pretrained): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip(f"No LFC_{MAX_WBITS}W{MAX_ABITS}A present.") if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"{size}_{wbits}W{abits}A" finn_onnx = nname + ".onnx" fc, _ = model_with_cfg(nname.lower(), pretrained=pretrained) FINNManager.export_onnx(fc, FC_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # load a random test vector input_tensor = np.random.uniform(MIN_INP_VAL, MAX_INP_VAL, size=FC_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def make_lookup_model(embeddings, ishape, idt, edt): num_embeddings, embedding_dim = embeddings.shape class LookupModel(nn.Module): def __init__(self, num_embeddings, embedding_dim): super().__init__() self.lookup = nn.Embedding( num_embeddings=num_embeddings, embedding_dim=embedding_dim ) def forward(self, x): x = self.lookup(x) return x torch_model = LookupModel(num_embeddings, embedding_dim) input_t = torch.zeros(ishape, dtype=torch.int64) ret = FINNManager.export(torch_model, input_t=input_t, opset_version=11) model = ModelWrapper(ret) iname = model.graph.input[0].name ename = model.graph.node[0].input[0] model.set_tensor_datatype(iname, idt) eshape = model.get_tensor_shape(ename) assert tuple(eshape) == embeddings.shape model.set_initializer(ename, embeddings) model.set_tensor_datatype(ename, edt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model
def test_infer_data_layouts(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_2_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC
def test_quant_conv2d(dw, bias, bias_quant, in_features, in_channels, out_channels, w_bits, channel_scaling, kernel_size, padding, stride, i_bits): # required to generated quantized inputs, not part of the exported model to test quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True) inp_tensor = quant_inp( torch.randn(1, in_channels, in_features, in_features)) conv = QuantConv2d(in_channels=in_channels, out_channels=in_channels if dw else out_channels, groups=in_channels if dw else 1, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant=bias_quant, weight_bit_width=w_bits, weight_scaling_per_output_channel=channel_scaling) conv.eval() model = bo.export_finn_onnx(conv, input_t=inp_tensor) model = ModelWrapper(model) model = model.transform(InferShapes()) # the quantized input tensor passed to FINN should be in integer form int_inp_array = inp_tensor.int(float_datatype=True).numpy() idict = {model.graph.input[0].name: int_inp_array} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] expected = conv(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def test_is_linear_forked_node_output(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [2]) mul0_param = oh.make_tensor_value_info("mul0_param", TensorProto.FLOAT, [2]) mul1_param = oh.make_tensor_value_info("mul1_param", TensorProto.FLOAT, [2]) mul0_res = oh.make_tensor_value_info("mul0_res", TensorProto.FLOAT, [2]) mul1_res = oh.make_tensor_value_info("mul1_res", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[add_param, mul0_param, mul1_param, mul0_res, mul1_res], nodes=[ oh.make_node("Add", ["top_in", "add_param"], ["middle"]), oh.make_node("Mul", ["middle", "mul0_param"], ["mul0_res"]), oh.make_node("Mul", ["middle", "mul1_param"], ["mul1_res"]), oh.make_node("Add", ["mul0_res", "mul1_res"], ["top_out"]), ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) ret = model.analysis(ta.is_linear) assert ret["is_linear"] is False
def test_renaming(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # do some basic checks assert model.graph.input[0].name == "global_in" assert model.graph.output[0].name == "global_out" assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # ensure running renaming twice still yields the same names model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # run renamed model to make sure we did not mess up the topology raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) input_dict = {"global_in": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) assert np.isclose( np_helper.to_array(output_tensor), output_dict["global_out"], atol=1e-3 ).all()
def test_xnorpopcountmatmul(): M = 1 K = 3 N = 3 x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [M, K]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [K, N]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, ["x", "y"]) node_def = helper.make_node("XnorPopcountMatMul", ["x", "W"], ["out"], domain="finn.custom_op.general") modelproto = helper.make_model( helper.make_graph([node_def], "test_model", [x], [out], value_info=[W])) model = ModelWrapper(modelproto) model.set_tensor_datatype("x", DataType.BINARY) model.set_tensor_datatype("W", DataType.BINARY) W_data = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) model.set_initializer("W", W_data) # test shape inference model = model.transform(InferShapes()) assert model.get_tensor_shape("out") == [M, N] # test datatype inference assert model.get_tensor_datatype("out") is DataType.FLOAT32 model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("out") is DataType.UINT32 # test execution x_data = np.asarray([[1, 0, 0]], dtype=np.float32) inp_dict = {"x": x_data} out_dict = oxe.execute_onnx(model, inp_dict) Wb = 2 * W_data - 1 xb = 2 * x_data - 1 rb = np.matmul(xb, Wb) assert (2 * out_dict["out"] - K == rb).all()
def make_dupstreams_modelwrapper(ch, pe, idim, idt): shape = [1, idim, idim, ch] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) outp0 = helper.make_tensor_value_info("outp0", TensorProto.FLOAT, shape) outp1 = helper.make_tensor_value_info("outp1", TensorProto.FLOAT, shape) dupstrm_node = helper.make_node( "DuplicateStreams_Batch", ["inp"], ["outp0", "outp1"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, inputDataType=idt.name, numInputVectors=[1, idim, idim], ) graph = helper.make_graph(nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=[outp0, outp1]) model = helper.make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model