def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph(nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp]) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout( node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout( node.output[0]) == DataLayout.NHWC
def inference_cost(model_filename, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True): """Print the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. :param model_filename: Filename for ONNX model :param output_json: Optional JSON filename to save the inference cost dict :param output_onnx: Optional ONNX filename to save the final model after any preprocessing :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights. """ print("Inference cost for " + model_filename) model = ModelWrapper(model_filename) if preprocess: qnt_nodes = model.get_nodes_by_op_type("Quant") for qnt_node in qnt_nodes: qnt_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferDataTypes()) model = model.transform(FoldConstants()) model = model.transform(RemoveUnusedTensors()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) bops = compute_bops(ret) mem_w_bits = compute_mem_bits(ret, "mem_w") mem_o_bits = compute_mem_bits(ret, "mem_o") ret["total_bops"] = bops ret["total_mem_w_bits"] = mem_w_bits ret["total_mem_o_bits"] = mem_o_bits if "unsupported" in ret: ret["unsupported"] = str(ret["unsupported"]) print(json.dumps(ret, sort_keys=True, indent=2)) if output_json is not None: with open(output_json, "w") as f: json.dump(ret, f, sort_keys=True, indent=2)
def step_resnet50_tidy(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(InsertTopK()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) return model
def test_topk_insert(k): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model.model.opset_import[0].version = 11 # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_tensor = nph.to_array(input_tensor) input_dict = {"global_in": input_tensor} output_golden = oxe.execute_onnx(model, input_dict)["global_out"] output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk)
def test_xnorpopcountmatmul(): M = 1 K = 3 N = 3 x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [M, K]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [K, N]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, ["x", "y"]) node_def = helper.make_node("XnorPopcountMatMul", ["x", "W"], ["out"], domain="finn.custom_op.general") modelproto = helper.make_model( helper.make_graph([node_def], "test_model", [x], [out], value_info=[W])) model = ModelWrapper(modelproto) model.set_tensor_datatype("x", DataType.BINARY) model.set_tensor_datatype("W", DataType.BINARY) W_data = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) model.set_initializer("W", W_data) # test shape inference model = model.transform(InferShapes()) assert model.get_tensor_shape("out") == [M, N] # test datatype inference assert model.get_tensor_datatype("out") is DataType.FLOAT32 model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("out") is DataType.UINT32 # test execution x_data = np.asarray([[1, 0, 0]], dtype=np.float32) inp_dict = {"x": x_data} out_dict = oxe.execute_onnx(model, inp_dict) Wb = 2 * W_data - 1 xb = 2 * x_data - 1 rb = np.matmul(xb, Wb) assert (2 * out_dict["out"] - K == rb).all()
def make_lookup_model(embeddings, ishape, idt, edt): num_embeddings, embedding_dim = embeddings.shape class LookupModel(nn.Module): def __init__(self, num_embeddings, embedding_dim): super().__init__() self.lookup = nn.Embedding( num_embeddings=num_embeddings, embedding_dim=embedding_dim ) def forward(self, x): x = self.lookup(x) return x torch_model = LookupModel(num_embeddings, embedding_dim) input_t = torch.zeros(ishape, dtype=torch.int64) ret = FINNManager.export(torch_model, input_t=input_t, opset_version=11) model = ModelWrapper(ret) iname = model.graph.input[0].name ename = model.graph.node[0].input[0] model.set_tensor_datatype(iname, idt) eshape = model.get_tensor_shape(ename) assert tuple(eshape) == embeddings.shape model.set_initializer(ename, embeddings) model.set_tensor_datatype(ename, edt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model
def test_end2end_mobilenet_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_tidy.onnx") model = model.transform(Streamline()) additional_streamline_transformations = [ DoubleToSingleFloat(), reorder.MoveMulPastDWConv(), absorb.AbsorbMulIntoMultiThreshold(), ChangeDataLayoutQuantAvgPool2d(), InferDataLayouts(), reorder.MoveTransposePastScalarMul(), absorb.AbsorbTransposeIntoFlatten(), reorder.MoveFlattenPastAffine(), reorder.MoveFlattenPastTopK(), reorder.MoveScalarMulPastMatMul(), CollapseRepeatedMul(), RemoveIdentityOps(), RoundAndClipThresholds(), ] for trn in additional_streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_mobilenet_streamlined.onnx") assert (len(model.get_nodes_by_op_type("Add")) == 1 ) # only final quantized bias Add op remains assert len(model.get_nodes_by_op_type("Mul")) == 0 # no Mul ops remain
def test_topk_insert(k): tfc = get_test_model_trained("TFC", 1, 1) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float() output_golden = tfc.forward(input_brevitas).detach().numpy() output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() input_dict = {"global_in": nph.to_array(input_tensor)} # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk)
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if n.op_type == "TopK": prod = model.find_producer(n.input[0]) if prod is not None and (prod.op_type in ["Mul", "Add"]): prod_input = prod.input[0] param_name = prod.input[1] A = model.get_initializer(param_name) if A is None: warnings.warn("Param is not constant, skipping") continue is_scalar = all(x == 1 for x in A.shape) is_scalar_pos_mul = is_scalar and (prod.op_type == "Mul") and A > 0 is_scalar_add = is_scalar and (prod.op_type == "Add") if is_scalar_pos_mul or is_scalar_add: # if the mul is scalar and positive, we can just delete the # mul node and rewire the top k node. Because the top k node # works with probabilities and their relation to each other # the relation doesn't change if every value is multiplied # with a scalar graph.node.remove(prod) n.input[0] = prod_input # to avoid error the dataype is set to float32 model.set_tensor_datatype(n.input[0], DataType.FLOAT32) graph_modified = True if graph_modified: model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified)
def apply(self, model): streamline_transformations = [ ConvertSubToAdd(), ConvertDivToMul(), BatchNormToAffine(), ConvertSignToThres(), MoveMulPastMaxPool(), MoveScalarLinearPastInvariants(), AbsorbSignBiasIntoMultiThreshold(), MoveAddPastMul(), MoveScalarAddPastMatMul(), MoveAddPastConv(), MoveScalarMulPastMatMul(), MoveScalarMulPastConv(), MoveAddPastMul(), CollapseRepeatedAdd(), CollapseRepeatedMul(), MoveMulPastMaxPool(), AbsorbAddIntoMultiThreshold(), FactorOutMulSignMagnitude(), AbsorbMulIntoMultiThreshold(), Absorb1BitMulIntoMatMul(), Absorb1BitMulIntoConv(), RoundAndClipThresholds(), ] for trn in streamline_transformations: model = model.transform(trn) model = model.transform(RemoveIdentityOps()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) return (model, False)
def make_dupstreams_modelwrapper(ch, pe, idim, idt): shape = [1, idim, idim, ch] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) outp0 = helper.make_tensor_value_info("outp0", TensorProto.FLOAT, shape) outp1 = helper.make_tensor_value_info("outp1", TensorProto.FLOAT, shape) dupstrm_node = helper.make_node( "DuplicateStreams_Batch", ["inp"], ["outp0", "outp1"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, inputDataType=idt.name, numInputVectors=[1, idim, idim], ) graph = helper.make_graph(nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=[outp0, outp1]) model = helper.make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model
def test_add_pre_and_postproc(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model = load_test_checkpoint_or_skip(prev_chkpt_name) global_inp_name = model.graph.input[0].name ishape = model.get_tensor_shape(global_inp_name) # preprocessing: torchvision's ToTensor divides uint8 inputs by 255 totensor_pyt = ToTensor() chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc") bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name) assert os.path.isfile(chkpt_preproc_name) # join preprocessing and core model pre_model = ModelWrapper(chkpt_preproc_name) model = model.transform(MergeONNXModels(pre_model)) # add input quantization annotation: UINT8 for all BNN-PYNQ models global_inp_name = model.graph.input[0].name model.set_tensor_datatype(global_inp_name, DataType.UINT8) # postprocessing: insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") # tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) model.save(chkpt_name) assert os.path.isfile(chkpt_name)
def test_move_flatten_past_affine(data_layout, batch_size): if data_layout == DataLayout.NHWC: ishape = [batch_size, 1, 1, 1024] oshape = [batch_size, 1000] else: ishape = [batch_size, 1024, 1, 1] oshape = [batch_size, 1000] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) a0 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, [1024, 1000]) a1 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) a2 = helper.make_tensor_value_info("a3", TensorProto.FLOAT, [1000]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) flatten_node = helper.make_node("Flatten", ["inp"], ["flatten_out"]) matmul_node = helper.make_node("MatMul", ["flatten_out", "a0"], ["matmul_out"]) mul_node = helper.make_node("Mul", ["matmul_out", "a1"], ["mul_out"]) add_node = helper.make_node("Add", ["mul_out", "a2"], ["outp"]) graph = helper.make_graph( nodes=[flatten_node, matmul_node, mul_node, add_node], name="move-reshape-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1, a2], ) model = helper.make_model(graph, producer_name="move_reshape_model") model = ModelWrapper(model) # initialize values a0_values = gen_finn_dt_tensor(DataType["TERNARY"], [1024, 1000]) model.set_initializer("a0", a0_values) a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) model.set_initializer("a1", a1_values) a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32) model.set_initializer("a2", a2_values) model.set_tensor_datatype("inp", DataType["INT2"]) model.set_tensor_layout("inp", data_layout) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveFlattenPastAffine()) assert oxe.compare_execution(model, model_transformed, idict) # depending on data layout check if graph is transformed or not if data_layout == DataLayout.NHWC: # check if nodes have new order in transformed graph assert model.graph != model_transformed.graph assert model_transformed.graph.node[-1].op_type == "Flatten" else: assert model.graph == model_transformed.graph
def test_end2end_tfc_w1a2_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_export.onnx") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_tfc_w1a2_tidy.onnx")
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: # search for (MultiThreshold, Add) pair node_ind += 1 if ( n.op_type == "MultiThreshold" and not model.is_fork_node(n) and not model.is_join_node(n) ): consumer = model.find_consumer(n.output[0]) if consumer is not None and consumer.op_type == "Add": mt_node = n add_node = consumer threshold_name = mt_node.input[1] add_weight_name = add_node.input[1] T = model.get_initializer(threshold_name) A = model.get_initializer(add_weight_name) if (A is None) or (T is None): warnings.warn("Threshold or add bias not constant, skipping") continue end_name = add_node.output[0] # we can only absorb scalar adds is_scalar = A.ndim == 0 or all(x == 1 for x in A.shape) if not is_scalar: continue bias = A.flatten()[0] # set MultiThreshold bias property mt_inst = getCustomOp(mt_node) bias += mt_inst.get_nodeattr("out_bias") mt_inst.set_nodeattr("out_bias", bias) graph_modified = True # compute new DataType for MultiThreshold output steps = T.shape[-1] new_min = bias new_max = steps + bias odt = DataType.get_smallest_possible(steps).name.replace( "UINT", "INT" ) odt = DataType[odt] assert odt.allowed(new_max) and odt.allowed( new_min ), """Could not compute new MultiThreshold DataType (min = %d max = %d)""" % ( new_min, new_max, ) mt_inst.set_nodeattr("out_dtype", odt.name) # remove Add node, rewire MultiThreshold graph.node.remove(add_node) mt_node.output[0] = end_name # set datatype model.set_tensor_datatype(end_name, odt) if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified)
def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model = model.transform(InferDataLayouts()) return model
def apply(self, model): graph = model.graph graph_modified = False node_ind = 0 for n in graph.node: node_ind += 1 if ( n.op_type == "Reshape" and (model.get_initializer(n.input[1]) == [1, -1]).all() ) or n.op_type == "Flatten": prod = model.find_producer(n.input[0]) if ( prod is not None and prod.op_type == "Transpose" # we ensure that the first dimension is not changed from the # transpose operation and get_by_name(prod.attribute, "perm").ints[0] == 0 ): data_layout = model.get_tensor_layout(prod.input[0]) # check for the data layout to interpret input shape correctly if data_layout is None: warnings.warn( """Data layout for input tensor of Transpose node is not set. To use AbsorbTransposeIntoFlatten transformation please set tensor data layout.""" ) continue elif data_layout == DataLayout.NCHW: (b, c, h, w) = model.get_tensor_shape(prod.input[0]) # if h=w=1 the transposition can be absorbed, otherwise # the absorption would lead to an error in the behavior if h != 1 or w != 1: continue # the flatten node from onnx keeps by default the first # dim and flattens the rest, that is why this transformation # can only work with b != 1 if the model contains already a # flatten node and not a reshape node with shape = [1, -1]. # If the first dim of the input tensor is not 1, flatten and # reshape (with shape = [1, -1]) would lead to different results if n.op_type == "Reshape" and b != 1: continue elif data_layout == DataLayout.NHWC: (b, h, w, c) = model.get_tensor_shape(prod.input[0]) if h != 1 or w != 1: continue if n.op_type == "Reshape" and b != 1: continue # create single flatten node and remove obsolete nodes node = oh.make_node("Flatten", [prod.input[0]], [n.output[0]]) graph.node.remove(n) graph.node.remove(prod) graph.node.insert(node_ind, node) graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified)
def test_end2end_mobilenet_lowering(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_streamlined.onnx") model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False nodes = [n for n in graph.node] for n in nodes: node_ind += 1 if ( n.op_type == "GlobalAveragePool" or n.op_type == "Reshape" or n.op_type == "Transpose" or n.op_type == "Flatten" ): in0 = n.input[0] if in0 is None: continue # find and check producer on our input prod0 = model.find_producer(in0) if prod0 is None: continue if prod0.op_type in ["Mul", "Add", "Div"]: # check if second input of producer is an initializer init0 = model.get_initializer(prod0.input[1]) # if either initializer is None, skip if init0 is None: continue # if initializer is not scalar, skip if np.prod(init0.shape) != 1: continue # move prod0 from input to output, old_prod0_in = prod0.input[0] old_prod0_out = prod0.output[0] scalar_op_odt = model.get_tensor_datatype(old_prod0_out) old_n_out = n.output[0] in_shape = model.get_tensor_shape(n.input[0]) out_shape = model.get_tensor_shape(n.output[0]) n.input[0] = old_prod0_in n.output[0] = old_prod0_out prod0.input[0] = old_prod0_out prod0.output[0] = old_n_out model.set_tensor_shape(n.input[0], in_shape) model.set_tensor_shape(n.output[0], out_shape) model.set_tensor_shape(prod0.output[0], out_shape) model.set_tensor_datatype(prod0.output[0], scalar_op_odt) model.set_tensor_datatype(n.output[0], DataType.FLOAT32) graph.node.remove(prod0) graph.node.insert(node_ind - 1, prod0) graph_modified = True else: continue if graph_modified: model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified)
def test_import_and_tidy(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model.save(chkpt)
def tidy_up(model): log("Basic transformations launched") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) log("Basic transformations completed") save(model, "0_tidy") return model
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False nodes = [n for n in model.graph.node] for n in nodes: node_ind += 1 if n.op_type == "Transpose" and not model.is_fork_node(n): perms = list(get_by_name(n.attribute, "perm").ints) if perms == [0, 3, 1, 2]: mt_cand = model.find_consumer(n.output[0]) if (mt_cand is not None and mt_cand.op_type == "MultiThreshold" # and not model.is_fork_node(mt_cand) ): mt_cand_orig_output = mt_cand.output[0] mt = getCustomOp(mt_cand) mt.set_nodeattr("data_layout", "NHWC") # Rewire input of MultiThreshold node mt_cand.input[0] = n.input[0] # Make new intermediate tensor intermediate_tensor_name = model.make_new_valueinfo_name( ) intermediate_tensor_shape = model.get_tensor_shape( n.input[0]) intermediate_tensor_finn_dtype = model.get_tensor_datatype( mt_cand.output[0]) # Create a new ValueInfoProto and set the shape model.set_tensor_shape(intermediate_tensor_name, intermediate_tensor_shape) # Set the tensor layout model.set_tensor_layout(intermediate_tensor_name, DataLayout.NHWC) # Set the tensor FINN datatype model.set_tensor_datatype( intermediate_tensor_name, intermediate_tensor_finn_dtype) # Rewire output of MT node mt_cand.output[0] = intermediate_tensor_name # Get rid of first transpose node graph.node.remove(n) # Create new Transpose node new_transpose = oh.make_node( "Transpose", [intermediate_tensor_name], [mt_cand_orig_output], perm=[0, 3, 1, 2], ) graph.node.insert(node_ind + 1, new_transpose) graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified)
def apply(self, model): # Extract the bias from Conv node model = model.transform(ExtractBiasFromConv()) # Gemm operations are not supported by FINN, so we convert them to MatMul model = model.transform(GemmToMatMul()) model = model.transform(FoldTransposeIntoQuantInit()) # Make sure the datatypes exist, these are required for folding the weights model = model.transform(InferDataTypes()) # Fold weights model = model.transform(FoldQuantWeights()) # Convert activations model = model.transform( ConvertQuantActToMultiThreshold( filter_function=self._filter_function, )) # Recompute datatypes model = model.transform(InferDataTypes()) # Convert AvgPool -> Mul -> Trunc structure to QuantAvgPool2d model = model.transform(AvgPoolAndTruncToQuantAvgPool()) # Remove empty padding if it exists model = model.transform(RemoveIdentityOps()) return model, False
def post_processing(model): log("Starting Post Processing") # Insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) # Tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) log("Finished Post Processing!") save(model, "2_with_pre_post") return model
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if n.op_type == "Transpose" and not model.is_fork_node(n): perms = list(get_by_name(n.attribute, "perm").ints) if perms == [0, 3, 1, 2]: mt_cand = model.find_consumer(n.output[0]) if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node( mt_cand ): final_t_cand = model.find_consumer(mt_cand.output[0]) if final_t_cand.op_type == "Transpose": perms = list( get_by_name(final_t_cand.attribute, "perm").ints ) if perms == [0, 2, 3, 1]: mt = getCustomOp(mt_cand) mt.set_nodeattr("data_layout", "NHWC") # get rid of tranpose nodes, wire MT directly mt_cand.input[0] = n.input[0] mt_cand.output[0] = final_t_cand.output[0] graph.node.remove(n) graph.node.remove(final_t_cand) graph_modified = True else: mt = getCustomOp(mt_cand) mt.set_nodeattr("data_layout", "NHWC") # get rid of first tranpose node mt_cand.input[0] = n.input[0] graph.node.remove(n) # fix output shape for MultiThreshold mt_ishape = model.get_tensor_shape(mt_cand.input[0]) model.set_tensor_shape(mt_cand.output[0], mt_ishape) # re-insert Transpose behind MultiThreshold transpose_output = model.make_new_valueinfo_name() new_transpose = oh.make_node( "Transpose", [mt_cand.output[0]], [transpose_output], perm=[0, 3, 1, 2], ) graph.node.insert(node_ind + 1, new_transpose) final_t_cand.input[0] = transpose_output graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified)
def test_remove_identity_ops(op, as_first_node, approx): # set up onnx model inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1]) mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, []) shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2]) div = helper.make_tensor_value_info("div", TensorProto.FLOAT, []) matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2]) mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"]) div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"]) matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"]) graph = helper.make_graph( nodes=[mul_node, reshape_node, div_node, matmul_node], name="identity-graph", inputs=[inp], outputs=[outp], value_info=[mul, shape, div, matmul], ) model = helper.make_model(graph, producer_name="mulpastconv-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, 4, 1, 1]) mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) shape_values = np.asarray([1, -1], dtype=np.int64) div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) matmul_values = gen_finn_dt_tensor(DataType["INT2"], [4, 2]) model.set_initializer("mul", mul_values) model.set_initializer("shape", shape_values) model.set_initializer("div", div_values) model.set_initializer("matmul", matmul_values) insert_identity_op(model, op, as_first_node, approx) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) idict = {"inp": inp_values} odict = oxe.execute_onnx(model, idict) out_before = odict["outp"] num_of_nodes_before = len(model.graph.node) model = model.transform(RemoveIdentityOps()) num_of_nodes_after = len(model.graph.node) assert num_of_nodes_before - 1 == num_of_nodes_after odict = oxe.execute_onnx(model, idict) out_after = odict["outp"] assert np.isclose(out_before, out_after, atol=1e-3).all()
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if n.op_type == "Im2Col": i2c_input = n.input[0] i2c_output = n.output[0] i2c_in_shape = model.get_tensor_shape(i2c_input) i2c_out_shape = model.get_tensor_shape(i2c_output) dt = model.get_tensor_datatype(i2c_input) i2c_inst = getCustomOp(n) stride = i2c_inst.get_nodeattr("stride") k = i2c_inst.get_nodeattr("kernel_size") pad = i2c_inst.get_nodeattr("pad_amount") pad_val = i2c_inst.get_nodeattr("pad_value") ifm_ch = i2c_in_shape[-1] ifm_dim = i2c_in_shape[1] ofm_dim = i2c_out_shape[1] # if padding enabled, ensure pad_val supported by DataType if pad > 0: assert dt.allowed( pad_val), "Im2Col DataType must support pad_val" # create equivalent ConvolutionInputGenerator node # TODO support padding new_node = helper.make_node( "ConvolutionInputGenerator", [i2c_input], [i2c_output], domain="finn", backend="fpgadataflow", ConvKernelDim=k, IFMChannels=ifm_ch, IFMDim=ifm_dim, OFMDim=ofm_dim, SIMD=ifm_ch, Stride=stride, inputDataType=dt.name, outputDataType=dt.name, ) graph.node.insert(node_ind, new_node) # remove old nodes graph.node.remove(n) graph_modified = True if graph_modified: model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified)
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if n.op_type == "Transpose" and not model.is_fork_node(n): perms = list(get_by_name(n.attribute, "perm").ints) if perms == [0, 3, 1, 2]: mt_cand = model.find_consumer(n.output[0]) if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node( mt_cand): final_t_cand = model.find_consumer(mt_cand.output[0]) if final_t_cand.op_type == "Transpose": perms = list( get_by_name(final_t_cand.attribute, "perm").ints) if perms == [0, 2, 3, 1]: mt = getCustomOp(mt_cand) mt.set_nodeattr("data_layout", "NHWC") # get rid of tranpose nodes, wire MT directly mt_cand.input[0] = n.input[0] mt_cand.output[0] = final_t_cand.output[0] graph.node.remove(n) graph.node.remove(final_t_cand) graph_modified = True elif final_t_cand.op_type == "Reshape": oshape = model.get_tensor_shape( final_t_cand.output[0]) if len(oshape) == 2: # transition to FC part, can still use NHWC mt = getCustomOp(mt_cand) mt.set_nodeattr("data_layout", "NHWC") # get rid of first tranpose node mt_cand.input[0] = n.input[0] # fix output shape for MultiThreshold mt_ishape = model.get_tensor_shape( mt_cand.input[0]) (b, h, w, c) = mt_ishape assert (h == 1 and w == 1), """Untested spatial dim in conv->fc transition, proceed with caution!""" model.set_tensor_shape(mt_cand.output[0], mt_ishape) graph.node.remove(n) graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified)
def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) model = model.transform(InferDataLayouts()) try: from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv model = model.transform(InferDoublePackedConv([1])) except: print( " FINN Experimental not available. Using non-packed convolution ") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) to_hls_transformations = [ to_hls.InferAddStreamsLayer, LowerConvsToMatMul, to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch, AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds, to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer, AbsorbConsecutiveTransposes, to_hls.InferConvInpGen, to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer ] for trn in to_hls_transformations: model = model.transform(trn()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveCNVtoFCFlatten()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveUnusedTensors()) model = model.transform(SortGraph()) return model
def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): for iter_id in range(4): model = step_resnet50_streamline_linear(model, cfg) model = step_resnet50_streamline_nonlinear(model, cfg) # big loop tidy up model = model.transform(RemoveUnusedTensors()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) model = model.transform(DoubleToSingleFloat()) return model