def test_modelwrapper(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.check_all_tensor_shapes_specified() is False inp_name = model.graph.input[0].name inp_shape = model.get_tensor_shape(inp_name) assert inp_shape == [1, 1, 28, 28] # find first matmul node l0_mat_tensor_name = "" l0_inp_tensor_name = "" for node in model.graph.node: if node.op_type == "MatMul": l0_inp_tensor_name = node.input[0] l0_mat_tensor_name = node.input[1] break assert l0_mat_tensor_name != "" l0_weights = model.get_initializer(l0_mat_tensor_name) assert l0_weights.shape == (784, 1024) l0_weights_hist = Counter(l0_weights.flatten()) assert (l0_weights_hist[1.0] + l0_weights_hist[-1.0]) == 784 * 1024 l0_weights_rand = np.random.randn(784, 1024) model.set_initializer(l0_mat_tensor_name, l0_weights_rand) assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all() assert l0_inp_tensor_name != "" inp_cons = model.find_consumer(l0_inp_tensor_name) assert inp_cons.op_type == "MatMul" out_prod = model.find_producer(l0_inp_tensor_name) assert out_prod.op_type == "Sign" os.remove(export_onnx_path)
def test_const_folding_shapes(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) mm_node_w_in = model.get_nodes_by_op_type("MatMul")[0].input[1] assert model.find_producer(mm_node_w_in) is not None assert model.find_producer(mm_node_w_in).op_type == "Reshape" assert model.get_initializer(mm_node_w_in) is None model = model.transform(FoldConstants()) assert model.find_producer(mm_node_w_in) is None assert model.get_initializer(mm_node_w_in) is not None
def test_brevitas_cnv_w1a1_export(): cnv = get_test_model_untrained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.graph.node[2].op_type == "Sign" assert model.graph.node[3].op_type == "Conv" conv0_wname = model.graph.node[3].input[1] assert list(model.get_initializer(conv0_wname).shape) == [64, 3, 3, 3] assert model.graph.node[4].op_type == "Mul" os.remove(export_onnx_path)
def test_modelwrapper(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.check_all_tensor_shapes_specified() is False inp_shape = model.get_tensor_shape("0") assert inp_shape == [1, 1, 28, 28] l0_mat_tensor_name = "33" l0_weights = model.get_initializer(l0_mat_tensor_name) assert l0_weights.shape == (784, 1024) l0_weights_hist = Counter(l0_weights.flatten()) assert l0_weights_hist[1.0] == 401311 and l0_weights_hist[-1.0] == 401505 l0_weights_rand = np.random.randn(784, 1024) model.set_initializer(l0_mat_tensor_name, l0_weights_rand) assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all() l0_inp_tensor_name = "32" inp_cons = model.find_consumer(l0_inp_tensor_name) assert inp_cons.op_type == "MatMul" out_prod = model.find_producer(l0_inp_tensor_name) assert out_prod.op_type == "Sign" os.remove(export_onnx_path)
def test_modelwrapper(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) assert model.check_all_tensor_shapes_specified() is True inp_name = model.graph.input[0].name inp_shape = model.get_tensor_shape(inp_name) assert inp_shape == [1, 1, 28, 28] conv_nodes = model.get_nodes_by_op_type("Conv") matmul_nodes = model.get_nodes_by_op_type("MatMul") assert len(conv_nodes) == 2 assert len(matmul_nodes) == 1 first_conv = conv_nodes[0] first_conv_iname = first_conv.input[0] first_conv_wname = first_conv.input[1] first_conv_oname = first_conv.output[0] assert first_conv_iname != "" and (first_conv_iname is not None) assert first_conv_wname != "" and (first_conv_wname is not None) assert first_conv_oname != "" and (first_conv_oname is not None) first_conv_weights = model.get_initializer(first_conv_wname) assert first_conv_weights.shape == (8, 1, 5, 5) first_conv_weights_rand = np.random.randn(8, 1, 5, 5) model.set_initializer(first_conv_wname, first_conv_weights_rand) assert (model.get_initializer(first_conv_wname) == first_conv_weights_rand ).all() inp_cons = model.find_consumer(first_conv_iname) assert inp_cons == first_conv out_prod = model.find_producer(first_conv_oname) assert out_prod == first_conv inp_layout = model.get_tensor_layout(first_conv_iname) assert inp_layout is None inp_layout = DataLayout.NCHW model.set_tensor_layout(first_conv_iname, inp_layout) assert model.get_tensor_layout(first_conv_iname) == inp_layout inp_sparsity = model.get_tensor_sparsity(first_conv_iname) assert inp_sparsity is None inp_sparsity = {"dw": {"kernel_shape": [3, 3]}} model.set_tensor_sparsity(first_conv_iname, inp_sparsity) assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity
def apply(self, model): # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # create the base FINN driver -- same for all accels driver_base_template = pk.resource_filename( "finn.qnn-data", "templates/driver/driver_base.py" ) driver_base_py = pynq_driver_dir + "/driver_base.py" shutil.copy(driver_base_template, driver_base_py) # extract input-output shapes from the graph # TODO convert this to an analysis pass? idt = [] idma_names = [] ishape_normal = [] ishape_folded = [] ishape_packed = [] for idma_ind, graph_in in enumerate(model.graph.input): i_tensor_name = graph_in.name # get inp tensor properties i_tensor_dt = model.get_tensor_datatype(i_tensor_name) i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) # go down into dataflow partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning i_consumer = model.find_consumer(i_tensor_name) assert ( i_consumer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model")) assert ( first_df_model.graph.node[0].op_type == "IODMA" ), "First partition must hold input IODMA" successors = model.find_direct_successors(i_consumer) successor_input_num = list(successors[0].input).index(i_consumer.output[0]) successor_sdp = getCustomOp(successors[0]) successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model")) first_node = successor_df_model.find_consumer( successor_df_model.graph.input[successor_input_num].name ) i_tensor_shape_folded = tuple( getCustomOp(first_node).get_folded_input_shape() ) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor( i_tensor_dt, i_tensor_shape_folded ) i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt ) i_tensor_shape_packed = i_tensor_dummy_packed.shape # append all input tensor info to relevant lists idt.append("DataType['%s']" % i_tensor_dt.name) ishape_normal.append(i_tensor_shape_normal) ishape_folded.append(i_tensor_shape_folded) ishape_packed.append(i_tensor_shape_packed) idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name")) odt = [] odma_names = [] oshape_normal = [] oshape_folded = [] oshape_packed = [] for odma_ind, graph_out in enumerate(model.graph.output): o_tensor_name = graph_out.name # get inp tensor properties o_tensor_dt = model.get_tensor_datatype(o_tensor_name) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) # go down into IODMA partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning o_producer = model.find_producer(o_tensor_name) assert ( o_producer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model")) assert ( df_model.graph.node[-1].op_type == "IODMA" ), "Partition must hold output IODMA" predecessors = model.find_direct_predecessors(o_producer) predecessor_output_num = list(predecessors[0].output).index( o_producer.input[0] ) predecessor_sdp = getCustomOp(predecessors[0]) predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model")) last_node = predecessor_df_model.find_producer( predecessor_df_model.graph.output[predecessor_output_num].name ) o_tensor_shape_folded = tuple( getCustomOp(last_node).get_folded_output_shape() ) o_tensor_dummy_folded = gen_finn_dt_tensor( o_tensor_dt, o_tensor_shape_folded ) o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt ) o_tensor_shape_packed = o_tensor_dummy_packed.shape # append all output tensor info to relevant lists odt.append("DataType['%s']" % o_tensor_dt.name) oshape_normal.append(o_tensor_shape_normal) oshape_folded.append(o_tensor_shape_folded) oshape_packed.append(o_tensor_shape_packed) odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name")) # generate external weights npy files weights_dir = pynq_driver_dir + "/runtime_weights" os.makedirs(weights_dir) idma_idx = 0 ext_weight_dma_cnt = 0 for node in model.graph.node: assert ( node.op_type == "StreamingDataflowPartition" ), "CreateDataflowPartition needs to be applied before driver generation" if len(node.input) > 0: producer = model.find_producer(node.input[0]) init_tensor = model.get_initializer(node.input[0]) else: producer = None init_tensor = None if producer is None: # input dma? sdp_inst = getCustomOp(node) idma_name = sdp_inst.get_nodeattr("instance_name") df_model = ModelWrapper(sdp_inst.get_nodeattr("model")) assert df_model.graph.node[0].op_type == "IODMA" iodma_node = getCustomOp(df_model.graph.node[0]) if iodma_node.get_nodeattr("burstMode") == "wrap": # input weights dma? init_tensor = df_model.get_initializer( iodma_node.onnx_node.input[0] ) ext_weight_dma_cnt += 1 w_dtype = df_model.get_tensor_datatype( iodma_node.onnx_node.input[0] ) init_external_tensor = to_external_tensor(init_tensor, w_dtype) np.save( weights_dir + "/" + idma_name + ".npy", init_external_tensor ) idma_idx += 1 # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', "")) driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', "")) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed)) driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names)) driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names)) driver = driver.replace("$NUM_INPUTS$", str(len(idma_names))) driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names))) driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt)) with open(driver_py, "w") as f: f.write(driver) # add validate.py to run full top-1 test (only for suitable networks) validate_py = pynq_driver_dir + "/validate.py" validate_template = pk.resource_filename( "finn.qnn-data", "templates/driver/validate.py" ) shutil.copy(validate_template, validate_py) # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base # e.g. /workspace/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ # e.g. /workspace/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") # e.g. /workspace/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ # e.g. /workspace/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights") if is_rt_weights == 1: fcl_w = dataflow_model.get_initializer(node.input[1]) w_filename = weights_dir + "/%d_%d_%s.dat" % ( sdp_ind, rt_layer_ind, node.name, ) node_inst.make_weight_file( fcl_w, "decoupled_runtime", w_filename ) rt_layer_ind += 1 elif node.op_type == "StreamingDataflowPartition": warnings.warn( """Nested StreamingDataflowPartition are not supported """ ) else: continue return (model, False)
def test_linear_past_eltwise_add_multiple_forks(ch, ifmdim): # generate test vectors of correct shape if ifmdim == -1: input_shape = (1, ch) else: input_shape = (1, ch, ifmdim, ifmdim) top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) num_of_params = 6 value_info = [] for i in range(num_of_params): value_info += [ helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) ] modelproto = helper.make_model( helper.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ helper.make_node("Add", ["top_in", "p0"], ["fork1"]), helper.make_node("Mul", ["fork1", "p1"], ["t2"]), helper.make_node("Mul", ["fork1", "p2"], ["t3"]), helper.make_node("Add", ["t2", "t3"], ["t4"]), helper.make_node("Mul", ["t4", "p3"], ["fork2"]), helper.make_node("Add", ["fork2", "p4"], ["t5"]), helper.make_node("Add", ["fork2", "p5"], ["t6"]), helper.make_node("Add", ["t5", "t6"], ["top_out"]), ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) np.random.seed(0) for i in range(num_of_params): model.set_initializer("p" + str(i), np.random.rand(*input_shape).astype(np.float32)) # need equal mults: model.set_initializer("p2", model.get_initializer("p1")) # Transform new_model = model.transform(MoveLinearPastEltwiseAdd()) inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} # Test assert oxe.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "Add" assert new_model.graph.node[1].op_type == "Add" assert new_model.graph.node[2].op_type == "Mul" assert new_model.graph.node[3].op_type == "Mul" assert new_model.graph.node[4].op_type == "Add" assert new_model.graph.node[5].op_type == "Add" assert len(new_model.graph.node) == 6
def test_brevitas_compare_exported_mobilenet(): if "IMAGENET_VAL_PATH" not in os.environ.keys(): pytest.skip("Can't do validation without IMAGENET_VAL_PATH") n_images = 10 debug_mode = False export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") # export preprocessing preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) # export the actual MobileNet-v1 finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) if debug_mode: dbg_hook = bo.enable_debug(mobilenet) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") # create merged preprocessing + MobileNet-v1 model model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") with open( export_onnx_path + "/mobilenet_validation.csv", "w", newline="" ) as csvfile: writer = csv.writer(csvfile) writer.writerow( [ "goldenID", "brevitasTop5", "brevitasTop5[%]", "finnTop5", "finnTop5[%]", "top5equal", "top5%equal", ] ) csvfile.flush() workload = imagenet_util.get_val_images(n_images, interleave_classes=True) all_inds_ok = True all_probs_ok = True for (img_path, target_id) in workload: img_np = imagenet_util.load_resize_crop(img_path) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, return_full_exec_context=True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 inds_ok = (produced.flatten() == expected_top5).all() probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all() all_inds_ok = all_inds_ok and inds_ok all_probs_ok = all_probs_ok and probs_ok writer.writerow( [ str(target_id), str(expected_top5), str(expected_top5_prob), str(produced.flatten()), str(produced_prob.flatten()), str(inds_ok), str(probs_ok), ] ) csvfile.flush() if ((not inds_ok) or (not probs_ok)) and debug_mode: print("Results differ for %s" % img_path) # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(odict.keys()) names_common = names_brevitas.intersection(names_finn) for dbg_name in names_common: if not np.isclose( dbg_hook.values[dbg_name].detach().numpy(), odict[dbg_name], atol=1e-3, ).all(): print("Tensor %s differs between Brevitas and FINN" % dbg_name) assert all_inds_ok and all_probs_ok
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) x = gen_finn_dt_tensor(idt, input_tensor_shape) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] chw_mul = model.get_initializer(model.graph.node[-1].input[1]) chw_mul = 1 expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) assert (produced_sum.flatten() == expected_sum.flatten()).all() model = model.transform(InferDataLayouts()) # convert to hls model.set_tensor_datatype(model.graph.input[0].name, idt) # extra streamlining model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(MoveAddPastMul()) model = model.transform(CollapseRepeatedMul()) model = model.transform(CollapseRepeatedAdd()) # insert top-k node, which should absorb linear ops before it model = model.transform(InferShapes()) model = model.transform(InferDataLayouts()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferAddStreamsLayer()) model = model.transform(to_hls.InferGlobalAccPoolLayer()) model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(InsertTopK()) model = model.transform(AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(AbsorbConsecutiveTransposes()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(to_hls.InferDuplicateStreamsLayer()) model = model.transform(SortGraph()) # model.save("golden_hls.onnx") # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 9 add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") assert len(add_nodes) == 1 pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") assert len(pool_nodes) == 1 label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") assert len(label_nodes) == 1 channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") assert len(channelwise_nodes) == 5 dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") assert len(dup_nodes) == 1 model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) output_dict = oxe.execute_onnx(model, input_dict, True) produced_topk_hls = output_dict[model.graph.output[0].name] topk_input = output_dict[model.graph.node[-1].input[0]] assert soft_verify_topk(topk_input, produced_topk_hls, 5) os.remove(export_onnx_path)
def test_brevitas_mobilenet(): # get single image as input and prepare image img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) # crop central 224*224 window img = crop_center(224, img) # save image as numpy array and as torch tensor to enable testing in # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W) img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1) img_np = img_np.reshape(1, 3, 224, 224) img_torch = torch.from_numpy(img_np).float() # export preprocess export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) # set input finn datatype to UINT8 preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_exported.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.graph.node[-2].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 assert (produced.flatten() == expected_top5).all() assert np.isclose(produced_prob.flatten(), expected_top5_prob).all()
def test_end2end_cybsec_mlp_export(QONNX_export): assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/") # load up trained net in Brevitas input_size = 593 hidden1 = 64 hidden2 = 64 hidden3 = 64 weight_bit_width = 2 act_bit_width = 2 num_classes = 1 model = nn.Sequential( QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden1), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden2), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden3), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width), ) trained_state_dict = torch.load(assets_dir + "/state_dict.pth")["models_state_dict"][0] model.load_state_dict(trained_state_dict, strict=False) W_orig = model[0].weight.data.detach().numpy() # pad the second (593-sized) dimensions with 7 zeroes at the end W_new = np.pad(W_orig, [(0, 0), (0, 7)]) model[0].weight.data = torch.from_numpy(W_new) model_for_export = CybSecMLPForExport(model) export_onnx_path = get_checkpoint_name("export", QONNX_export) input_shape = (1, 600) # create a QuantTensor instance to mark the input as bipolar during export input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32) input_a = 2 * input_a - 1 scale = 1.0 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor(input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True) if QONNX_export: # With the BrevitasONNXManager we need to manually set # the FINN DataType at the input BrevitasONNXManager.export(model_for_export, input_shape, export_path=export_onnx_path) model = ModelWrapper(export_onnx_path) model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"]) model.save(export_onnx_path) qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(ConvertQONNXtoFINN()) model.save(export_onnx_path) else: bo.export_finn_onnx(model_for_export, export_path=export_onnx_path, input_t=input_qt) assert os.path.isfile(export_onnx_path) # fix input datatype finn_model = ModelWrapper(export_onnx_path) finnonnx_in_tensor_name = finn_model.graph.input[0].name assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600) # verify a few exported ops if QONNX_export: # The first "Mul" node doesn't exist in the QONNX export, # because the QuantTensor scale is not exported. # However, this node would have been unity scale anyways and # the models are still equivalent. assert finn_model.graph.node[0].op_type == "Add" assert finn_model.graph.node[1].op_type == "Div" assert finn_model.graph.node[2].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" else: assert finn_model.graph.node[0].op_type == "Mul" assert finn_model.get_initializer( finn_model.graph.node[0].input[1]) == 1.0 assert finn_model.graph.node[1].op_type == "Add" assert finn_model.graph.node[2].op_type == "Div" assert finn_model.graph.node[3].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" # verify datatypes on some tensors assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]) first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1] assert finn_model.get_tensor_datatype( first_matmul_w_name) == DataType["INT2"]
def apply(self, model): # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # create the base FINN driver -- same for all accels driver_base_template = pk.resource_filename( "finn.qnn-data", "templates/driver/driver_base.py") driver_base_py = pynq_driver_dir + "/driver_base.py" shutil.copy(driver_base_template, driver_base_py) # extract input-output shapes from the graph # TODO convert this to an analysis pass? i_tensor_name = model.graph.input[0].name o_tensor_name = model.graph.output[0].name i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) i_tensor_dt = model.get_tensor_datatype(i_tensor_name) o_tensor_dt = model.get_tensor_datatype(o_tensor_name) first_node = model.find_consumer(i_tensor_name) last_node = model.find_producer(o_tensor_name) if first_node.op_type == "StreamingDataflowPartition": # IODMAs and dataflow partitions have already been created # extract folded i/o shapes from IODMA consumer/producer first_df_model = ModelWrapper( getCustomOp(first_node).get_nodeattr("model")) assert (first_df_model.graph.node[0].op_type == "IODMA" ), "First partition must hold input IODMA" successors = model.find_direct_successors(first_node) successor_sdp = getCustomOp(successors[0]) successor_df_model = ModelWrapper( successor_sdp.get_nodeattr("model")) first_node = successor_df_model.find_consumer( successor_df_model.graph.input[0].name) last_df_model = ModelWrapper( getCustomOp(last_node).get_nodeattr("model")) assert (last_df_model.graph.node[0].op_type == "IODMA" ), "Last partition must hold output IODMA" predecessors = model.find_direct_predecessors(last_node) predecessor_sdp = getCustomOp(predecessors[0]) predecessor_df_model = ModelWrapper( predecessor_sdp.get_nodeattr("model")) last_node = predecessor_df_model.find_producer( predecessor_df_model.graph.output[0].name) # else: transformation called before IODMA/SDP creation (legacy flow) # can access folded i/o shapes directly i_tensor_shape_folded = tuple( getCustomOp(first_node).get_folded_input_shape()) o_tensor_shape_folded = tuple( getCustomOp(last_node).get_folded_output_shape()) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded) o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded) i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt) o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt) i_tensor_shape_packed = i_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape # generate external weights npy files weights_dir = pynq_driver_dir + "/runtime_weights" os.makedirs(weights_dir) idma_idx = 0 ext_weight_dma_cnt = 0 for node in model.graph.node: assert ( node.op_type == "StreamingDataflowPartition" ), "CreateDataflowPartition needs to be applied before driver generation" producer = model.find_producer(node.input[0]) init_tensor = model.get_initializer(node.input[0]) if producer is None: # input dma? idma_name = "idma" + str(idma_idx) if init_tensor is not None: # input weights dma? ext_weight_dma_cnt += 1 w_dtype = model.get_tensor_datatype(node.input[0]) init_external_tensor = to_external_tensor( init_tensor, w_dtype) np.save(weights_dir + "/" + idma_name + ".npy", init_external_tensor) else: net_input_name = idma_name idma_idx += 1 # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template def mss(x, batch_var_name="1"): # "make shape string" # for a shape like (1, ...) emit a string (N, ...) # where N is the default value for batch_var_name # this lets the driver work with a batch of samples at once ret = str(x) ret = ret.replace("(1,", "(%s," % batch_var_name) ret = ret.replace("[1,", "[%s," % batch_var_name) return ret driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt)) driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt)) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name) driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt)) with open(driver_py, "w") as f: f.write(driver) # add validate.py to run full top-1 test (only for suitable networks) validate_py = pynq_driver_dir + "/validate.py" validate_template = pk.resource_filename( "finn.qnn-data", "templates/driver/validate.py") shutil.copy(validate_template, validate_py) # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base # e.g. /workspace/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ # e.g. /workspace/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") # e.g. /workspace/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ # e.g. /workspace/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: if node.op_type in [ "StreamingFCLayer_Batch", "Thresholding_Batch" ]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr( "runtime_writeable_weights") if is_rt_weights == 1: fcl_w = dataflow_model.get_initializer(node.input[1]) w_filename = weights_dir + "/%d_%d_%s.dat" % ( sdp_ind, rt_layer_ind, node.name, ) node_inst.make_weight_file(fcl_w, "decoupled_runtime", w_filename) rt_layer_ind += 1 elif node.op_type == "StreamingDataflowPartition": warnings.warn( """Nested StreamingDataflowPartition are not supported """) else: continue return (model, False)