def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node inp = context[node.input[0]] exp_shape = self.get_normal_input_shape() if mode == "cppsim": output = inp output = np.asarray([output], dtype=np.float32).reshape(*exp_shape) context[node.output[0]] = output elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") # create a npy file for the input of the node assert ( str(inp.dtype) == "float32" ), """Input datatype is not float32 as expected.""" expected_inp_shape = self.get_folded_input_shape() reshaped_input = inp.reshape(expected_inp_shape) if DataType[self.get_nodeattr("dataType")] == DataType.BIPOLAR: # store bipolar activations as binary reshaped_input = (reshaped_input + 1) / 2 export_idt = DataType.BINARY else: export_idt = DataType[self.get_nodeattr("dataType")] # make copy before saving the array reshaped_input = reshaped_input.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) sim = self.get_rtlsim() nbits = self.get_instream_width() inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) super().toggle_clk(sim) output = self.rtlsim(sim, inp) odt = DataType[self.get_nodeattr("dataType")] target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy( output, out_npy_path, odt, out_shape, packed_bits, target_bits ) # load and reshape output output = np.load(out_npy_path) oshape = self.get_normal_output_shape() output = np.asarray([output], dtype=np.float32).reshape(*oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) )
def rtlsim_exec(model, execution_context): """Use PyVerilator to execute given model with stitched IP. The execution context contains the input values.""" if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") # ensure stitched ip project already exists assert os.path.isfile(model.get_metadata_prop("wrapper_filename")), """The file name from metadata property "wrapper_filename" doesn't exist.""" assert os.path.isdir(model.get_metadata_prop("vivado_stitch_proj")), """The directory from metadata property "vivado_stitch_proj" doesn't exist""" trace_file = model.get_metadata_prop("rtlsim_trace") # extract input shape # TODO extend for multiple inputs i_name = model.graph.input[0].name i_tensor = execution_context[i_name] i_dt = model.get_tensor_datatype(i_name) first_node = getCustomOp(model.find_consumer(i_name)) i_stream_w = first_node.get_instream_width() # convert input into time multiplexed shape i_folded_shape = first_node.get_folded_input_shape() # TODO any other layout transformations need to happen here! i_tensor = i_tensor.reshape(i_folded_shape) # extract output shape o_name = model.graph.output[0].name o_shape = model.get_tensor_shape(o_name) o_dt = model.get_tensor_datatype(o_name) last_node = getCustomOp(model.find_producer(o_name)) o_folded_shape = last_node.get_folded_output_shape() o_stream_w = last_node.get_outstream_width() packedBits = o_stream_w targetBits = o_dt.bitwidth() # pack input packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) num_out_values = last_node.get_number_output_values() # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): sim = pyverilate_stitched_ip(model) model.set_metadata_prop("rtlsim_so", sim.lib._name) else: sim = PyVerilator(rtlsim_so) _reset_rtlsim(sim) _toggle_clk(sim) ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file) packed_output = ret[0] model.set_metadata_prop("sim_cycles", str(ret[1])) # unpack output and put into context o_folded_tensor = rtlsim_output_to_npy(packed_output, None, o_dt, o_folded_shape, packedBits, targetBits) execution_context[o_name] = o_folded_tensor.reshape(o_shape)
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") mem_mode = self.get_nodeattr("mem_mode") node = self.onnx_node # TODO ensure codegen dir exists if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) ) # create a npy file fore each input of the node (in_ind is input index) in_ind = 0 for inputs in node.input: # it is assumed that the first input of the node is the data input # the second input are the weights # the third input are the thresholds if in_ind == 0: assert ( str(context[inputs].dtype) == "float32" ), """Input datatype is not float32 as expected.""" expected_inp_shape = self.get_folded_input_shape() reshaped_input = context[inputs].reshape(expected_inp_shape) if self.get_input_datatype() == DataType.BIPOLAR: # store bipolar activations as binary reshaped_input = (reshaped_input + 1) / 2 export_idt = DataType.BINARY else: export_idt = self.get_input_datatype() # make copy before saving the array reshaped_input = reshaped_input.copy() np.save( os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), reshaped_input, ) elif in_ind > 2: raise Exception("Unexpected input found for StreamingFCLayer") in_ind += 1 if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) # reinterpret binary output as bipolar where needed if self.get_output_datatype() == DataType.BIPOLAR: out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out assert ( context[node.output[0]].shape == self.get_folded_output_shape() ), """Output shape is not as expected""" # reshape output to have expected shape oshape = self.get_normal_output_shape() context[node.output[0]] = context[node.output[0]].reshape(*oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) super().toggle_clk(sim) if mem_mode == "external" or mem_mode == "decoupled": wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() # we have converted bipolar weights to binary for export, # so use it as such for weight generation if self.get_weight_datatype() == DataType.BIPOLAR: export_wdt = DataType.BINARY wei = npy_to_rtlsim_input( "{}/weights.npy".format(code_gen_dir), export_wdt, wnbits ) num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) io_dict = { "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) output = io_dict["outputs"]["out"] else: output = self.rtlsim(sim, inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy( output, out_npy_path, odt, out_shape, packed_bits, target_bits ) # load and reshape output output = np.load(out_npy_path) oshape = self.get_normal_output_shape() output = np.asarray([output], dtype=np.float32).reshape(*oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) )
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node # TODO ensure codegen dir exists if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) ) # create a npy file fore each input of the node (in_ind is input index) in_ind = 0 for inputs in node.input: # it is assumed that the first input of the node is the data input # the second input are the weights # the third input are the thresholds if in_ind == 0: assert ( str(context[inputs].dtype) == "float32" ), """Input datatype is not float32 as expected.""" expected_inp_shape = self.get_folded_input_shape() reshaped_input = context[inputs].reshape(expected_inp_shape) # make copy before saving the array reshaped_input = reshaped_input.copy() np.save( os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), reshaped_input, ) elif in_ind > 2: raise Exception( "Unexpected input found for Vector_Vector_Activate_Unit" ) in_ind += 1 if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) assert ( context[node.output[0]].shape == self.get_folded_output_shape() ), """Output shape is not as expected""" # reshape output to have expected shape oshape = self.get_normal_output_shape() context[node.output[0]] = context[node.output[0]].reshape(*oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() idt = self.get_input_datatype() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) output = self.rtlsim(sim, inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy( output, out_npy_path, odt, out_shape, packed_bits, target_bits ) # load and reshape output output = np.load(out_npy_path) oshape = self.get_normal_output_shape() output = np.asarray([output], dtype=np.float32).reshape(*oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) )
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_oshape = self.get_folded_output_shape() # TODO ensure codegen dir exists if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert (inp.shape == exp_ishape), """Input shape doesn't match expected shape (1, ifm_dim, ifm_dim, ifm_ch).""" if self.get_input_datatype() == DataType["BIPOLAR"]: # store bipolar activations as binary inp = (inp + 1) / 2 export_idt = DataType["BINARY"] else: export_idt = self.get_input_datatype() # no reshaping for input since assuming no folding on input # make copy before saving array reshaped_input = inp.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) assert (context[node.output[0]].shape == folded_oshape), "cppsim \ did not produce expected ofolded utput shape" context[node.output[0]] = context[node.output[0]].reshape( *exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp) odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy(rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits) # load and reshape output output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) # binary -> bipolar if needed if self.get_output_datatype() == DataType["BIPOLAR"]: out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out assert (context[node.output[0]].shape == exp_oshape), """Output
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_ishape = self.get_normal_input_shape() folded_ishape = self.get_folded_input_shape() exp_oshape = self.get_normal_output_shape() folded_oshape = self.get_folded_output_shape() # TODO ensure codegen dir exists if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert (inp.shape == exp_ishape), """Input shape doesn't match expected shape (batch_size,odim,odim,k*k*ifm_ch).""" export_idt = self.get_input_datatype() reshaped_input = inp.reshape(folded_ishape) np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) assert (context[node.output[0]].shape == folded_oshape ), "cppsim did not produce expected folded output shape" context[node.output[0]] = context[node.output[0]].reshape( *exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy(rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits) # load and reshape output output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) assert (context[node.output[0]].shape == exp_oshape), """Output
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert inp.shape == exp_ishape, """Input shape doesn't match expected shape .""" export_idt = self.get_input_datatype() # reshape input into folded form inp = inp.reshape(folded_ishape) # make copy before saving array reshaped_input = inp.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_outputs(context, ["output0.npy", "output1.npy"]) assert (context[node.output[0]].shape == folded_oshape), "cppsim \ did not produce expected ofolded utput shape" assert (context[node.output[1]].shape == folded_oshape), "cppsim \ did not produce expected ofolded utput shape" context[node.output[0]] = context[node.output[0]].reshape( *exp_oshape) context[node.output[1]] = context[node.output[1]].reshape( *exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) rtlsim_dict = { "inputs": { "in0": rtlsim_inp }, "outputs": { "out0": [], "out1": [] }, } self.rtlsim_multi_io(sim, rtlsim_dict) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_shape = self.get_folded_output_shape() out_npy_path = "{}/output0.npy".format(code_gen_dir) rtlsim_output_to_npy( rtlsim_dict["outputs"]["out0"], out_npy_path, odt, out_shape, packed_bits, target_bits, ) # load and reshape output 0 output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) context[node.output[0]] = output out_npy_path = "{}/output1.npy".format(code_gen_dir) rtlsim_output_to_npy( rtlsim_dict["outputs"]["out1"], out_npy_path, odt, out_shape, packed_bits, target_bits, ) # load and reshape output 1 output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) context[node.output[1]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) assert (context[node.output[0]].shape == exp_oshape ), """Output0 shape doesn't match expected shape.""" assert (context[node.output[1]].shape == exp_oshape ), """Output1 shape doesn't match expected shape."""
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_shape = self.get_normal_input_shape() folded_ishape = self.get_folded_input_shape() # TODO ensure codegen dir exists if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""".format( mode ) ) inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert inp.shape == tuple( exp_shape ), "Input shape does not match expected shape." if self.get_input_datatype() == DataType.BIPOLAR: # store bipolar activations as binary inp = (inp + 1) / 2 export_idt = DataType.BINARY else: export_idt = self.get_input_datatype() # reshape input into folded shape reshaped_input = inp.reshape(folded_ishape) # make copy before saving array reshaped_input = reshaped_input.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": output = inp output = np.asarray([output], dtype=np.float32).reshape(*exp_shape) context[node.output[0]] = output elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp) odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy( rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits ) # load and reshape output output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(exp_shape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to "rtlsim" """.format( mode ) ) # binary -> bipolar if needed if self.get_output_datatype() == DataType.BIPOLAR: out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out assert context[node.output[0]].shape == tuple( exp_shape ), """Output
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() folded_oshape = self.get_folded_output_shape() if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert inp.shape == exp_ishape, """Input shape doesn't match expected shape .""" export_idt = self.get_input_datatype() # reshape input into folded form inp = inp.reshape(folded_ishape) # make copy before saving array reshaped_input = inp.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) if mode == "cppsim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) assert (context[node.output[0]].shape == folded_oshape), "cppsim \ did not produce expected ofolded utput shape" context[node.output[0]] = context[node.output[0]].reshape( *exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() rtlsim_output_to_npy(rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits) # load and reshape output output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) context[node.output[0]] = output else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("cppsim", "rtlsim")""" .format(mode)) assert (context[node.output[0]].shape == exp_oshape ), """Output shape doesn't match expected shape.""" # TopK ind output normally uses TensorProto.INT64, which # can cause issues for the node-by-node simulation in FINN # (as the custom DataType system always assumes float containers) # so cast the output to int64 ret = context[node.output[0]] context[node.output[0]] = ret.astype(np.int64)
def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): """Use PyVerilator to execute given model with stitched IP. The execution context contains the input values. Hook functions can be optionally specified to observe/alter the state of the circuit, receiving the PyVerilator sim object as their first argument: - pre_hook : hook function to be called before sim start (after reset) - post_hook : hook function to be called after sim end """ if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") # ensure stitched ip project already exists assert os.path.isfile(model.get_metadata_prop("wrapper_filename")), """The file name from metadata property "wrapper_filename" doesn't exist.""" assert os.path.isdir(model.get_metadata_prop("vivado_stitch_proj")), """The directory from metadata property "vivado_stitch_proj" doesn't exist""" trace_file = model.get_metadata_prop("rtlsim_trace") # extract input shape # TODO extend for multiple inputs i_name = model.graph.input[0].name i_tensor = execution_context[i_name] i_dt = model.get_tensor_datatype(i_name) first_node = getCustomOp(model.find_consumer(i_name)) i_stream_w = first_node.get_instream_width() # convert input into time multiplexed shape i_folded_shape = first_node.get_folded_input_shape() batchsize = i_tensor.shape[0] # override batch size for input i_folded_shape = list(i_folded_shape) i_folded_shape[0] = batchsize i_folded_shape = tuple(i_folded_shape) # TODO any other layout transformations need to happen here! i_tensor = i_tensor.reshape(i_folded_shape) # extract output shape o_name = model.graph.output[0].name o_shape = model.get_tensor_shape(o_name) o_dt = model.get_tensor_datatype(o_name) last_node = getCustomOp(model.find_producer(o_name)) o_folded_shape = last_node.get_folded_output_shape() # override batch size from actual input o_shape = list(o_shape) o_shape[0] = batchsize o_shape = tuple(o_shape) o_folded_shape = list(o_folded_shape) o_folded_shape[0] = batchsize o_folded_shape = tuple(o_folded_shape) o_stream_w = last_node.get_outstream_width() packedBits = o_stream_w targetBits = o_dt.bitwidth() # pack input packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) num_out_values = last_node.get_number_output_values() num_out_values *= batchsize # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): sim = pyverilate_stitched_ip(model) model.set_metadata_prop("rtlsim_so", sim.lib._name) else: sim = PyVerilator(rtlsim_so, auto_eval=False) ret = _run_rtlsim( sim, packed_input, num_out_values, trace_file, pre_hook=pre_hook, post_hook=post_hook, ) packed_output = ret[0] model.set_metadata_prop("cycles_rtlsim", str(ret[1])) # unpack output and put into context o_folded_tensor = rtlsim_output_to_npy(packed_output, None, o_dt, o_folded_shape, packedBits, targetBits) execution_context[o_name] = o_folded_tensor.reshape(o_shape)
def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): """Use PyVerilator to execute given model with stitched IP. The execution context contains the input values. Hook functions can be optionally specified to observe/alter the state of the circuit, receiving the PyVerilator sim object as their first argument: - pre_hook : hook function to be called before sim start (after reset) - post_hook : hook function to be called after sim end """ if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") # ensure stitched ip project already exists assert os.path.isfile( model.get_metadata_prop("wrapper_filename") ), """The file name from metadata property "wrapper_filename" doesn't exist.""" assert os.path.isdir( model.get_metadata_prop("vivado_stitch_proj") ), """The directory from metadata property "vivado_stitch_proj" doesn't exist""" trace_file = model.get_metadata_prop("rtlsim_trace") if trace_file is None: trace_file = "" extra_verilator_args = model.get_metadata_prop("extra_verilator_args") if extra_verilator_args is None: extra_verilator_args = [] else: extra_verilator_args = eval(extra_verilator_args) # extract i/o info to prepare io_dict io_dict = {"inputs": {}, "outputs": {}} if_dict = eval(model.get_metadata_prop("vivado_stitch_ifnames")) # go over and prepare inputs for i, i_vi in enumerate(model.graph.input): i_name = i_vi.name i_tensor = execution_context[i_name] i_dt = model.get_tensor_datatype(i_name) first_node_onnx = model.find_consumer(i_name) first_node = getCustomOp(first_node_onnx) node_inp_ind = list(first_node_onnx.input).index(i_name) if node_inp_ind == 0: # default node input (input 0) i_stream_w = first_node.get_instream_width() i_folded_shape = first_node.get_folded_input_shape() else: # not input 0; node must support specifying inp index # for these functions i_stream_w = first_node.get_instream_width(node_inp_ind) i_folded_shape = first_node.get_folded_input_shape(node_inp_ind) batchsize = i_tensor.shape[0] # override batch size for input i_folded_shape = list(i_folded_shape) i_folded_shape[0] = batchsize i_folded_shape = tuple(i_folded_shape) # TODO any other layout transformations need to happen here! i_tensor = i_tensor.reshape(i_folded_shape) # pack input for rtlsim packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) # add to io_dict if_name = if_dict["s_axis"][i][0] io_dict["inputs"][if_name] = packed_input # go over outputs to determine how many values will be produced num_out_values = 0 o_tensor_info = [] for o, o_vi in enumerate(model.graph.output): # output in io_dict just needs an empty list if_name = if_dict["m_axis"][o][0] io_dict["outputs"][if_name] = [] # extract output shape o_name = o_vi.name o_shape = model.get_tensor_shape(o_name) o_dt = model.get_tensor_datatype(o_name) last_node = getCustomOp(model.find_producer(o_name)) o_folded_shape = last_node.get_folded_output_shape() # override batch size from actual input o_shape = list(o_shape) o_shape[0] = batchsize o_shape = tuple(o_shape) o_folded_shape = list(o_folded_shape) o_folded_shape[0] = batchsize o_folded_shape = tuple(o_folded_shape) o_stream_w = last_node.get_outstream_width() o_tensor_info.append((o_stream_w, o_dt, o_folded_shape, o_shape)) num_out_values += batchsize * last_node.get_number_output_values() # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): sim = pyverilate_stitched_ip(model, extra_verilator_args=extra_verilator_args) model.set_metadata_prop("rtlsim_so", sim.lib._name) else: sim = PyVerilator(rtlsim_so, auto_eval=False) # reset and call rtlsim, including any pre/post hooks reset_rtlsim(sim) if pre_hook is not None: pre_hook(sim) n_cycles = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file, sname="_") if post_hook is not None: post_hook(sim) # unpack outputs and put back into execution context for o, o_vi in enumerate(model.graph.output): o_name = o_vi.name if_name = if_dict["m_axis"][o][0] o_stream_w, o_dt, o_folded_shape, o_shape = o_tensor_info[o] packed_output = io_dict["outputs"][if_name] o_folded_tensor = rtlsim_output_to_npy( packed_output, None, o_dt, o_folded_shape, o_stream_w, o_dt.bitwidth() ) execution_context[o_name] = o_folded_tensor.reshape(o_shape) model.set_metadata_prop("cycles_rtlsim", str(n_cycles))
def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node mw = self.get_nodeattr("MW") mh = self.get_nodeattr("MH") simd = self.get_nodeattr("SIMD") pe = self.get_nodeattr("PE") sf = mw // simd nf = mh // pe # TODO ensure codegen dir exists if mode == "npysim": code_gen_dir = self.get_nodeattr("code_gen_dir_npysim") elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("npysim", "rtlsim")""" .format(mode)) # create a npy file fore each input of the node (in_ind is input index) in_ind = 0 for inputs in node.input: # it is assumed that the first input of the node is the data input # the second input are the weights # the third input are the thresholds if in_ind == 0: assert (str( context[inputs].dtype) == "float32"), """Input datatype is not float32 as expected.""" expected_inp_shape = (1, sf, simd) reshaped_input = context[inputs].reshape(expected_inp_shape) if self.get_input_datatype() == DataType.BIPOLAR: # store bipolar activations as binary reshaped_input = (reshaped_input + 1) / 2 export_idt = DataType.BINARY else: export_idt = self.get_input_datatype() np.save( os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), reshaped_input, ) elif in_ind > 2: raise Exception("Unexpected input found for StreamingFCLayer") in_ind += 1 if mode == "npysim": # execute the precompiled model super().exec_precompiled_singlenode_model() # load output npy file super().npy_to_dynamic_output(context) # reinterpret binary output as bipolar where needed if self.get_output_datatype() == DataType.BIPOLAR: out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out assert context[node.output[0]].shape == ( 1, nf, pe, ), """Output shape is not as expected (1, nf, pe)""" # reshape output to have expected shape context[node.output[0]] = context[node.output[0]].reshape(1, mh) elif mode == "rtlsim": if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") prefixed_top_name = "%s_%s" % (node.name, node.name) # check if needed file exists verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( code_gen_dir, node.name, prefixed_top_name) if os.path.isfile(verilog_file): nbits = self.get_instream_width() inp = npy_to_rtlsim_input( "{}/input_0.npy".format(code_gen_dir), export_idt, nbits) sim = PyVerilator.build( verilog_file, verilog_path=[ "{}/project_{}/sol1/impl/verilog/".format( code_gen_dir, node.name) ], ) super().reset_rtlsim(sim) super().toggle_clk(sim) output = self.rtlsim(sim, inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) rtlsim_output_to_npy(output, out_npy_path, odt, (1, nf, pe), packed_bits, target_bits) # load and reshape output output = np.load(out_npy_path) output = np.asarray([output], dtype=np.float32).reshape(1, mh) context[node.output[0]] = output else: raise Exception("""Found no verilog files for this node, did you run the codegen_ipgen transformation?""") else: raise Exception( """Invalid value for attribute exec_mode! Is currently set to: {} has to be set to one of the following value ("npysim", "rtlsim")""" .format(mode))