def __init__(self, edge_params, from_mem, to_mem, from_inst, to_inst): super().__init__(f"lake_edge", debug=True) # PARAMETERS # data_out self.from_signal = edge_params["from_signal"] # data_in self.to_signal = edge_params["to_signal"] self.dim = edge_params["dim"] self.max_range = edge_params["max_range"] self.max_stride = edge_params["max_stride"] self._write(f"write_{self.to_signal}", width=1) forloop = ForLoop(iterator_support=self.dim, config_width=clog2(self.max_range)) # get memory params from top Lake or make a wrapper func for user # with just these params and then pass in mem for this signal # self._write_addr(f"write_addr_{to_signal}") self.add_child(f"loops_{self.from_signal}_{self.to_signal}", forloop, clk=self._clk, rst_n=self._rst_n, step=self._write) AG_write = AddrGen(iterator_support=addr_gen_dim, config_width=clog2(addr_gen_max_range)) self.add_child(f"AG_write_{self.from_signal}_{self.to_signal}", AG_write, clk=self._clk, rst_n=self._rst_n, step=self._write, mux_sel=forloop.ports.mux_sel_out) safe_wire(self, AG_write.ports.addr_out, self._write_addr)
def __init__( self, data_width=16, # CGRA Params mem_depth=32, default_iterator_support=3, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, config_data_width=32, config_addr_width=8, cycle_count_width=16, add_clk_enable=True, add_flush=True): super().__init__("pond", debug=True) self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_input_ports = mem_input_ports self.mem_output_ports = mem_output_ports self.mem_depth = mem_depth self.data_width = data_width self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.add_clk_enable = add_clk_enable self.add_flush = add_flush self.cycle_count_width = cycle_count_width self.default_iterator_support = default_iterator_support self.default_config_width = kts.clog2(self.mem_depth) # inputs self._clk = self.clock("clk") self._clk.add_attribute( FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK)) self._rst_n = self.reset("rst_n") self._rst_n.add_attribute( FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN)) self._clk_en = self.clock_en("clk_en", 1) # Enable/Disable tile self._tile_en = self.input("tile_en", 1) self._tile_en.add_attribute( ConfigRegAttr("Tile logic enable manifested as clock gate")) gclk = self.var("gclk", 1) self._gclk = kts.util.clock(gclk) self.wire(gclk, kts.util.clock(self._clk & self._tile_en)) self._cycle_count = add_counter(self, "cycle_count", self.cycle_count_width) # Create write enable + addr, same for read. # self._write = self.input("write", self.interconnect_input_ports) self._write = self.var("write", self.mem_input_ports) # self._write.add_attribute(ControlSignalAttr(is_control=True)) self._write_addr = self.var("write_addr", kts.clog2(self.mem_depth), size=self.interconnect_input_ports, explicit_array=True, packed=True) # Add "_pond" suffix to avoid error during garnet RTL generation self._data_in = self.input("data_in_pond", self.data_width, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._data_in.add_attribute( FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE)) self._data_in.add_attribute(ControlSignalAttr(is_control=False)) self._read = self.var("read", self.mem_output_ports) self._t_write = self.var("t_write", self.interconnect_input_ports) self._t_read = self.var("t_read", self.interconnect_output_ports) # self._read.add_attribute(ControlSignalAttr(is_control=True)) self._read_addr = self.var("read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._s_read_addr = self.var("s_read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._data_out = self.output("data_out_pond", self.data_width, size=self.interconnect_output_ports, explicit_array=True, packed=True) self._data_out.add_attribute( FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE)) self._data_out.add_attribute(ControlSignalAttr(is_control=False)) self._valid_out = self.output("valid_out_pond", self.interconnect_output_ports) self._valid_out.add_attribute( FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE)) self._valid_out.add_attribute(ControlSignalAttr(is_control=False)) self._mem_data_out = self.var("mem_data_out", self.data_width, size=self.mem_output_ports, explicit_array=True, packed=True) self._s_mem_data_in = self.var("s_mem_data_in", self.data_width, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._mem_data_in = self.var("mem_data_in", self.data_width, size=self.mem_input_ports, explicit_array=True, packed=True) self._s_mem_write_addr = self.var("s_mem_write_addr", kts.clog2(self.mem_depth), size=self.interconnect_input_ports, explicit_array=True, packed=True) self._s_mem_read_addr = self.var("s_mem_read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._mem_write_addr = self.var("mem_write_addr", kts.clog2(self.mem_depth), size=self.mem_input_ports, explicit_array=True, packed=True) self._mem_read_addr = self.var("mem_read_addr", kts.clog2(self.mem_depth), size=self.mem_output_ports, explicit_array=True, packed=True) if self.interconnect_output_ports == 1: self.wire(self._data_out[0], self._mem_data_out[0]) else: for i in range(self.interconnect_output_ports): self.wire(self._data_out[i], self._mem_data_out[0]) # Valid out is simply passing the read signal through... self.wire(self._valid_out, self._t_read) # Create write addressors for wr_port in range(self.interconnect_input_ports): RF_WRITE_ITER = ForLoop( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width) RF_WRITE_ADDR = AddrGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width) RF_WRITE_SCHED = SchedGen( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width, use_enable=True) self.add_child(f"rf_write_iter_{wr_port}", RF_WRITE_ITER, clk=self._gclk, rst_n=self._rst_n, step=self._t_write[wr_port]) # Whatever comes through here should hopefully just pipe through seamlessly # addressor modules self.add_child(f"rf_write_addr_{wr_port}", RF_WRITE_ADDR, clk=self._gclk, rst_n=self._rst_n, step=self._t_write[wr_port], mux_sel=RF_WRITE_ITER.ports.mux_sel_out, restart=RF_WRITE_ITER.ports.restart) safe_wire(self, self._write_addr[wr_port], RF_WRITE_ADDR.ports.addr_out) self.add_child(f"rf_write_sched_{wr_port}", RF_WRITE_SCHED, clk=self._gclk, rst_n=self._rst_n, mux_sel=RF_WRITE_ITER.ports.mux_sel_out, finished=RF_WRITE_ITER.ports.restart, cycle_count=self._cycle_count, valid_output=self._t_write[wr_port]) # Create read addressors for rd_port in range(self.interconnect_output_ports): RF_READ_ITER = ForLoop( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width) RF_READ_ADDR = AddrGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width) RF_READ_SCHED = SchedGen( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width, use_enable=True) self.add_child(f"rf_read_iter_{rd_port}", RF_READ_ITER, clk=self._gclk, rst_n=self._rst_n, step=self._t_read[rd_port]) self.add_child(f"rf_read_addr_{rd_port}", RF_READ_ADDR, clk=self._gclk, rst_n=self._rst_n, step=self._t_read[rd_port], mux_sel=RF_READ_ITER.ports.mux_sel_out, restart=RF_READ_ITER.ports.restart) if self.interconnect_output_ports > 1: safe_wire(self, self._read_addr[rd_port], RF_READ_ADDR.ports.addr_out) else: safe_wire(self, self._read_addr[rd_port], RF_READ_ADDR.ports.addr_out) self.add_child(f"rf_read_sched_{rd_port}", RF_READ_SCHED, clk=self._gclk, rst_n=self._rst_n, mux_sel=RF_READ_ITER.ports.mux_sel_out, finished=RF_READ_ITER.ports.restart, cycle_count=self._cycle_count, valid_output=self._t_read[rd_port]) self.wire(self._write, self._t_write.r_or()) self.wire(self._mem_write_addr[0], decode(self, self._t_write, self._s_mem_write_addr)) self.wire(self._mem_data_in[0], decode(self, self._t_write, self._s_mem_data_in)) self.wire(self._read, self._t_read.r_or()) self.wire(self._mem_read_addr[0], decode(self, self._t_read, self._s_mem_read_addr)) # =================================== # Instantiate config hooks... # =================================== self.fw_int = 1 self.data_words_per_set = 2**self.config_addr_width self.sets = int( (self.fw_int * self.mem_depth) / self.data_words_per_set) self.sets_per_macro = max( 1, int(self.mem_depth / self.data_words_per_set)) self.total_sets = max(1, 1 * self.sets_per_macro) self._config_data_in = self.input("config_data_in", self.config_data_width) self._config_data_in.add_attribute(ControlSignalAttr(is_control=False)) self._config_data_in_shrt = self.var("config_data_in_shrt", self.data_width) self.wire(self._config_data_in_shrt, self._config_data_in[self.data_width - 1, 0]) self._config_addr_in = self.input("config_addr_in", self.config_addr_width) self._config_addr_in.add_attribute(ControlSignalAttr(is_control=False)) self._config_data_out_shrt = self.var("config_data_out_shrt", self.data_width, size=self.total_sets, explicit_array=True, packed=True) self._config_data_out = self.output("config_data_out", self.config_data_width, size=self.total_sets, explicit_array=True, packed=True) self._config_data_out.add_attribute( ControlSignalAttr(is_control=False)) for i in range(self.total_sets): self.wire( self._config_data_out[i], self._config_data_out_shrt[i].extend(self.config_data_width)) self._config_read = self.input("config_read", 1) self._config_read.add_attribute(ControlSignalAttr(is_control=False)) self._config_write = self.input("config_write", 1) self._config_write.add_attribute(ControlSignalAttr(is_control=False)) self._config_en = self.input("config_en", self.total_sets) self._config_en.add_attribute(ControlSignalAttr(is_control=False)) self._mem_data_cfg = self.var("mem_data_cfg", self.data_width, explicit_array=True, packed=True) self._mem_addr_cfg = self.var("mem_addr_cfg", kts.clog2(self.mem_depth)) # Add config... stg_cfg_seq = StorageConfigSeq( data_width=self.data_width, config_addr_width=self.config_addr_width, addr_width=kts.clog2(self.mem_depth), fetch_width=self.data_width, total_sets=self.total_sets, sets_per_macro=self.sets_per_macro) # The clock to config sequencer needs to be the normal clock or # if the tile is off, we bring the clock back in based on config_en cfg_seq_clk = self.var("cfg_seq_clk", 1) self._cfg_seq_clk = kts.util.clock(cfg_seq_clk) self.wire(cfg_seq_clk, kts.util.clock(self._gclk)) self.add_child(f"config_seq", stg_cfg_seq, clk=self._cfg_seq_clk, rst_n=self._rst_n, clk_en=self._clk_en | self._config_en.r_or(), config_data_in=self._config_data_in_shrt, config_addr_in=self._config_addr_in, config_wr=self._config_write, config_rd=self._config_read, config_en=self._config_en, wr_data=self._mem_data_cfg, rd_data_out=self._config_data_out_shrt, addr_out=self._mem_addr_cfg) if self.interconnect_output_ports == 1: self.wire(stg_cfg_seq.ports.rd_data_stg, self._mem_data_out) else: self.wire(stg_cfg_seq.ports.rd_data_stg[0], self._mem_data_out[0]) self.RF_GEN = RegisterFile(data_width=self.data_width, write_ports=self.mem_input_ports, read_ports=self.mem_output_ports, width_mult=1, depth=self.mem_depth, read_delay=0) # Now we can instantiate and wire up the register file self.add_child(f"rf", self.RF_GEN, clk=self._gclk, rst_n=self._rst_n, data_out=self._mem_data_out) # Opt in for config_write self._write_rf = self.var("write_rf", self.mem_input_ports) self.wire( self._write_rf[0], kts.ternary(self._config_en.r_or(), self._config_write, self._write[0])) for i in range(self.mem_input_ports - 1): self.wire( self._write_rf[i + 1], kts.ternary(self._config_en.r_or(), kts.const(0, 1), self._write[i + 1])) self.wire(self.RF_GEN.ports.wen, self._write_rf) # Opt in for config_data_in for i in range(self.interconnect_input_ports): self.wire( self._s_mem_data_in[i], kts.ternary(self._config_en.r_or(), self._mem_data_cfg, self._data_in[i])) self.wire(self.RF_GEN.ports.data_in, self._mem_data_in) # Opt in for config_addr for i in range(self.interconnect_input_ports): self.wire( self._s_mem_write_addr[i], kts.ternary(self._config_en.r_or(), self._mem_addr_cfg, self._write_addr[i])) self.wire(self.RF_GEN.ports.wr_addr, self._mem_write_addr[0]) for i in range(self.interconnect_output_ports): self.wire( self._s_mem_read_addr[i], kts.ternary(self._config_en.r_or(), self._mem_addr_cfg, self._read_addr[i])) self.wire(self.RF_GEN.ports.rd_addr, self._mem_read_addr[0]) if self.add_clk_enable: # self.clock_en("clk_en") kts.passes.auto_insert_clock_enable(self.internal_generator) clk_en_port = self.internal_generator.get_port("clk_en") clk_en_port.add_attribute(ControlSignalAttr(False)) if self.add_flush: self.add_attribute("sync-reset=flush") kts.passes.auto_insert_sync_reset(self.internal_generator) flush_port = self.internal_generator.get_port("flush") flush_port.add_attribute(ControlSignalAttr(True)) # Finally, lift the config regs... lift_config_reg(self.internal_generator)
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4): super().__init__("tb_formal", debug=True) self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.mem_depth = mem_depth self.banks = banks self.data_width = data_width self.config_width = config_width self.input_addr_iterator_support = input_addr_iterator_support self.output_addr_iterator_support = output_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.output_sched_iterator_support = output_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 # inputs self._clk = self.clock("clk") self._clk.add_attribute( FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK)) self._rst_n = self.reset("rst_n") self._rst_n.add_attribute( FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN)) self._cycle_count = self.var("cycle_count", 16) self.add_code(self.increment_cycle_count) self._read = self.var("read", 1) self._valid_in = self.output("valid_in", 1) self.wire(self._read, self._valid_in) self._valid_in.add_attribute( FormalAttr(f"{self._valid_in.name}", FormalSignalConstraint.SEQUENCE)) self._data_in = self.input("data_in", data_width, size=self.fetch_width, packed=True, explicit_array=True) self._data_in.add_attribute( FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE)) # outputs self._data_out = self.output("data_out", self.data_width, size=self.interconnect_output_ports, packed=True, explicit_array=True) self._data_out.add_attribute( FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE)) self._tb_read = self.var("tb_read", self.interconnect_output_ports) # Break out valids for formal! self._valid_out = self.output("valid_out", self.interconnect_output_ports) self._valid_out.add_attribute( FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE)) self.wire(self._valid_out, self._tb_read) self.tb_height = 4 self._tb_write_addr = self.var("tb_write_addr", 6, size=self.interconnect_output_ports, packed=True, explicit_array=True) self._tb_read_addr = self.var("tb_read_addr", 6, size=self.interconnect_output_ports, packed=True, explicit_array=True) self._tb = self.var("tb", width=data_width, size=(self.interconnect_output_ports, self.tb_height, self.fetch_width), packed=True, explicit_array=True) self._output_port_sel_addr = self.var( "tb_bank_sel_addr", max(1, clog2(self.interconnect_output_ports))) # -------------------------------- Delineate new group ------------------------------- fl_ctr_sram_rd = ForLoop( iterator_support=self.default_iterator_support, config_width=self.default_config_width) loop_itr = fl_ctr_sram_rd.get_iter() loop_wth = fl_ctr_sram_rd.get_cfg_width() self.add_child(f"tb_write_loops", fl_ctr_sram_rd, clk=self._clk, rst_n=self._rst_n, step=self._read) self.add_child(f"tb_write_sched_gen", SchedGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=fl_ctr_sram_rd.ports.mux_sel_out, finished=fl_ctr_sram_rd.ports.restart, valid_output=self._read) for i in range(self.interconnect_output_ports): # fl_ctr_tb_wr = ForLoop(iterator_support=self.default_iterator_support, # config_width=self.default_config_width) # loop_itr = fl_ctr_tb_wr.get_iter() # loop_wth = fl_ctr_tb_wr.get_cfg_width() # self.add_child(f"tb_write_loops_{i}", # fl_ctr_tb_wr, # clk=self._clk, # rst_n=self._rst_n, # step=self._read & (self._output_port_sel_addr == # const(i, self._output_port_sel_addr.width))) newAG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child( f"tb_write_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._read & (self._output_port_sel_addr == const( i, self._output_port_sel_addr.width)), # addr_out=self._tb_write_addr[i]) mux_sel=fl_ctr_sram_rd.ports.mux_sel_out, restart=fl_ctr_sram_rd.ports.restart) safe_wire(self, self._tb_write_addr[i], newAG.ports.addr_out) fl_ctr_tb_rd = ForLoop( iterator_support=self.default_iterator_support, config_width=self.default_config_width) loop_itr = fl_ctr_tb_rd.get_iter() loop_wth = fl_ctr_tb_rd.get_cfg_width() self.add_child(f"tb_read_loops_{i}", fl_ctr_tb_rd, clk=self._clk, rst_n=self._rst_n, step=self._tb_read[i]) newAG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child( f"tb_read_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._tb_read[i], # addr_out=self._tb_read_addr[i]) mux_sel=fl_ctr_tb_rd.ports.mux_sel_out, restart=fl_ctr_tb_rd.ports.restart) safe_wire(self, self._tb_read_addr[i], newAG.ports.addr_out) self.add_child(f"tb_read_sched_gen_{i}", SchedGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=fl_ctr_tb_rd.ports.mux_sel_out, finished=fl_ctr_tb_rd.ports.restart, valid_output=self._tb_read[i]) if self.interconnect_output_ports > 1: # fl_ctr_out_sel = ForLoop(iterator_support=self.default_iterator_support, # # config_width=clog2(self.interconnect_output_ports)) # config_width=self.default_config_width) # loop_itr = fl_ctr_out_sel.get_iter() # loop_wth = fl_ctr_out_sel.get_cfg_width() # self.add_child(f"tb_sel_loops", # fl_ctr_out_sel, # clk=self._clk, # rst_n=self._rst_n, # step=self._read) self.add_child(f"out_port_sel_addr", AddrGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width), clk=self._clk, rst_n=self._rst_n, step=self._read, mux_sel=fl_ctr_sram_rd.ports.mux_sel_out, addr_out=self._output_port_sel_addr) # Addr for port select should be driven on agg to sram write sched else: self.wire(self._output_port_sel_addr[0], const(0, self._output_port_sel_addr.width)) self.add_code(self.tb_ctrl) for idx in range(self.interconnect_output_ports): self.add_code(self.tb_to_out, idx=idx)
def __init__(self, mem_params, word_width): super().__init__("lake_mem", debug=True) ################################################################ # PARAMETERS ################################################################ # print("MEM PARAMS ", mem_params) # basic parameters self.word_width = word_width # general memory parameters self.mem_name = mem_params["name"] self.capacity = mem_params["capacity"] self.rw_same_cycle = mem_params["rw_same_cycle"] self.use_macro = mem_params["use_macro"] self.macro_name = mem_params["macro_name"] # number of port types self.num_read_write_ports = mem_params["num_read_write_ports"] self.num_read_only_ports = mem_params["num_read_ports"] self.num_write_only_ports = mem_params["num_write_ports"] self.num_read_ports = self.num_read_only_ports + self.num_read_write_ports self.num_write_ports = self.num_write_only_ports + self.num_read_write_ports # info for port types self.write_info = mem_params["write_info"] self.read_info = mem_params["read_info"] self.read_write_info = mem_params["read_write_info"] # TODO change - for now, we assume you cannot have read/write and read or write ports # should be the max of write vs read_write and need to handle more general case if self.num_read_write_ports == 0: self.write_width = mem_params["write_port_width"] self.read_width = mem_params["read_port_width"] else: self.write_width = mem_params["read_write_port_width"] self.read_width = mem_params["read_write_port_width"] assert self.capacity % self.write_width == 0, \ "Memory capacity is not a multiple of the port width for writes" assert self.capacity % self.read_width == 0, \ "Memory capacity is not a multiple of the port width for reads" # innermost dimension for size of memory is the size of whichever port # type has a wider width between reads and writes self.mem_size = max(self.read_width, self.write_width) # this assert has to be true if previous two asserts are true assert self.capacity % self.mem_size == 0 # this is the last dimension for size of memory - equal to the number # of the port type with wider width addresses can fit in the memory self.mem_last_dim = int(self.capacity / self.mem_size) self.mem_size_bits = max(1, clog2(self.mem_size)) self.mem_last_dim_bits = max(1, clog2(self.mem_last_dim)) # chaining parameters and config regs self.chaining = mem_params["chaining"] self.num_chain = mem_params["num_chain"] self.num_chain_bits = clog2(self.num_chain) if self.chaining: self.chain_index = self.var("chain_index", width=self.num_chain_bits) self.chain_index.add_attribute( ConfigRegAttr("Chain index for chaining")) self.chain_index.add_attribute( FormalAttr(self.chain_index.name, FormalSignalConstraint.SET0)) # minimum required widths for address signals if self.mem_size == self.write_width and self.mem_size == self.read_width: self.write_addr_width = self.mem_last_dim_bits + self.num_chain_bits self.read_addr_width = self.mem_last_dim_bits + self.num_chain_bits elif self.mem_size == self.write_width: self.write_addr_width = self.mem_last_dim_bits + self.num_chain_bits self.read_addr_width = self.mem_size_bits + self.mem_last_dim_bits + self.num_chain_bits elif self.mem_size == self.read_width: self.write_addr_width = self.mem_size_bits + self.mem_last_dim_bits + self.num_chain_bits self.read_addr_width = self.mem_last_dim_bits + self.num_chain_bits else: print("Error occurred! Memory size does not make sense.") ################################################################ # I/O INTERFACE (WITHOUT ADDRESSING) + MEMORY ################################################################ self.clk = self.clock("clk") # active low asynchornous reset self.rst_n = self.reset("rst_n", 1) self.data_in = self.input("data_in", width=self.word_width, size=(self.num_write_ports, self.write_width), explicit_array=True, packed=True) self.chain_en = self.input("chain_en", 1) # write enable (high: write, low: read when rw_same_cycle = False, else # only indicates write) self.write = self.input("write", width=1, size=self.num_write_ports) self.data_out = self.output("data_out", width=self.word_width, size=(self.num_read_ports, self.read_width), explicit_array=True, packed=True) self.write_chain = self.var("write_chain", width=1, size=self.num_write_ports) if self.use_macro: self.read_write_addr = self.input("read_write_addr", width=self.addr_width, size=self.num_read_write_ports, explicit_array=True) sram = SRAM(not self.use_macro, self.macro_name, word_width, mem_params["read_write_port_width"], mem_params["capacity"], mem_params["num_read_write_ports"], mem_params["num_read_write_ports"], clog2(mem_params["capacity"]), 0, 1) self.add_child( "SRAM_" + mem_params["name"], sram, clk=self.clk, clk_en=1, mem_data_in_bank=self.data_in, mem_data_out_bank=self.data_out, mem_addr_in_bank=self.read_write_addr, # TODO adjust mem_cen_in_bank=1, mem_wen_in_bank=self.write_chain, wtsel=0, rtsel=1) else: # memory variable (not I/O) self.memory = self.var("memory", width=self.word_width, size=(self.mem_last_dim, self.mem_size), explicit_array=True, packed=True) ################################################################ # ADDRESSING I/O AND SIGNALS ################################################################ # I/O is different depending on whether we have read and write ports or # read/write ports # we keep address width at 16 to avoid unpacked # safe_wire errors for addr in hw_top_lake - can change by changing # default_config_width for those addr gens while accounting for muxing # bits, but the extra bits are unused anyway if self.rw_same_cycle: self.read = self.input("read", width=1, size=self.num_read_ports) else: self.read = self.var("read", width=1, size=self.num_read_ports) for i in range(self.num_read_ports): self.wire(self.read[i], 1) # TODO change later - same read/write or read and write assumption as above if self.num_write_only_ports != 0 and self.num_read_only_ports != 0: # writes self.write_addr = self.input( "write_addr", width=16, # self.write_addr_width, size=self.num_write_ports, explicit_array=True) assert self.write_info[0]["latency"] > 0, \ "Latency for write ports must be greater than 1 clock cycle." # reads self.read_addr = self.input( "read_addr", width=16, # self.read_addr_width, size=self.num_read_ports, explicit_array=True) # TODO for now assuming all read ports have same latency # TODO also should add support for other latencies # rw_same_cycle is not valid here because read/write share the same port elif self.num_read_write_ports != 0: self.read_write_addr = self.input( "read_write_addr", width= 16, # max(self.read_addr_width, self.write_addr_width), size=self.num_read_write_ports, explicit_array=True) # writes self.write_addr = self.var( "write_addr", width=16, # self.write_addr_width, size=self.num_read_write_ports, explicit_array=True) for p in range(self.num_read_write_ports): safe_wire(gen=self, w_to=self.write_addr[p], w_from=self.read_write_addr[p]) # reads self.read_addr = self.var("read_addr", width=self.read_addr_width, size=self.num_read_write_ports, explicit_array=True) for p in range(self.num_read_write_ports): safe_wire(gen=self, w_to=self.read_addr[p], w_from=self.read_write_addr[p]) # TODO in self.read_write_info we should allow for different read # and write latencies? self.read_info = self.read_write_info # TODO just doing chaining for SRAM if self.chaining and self.num_read_write_ports > 0: self.wire( self.write_chain, # chaining not enabled ( ~self.chain_en | # chaining enabled (self.chain_en & (self.chain_index == self.read_write_addr[ self.write_addr_width + self.num_chain_bits, self.write_addr_width]))) & self.write) # chaining not supported else: self.wire(self.write_chain, self.write) if self.use_macro: self.wire(sram.ports.mem_wen_in_bank, self.write_chain) self.add_write_data_block() self.add_read_data_block()
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_height=2): super().__init__("strg_ub_agg_sram_shared") ################################################################################## # Capture constructor parameter... ################################################################################## self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.tb_height = tb_height self.mem_width = mem_width self.mem_depth = mem_depth self.config_width = config_width self.data_width = data_width self.input_addr_iterator_support = input_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 self.sram_iterator_support = 6 self.agg_rd_addr_gen_width = 8 ################################################################################## # IO ################################################################################## self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._cycle_count = self.input("cycle_count", 16) self._floop_mux_sel = self.output( "floop_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_input_ports, explicit_array=True, packed=True) self._floop_restart = self.output("floop_restart", width=1, size=self.interconnect_input_ports, explicit_array=True, packed=True) # The SRAM write is just the OR reduction of the aggregator reads self._agg_read_out = self.output("agg_read_out", self.interconnect_input_ports) self._agg_read = self.var("agg_read", self.interconnect_input_ports) self.wire(self._agg_read_out, self._agg_read) ################################################################################## # AGG PATHS ################################################################################## for i in range(self.interconnect_input_ports): self.agg_iter_support = 6 self.agg_addr_width = 4 self.agg_range_width = 16 # Create for loop counters that can be shared across the input port selection and SRAM write fl_ctr_sram_wr = ForLoop( iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child(f"loops_in2buf_autovec_write_{i}", fl_ctr_sram_wr, clk=self._clk, rst_n=self._rst_n, step=self._agg_read[i]) safe_wire(gen=self, w_to=self._floop_mux_sel[i], w_from=fl_ctr_sram_wr.ports.mux_sel_out) self.wire(self._floop_restart[i], fl_ctr_sram_wr.ports.restart) # scheduler modules self.add_child( f"agg_read_sched_gen_{i}", SchedGen( iterator_support=self.default_iterator_support, # config_width=self.mem_addr_width), config_width=16), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=fl_ctr_sram_wr.ports.mux_sel_out, finished=fl_ctr_sram_wr.ports.restart, valid_output=self._agg_read[i])
def __init__(self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4): super().__init__("agg_formal", debug=True) self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.mem_depth = mem_depth self.default_iterator_support = 6 self.default_config_width = 16 # inputs self._clk = self.clock("clk") self._clk.add_attribute(FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK)) self._rst_n = self.reset("rst_n") self._rst_n.add_attribute(FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN)) self._cycle_count = self.var("cycle_count", 16) self.add_code(self.increment_cycle_count) self._data_in = self.input("data_in", data_width, size=self.interconnect_input_ports, packed=True, explicit_array=True) self._data_in.add_attribute(FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE)) self._agg_write = self.var("agg_write", self.interconnect_input_ports) self._valid_in = self.output("valid_in", self.interconnect_input_ports) self._valid_in.add_attribute(FormalAttr(f"{self._valid_in.name}", FormalSignalConstraint.SEQUENCE)) self.wire(self._valid_in, self._agg_write) self._write = self.var("write", 1) self._valid_out = self.output("valid_out", 1) self._valid_out.add_attribute(FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE)) self.wire(self._write, self._valid_out) self._data_out = self.output("data_out", data_width, size=self.fetch_width, packed=True) self._data_out.add_attribute(FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE)) # Make this based on the size self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height), size=self.interconnect_input_ports, packed=True, explicit_array=True) self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)), size=self.interconnect_input_ports, packed=True, explicit_array=True) self.agg_rd_addr_gen_width = 8 self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width, size=self.interconnect_input_ports, packed=True, explicit_array=True) self._input_port_sel_addr = self.var("input_port_sel_addr", max(1, clog2(self.interconnect_input_ports))) # Create an input to agg write scheduler + addressor for each input # Also need an addressor for the mux in addition to the read addr self._agg = self.var(f"agg", width=data_width, size=(self.interconnect_input_ports, self.agg_height, self.fetch_width), packed=True, explicit_array=True) output_loops = None for i in range(self.interconnect_input_ports): forloop_ctr = ForLoop(iterator_support=self.default_iterator_support, config_width=self.default_config_width) loop_itr = forloop_ctr.get_iter() loop_wth = forloop_ctr.get_cfg_width() self.add_child(f"agg_write_loops_{i}", forloop_ctr, clk=self._clk, rst_n=self._rst_n, step=self._agg_write[i]) newAG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child(f"agg_write_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._agg_write[i], # addr_out=self._agg_write_addr[i]) mux_sel=forloop_ctr.ports.mux_sel_out, restart=forloop_ctr.ports.restart) safe_wire(self, self._agg_write_addr[i], newAG.ports.addr_out) newSG = SchedGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child(f"agg_write_sched_gen_{i}", newSG, clk=self._clk, rst_n=self._rst_n, mux_sel=forloop_ctr.ports.mux_sel_out, finished=forloop_ctr.ports.restart, cycle_count=self._cycle_count, valid_output=self._agg_write[i]) forloop_ctr_rd = ForLoop(iterator_support=self.default_iterator_support, config_width=self.default_config_width) loop_itr = forloop_ctr_rd.get_iter() loop_wth = forloop_ctr_rd.get_cfg_width() # Add loops for the output of each agg... self.add_child(f"agg_read_loops_{i}", forloop_ctr_rd, clk=self._clk, rst_n=self._rst_n, # (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width)))) step=self._write) output_loops = forloop_ctr_rd # And an associated read address... newAG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child(f"agg_read_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._write, # (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))), # addr_out=self._agg_read_addr_gen_out[i]) mux_sel=forloop_ctr_rd.ports.mux_sel_out, restart=forloop_ctr_rd.ports.restart) safe_wire(self, self._agg_read_addr_gen_out[i], newAG.ports.addr_out) self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0]) # Now we determine what data goes through to the sram... # If we have more than one port, we can generate a selector # to pick which input port should go through - then we send # the step signal to the appropriate input port if self.interconnect_input_ports > 1: # Create for loop counters that can be shared across the input port selection and SRAM write fl_ctr_sram_wr = ForLoop(iterator_support=self.default_iterator_support, config_width=self.default_config_width) loop_itr = fl_ctr_sram_wr.get_iter() loop_wth = fl_ctr_sram_wr.get_cfg_width() output_loops = fl_ctr_sram_wr self.add_child(f"agg_select_loops", fl_ctr_sram_wr, clk=self._clk, rst_n=self._rst_n, step=self._write) tmp_AG = AddrGen(iterator_support=self.default_iterator_support, # config_width=clog2(self.interconnect_input_ports)), config_width=self.default_config_width) self.add_child(f"port_sel_addr", tmp_AG, clk=self._clk, rst_n=self._rst_n, step=self._write, # addr_out=self._input_port_sel_addr) mux_sel=fl_ctr_sram_wr.ports.mux_sel_out) safe_wire(self, self._input_port_sel_addr, tmp_AG.ports.addr_out) else: self.wire(self._input_port_sel_addr[0], const(0, self._input_port_sel_addr.width)) # Addr for port select should be driven on agg to sram write sched # scheduler modules self.add_child(f"agg_read_output_sched_gen", SchedGen(iterator_support=self.default_iterator_support, config_width=self.default_config_width), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=output_loops.ports.mux_sel_out, finished=output_loops.ports.restart, valid_output=self._write) for idx in range(self.interconnect_input_ports): self.add_code(self.agg_ctrl, idx=idx) self.add_code(self.agg_to_sram)
def __init__(self, word_width, input_ports, output_ports, memories, edges): super().__init__("LakeTop", debug=True) # parameters self.word_width = word_width self.input_ports = input_ports self.output_ports = output_ports self.default_config_width = 16 self.cycle_count_width = 16 self.stencil_valid = False # objects self.memories = memories self.edges = edges # tile enable and clock self.tile_en = self.input("tile_en", 1) self.tile_en.add_attribute(ConfigRegAttr("Tile logic enable manifested as clock gate")) self.tile_en.add_attribute(FormalAttr(self.tile_en.name, FormalSignalConstraint.SET1)) self.clk_mem = self.clock("clk") self.clk_mem.add_attribute(FormalAttr(self.clk_mem.name, FormalSignalConstraint.CLK)) # chaining chain_supported = False for mem in self.memories.keys(): if self.memories[mem]["chaining"]: chain_supported = True break if chain_supported: self.chain_en = self.input("chain_en", 1) self.chain_en.add_attribute(ConfigRegAttr("Chaining enable")) self.chain_en.add_attribute(FormalAttr(self.chain_en.name, FormalSignalConstraint.SET0)) else: self.chain_en = self.var("chain_en", 1) self.wire(self.chain_en, 0) # gate clock with tile_en gclk = self.var("gclk", 1) self.gclk = kts.util.clock(gclk) self.wire(gclk, self.clk_mem & self.tile_en) self.clk_en = self.clock_en("clk_en", 1) # active low asynchornous reset self.rst_n = self.reset("rst_n", 1) self.rst_n.add_attribute(FormalAttr(self.rst_n.name, FormalSignalConstraint.RSTN)) # data in and out of top level Lake memory object self.data_in = self.input("data_in", width=self.word_width, size=self.input_ports, explicit_array=True, packed=True) self.data_in.add_attribute(FormalAttr(self.data_in.name, FormalSignalConstraint.SEQUENCE)) self.data_out = self.output("data_out", width=self.word_width, size=self.output_ports, explicit_array=True, packed=True) self.data_out.add_attribute(FormalAttr(self.data_out.name, FormalSignalConstraint.SEQUENCE)) # global cycle count for accessor comparison self._cycle_count = self.var("cycle_count", 16) @always_ff((posedge, self.gclk), (negedge, "rst_n")) def increment_cycle_count(self): if ~self.rst_n: self._cycle_count = 0 else: self._cycle_count = self._cycle_count + 1 self.add_always(increment_cycle_count) # info about memories num_mem = len(memories) subscript_mems = list(self.memories.keys()) # list of the data out from each memory self.mem_data_outs = [self.var(f"mem_data_out_{subscript_mems[i]}", width=self.word_width, size=self.memories[subscript_mems[i]] ["read_port_width" if "read_port_width" in self.memories[subscript_mems[i]] else "read_write_port_width"], explicit_array=True, packed=True) for i in range(num_mem)] # keep track of write, read_addr, and write_addr vars for read/write memories # to later check whether there is a write and what to use for the shared port self.mem_read_write_addrs = {} # create memory instance for each memory self.mem_insts = {} i = 0 for mem in self.memories.keys(): m = mem_inst(self.memories[mem], self.word_width) self.mem_insts[mem] = m self.add_child(mem, m, clk=self.gclk, rst_n=self.rst_n, # put data out in memory data out list data_out=self.mem_data_outs[i], chain_en=self.chain_en) i += 1 # get input and output memories is_input, is_output = [], [] for mem_name in self.memories.keys(): mem = self.memories[mem_name] if mem["is_input"]: is_input.append(mem_name) if mem["is_output"]: is_output.append(mem_name) # TODO direct connection to write doesn't work (?), so have to do this... self.low = self.var("low", 1) self.wire(self.low, 0) # TODO adding multiple ports to 1 memory after talking about mux with compiler team # set up input memories for i in range(len(is_input)): in_mem = is_input[i] # input addressor / accessor parameters input_dim = self.memories[in_mem]["input_edge_params"]["dim"] input_range = self.memories[in_mem]["input_edge_params"]["max_range"] input_stride = self.memories[in_mem]["input_edge_params"]["max_stride"] # input port associated with memory input_port_index = self.memories[in_mem]["input_port"] self.valid = self.var( f"input_port{input_port_index}_2{in_mem}_accessor_valid", 1) self.wire(self.mem_insts[in_mem].ports.write, self.valid) # hook up data from the specified input port to the memory safe_wire(self, self.mem_insts[in_mem].ports.data_in[0], self.data_in[input_port_index]) if self.memories[in_mem]["num_read_write_ports"] > 0: self.mem_read_write_addrs[in_mem] = {"write": self.valid} # create IteratorDomain, AddressGenerator, and ScheduleGenerator # for writes to this input memory forloop = ForLoop(iterator_support=input_dim, config_width=max(1, clog2(input_range))) # self.default_config_width) loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(f"input_port{input_port_index}_2{in_mem}_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) newAG = AddrGen(iterator_support=input_dim, config_width=max(1, clog2(input_stride))) # self.default_config_width) self.add_child(f"input_port{input_port_index}_2{in_mem}_write_addr_gen", newAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) if self.memories[in_mem]["num_read_write_ports"] == 0: safe_wire(self, self.mem_insts[in_mem].ports.write_addr[0], newAG.ports.addr_out) else: self.mem_read_write_addrs[in_mem]["write_addr"] = newAG.ports.addr_out newSG = SchedGen(iterator_support=input_dim, config_width=self.cycle_count_width) self.add_child(f"input_port{input_port_index}_2{in_mem}_write_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # set up output memories for i in range(len(is_output)): out_mem = is_output[i] # output addressor / accessor parameters output_dim = self.memories[out_mem]["output_edge_params"]["dim"] output_range = self.memories[out_mem]["output_edge_params"]["max_range"] output_stride = self.memories[out_mem]["output_edge_params"]["max_stride"] # output port associated with memory output_port_index = self.memories[out_mem]["output_port"] # hook up data from the memory to the specified output port self.wire(self.data_out[output_port_index], self.mem_insts[out_mem].ports.data_out[0][0]) # self.mem_data_outs[subscript_mems.index(out_mem)][0]) self.valid = self.var(f"{out_mem}2output_port{output_port_index}_accessor_valid", 1) if self.memories[out_mem]["rw_same_cycle"]: self.wire(self.mem_insts[out_mem].ports.read, self.valid) # create IteratorDomain, AddressGenerator, and ScheduleGenerator # for reads from this output memory forloop = ForLoop(iterator_support=output_dim, config_width=max(1, clog2(output_range))) # self.default_config_width) loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(f"{out_mem}2output_port{output_port_index}_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) newAG = AddrGen(iterator_support=output_dim, config_width=max(1, clog2(output_stride))) # self.default_config_width) self.add_child(f"{out_mem}2output_port{output_port_index}_read_addr_gen", newAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) if self.memories[out_mem]["num_read_write_ports"] == 0: safe_wire(self, self.mem_insts[out_mem].ports.read_addr[0], newAG.ports.addr_out) else: self.mem_read_write_addrs[in_mem]["read_addr"] = newAG.ports.addr_out newSG = SchedGen(iterator_support=output_dim, config_width=self.cycle_count_width) # self.default_config_width) self.add_child(f"{out_mem}2output_port{output_port_index}_read_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # create shared IteratorDomains and accessors as well as # read/write addressors for memories connected by each edge for edge in self.edges: # see how many signals need to be selected between for # from and to signals for edge num_mux_from = len(edge["from_signal"]) num_mux_to = len(edge["to_signal"]) # get unique edge_name identifier for hardware modules edge_name = get_edge_name(edge) # create forloop and accessor valid output signal self.valid = self.var(edge_name + "_accessor_valid", 1) forloop = ForLoop(iterator_support=edge["dim"]) self.forloop = forloop loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(edge_name + "_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) # create input addressor readAG = AddrGen(iterator_support=edge["dim"], config_width=self.default_config_width) self.add_child(f"{edge_name}_read_addr_gen", readAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) # assign read address to all from memories if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0: # can assign same read addrs to all the memories for i in range(len(edge["from_signal"])): safe_wire(self, self.mem_insts[edge["from_signal"][i]].ports.read_addr[0], readAG.ports.addr_out) else: for i in range(len(edge["from_signal"])): self.mem_read_write_addrs[edge["from_signal"][i]]["read_addr"] = readAG.ports.addr_out # if needing to mux, choose which from memory we get data # from for to memory data in if num_mux_from > 1: num_mux_bits = clog2(num_mux_from) self.mux_sel = self.var(f"{edge_name}_mux_sel", width=num_mux_bits) read_addr_width = max(1, clog2(self.memories[edge["from_signal"][0]]["capacity"])) # decide which memory to get data from for to memory's data in safe_wire(self, self.mux_sel, readAG.ports.addr_out[read_addr_width + num_mux_from - 1, read_addr_width]) comb_mux_from = self.combinational() # for i in range(num_mux_from): # TODO want to use a switch statement here, but get add_fn_ln issue if_mux_sel = IfStmt(self.mux_sel == 0) for j in range(len(edge["to_signal"])): # print("TO ", edge["to_signal"][j]) # print("FROM ", edge["from_signal"][i]) if_mux_sel.then_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][0]].ports.data_out)) if_mux_sel.else_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][1]].ports.data_out)) comb_mux_from.add_stmt(if_mux_sel) # no muxing from, data_out from the one and only memory # goes to all to memories (valid determines whether it is # actually written) else: for j in range(len(edge["to_signal"])): # print("TO ", edge["to_signal"][j]) # print("FROM ", edge["from_signal"][0]) safe_wire(self, self.mem_insts[edge["to_signal"][j]].ports.data_in, # only one memory to read from self.mem_insts[edge["from_signal"][0]].ports.data_out) # create output addressor writeAG = AddrGen(iterator_support=edge["dim"], config_width=self.default_config_width) # step, mux_sel, restart may need delayed signals (assigned later) self.add_child(f"{edge_name}_write_addr_gen", writeAG, clk=self.gclk, rst_n=self.rst_n) # set write addr for to memories if self.memories[edge["to_signal"][0]]["num_read_write_ports"] == 0: for i in range(len(edge["to_signal"])): safe_wire(self, self.mem_insts[edge["to_signal"][i]].ports.write_addr[0], writeAG.ports.addr_out) else: for i in range(len(edge["to_signal"])): self.mem_read_write_addrs[edge["to_signal"][i]] = {"write": self.valid, "write_addr": writeAG.ports.addr_out} # calculate necessary delay between from_signal to to_signal # TODO this may need to be more sophisticated and based on II as well # TODO just need to add for loops for all the ports if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0: self.delay = self.memories[edge["from_signal"][0]]["read_info"][0]["latency"] else: self.delay = self.memories[edge["from_signal"][0]]["read_write_info"][0]["latency"] if self.delay > 0: # signals that need to be delayed due to edge latency self.delayed_writes = self.var(f"{edge_name}_delayed_writes", width=self.delay) self.delayed_mux_sels = self.var(f"{edge_name}_delayed_mux_sels", width=self.forloop.ports.mux_sel_out.width, size=self.delay, explicit_array=True, packed=True) self.delayed_restarts = self.var(f"{edge_name}_delayed_restarts", width=self.delay) # delay in valid between read from memory and write to next memory @always_ff((posedge, self.gclk), (negedge, "rst_n")) def get_delayed_write(self): if ~self.rst_n: self.delayed_writes = 0 self.delayed_mux_sels = 0 self.delayed_restarts = 0 else: for i in range(self.delay - 1): self.delayed_writes[i + 1] = self.delayed_writes[i] self.delayed_mux_sels[i + 1] = self.delayed_mux_sels[i] self.delayed_restarts[i + 1] = self.delayed_restarts[i] self.delayed_writes[0] = self.valid self.delayed_mux_sels[0] = self.forloop.ports.mux_sel_out self.delayed_restarts[0] = self.forloop.ports.restart self.add_always(get_delayed_write) # if we have a mux for the destination memories, # choose which mux to write to if num_mux_to > 1: num_mux_bits = clog2(num_mux_to) self.mux_sel_to = self.var(f"{edge_name}_mux_sel_to", width=num_mux_bits) write_addr_width = max(1, clog2(self.memories[edge["to_signal"][0]]["capacity"])) # decide which destination memory gets written to safe_wire(self, self.mux_sel_to, writeAG.ports.addr_out[write_addr_width + num_mux_to - 1, write_addr_width]) # wire the write (or if needed, delayed write) signal to the selected destination memory # and set write enable low for all other destination memories comb_mux_to = self.combinational() for i in range(num_mux_to): if_mux_sel_to = IfStmt(self.mux_sel_to == i) if self.delay == 0: if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.valid)) else: if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.delayed_writes[self.delay - 1])) if_mux_sel_to.else_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.low)) comb_mux_to.add_stmt(if_mux_sel_to) # no muxing to, just write to the one destination memory else: if self.delay == 0: self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.valid) else: self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.delayed_writes[self.delay - 1]) # assign delayed signals for write addressor if needed if self.delay == 0: self.wire(writeAG.ports.step, self.valid) self.wire(writeAG.ports.mux_sel, self.forloop.ports.mux_sel_out) self.wire(writeAG.ports.restart, self.forloop.ports.restart) else: self.wire(writeAG.ports.step, self.delayed_writes[self.delay - 1]) self.wire(writeAG.ports.mux_sel, self.delayed_mux_sels[self.delay - 1]) self.wire(writeAG.ports.restart, self.delayed_restarts[self.delay - 1]) # create accessor for edge newSG = SchedGen(iterator_support=edge["dim"], config_width=self.cycle_count_width) # self.default_config_width) self.add_child(edge_name + "_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # for read write memories, choose either read or write address based on whether # we are writing to the memory (whether write enable is high) read_write_addr_comb = self.combinational() for mem_name in self.memories: if mem_name in self.mem_read_write_addrs: mem_info = self.mem_read_write_addrs[mem_name] if_write = IfStmt(mem_info["write"] == 1) addr_width = self.mem_insts[mem_name].ports.read_write_addr[0].width if_write.then_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["write_addr"][addr_width - 1, 0])) if_write.else_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["read_addr"][addr_width - 1, 0])) read_write_addr_comb.add_stmt(if_write) # clock enable and flush passes kts.passes.auto_insert_clock_enable(self.internal_generator) clk_en_port = self.internal_generator.get_port("clk_en") clk_en_port.add_attribute(FormalAttr(clk_en_port.name, FormalSignalConstraint.SET1)) self.add_attribute("sync-reset=flush") kts.passes.auto_insert_sync_reset(self.internal_generator) flush_port = self.internal_generator.get_port("flush") # bring config registers up to top level lift_config_reg(self.internal_generator)
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_height=2): super().__init__("strg_ub_sram_tb_shared") ################################################################################## # Capture constructor parameter... ################################################################################## self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.tb_height = tb_height self.mem_width = mem_width self.mem_depth = mem_depth self.config_width = config_width self.data_width = data_width self.input_addr_iterator_support = input_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 self.sram_iterator_support = 6 self.agg_rd_addr_gen_width = 8 ################################################################################## # IO ################################################################################## self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._cycle_count = self.input("cycle_count", 16) self._loops_sram2tb_mux_sel = self.output( "loops_sram2tb_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._loops_sram2tb_restart = self.output( "loops_sram2tb_restart", width=1, size=self.interconnect_output_ports, explicit_array=True, packed=True) self._t_read_out = self.output("t_read_out", self.interconnect_output_ports) self._t_read = self.var("t_read", self.interconnect_output_ports) self.wire(self._t_read_out, self._t_read) ################################################################################## # TB PATHS ################################################################################## for i in range(self.interconnect_output_ports): # for loop for sram reads, tb writes loops_sram2tb = ForLoop( iterator_support=self.default_iterator_support, config_width=self.default_config_width) self.add_child(f"loops_buf2out_autovec_read_{i}", loops_sram2tb, clk=self._clk, rst_n=self._rst_n, step=self._t_read[i]) safe_wire(gen=self, w_to=self._loops_sram2tb_mux_sel[i], w_from=loops_sram2tb.ports.mux_sel_out) self.wire(self._loops_sram2tb_restart[i], loops_sram2tb.ports.restart) # sram read schedule, delay by 1 clock cycle for tb write schedule (done in tb_only) self.add_child( f"output_sched_gen_{i}", SchedGen( iterator_support=self.default_iterator_support, # config_width=self.default_config_width), config_width=16), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=loops_sram2tb.ports.mux_sel_out, finished=loops_sram2tb.ports.restart, valid_output=self._t_read[i])
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_height=2): super().__init__("strg_ub_tb_only") ################################################################################## # Capture constructor parameter... ################################################################################## self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.tb_height = tb_height self.mem_width = mem_width self.mem_depth = mem_depth self.config_width = config_width self.data_width = data_width self.input_addr_iterator_support = input_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 self.sram_iterator_support = 6 self.agg_rd_addr_gen_width = 8 ################################################################################## # IO ################################################################################## self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._cycle_count = self.input("cycle_count", 16) # data from SRAM self._sram_read_data = self.input("sram_read_data", self.data_width, size=self.fetch_width, packed=True, explicit_array=True) # read enable from SRAM self._t_read = self.input("t_read", self.interconnect_output_ports) # sram to tb for loop self._loops_sram2tb_mux_sel = self.input( "loops_sram2tb_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._loops_sram2tb_restart = self.input( "loops_sram2tb_restart", width=1, size=self.interconnect_output_ports, explicit_array=True, packed=True) self._valid_out = self.output("accessor_output", self.interconnect_output_ports) self._data_out = self.output("data_out", self.data_width, size=self.interconnect_output_ports, packed=True, explicit_array=True) ################################################################################## # TB RELEVANT SIGNALS ################################################################################## self._tb = self.var("tb", width=self.data_width, size=(self.interconnect_output_ports, self.tb_height, self.fetch_width), packed=True, explicit_array=True) self._tb_write_addr = self.var("tb_write_addr", 2 + max(1, clog2(self.tb_height)), size=self.interconnect_output_ports, packed=True, explicit_array=True) self._tb_read_addr = self.var("tb_read_addr", 2 + max(1, clog2(self.tb_height)), size=self.interconnect_output_ports, packed=True, explicit_array=True) # write enable to tb, delayed 1 cycle from SRAM reads self._t_read_d1 = self.var("t_read_d1", self.interconnect_output_ports) # read enable for reads from tb self._tb_read = self.var("tb_read", self.interconnect_output_ports) # Break out valids... self.wire(self._valid_out, self._tb_read) # delayed input mux_sel and restart signals from sram read/tb write # for loop and scheduling self._mux_sel_d1 = self.var("mux_sel_d1", kts.clog2(self.default_iterator_support), size=self.interconnect_output_ports, packed=True, explicit_array=True) self._restart_d1 = self.var("restart_d1", width=1, size=self.interconnect_output_ports, explicit_array=True, packed=True) for i in range(self.interconnect_output_ports): # signals delayed by 1 cycle from SRAM @always_ff((posedge, "clk"), (negedge, "rst_n")) def delay_read(): if ~self._rst_n: self._t_read_d1[i] = 0 self._mux_sel_d1[i] = 0 self._restart_d1[i] = 0 else: self._t_read_d1[i] = self._t_read[i] self._mux_sel_d1[i] = self._loops_sram2tb_mux_sel[i] self._restart_d1[i] = self._loops_sram2tb_restart[i] self.add_code(delay_read) ################################################################################## # TB PATHS ################################################################################## for i in range(self.interconnect_output_ports): self.tb_iter_support = 6 self.tb_addr_width = 4 self.tb_range_width = 16 _AG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.tb_addr_width) self.add_child(f"tb_write_addr_gen_{i}", _AG, clk=self._clk, rst_n=self._rst_n, step=self._t_read_d1[i], mux_sel=self._mux_sel_d1[i], restart=self._restart_d1[i]) safe_wire(gen=self, w_to=self._tb_write_addr[i], w_from=_AG.ports.addr_out) @always_ff((posedge, "clk")) def tb_ctrl(): if self._t_read_d1[i]: self._tb[i][self._tb_write_addr[i][0]] = \ self._sram_read_data self.add_code(tb_ctrl) # READ FROM TB fl_ctr_tb_rd = ForLoop(iterator_support=self.tb_iter_support, config_width=self.tb_range_width) self.add_child(f"loops_buf2out_read_{i}", fl_ctr_tb_rd, clk=self._clk, rst_n=self._rst_n, step=self._tb_read[i]) _AG = AddrGen(iterator_support=self.tb_iter_support, config_width=self.tb_addr_width) self.add_child( f"tb_read_addr_gen_{i}", _AG, clk=self._clk, rst_n=self._rst_n, step=self._tb_read[i], # addr_out=self._tb_read_addr[i]) mux_sel=fl_ctr_tb_rd.ports.mux_sel_out, restart=fl_ctr_tb_rd.ports.restart) safe_wire(gen=self, w_to=self._tb_read_addr[i], w_from=_AG.ports.addr_out) self.add_child( f"tb_read_sched_gen_{i}", SchedGen( iterator_support=self.tb_iter_support, # config_width=self.tb_addr_width), config_width=16), clk=self._clk, rst_n=self._rst_n, cycle_count=self._cycle_count, mux_sel=fl_ctr_tb_rd.ports.mux_sel_out, finished=fl_ctr_tb_rd.ports.restart, valid_output=self._tb_read[i]) @always_comb def tb_to_out(): self._data_out[i] = self._tb[i][self._tb_read_addr[i][ clog2(self.tb_height) + clog2(self.fetch_width) - 1, clog2(self.fetch_width)]][self._tb_read_addr[i][ clog2(self.fetch_width) - 1, 0]] self.add_code(tb_to_out)
def __init__(self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_height=2): super().__init__("strg_ub_agg_only") ################################################################################## # Capture constructor parameter... ################################################################################## self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.tb_height = tb_height self.mem_width = mem_width self.mem_depth = mem_depth self.config_width = config_width self.data_width = data_width self.input_addr_iterator_support = input_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 self.sram_iterator_support = 6 self.agg_rd_addr_gen_width = 8 ################################################################################## # IO ################################################################################## self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._cycle_count = self.input("cycle_count", 16) self._data_in = self.input("data_in", self.data_width, size=self.interconnect_input_ports, packed=True, explicit_array=True) self._agg_read = self.input("agg_read", self.interconnect_input_ports) self._floop_mux_sel = self.input("floop_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_input_ports, explicit_array=True, packed=True) self._floop_restart = self.input("floop_restart", width=1, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._agg_data_out = self.output(f"agg_data_out", self.data_width, size=(self.interconnect_input_ports, self.fetch_width), packed=True, explicit_array=True) self._agg_data_out.add_attribute(FormalAttr(self._agg_data_out.name, FormalSignalConstraint.SEQUENCE, "sram")) ################################################################################## # AGG RELEVANT SIGNALS ################################################################################## # Create an input to agg write scheduler + addressor for each input # Also need an addressor for the mux in addition to the read addr self._agg = self.var(f"agg", width=self.data_width, size=(self.interconnect_input_ports, self.agg_height, self.fetch_width), packed=True, explicit_array=True) self._agg_write = self.var("agg_write", self.interconnect_input_ports) # Make this based on the size self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height), size=self.interconnect_input_ports, packed=True, explicit_array=True) self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)), size=self.interconnect_input_ports, packed=True, explicit_array=True) self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width, size=self.interconnect_input_ports, packed=True, explicit_array=True) ################################################################################## # AGG PATHS ################################################################################## for i in range(self.interconnect_input_ports): self.agg_iter_support = 6 self.agg_addr_width = 4 self.agg_range_width = 16 forloop_ctr = ForLoop(iterator_support=self.agg_iter_support, # config_width=self.default_config_width) config_width=self.agg_range_width) loop_itr = forloop_ctr.get_iter() loop_wth = forloop_ctr.get_cfg_width() self.add_child(f"loops_in2buf_{i}", forloop_ctr, clk=self._clk, rst_n=self._rst_n, step=self._agg_write[i]) newAG = AddrGen(iterator_support=self.agg_iter_support, config_width=self.agg_addr_width) self.add_child(f"agg_write_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._agg_write[i], mux_sel=forloop_ctr.ports.mux_sel_out, restart=forloop_ctr.ports.restart) safe_wire(gen=self, w_to=self._agg_write_addr[i], w_from=newAG.ports.addr_out) newSG = SchedGen(iterator_support=self.agg_iter_support, # config_width=self.agg_addr_width) config_width=16) self.add_child(f"agg_write_sched_gen_{i}", newSG, clk=self._clk, rst_n=self._rst_n, mux_sel=forloop_ctr.ports.mux_sel_out, finished=forloop_ctr.ports.restart, cycle_count=self._cycle_count, valid_output=self._agg_write[i]) @always_ff((posedge, "clk")) def agg_ctrl(): if self._agg_write[i]: if self.agg_height == 1: self._agg[i][0][self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i] else: self._agg[i][self._agg_write_addr[i] [self._agg_write_addr[0].width - 1, clog2(self.fetch_width)]]\ [self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i] self.add_code(agg_ctrl) newAG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.agg_addr_width) self.add_child(f"agg_read_addr_gen_{i}", newAG, clk=self._clk, rst_n=self._rst_n, step=self._agg_read[i], # (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))), # mux_sel=self._floop_mux_sel[i], restart=self._floop_restart[i]) safe_wire(gen=self, w_to=newAG.ports.mux_sel, w_from=self._floop_mux_sel[i]) safe_wire(gen=self, w_to=self._agg_read_addr_gen_out[i], w_from=newAG.ports.addr_out) self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0]) # Now pick out the data from the agg... @always_comb def get_agg_data(): self._agg_data_out[i] = self._agg[i][self._agg_read_addr[i]] self.add_code(get_agg_data)
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_height=2): super().__init__("strg_ub_sram_only") ################################################################################## # Capture constructor parameter... ################################################################################## self.fetch_width = mem_width // data_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.agg_height = agg_height self.mem_width = mem_width self.tb_height = tb_height self.mem_depth = mem_depth self.config_width = config_width self.data_width = data_width self.input_addr_iterator_support = input_addr_iterator_support self.input_sched_iterator_support = input_sched_iterator_support self.default_iterator_support = 6 self.default_config_width = 16 self.sram_iterator_support = 6 self.agg_rd_addr_gen_width = 8 ################################################################################## # IO ################################################################################## self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._cycle_count = self.input("cycle_count", 16) # agg to sram for loop self._floop_mux_sel = self.input( "floop_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_input_ports, explicit_array=True, packed=True) self._floop_restart = self.input("floop_restart", width=1, size=self.interconnect_input_ports, explicit_array=True, packed=True) # sram to tb for loop self._loops_sram2tb_mux_sel = self.input( "loops_sram2tb_mux_sel", width=max(clog2(self.default_iterator_support), 1), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._loops_sram2tb_restart = self.input( "loops_sram2tb_restart", width=1, size=self.interconnect_output_ports, explicit_array=True, packed=True) self._agg_read = self.input("agg_read", self.interconnect_input_ports) self._t_read = self.input("t_read", self.interconnect_output_ports) # data from aggs, get decoded for sram_write_data which is wired to data_to_sram self._agg_data_out = self.input(f"agg_data_out", self.data_width, size=(self.interconnect_input_ports, self.fetch_width), packed=True, explicit_array=True) self._agg_data_out.add_attribute( FormalAttr(self._agg_data_out.name, FormalSignalConstraint.SEQUENCE, "agg")) # sram attribute for data_in, comes from cut gen of agg_only for agg_data_out_top self._wen_to_sram = self.output("wen_to_sram", 1, packed=True) self._cen_to_sram = self.output("cen_to_sram", 1, packed=True) self._addr_to_sram = self.output("addr_to_sram", clog2(self.mem_depth), packed=True) self._data_to_sram = self.output("data_to_sram", self.data_width, size=self.fetch_width, packed=True) ################################################################################## # INTERNAL SIGNALS ################################################################################## self._s_write_addr = self.var("s_write_addr", self.config_width, size=self.interconnect_input_ports, packed=True, explicit_array=True) self._s_read_addr = self.var("s_read_addr", self.config_width, size=self.interconnect_output_ports, packed=True, explicit_array=True) self._write = self.var("write", 1) self._read = self.var("read", 1) self._addr = self.var("addr", clog2(self.mem_depth)) self._sram_write_data = self.var("sram_write_data", data_width, size=self.fetch_width, packed=True) self.mem_addr_width = clog2(self.mem_depth) for i in range(self.interconnect_input_ports): _AG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.mem_addr_width) self.add_child( f"input_addr_gen_{i}", _AG, clk=self._clk, rst_n=self._rst_n, step=self._agg_read[i], # mux_sel=self._floop_mux_sel[i], restart=self._floop_restart[i]) safe_wire(gen=self, w_to=_AG.ports.mux_sel, w_from=self._floop_mux_sel[i]) safe_wire(gen=self, w_to=self._s_write_addr[i], w_from=_AG.ports.addr_out) ################################################################################## # TB PATHS ################################################################################## for i in range(self.interconnect_output_ports): _AG = AddrGen(iterator_support=self.default_iterator_support, config_width=self.mem_addr_width) self.add_child( f"output_addr_gen_{i}", _AG, clk=self._clk, rst_n=self._rst_n, step=self._t_read[i], # mux_sel=self._loops_sram2tb_mux_sel[i], restart=self._loops_sram2tb_restart[i]) safe_wire(gen=self, w_to=_AG.ports.mux_sel, w_from=self._loops_sram2tb_mux_sel[i]) safe_wire(gen=self, w_to=self._s_read_addr[i], w_from=_AG.ports.addr_out) ################################################################################## # WIRE TO SRAM INTERFACE ################################################################################## # Now select the write address as a decode of the underlying enables self.wire(self._addr_to_sram, self._addr) self.wire(self._data_to_sram, self._sram_write_data) self.wire(self._wen_to_sram, self._write) self.wire(self._cen_to_sram, self._write | self._read) self.wire(self._write, self._agg_read.r_or()) self.wire(self._read, self._t_read.r_or()) self.wire(self._sram_write_data, decode(self, self._agg_read, self._agg_data_out)) self._write_addr = decode(self, self._agg_read, self._s_write_addr) self._read_addr = decode(self, self._t_read, self._s_read_addr) self.add_code(self.set_sram_addr)