def mem_signal_logic(self): if self.if_sram_cfg_s.wr_en: if self.if_sram_cfg_s.wr_addr[self._params.bank_byte_offset - 1] == 0: self.mem_wr_en = 1 self.mem_rd_en_w = 0 self.mem_addr = self.if_sram_cfg_s.wr_addr self.mem_data_in = concat( const( 0, self._params.bank_data_width - self._params.axi_data_width), self.if_sram_cfg_s.wr_data) self.mem_data_in_bit_sel = concat( const( 0, self._params.bank_data_width - self._params.axi_data_width), const(2**self._params.axi_data_width - 1, self._params.axi_data_width)) else: self.mem_wr_en = 1 self.mem_rd_en_w = 0 self.mem_addr = self.if_sram_cfg_s.wr_addr self.mem_data_in = concat( self.if_sram_cfg_s.wr_data[self._params.bank_data_width - self._params.axi_data_width - 1, 0], const(0, self._params.axi_data_width)) self.mem_data_in_bit_sel = concat( const( 2**(self._params.bank_data_width - self._params.axi_data_width) - 1, self._params.bank_data_width - self._params.axi_data_width), const(0, self._params.axi_data_width)) elif self.if_sram_cfg_s.rd_en: self.mem_wr_en = 0 self.mem_rd_en_w = 1 self.mem_addr = self.if_sram_cfg_s.rd_addr self.mem_data_in = 0 self.mem_data_in_bit_sel = 0 elif self.packet_wr_en: self.mem_wr_en = 1 self.mem_rd_en_w = 0 self.mem_addr = self.packet_wr_addr self.mem_data_in = self.packet_wr_data self.mem_data_in_bit_sel = self.packet_wr_data_bit_sel elif self.packet_rd_en: self.mem_wr_en = 0 self.mem_rd_en_w = 1 self.mem_addr = self.packet_rd_addr self.mem_data_in = 0 self.mem_data_in_bit_sel = 0 else: self.mem_wr_en = 0 self.mem_rd_en_w = 0 self.mem_addr = 0 self.mem_data_in = 0 self.mem_data_in_bit_sel = 0
def zext(gen, wire, size): if wire.width >= size: return wire else: zext_signal = gen.var(f"{wire.name}_zext", size) gen.wire(zext_signal, kts.concat(kts.const(0, size - wire.width), wire)) return zext_signal
def add_pipeline(self): sram_signals_reset_high_in = concat(self.WEB, self.CEB, self.web_demux, self.ceb_demux, self.BWEB) sram_signals_reset_high_out = concat(self.WEB_d, self.CEB_d, self.web_demux_d, self.ceb_demux_d, self.BWEB_d) self.sram_signals_reset_high_pipeline = Pipeline( width=sram_signals_reset_high_in.width, depth=self._params.sram_gen_pipeline_depth, reset_high=True) self.add_child("sram_signals_reset_high_pipeline", self.sram_signals_reset_high_pipeline, clk=self.CLK, clk_en=const(1, 1), reset=self.RESET, in_=sram_signals_reset_high_in, out_=sram_signals_reset_high_out) sram_signals_in = concat(self.a_sram, self.sram_sel, self.D) sram_signals_out = concat(self.a_sram_d, self.sram_sel_d, self.D_d) self.sram_signals_pipeline = Pipeline( width=sram_signals_in.width, depth=self._params.sram_gen_pipeline_depth) self.add_child("sram_signals_pipeline", self.sram_signals_pipeline, clk=self.CLK, clk_en=const(1, 1), reset=self.RESET, in_=sram_signals_in, out_=sram_signals_out) self.sram_signals_output_pipeline = Pipeline( width=self.sram_macro_width, depth=self._params.sram_gen_output_pipeline_depth) self.add_child("sram_signals_output_pipeline", self.sram_signals_output_pipeline, clk=self.CLK, clk_en=const(1, 1), reset=self.RESET, in_=self.Q_w, out_=self.Q)
def mem_ff(self): if self.CEB == 0: self.Q_w = concat( self.mem[resize((self.A << 2) + 3, self.addr_width + 2)], self.mem[resize((self.A << 2) + 2, self.addr_width + 2)], self.mem[resize((self.A << 2) + 1, self.addr_width + 2)], self.mem[resize( (self.A << 2), self.addr_width + 2)]) if self.WEB == 0: for i in range(self.data_width): if self.BWEB[i] == 0: self.mem[resize( (self.A << 2) + i // 16, self.addr_width + 2)][resize( i % 16, clog2( self._params.cgra_data_width))] = self.D[i]
def test_regression_concat(): from kratos import concat parent = Generator("parent") for i in range(2): child = Generator("child") in_ = child.input("in", 1) out_ = child.output("out", 1) child.wire(out_, in_) parent.add_child("child{0}".format(i), child) in_ = parent.input("in", 1) out_2 = parent.output("out2", 2) for i in range(2): parent.wire(in_, parent["child{0}".format(i)].ports["in"]) parent.wire(out_2, concat(parent["child0"].ports.out, parent["child1"].ports.out)) src = verilog(parent, optimize_passthrough=False)["parent"] assert "assign out2 = {child0_out, child1_out};" in src
def __init__(self, agg_height, data_width, mem_width, max_agg_schedule): super().__init__("aggregation_buffer") self.agg_height = agg_height self.data_width = data_width self.mem_width = mem_width self.max_agg_schedule = max_agg_schedule # This is the maximum length of the schedule self.fw_int = int(self.mem_width / self.data_width) # Clock and Reset self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") # Inputs # Bring in a single element into an AggregationBuffer w/ valid signaling self._data_in = self.input("data_in", self.data_width) self._valid_in = self.input("valid_in", 1) self._align = self.input("align", 1) # Outputs self._data_out = self.output("data_out", self.mem_width) self._valid_out = self.output("valid_out", 1) self._data_out_chop = [] for i in range(self.fw_int): self._data_out_chop.append( self.output(f"data_out_chop_{i}", self.data_width)) self.add_stmt(self._data_out_chop[i].assign( self._data_out[(self.data_width * (i + 1)) - 1, self.data_width * i])) # CONFIG: # We receive a periodic (doesn't need to be, but has a maximum schedule, # so...possibly the schedule is a for loop? # Tells us where to write successive elements... self._in_schedule = self.input("in_sched", max(1, clog2(self.agg_height)), size=self.max_agg_schedule, explicit_array=True, packed=True) doc = "Input schedule for aggregation buffer. Enumerate which" + \ f" of {self.agg_height} buffers to write to." self._in_schedule.add_attribute(ConfigRegAttr(doc)) self._in_period = self.input("in_period", clog2(self.max_agg_schedule)) doc = "Input period for aggregation buffer. 1 is a reasonable" + \ " setting for most applications" self._in_period.add_attribute(ConfigRegAttr(doc)) # ...and which order to output the blocks self._out_schedule = self.input("out_sched", max(1, clog2(agg_height)), size=self.max_agg_schedule, explicit_array=True, packed=True) doc = "Output schedule for aggregation buffer. Enumerate which" + \ f" of {self.agg_height} buffers to write to SRAM from." self._out_schedule.add_attribute(ConfigRegAttr(doc)) self._out_period = self.input("out_period", clog2(self.max_agg_schedule)) self._out_period.add_attribute( ConfigRegAttr("Output period for aggregation buffer")) self._in_sched_ptr = self.var("in_sched_ptr", clog2(self.max_agg_schedule)) self._out_sched_ptr = self.var("out_sched_ptr", clog2(self.max_agg_schedule)) # Local Signals self._aggs_out = self.var("aggs_out", self.mem_width, size=self.agg_height, packed=True, explicit_array=True) self._aggs_sep = [] for i in range(self.agg_height): self._aggs_sep.append( self.var(f"aggs_sep_{i}", self.data_width, size=self.fw_int, packed=True)) self._valid_demux = self.var("valid_demux", self.agg_height) self._align_demux = self.var("align_demux", self.agg_height) self._next_full = self.var("next_full", self.agg_height) self._valid_out_mux = self.var("valid_out_mux", self.agg_height) for i in range(self.agg_height): # Add in the children aggregators... self.add_child(f"agg_{i}", Aggregator(self.data_width, mem_word_width=self.fw_int), clk=self._clk, rst_n=self._rst_n, in_pixels=self._data_in, valid_in=self._valid_demux[i], agg_out=self._aggs_sep[i], valid_out=self._valid_out_mux[i], next_full=self._next_full[i], align=self._align_demux[i]) portlist = [] if self.fw_int == 1: self.wire(self._aggs_out[i], self._aggs_sep[i]) else: for j in range(self.fw_int): portlist.append(self._aggs_sep[i][self.fw_int - 1 - j]) self.wire(self._aggs_out[i], kts.concat(*portlist)) # Sequential code blocks self.add_code(self.update_in_sched_ptr) self.add_code(self.update_out_sched_ptr) # Combinational code blocks self.add_code(self.valid_demux_comb) self.add_code(self.align_demux_comb) self.add_code(self.valid_out_comb) self.add_code(self.output_data_comb)
def test_single_concat(): from kratos import concat mod = Generator("mod") a = mod.var("a", 1) r = concat([a]) assert id(r) == id(a)
def __init__(self, fetch_width=16, data_width=16, memory_depth=32, num_tiles=1, int_in_ports=2, int_out_ports=2, strg_wr_ports=2, strg_rd_ports=2, read_delay=0, rw_same_cycle=True, separate_addresses=True): assert not (memory_depth & (memory_depth - 1)), "Memory depth needs to be a power of 2" super().__init__("rw_arbiter") # Absorb inputs self.fetch_width = fetch_width self.data_width = data_width self.fw_int = int(self.fetch_width / self.data_width) self.int_in_ports = int_in_ports self.int_out_ports = int_out_ports self.strg_wr_ports = strg_wr_ports self.strg_rd_ports = strg_rd_ports self.memory_depth = memory_depth self.num_tiles = num_tiles self.mem_addr_width = clog2(self.num_tiles * self.memory_depth) self.read_delay = read_delay self.rw_same_cycle = rw_same_cycle self.separate_addresses = separate_addresses # Clock and Reset self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") # Generate the packed struct of the right size port_pkt_struct = create_port_pkt(self.fetch_width, self.int_out_ports) # Inputs self._wen_in = self.input("wen_in", self.strg_wr_ports) # self._wen_en = self.input("wen_en", self.strg_wr_ports) self._w_data = self.input("w_data", self.data_width, size=(self.strg_wr_ports, self.fw_int), explicit_array=True, packed=True) self._w_addr = self.input("w_addr", self.mem_addr_width, size=self.strg_wr_ports, explicit_array=True, packed=True) self._data_from_mem = self.input("data_from_mem", self.data_width, size=(self.strg_rd_ports, self.fw_int), explicit_array=True, packed=True) self._mem_valid_data = self.input("mem_valid_data", self.strg_rd_ports) self._out_mem_valid_data = self.output("out_mem_valid_data", self.strg_rd_ports) self._ren_in = self.input("ren_in", self.int_out_ports) self._ren_en = self.input("ren_en", self.int_out_ports) self._rd_addr = self.input("rd_addr", self.mem_addr_width, size=self.int_out_ports, explicit_array=True, packed=True) self._rd_addr_sel = self.var("rd_addr_sel", self.mem_addr_width, size=self.strg_rd_ports, explicit_array=True, packed=True) # Outputs self._out_data = self.output("out_data", self.data_width, size=(self.strg_rd_ports, self.fw_int), explicit_array=True, packed=True) self._out_port = self.output("out_port", self.int_out_ports, size=self.strg_rd_ports, explicit_array=True, packed=True) self._out_valid = self.output("out_valid", self.strg_rd_ports) self._cen_mem = self.output("cen_mem", self.strg_rd_ports) self._wen_mem = self.output("wen_mem", self.strg_wr_ports) self._data_to_mem = self.output("data_to_mem", self.data_width, size=(self.strg_wr_ports, self.fw_int), explicit_array=True, packed=True) # In this case, need separate addresses if self.separate_addresses: self._wr_addr_to_mem = self.output("wr_addr_to_mem", self.mem_addr_width, size=self.strg_wr_ports, explicit_array=True, packed=True) self._rd_addr_to_mem = self.output("rd_addr_to_mem", self.mem_addr_width, size=self.strg_rd_ports, explicit_array=True, packed=True) # If the addresses are combined, we better have in==out else: assert self.strg_rd_ports == self.strg_wr_ports, \ "Cannot have coalesced address with mismatch port count" assert not self.rw_same_cycle, \ "Cannot read and write with a shared address...set rw_same_cycle to false" self._addr_to_mem = self.output("addr_to_mem", self.mem_addr_width, size=self.strg_rd_ports, explicit_array=True, packed=True) self._out_ack = self.output("out_ack", self.int_out_ports) # Local # self._rd_data = self.var("rd_data", self.fetch_width) self._wen_int = self.var("wen_int", self.strg_wr_ports) self._ren_int = self.var("ren_int", self.int_out_ports) self.wire(self._ren_int, self._ren_in & self._ren_en) self.wire(self._wen_int, self._wen_in) # & self._wen_en) self._rd_valid = self.var("rd_valid", self.strg_rd_ports) self._rd_port = self.var("rd_port", self.int_out_ports, size=self.strg_rd_ports, explicit_array=True, packed=True) self._next_rd_port = self.var("next_rd_port", self.int_out_ports, size=self.strg_rd_ports, explicit_array=True, packed=True) # For demuxing the read ports self._done = self.var("done", self.strg_rd_ports) self.add_code(self.set_next_read_port_lowest) if(self.strg_rd_ports > 1): self._idx_cnt = self.var("idx_cnt", 5, size=self.strg_rd_ports - 1, explicit_array=True, packed=True) for i in range(self.strg_rd_ports - 1): self.add_code(self.set_next_read_port_alt, index=i + 1) # If we have more than one read port, we need to use slightly different logic # to set the other reads... self._next_rd_port_red = self.var("next_rd_port_red", self.int_out_ports) for i in range(self.int_out_ports): temp_port = self._next_rd_port[0][i] for j in range(self.strg_rd_ports - 1): temp_port = kts.concat(temp_port, self._next_rd_port[j + 1][i]) self.wire(self._next_rd_port_red[i], temp_port.r_or()) # The next read port can be used to acknowledge reads # We do not need to gate the ack if we can read and write in the same cycle if self.rw_same_cycle: self.wire(self._out_ack, self._next_rd_port_red) else: self.wire(self._out_ack, self._next_rd_port_red & kts.concat(*([~self._wen_int] * self._out_ack.width))) # self.add_code(self.mem_controls) if self.separate_addresses: for i in range(self.strg_wr_ports): self.add_code(self.mem_controls_wr, idx=i) for i in range(self.strg_rd_ports): self.add_code(self.mem_controls_rd, idx=i) else: for i in range(self.strg_rd_ports): self.add_code(self.mem_controls_combined, idx=i) if self.read_delay == 1: for i in range(self.strg_rd_ports): self.add_code(self.next_read_valid, idx=i) else: for i in range(self.strg_rd_ports): self.add_code(self.zero_delay_read, idx=i) self.add_code(self.output_stage)
def __init__(self, data_width, config_addr_width, addr_width, fetch_width, total_sets, sets_per_macro): super().__init__("storage_config_seq") self.data_width = data_width self.config_addr_width = config_addr_width self.addr_width = addr_width self.fetch_width = fetch_width self.fw_int = int(self.fetch_width / self.data_width) self.total_sets = total_sets self.sets_per_macro = sets_per_macro self.banks = int(self.total_sets / self.sets_per_macro) self.set_addr_width = clog2(total_sets) # self.storage_addr_width = self. # Clock and Reset self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") # Inputs # phases = [] TODO # Take in the valid and data and attach an address + direct to a port self._config_data_in = self.input("config_data_in", self.data_width) self._config_addr_in = self.input("config_addr_in", self.config_addr_width) self._config_wr = self.input("config_wr", 1) self._config_rd = self.input("config_rd", 1) self._config_en = self.input("config_en", self.total_sets) self._clk_en = self.input("clk_en", 1) self._rd_data_stg = self.input("rd_data_stg", self.data_width, size=(self.banks, self.fw_int), explicit_array=True, packed=True) self._wr_data = self.output("wr_data", self.data_width, size=self.fw_int, explicit_array=True, packed=True) self._rd_data_out = self.output("rd_data_out", self.data_width, size=self.total_sets, explicit_array=True, packed=True) self._addr_out = self.output("addr_out", self.addr_width) # One set per macro means we directly send the config address through if self.sets_per_macro == 1: width = self.addr_width - self.config_addr_width if width > 0: self.wire(self._addr_out, kts.concat(kts.const(0, width), self._config_addr_in)) else: self.wire(self._addr_out, self._config_addr_in[self.addr_width - 1, 0]) else: width = self.addr_width - self.config_addr_width - clog2(self.sets_per_macro) self._set_to_addr = self.var("set_to_addr", clog2(self.sets_per_macro)) self._reduce_en = self.var("reduce_en", self.sets_per_macro) for i in range(self.sets_per_macro): reduce_var = self._config_en[i] for j in range(self.banks - 1): reduce_var = kts.concat(reduce_var, self._config_en[i + (self.sets_per_macro * (j + 1))]) self.wire(self._reduce_en[i], reduce_var.r_or()) self.add_code(self.demux_set_addr) if width > 0: self.wire(self._addr_out, kts.concat(kts.const(0, width), self._set_to_addr, self._config_addr_in)) else: self.wire(self._addr_out, kts.concat(self._set_to_addr, self._config_addr_in)) self._wen_out = self.output("wen_out", self.banks) self._ren_out = self.output("ren_out", self.banks) # Handle data passing if self.fw_int == 1: # If word width is same as data width, just pass everything through self.wire(self._wr_data[0], self._config_data_in) # self.wire(self._rd_data_out, self._rd_data_stg[0]) num = 0 for i in range(self.banks): for j in range(self.sets_per_macro): self.wire(self._rd_data_out[num], self._rd_data_stg[i]) num = num + 1 else: self._data_wr_reg = self.var("data_wr_reg", self.data_width, size=self.fw_int - 1, packed=True, explicit_array=True) # self._data_rd_reg = self.var("data_rd_reg", # self.data_width, # size=self.fw_int - 1, # packed=True, # explicit_array=True) # Have word counter for repeated reads/writes self._cnt = self.var("cnt", clog2(self.fw_int)) self._rd_cnt = self.var("rd_cnt", clog2(self.fw_int)) self.add_code(self.update_cnt) self.add_code(self.update_rd_cnt) # Gate wen if not about to finish the word num = 0 for i in range(self.banks): for j in range(self.sets_per_macro): self.wire(self._rd_data_out[num], self._rd_data_stg[i][self._rd_cnt]) num = num + 1 # Deal with writing to the data buffer self.add_code(self.write_buffer) # Wire the reg + such to this guy for i in range(self.fw_int - 1): self.wire(self._wr_data[i], self._data_wr_reg[i]) self.wire(self._wr_data[self.fw_int - 1], self._config_data_in) # If we have one bank, we can just always rd/wr from that one if self.banks == 1: if self.fw_int == 1: self.wire(self._wen_out, self._config_wr) else: self.wire(self._wen_out, self._config_wr & (self._cnt == (self.fw_int - 1))) self.wire(self._ren_out, self._config_rd) # Otherwise we need to extract the bank from the set else: if self.fw_int == 1: for i in range(self.banks): width = self.sets_per_macro self.wire(self._wen_out[i], self._config_wr & self._config_en[(i + 1) * width - 1, i * width].r_or()) else: for i in range(self.banks): width = self.sets_per_macro self.wire(self._wen_out[i], self._config_wr & self._config_en[(i + 1) * width - 1, i * width].r_or() & (self._cnt == (self.fw_int - 1))) for i in range(self.banks): width = self.sets_per_macro self.wire(self._ren_out[i], self._config_rd & self._config_en[(i + 1) * width - 1, i * width].r_or())
def __init__(self, interconnect_input_ports, interconnect_output_ports, depth_width=16, sprt_stcl_valid=False, stcl_cnt_width=16, stcl_iter_support=4): super().__init__("app_ctrl", debug=True) self.int_in_ports = interconnect_input_ports self.int_out_ports = interconnect_output_ports self.depth_width = depth_width self.sprt_stcl_valid = sprt_stcl_valid self.stcl_cnt_width = stcl_cnt_width self.stcl_iter_support = stcl_iter_support # Clock and Reset self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") # IO self._wen_in = self.input("wen_in", self.int_in_ports) self._ren_in = self.input("ren_in", self.int_out_ports) self._ren_update = self.input("ren_update", self.int_out_ports) self._tb_valid = self.input("tb_valid", self.int_out_ports) self._valid_out_data = self.output("valid_out_data", self.int_out_ports) self._valid_out_stencil = self.output("valid_out_stencil", self.int_out_ports) # Send tb valid to valid out for now... if self.sprt_stcl_valid: # Add the config registers to watch self._ranges = self.input("ranges", self.stcl_cnt_width, size=self.stcl_iter_support, packed=True, explicit_array=True) self._ranges.add_attribute(ConfigRegAttr("Ranges of stencil valid generator")) self._threshold = self.input("threshold", self.stcl_cnt_width, size=self.stcl_iter_support, packed=True, explicit_array=True) self._threshold.add_attribute(ConfigRegAttr("Threshold of stencil valid generator")) self._dim_counter = self.var("dim_counter", self.stcl_cnt_width, size=self.stcl_iter_support, packed=True, explicit_array=True) self._update = self.var("update", self.stcl_iter_support) self.wire(self._update[0], const(1, 1)) for i in range(self.stcl_iter_support - 1): self.wire(self._update[i + 1], (self._dim_counter[i] == (self._ranges[i] - 1)) & self._update[i]) for i in range(self.stcl_iter_support): self.add_code(self.dim_counter_update, idx=i) # Now we need to just compute stencil valid threshold_comps = [self._dim_counter[_i] >= self._threshold[_i] for _i in range(self.stcl_iter_support)] self.wire(self._valid_out_stencil[0], kts.concat(*threshold_comps).r_and()) for i in range(self.int_out_ports - 1): # self.wire(self._valid_out_stencil[i + 1], 0) # for multiple ports self.wire(self._valid_out_stencil[i + 1], kts.concat(*threshold_comps).r_and()) else: self.wire(self._valid_out_stencil, self._tb_valid) # Now gate the valid with stencil valid self.wire(self._valid_out_data, self._tb_valid & self._valid_out_stencil) self._wr_delay_state_n = self.var("wr_delay_state_n", self.int_out_ports) self._wen_out = self.output("wen_out", self.int_in_ports) self._ren_out = self.output("ren_out", self.int_out_ports) self._write_depth_wo = self.input("write_depth_wo", self.depth_width, size=self.int_in_ports, explicit_array=True, packed=True) self._write_depth_wo.add_attribute(ConfigRegAttr("Depth of writes")) self._write_depth_ss = self.input("write_depth_ss", self.depth_width, size=self.int_in_ports, explicit_array=True, packed=True) self._write_depth_ss.add_attribute(ConfigRegAttr("Depth of writes")) self._write_depth = self.var("write_depth", self.depth_width, size=self.int_in_ports, explicit_array=True, packed=True) for i in range(self.int_in_ports): self.wire(self._write_depth[i], kts.ternary(self._wr_delay_state_n[i], self._write_depth_ss[i], self._write_depth_wo[i])) self._read_depth = self.input("read_depth", self.depth_width, size=self.int_out_ports, explicit_array=True, packed=True) self._read_depth.add_attribute(ConfigRegAttr("Depth of reads")) self._write_count = self.var("write_count", self.depth_width, size=self.int_in_ports, explicit_array=True, packed=True) self._read_count = self.var("read_count", self.depth_width, size=self.int_out_ports, explicit_array=True, packed=True) self._write_done = self.var("write_done", self.int_in_ports) self._write_done_ff = self.var("write_done_ff", self.int_in_ports) self._read_done = self.var("read_done", self.int_out_ports) self._read_done_ff = self.var("read_done_ff", self.int_out_ports) self.in_port_bits = max(1, kts.clog2(self.int_in_ports)) self._input_port = self.input("input_port", self.in_port_bits, size=self.int_out_ports, explicit_array=True, packed=True) self._input_port.add_attribute(ConfigRegAttr("Relative input port for an output port")) self.out_port_bits = max(1, kts.clog2(self.int_out_ports)) self._output_port = self.input("output_port", self.out_port_bits, size=self.int_in_ports, explicit_array=True, packed=True) self._output_port.add_attribute(ConfigRegAttr("Relative output port for an input port")) self._prefill = self.input("prefill", self.int_out_ports) self._prefill.add_attribute(ConfigRegAttr("Is the input stream prewritten?")) for i in range(self.int_out_ports): self.add_code(self.set_read_done, idx=i) if self.int_in_ports == 1: self.add_code(self.set_read_done_ff_one_wr, idx=i) else: self.add_code(self.set_read_done_ff, idx=i) # self._write_done_comb = self.var("write_done_comb", self.int_in_ports) for i in range(self.int_in_ports): self.add_code(self.set_write_done, idx=i) self.add_code(self.set_write_done_ff, idx=i) for i in range(self.int_in_ports): self.add_code(self.set_write_cnt, idx=i) for i in range(self.int_out_ports): if self.int_in_ports == 1: self.add_code(self.set_read_cnt_one_wr, idx=i) else: self.add_code(self.set_read_cnt, idx=i) for i in range(self.int_out_ports): if self.int_in_ports == 1: self.add_code(self.set_wr_delay_state_one_wr, idx=i) else: self.add_code(self.set_wr_delay_state, idx=i) self._read_on = self.var("read_on", self.int_out_ports) for i in range(self.int_out_ports): self.wire(self._read_on[i], self._read_depth[i].r_or()) # If we have prefill enabled, we are skipping the initial delay step... self.wire(self._ren_out, (self._wr_delay_state_n | self._prefill) & ~self._read_done_ff & self._ren_in & self._read_on) self.wire(self._wen_out, ~self._write_done_ff & self._wen_in)
def __init__(self, _params: GlobalBufferParams): super().__init__("global_buffer") self._params = _params self.header = GlbHeader(self._params) self.clk = self.clock("clk") self.stall = self.input("stall", self._params.num_glb_tiles) self.reset = self.reset("reset") # TODO: Why cgra_stall has same width as num_glb_tiles self.cgra_stall_in = self.input("cgra_stall_in", self._params.num_glb_tiles) self.cgra_stall = self.output( "cgra_stall", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.proc_wr_en = self.input("proc_wr_en", 1) self.proc_wr_strb = self.input("proc_wr_strb", self._params.bank_data_width // 8) self.proc_wr_addr = self.input("proc_wr_addr", self._params.glb_addr_width) self.proc_wr_data = self.input("proc_wr_data", self._params.bank_data_width) self.proc_rd_en = self.input("proc_rd_en", 1) self.proc_rd_addr = self.input("proc_rd_addr", self._params.glb_addr_width) self.proc_rd_data = self.output("proc_rd_data", self._params.bank_data_width) self.proc_rd_data_valid = self.output("proc_rd_data_valid", 1) self.if_cfg_wr_en = self.input("if_cfg_wr_en", 1) self.if_cfg_wr_addr = self.input("if_cfg_wr_addr", self._params.axi_addr_width) self.if_cfg_wr_data = self.input("if_cfg_wr_data", self._params.axi_data_width) self.if_cfg_rd_en = self.input("if_cfg_rd_en", 1) self.if_cfg_rd_addr = self.input("if_cfg_rd_addr", self._params.axi_addr_width) self.if_cfg_rd_data = self.output("if_cfg_rd_data", self._params.axi_data_width) self.if_cfg_rd_data_valid = self.output("if_cfg_rd_data_valid", 1) self.if_sram_cfg_wr_en = self.input("if_sram_cfg_wr_en", 1) self.if_sram_cfg_wr_addr = self.input("if_sram_cfg_wr_addr", self._params.glb_addr_width) self.if_sram_cfg_wr_data = self.input("if_sram_cfg_wr_data", self._params.axi_data_width) self.if_sram_cfg_rd_en = self.input("if_sram_cfg_rd_en", 1) self.if_sram_cfg_rd_addr = self.input("if_sram_cfg_rd_addr", self._params.glb_addr_width) self.if_sram_cfg_rd_data = self.output("if_sram_cfg_rd_data", self._params.axi_data_width) self.if_sram_cfg_rd_data_valid = self.output( "if_sram_cfg_rd_data_valid", 1) self.cgra_cfg_jtag_gc2glb_wr_en = self.input( "cgra_cfg_jtag_gc2glb_wr_en", 1) self.cgra_cfg_jtag_gc2glb_rd_en = self.input( "cgra_cfg_jtag_gc2glb_rd_en", 1) self.cgra_cfg_jtag_gc2glb_addr = self.input( "cgra_cfg_jtag_gc2glb_addr", self._params.cgra_cfg_addr_width) self.cgra_cfg_jtag_gc2glb_data = self.input( "cgra_cfg_jtag_gc2glb_data", self._params.cgra_cfg_data_width) self.stream_data_f2g = self.input( "stream_data_f2g", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_valid_f2g = self.input( "stream_data_valid_f2g", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_g2f = self.output( "stream_data_g2f", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_valid_g2f = self.output( "stream_data_valid_g2f", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_wr_en = self.output( "cgra_cfg_g2f_cfg_wr_en", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_rd_en = self.output( "cgra_cfg_g2f_cfg_rd_en", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_addr = self.output( "cgra_cfg_g2f_cfg_addr", self._params.cgra_cfg_addr_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_data = self.output( "cgra_cfg_g2f_cfg_data", self._params.cgra_cfg_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.strm_g2f_start_pulse = self.input("strm_g2f_start_pulse", self._params.num_glb_tiles) self.strm_f2g_start_pulse = self.input("strm_f2g_start_pulse", self._params.num_glb_tiles) self.pcfg_start_pulse = self.input("pcfg_start_pulse", self._params.num_glb_tiles) self.strm_f2g_interrupt_pulse = self.output("strm_f2g_interrupt_pulse", self._params.num_glb_tiles) self.strm_g2f_interrupt_pulse = self.output("strm_g2f_interrupt_pulse", self._params.num_glb_tiles) self.pcfg_g2f_interrupt_pulse = self.output("pcfg_g2f_interrupt_pulse", self._params.num_glb_tiles) # local variables self.cgra_cfg_jtag_gc2glb_wr_en_d = self.var( "cgra_cfg_jtag_gc2glb_wr_en_d", 1) self.cgra_cfg_jtag_gc2glb_rd_en_d = self.var( "cgra_cfg_jtag_gc2glb_rd_en_d", 1) self.cgra_cfg_jtag_gc2glb_addr_d = self.var( "cgra_cfg_jtag_gc2glb_addr_d", self._params.cgra_cfg_addr_width) self.cgra_cfg_jtag_gc2glb_data_d = self.var( "cgra_cfg_jtag_gc2glb_data_d", self._params.cgra_cfg_data_width) self.proc_packet_d = self.var("proc_packet_d", self.header.packet_t) self.proc_packet_e2w_esti = self.var("proc_packet_e2w_esti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.proc_packet_w2e_wsti = self.var("proc_packet_w2e_wsti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.proc_packet_e2w_wsto = self.var("proc_packet_e2w_wsto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.proc_packet_w2e_esto = self.var("proc_packet_w2e_esto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.strm_packet_e2w_esti = self.var("strm_packet_e2w_esti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.strm_packet_w2e_wsti = self.var("strm_packet_w2e_wsti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.strm_packet_e2w_wsto = self.var("strm_packet_e2w_wsto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.strm_packet_w2e_esto = self.var("strm_packet_w2e_esto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) self.pcfg_packet_e2w_esti = self.var("pcfg_packet_e2w_esti", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) self.pcfg_packet_w2e_wsti = self.var("pcfg_packet_w2e_wsti", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) self.pcfg_packet_e2w_wsto = self.var("pcfg_packet_e2w_wsto", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) self.pcfg_packet_w2e_esto = self.var("pcfg_packet_w2e_esto", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) self.cfg_tile_connected = self.var("cfg_tile_connected", self._params.num_glb_tiles + 1) self.cfg_pcfg_tile_connected = self.var("cfg_pcfg_tile_connected", self._params.num_glb_tiles + 1) self.wire(self.cfg_tile_connected[0], 0) self.wire(self.cfg_pcfg_tile_connected[0], 0) self.cgra_cfg_jtag_wsti_wr_en = self.var( "cgra_cfg_jtag_wsti_wr_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_wsti_rd_en = self.var( "cgra_cfg_jtag_wsti_rd_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_wsti_addr = self.var( "cgra_cfg_jtag_wsti_addr", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_wsti_data = self.var( "cgra_cfg_jtag_wsti_data", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_wr_en = self.var( "cgra_cfg_jtag_esto_wr_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_rd_en = self.var( "cgra_cfg_jtag_esto_rd_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_addr = self.var( "cgra_cfg_jtag_esto_addr", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_data = self.var( "cgra_cfg_jtag_esto_data", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_wsti_rd_en_bypass = self.var( "cgra_cfg_jtag_wsti_rd_en_bypass", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_wsti_addr_bypass = self.var( "cgra_cfg_jtag_wsti_addr_bypass", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_rd_en_bypass = self.var( "cgra_cfg_jtag_esto_rd_en_bypass", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_jtag_esto_addr_bypass = self.var( "cgra_cfg_jtag_esto_addr_bypass", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_wsti_wr_en = self.var( "cgra_cfg_pcfg_wsti_wr_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_wsti_rd_en = self.var( "cgra_cfg_pcfg_wsti_rd_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_wsti_addr = self.var( "cgra_cfg_pcfg_wsti_addr", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_wsti_data = self.var( "cgra_cfg_pcfg_wsti_data", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_esto_wr_en = self.var( "cgra_cfg_pcfg_esto_wr_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_esto_rd_en = self.var( "cgra_cfg_pcfg_esto_rd_en", 1, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_esto_addr = self.var( "cgra_cfg_pcfg_esto_addr", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) self.cgra_cfg_pcfg_esto_data = self.var( "cgra_cfg_pcfg_esto_data", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) self.stall_w = self.var("stall_w", self._params.num_glb_tiles) self.stall_d = self.var("stall_d", self._params.num_glb_tiles) self.wire(self.stall_w, self.stall) self.cgra_stall_in_w = self.var("cgra_stall_in_w", self._params.num_glb_tiles) self.cgra_stall_in_d = self.var("cgra_stall_in_d", self._params.num_glb_tiles) self.wire(self.cgra_stall_in_w, self.cgra_stall_in) for i in range(self._params.num_glb_tiles): self.wire( self.cgra_stall[i], concat(*[self.cgra_stall_in_d[i]] * self._params.cgra_per_glb)) self.strm_g2f_start_pulse_w = self.var("strm_g2f_start_pulse_w", self._params.num_glb_tiles) self.strm_g2f_start_pulse_d = self.var("strm_g2f_start_pulse_d", self._params.num_glb_tiles) self.wire(self.strm_g2f_start_pulse, self.strm_g2f_start_pulse_w) self.strm_f2g_start_pulse_w = self.var("strm_f2g_start_pulse_w", self._params.num_glb_tiles) self.strm_f2g_start_pulse_d = self.var("strm_f2g_start_pulse_d", self._params.num_glb_tiles) self.wire(self.strm_f2g_start_pulse, self.strm_f2g_start_pulse_w) self.pcfg_start_pulse_w = self.var("pcfg_start_pulse_w", self._params.num_glb_tiles) self.pcfg_start_pulse_d = self.var("pcfg_start_pulse_d", self._params.num_glb_tiles) self.wire(self.pcfg_start_pulse, self.pcfg_start_pulse_w) self.strm_f2g_interrupt_pulse_w = self.var( "strm_f2g_interrupt_pulse_w", self._params.num_glb_tiles) self.strm_f2g_interrupt_pulse_d = self.var( "strm_f2g_interrupt_pulse_d", self._params.num_glb_tiles) self.wire(self.strm_f2g_interrupt_pulse_d, self.strm_f2g_interrupt_pulse) self.strm_g2f_interrupt_pulse_w = self.var( "strm_g2f_interrupt_pulse_w", self._params.num_glb_tiles) self.strm_g2f_interrupt_pulse_d = self.var( "strm_g2f_interrupt_pulse_d", self._params.num_glb_tiles) self.wire(self.strm_g2f_interrupt_pulse_d, self.strm_g2f_interrupt_pulse) self.pcfg_g2f_interrupt_pulse_w = self.var( "pcfg_g2f_interrupt_pulse_w", self._params.num_glb_tiles) self.pcfg_g2f_interrupt_pulse_d = self.var( "pcfg_g2f_interrupt_pulse_d", self._params.num_glb_tiles) self.wire(self.pcfg_g2f_interrupt_pulse_d, self.pcfg_g2f_interrupt_pulse) self.cgra_cfg_g2f_cfg_wr_en_w = self.var( "cgra_cfg_g2f_cfg_wr_en_w", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_wr_en_d = self.var( "cgra_cfg_g2f_cfg_wr_en_d", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.cgra_cfg_g2f_cfg_wr_en_d, self.cgra_cfg_g2f_cfg_wr_en) self.cgra_cfg_g2f_cfg_rd_en_w = self.var( "cgra_cfg_g2f_cfg_rd_en_w", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_rd_en_d = self.var( "cgra_cfg_g2f_cfg_rd_en_d", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.cgra_cfg_g2f_cfg_rd_en_d, self.cgra_cfg_g2f_cfg_rd_en) self.cgra_cfg_g2f_cfg_addr_w = self.var( "cgra_cfg_g2f_cfg_addr_w", self._params.cgra_cfg_addr_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_addr_d = self.var( "cgra_cfg_g2f_cfg_addr_d", self._params.cgra_cfg_addr_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.cgra_cfg_g2f_cfg_addr_d, self.cgra_cfg_g2f_cfg_addr) self.cgra_cfg_g2f_cfg_data_w = self.var( "cgra_cfg_g2f_cfg_data_w", self._params.cgra_cfg_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.cgra_cfg_g2f_cfg_data_d = self.var( "cgra_cfg_g2f_cfg_data_d", self._params.cgra_cfg_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.cgra_cfg_g2f_cfg_data_d, self.cgra_cfg_g2f_cfg_data) self.stream_data_f2g_w = self.var( "stream_data_f2g_w", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_f2g_d = self.var( "stream_data_f2g_d", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.stream_data_f2g, self.stream_data_f2g_w) self.stream_data_valid_f2g_w = self.var( "stream_data_valid_f2g_w", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_valid_f2g_d = self.var( "stream_data_valid_f2g_d", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.stream_data_valid_f2g, self.stream_data_valid_f2g_w) self.stream_data_g2f_w = self.var( "stream_data_g2f_w", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_g2f_d = self.var( "stream_data_g2f_d", self._params.cgra_data_width, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.stream_data_g2f_d, self.stream_data_g2f) self.stream_data_valid_g2f_w = self.var( "stream_data_valid_g2f_w", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.stream_data_valid_g2f_d = self.var( "stream_data_valid_g2f_d", 1, size=[self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.wire(self.stream_data_valid_g2f_d, self.stream_data_valid_g2f) # interface if_cfg_tile2tile = GlbConfigInterface( addr_width=self._params.axi_addr_width, data_width=self._params.axi_data_width) if_sram_cfg_tile2tile = GlbConfigInterface( addr_width=self._params.glb_addr_width, data_width=self._params.axi_data_width) self.if_cfg_list = [] self.if_sram_cfg_list = [] for i in range(self._params.num_glb_tiles + 1): self.if_cfg_list.append( self.interface(if_cfg_tile2tile, f"if_cfg_tile2tile_{i}")) self.if_sram_cfg_list.append( self.interface(if_sram_cfg_tile2tile, f"if_sram_cfg_tile2tile_{i}")) self.glb_tile = [] for i in range(self._params.num_glb_tiles): self.glb_tile.append(GlbTile(_params=self._params)) self.wire(self.if_cfg_list[-1].rd_data, 0) self.wire(self.if_cfg_list[-1].rd_data_valid, 0) self.wire(self.if_sram_cfg_list[-1].rd_data, 0) self.wire(self.if_sram_cfg_list[-1].rd_data_valid, 0) self.add_glb_tile() self.add_always(self.left_edge_proc_ff) self.add_always(self.left_edge_cfg_ff) self.add_always(self.left_edge_sram_cfg_ff) self.add_always(self.left_edge_cgra_cfg_ff) self.tile2tile_e2w_wiring() self.tile2tile_w2e_wiring() self.add_always(self.tile2tile_w2e_cfg_wiring) self.add_always(self.interrupt_pipeline) self.add_always(self.start_pulse_pipeline) self.add_always(self.stall_pipeline) self.add_always(self.stream_data_pipeline) self.add_always(self.cgra_cfg_pcfg_pipeline)
def __init__(self, interconnect_input_ports=2, mem_depth=32, num_tiles=1, banks=1, iterator_support=6, address_width=5, data_width=16, fetch_width=16, multiwrite=1, strg_wr_ports=2, config_width=16): super().__init__("input_addr_ctrl", debug=True) assert multiwrite >= 1, "Multiwrite must be at least 1..." self.interconnect_input_ports = interconnect_input_ports self.mem_depth = mem_depth self.num_tiles = num_tiles self.banks = banks self.iterator_support = iterator_support self.address_width = address_width self.port_sched_width = max(1, clog2(self.interconnect_input_ports)) self.data_width = data_width self.fetch_width = fetch_width self.fw_int = int(self.fetch_width / self.data_width) self.multiwrite = multiwrite self.strg_wr_ports = strg_wr_ports self.config_width = config_width self.mem_addr_width = clog2(self.num_tiles * self.mem_depth) if self.banks > 1: self.bank_addr_width = clog2(self.banks) else: self.bank_addr_width = 0 self.address_width = self.mem_addr_width + self.bank_addr_width # Clock and Reset self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") # Inputs # phases = [] TODO # Take in the valid and data and attach an address + direct to a port self._valid_in = self.input("valid_in", self.interconnect_input_ports) self._wen_en = self.input("wen_en", self.interconnect_input_ports) self._wen_en_saved = self.var("wen_en_saved", self.interconnect_input_ports) self._data_in = self.input("data_in", self.data_width, size=(self.interconnect_input_ports, self.fw_int), explicit_array=True, packed=True) self._data_in_saved = self.var("data_in_saved", self.data_width, size=(self.interconnect_input_ports, self.fw_int), explicit_array=True, packed=True) # Outputs self._wen = self.output("wen_to_sram", self.strg_wr_ports, size=self.banks, explicit_array=True, packed=True) wen_full_size = (self.interconnect_input_ports, self.multiwrite) self._wen_full = self.var("wen_full", self.banks, size=wen_full_size, explicit_array=True, packed=True) self._wen_reduced = self.var("wen_reduced", self.banks, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._wen_reduced_saved = self.var("wen_reduced_saved", self.banks, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._addresses = self.output("addr_out", self.mem_addr_width, size=(self.banks, self.strg_wr_ports), explicit_array=True, packed=True) self._data_out = self.output("data_out", self.data_width, size=(self.banks, self.strg_wr_ports, self.fw_int), explicit_array=True, packed=True) self._port_out_exp = self.var("port_out_exp", self.interconnect_input_ports, size=self.banks, explicit_array=True, packed=True) self._port_out = self.output("port_out", self.interconnect_input_ports) self._counter = self.var("counter", self.port_sched_width) # Wire to port out for i in range(self.interconnect_input_ports): new_tmp = [] for j in range(self.banks): new_tmp.append(self._port_out_exp[j][i]) self.wire(self._port_out[i], kts.concat(*new_tmp).r_or()) self._done = self.var("done", self.strg_wr_ports, size=self.banks, explicit_array=True, packed=True) # LOCAL VARS self._local_addrs = self.var("local_addrs", self.address_width, size=(self.interconnect_input_ports, self.multiwrite), packed=True, explicit_array=True) self._local_addrs_saved = self.var("local_addrs_saved", self.address_width, size=(self.interconnect_input_ports, self.multiwrite), packed=True, explicit_array=True) for i in range(self.interconnect_input_ports): for j in range(self.banks): concat_ports = [] for k in range(self.multiwrite): concat_ports.append(self._wen_full[i][k][j]) self.wire(self._wen_reduced[i][j], kts.concat(*concat_ports).r_or()) if self.banks == 1 and self.interconnect_input_ports == 1: self.wire(self._wen_full[0][0][0], self._valid_in) elif self.banks == 1 and self.interconnect_input_ports > 1: self.add_code(self.set_wen_single) else: self.add_code(self.set_wen_mult) # MAIN # Iterate through all banks to priority decode the wen self.add_code(self.decode_out_lowest) # Also set the write ports on the storage if self.strg_wr_ports > 1: self._idx_cnt = self.var("idx_cnt", 8, size=(self.banks, self.strg_wr_ports - 1), explicit_array=True, packed=True) for i in range(self.strg_wr_ports - 1): self.add_code(self.decode_out_alt, idx=i + 1) # Now we should instantiate the child address generators # (1 per input port) to send to the sram banks for i in range(self.interconnect_input_ports): self.add_child(f"address_gen_{i}", AddrGen(iterator_support=self.iterator_support, config_width=self.config_width), clk=self._clk, rst_n=self._rst_n, clk_en=const(1, 1), flush=const(0, 1), step=self._valid_in[i]) # Need to check that the address falls into the bank for implicit banking # Then, obey the input schedule to send the proper Aggregator to the output # The wen to sram should be that the valid for the selected port is high # Do the same thing for the output address assert self.multiwrite <= self.banks and self.multiwrite > 0,\ "Multiwrite should be between 1 and banks" if self.multiwrite > 1: size = (self.interconnect_input_ports, self.multiwrite - 1) self._offsets_cfg = self.input("offsets_cfg", self.address_width, size=size, packed=True, explicit_array=True) doc = "These offsets provide the ability to write to multiple banks explicitly" self._offsets_cfg.add_attribute(ConfigRegAttr(doc)) self.add_code(self.set_multiwrite_addrs) # to handle multiple input ports going to fewer SRAM write ports self.add_code(self.set_int_ports_counter) self.add_code(self.save_mult_int_signals)