Esempio n. 1
0
    def __init__(self, edge_params, from_mem, to_mem, from_inst, to_inst):

        super().__init__(f"lake_edge", debug=True)

        # PARAMETERS
        # data_out
        self.from_signal = edge_params["from_signal"]
        # data_in
        self.to_signal = edge_params["to_signal"]
        self.dim = edge_params["dim"]
        self.max_range = edge_params["max_range"]
        self.max_stride = edge_params["max_stride"]

        self._write(f"write_{self.to_signal}", width=1)

        forloop = ForLoop(iterator_support=self.dim,
                          config_width=clog2(self.max_range))

        # get memory params from top Lake or make a wrapper func for user
        # with just these params and then pass in mem for this signal
        # self._write_addr(f"write_addr_{to_signal}")

        self.add_child(f"loops_{self.from_signal}_{self.to_signal}",
                       forloop,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write)

        AG_write = AddrGen(iterator_support=addr_gen_dim,
                           config_width=clog2(addr_gen_max_range))

        self.add_child(f"AG_write_{self.from_signal}_{self.to_signal}",
                       AG_write,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       mux_sel=forloop.ports.mux_sel_out)

        safe_wire(self, AG_write.ports.addr_out, self._write_addr)
Esempio n. 2
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            max_line_length=128,
            max_tb_height=1,
            tb_range_inner_max=5,
            tb_sched_max=64,
            num_tiles=1):
        super().__init__("strg_ub", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth
        # generation parameters
        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._clk_en = self.input("clk_en", 1)
        self._flush = self.reset("flush", is_async=False, active_high=True)

        self._data_in = self.input("data_in",
                                   data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)

        # Create cycle counter to share...
        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)

        # outputs
        self._data_out = self.output("data_out",
                                     data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._read_d1 = self.var("read_d1", 1)
        self.add_code(self.delay_read)

        self._write_addr = self.var("write_addr", config_width)
        self._read_addr = self.var("read_addr", config_width)
        self._addr = self.var("addr", clog2(mem_depth))

        self._agg_write = self.var("agg_write", self.interconnect_input_ports)

        # Make this based on the size
        self._agg_write_addr = self.var("agg_write_addr",
                                        2 + clog2(self.agg_height),
                                        size=self.interconnect_input_ports,
                                        packed=True,
                                        explicit_array=True)
        self._agg_read_addr = self.var("agg_read_addr",
                                       max(1, clog2(self.agg_height)),
                                       size=self.interconnect_input_ports,
                                       packed=True,
                                       explicit_array=True)

        self.agg_rd_addr_gen_width = 8
        self._agg_read_addr_gen_out = self.var(
            "agg_read_addr_gen_out",
            self.agg_rd_addr_gen_width,
            size=self.interconnect_input_ports,
            packed=True,
            explicit_array=True)

        self._sram_write_data = self.var("sram_write_data",
                                         data_width,
                                         size=self.fetch_width,
                                         packed=True)
        self._sram_read_data = self.var("sram_read_data",
                                        data_width,
                                        size=self.fetch_width,
                                        packed=True,
                                        explicit_array=True)

        self._data_to_sram = self.output("data_to_strg",
                                         data_width,
                                         size=self.fetch_width,
                                         packed=True)
        self._data_from_sram = self.input("data_from_strg",
                                          data_width,
                                          size=self.fetch_width,
                                          packed=True)

        self._wen_to_sram = self.output("wen_to_strg", 1, packed=True)
        self._cen_to_sram = self.output("cen_to_strg", 1, packed=True)

        self._addr_to_sram = self.output("addr_out",
                                         clog2(mem_depth),
                                         packed=True)

        self.wire(self._addr_to_sram, self._addr)
        self.wire(self._data_to_sram, self._sram_write_data)
        self.wire(self._data_from_sram, self._sram_read_data)
        self.wire(self._wen_to_sram, self._write)
        self.wire(self._cen_to_sram, self._write | self._read)

        self._agg_write_index = self.var("agg_write_index", 2, size=4)

        self._output_port_sel_addr = self.var(
            "output_port_sel_addr",
            max(1, clog2(self.interconnect_output_ports)))

        self.agg_write_scheds = []
        self.agg_read_addrs = []
        self._input_port_sel_addr = self.var(
            "input_port_sel_addr", max(1,
                                       clog2(self.interconnect_input_ports)))
        # Create an input to agg write scheduler + addressor for each input
        # Also need an addressor for the mux in addition to the read addr
        self._agg = self.var(f"agg",
                             width=data_width,
                             size=(self.interconnect_input_ports,
                                   self.agg_height, self.fetch_width),
                             packed=True,
                             explicit_array=True)

        for i in range(self.interconnect_input_ports):

            forloop_ctr = ForLoop(iterator_support=4,
                                  config_width=self._agg_write_addr.width)
            loop_itr = forloop_ctr.get_iter()
            loop_wth = forloop_ctr.get_cfg_width()

            self.add_child(f"loops_in2buf_{i}",
                           forloop_ctr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i])

            newAG = AddrGen(iterator_support=loop_itr, config_width=loop_wth)
            self.add_child(f"agg_write_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i],
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           addr_out=self._agg_write_addr[i])

            newSG = SchedGen(iterator_support=loop_itr, config_width=loop_wth)
            self.agg_write_scheds.append(newSG)
            self.add_child(f"agg_write_sched_gen_{i}",
                           newSG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           cycle_count=self._cycle_count,
                           valid_output=self._agg_write[i])

            forloop_ctr_rd = ForLoop(iterator_support=4,
                                     config_width=self.agg_rd_addr_gen_width)
            loop_itr = forloop_ctr_rd.get_iter()
            loop_wth = forloop_ctr_rd.get_cfg_width()

            self.add_child(f"loops_in2buf_autovec_read_{i}",
                           forloop_ctr_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=(self._write &
                                 (self._input_port_sel_addr == const(
                                     i, self._input_port_sel_addr.width))))

            newAG = AddrGen(iterator_support=loop_itr, config_width=loop_wth)
            self.agg_read_addrs.append(newAG)
            self.add_child(f"agg_read_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=(self._write &
                                 (self._input_port_sel_addr == const(
                                     i, self._input_port_sel_addr.width))),
                           mux_sel=forloop_ctr_rd.ports.mux_sel_out,
                           addr_out=self._agg_read_addr_gen_out[i])
            self.wire(
                self._agg_read_addr[i],
                self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1,
                                               0])

        # Create for loop counters that can be shared across the input port selection and SRAM write
        fl_ctr_sram_wr = ForLoop(iterator_support=6, config_width=16)
        loop_itr = fl_ctr_sram_wr.get_iter()
        loop_wth = fl_ctr_sram_wr.get_cfg_width()

        self.add_child(f"loops_in2buf_autovec_write",
                       fl_ctr_sram_wr,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write)

        # Now we determine what data goes through to the sram...
        # If we have more than one port, we can generate a selector
        # to pick which input port should go through - then we send
        # the step signal to the appropriate input port
        if self.interconnect_input_ports > 1:
            self.add_child(f"port_sel_addr",
                           AddrGen(iterator_support=loop_itr,
                                   config_width=clog2(
                                       self.interconnect_input_ports)),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write,
                           mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                           addr_out=self._input_port_sel_addr)
            # Addr for port select should be driven on agg to sram write sched
        else:
            self.wire(self._input_port_sel_addr[0],
                      const(0, self._input_port_sel_addr.width))

        # Whatever comes through here should hopefully just pipe through seamlessly
        # addressor modules
        self.add_child(f"input_addr_gen",
                       AddrGen(input_addr_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       addr_out=self._write_addr)

        # scheduler modules
        self.add_child(f"input_sched_gen",
                       SchedGen(input_sched_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       valid_output=self._write)

        # -------------------------------- Delineate new group -------------------------------
        fl_ctr_sram_rd = ForLoop(iterator_support=6, config_width=16)
        loop_itr = fl_ctr_sram_rd.get_iter()
        loop_wth = fl_ctr_sram_rd.get_cfg_width()

        self.add_child(f"loops_buf2out_autovec_read",
                       fl_ctr_sram_rd,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read)

        self.add_child(f"output_addr_gen",
                       AddrGen(iterator_support=6, config_width=16),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       addr_out=self._read_addr)

        self.add_child(f"output_sched_gen",
                       SchedGen(iterator_support=6, config_width=16),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       valid_output=self._read)

        self._tb_read = self.var("tb_read", self.interconnect_output_ports)
        self.tb_height = 4

        self._tb_write_addr = self.var("tb_write_addr",
                                       6,
                                       size=self.interconnect_output_ports,
                                       packed=True,
                                       explicit_array=True)
        self._tb_read_addr = self.var("tb_read_addr",
                                      6,
                                      size=self.interconnect_output_ports,
                                      packed=True,
                                      explicit_array=True)

        self._tb = self.var("tb",
                            width=data_width,
                            size=(self.interconnect_output_ports,
                                  self.tb_height, self.fetch_width),
                            packed=True,
                            explicit_array=True)

        for i in range(self.interconnect_output_ports):
            fl_ctr_tb_wr = ForLoop(iterator_support=2, config_width=6)
            loop_itr = fl_ctr_tb_wr.get_iter()
            loop_wth = fl_ctr_tb_wr.get_cfg_width()

            self.add_child(f"loops_buf2out_autovec_write_{i}",
                           fl_ctr_tb_wr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read_d1 &
                           (self._output_port_sel_addr == const(
                               i, self._output_port_sel_addr.width)))

            self.add_child(f"tb_write_addr_gen_{i}",
                           AddrGen(iterator_support=loop_itr,
                                   config_width=loop_wth),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read_d1 &
                           (self._output_port_sel_addr == const(
                               i, self._output_port_sel_addr.width)),
                           mux_sel=fl_ctr_tb_wr.ports.mux_sel_out,
                           addr_out=self._tb_write_addr[i])

            fl_ctr_tb_rd = ForLoop(iterator_support=2, config_width=16)
            loop_itr = fl_ctr_tb_rd.get_iter()
            loop_wth = fl_ctr_tb_rd.get_cfg_width()

            self.add_child(f"loops_buf2out_read_{i}",
                           fl_ctr_tb_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i])

            self.add_child(f"tb_read_addr_gen_{i}",
                           AddrGen(iterator_support=loop_itr, config_width=6),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i],
                           mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                           addr_out=self._tb_read_addr[i])

            self.add_child(f"tb_read_sched_gen_{i}",
                           SchedGen(iterator_support=loop_itr,
                                    config_width=16),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           cycle_count=self._cycle_count,
                           mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                           valid_output=self._tb_read[i])

        if self.interconnect_output_ports > 1:

            fl_ctr_out_sel = ForLoop(iterator_support=2,
                                     config_width=clog2(
                                         self.interconnect_output_ports))
            loop_itr = fl_ctr_out_sel.get_iter()
            loop_wth = fl_ctr_out_sel.get_cfg_width()

            self.add_child(f"loops_buf2out_out_sel",
                           fl_ctr_out_sel,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read_d1)

            self.add_child(f"out_port_sel_addr",
                           AddrGen(iterator_support=loop_itr,
                                   config_width=loop_wth),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read_d1,
                           mux_sel=fl_ctr_out_sel.ports.mux_sel_out,
                           addr_out=self._output_port_sel_addr)
            # Addr for port select should be driven on agg to sram write sched
        else:
            self.wire(self._output_port_sel_addr[0],
                      const(0, self._output_port_sel_addr.width))

        # lift_config_reg(self.internal_generator)

        self.add_code(self.set_sram_addr)
        for idx in range(self.interconnect_input_ports):
            self.add_code(self.agg_ctrl, idx=idx)

        self.add_code(self.agg_to_sram)
        self.add_code(self.tb_ctrl)

        for idx in range(self.interconnect_output_ports):
            self.add_code(self.tb_to_out, idx=idx)
Esempio n. 3
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=16,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_addr_iterator_support=6,
            output_sched_iterator_support=6,
            interconnect_input_ports=1,  # Connection to int
            interconnect_output_ports=1,
            config_width=16,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=True,
            gen_addr=True):

        super().__init__("strg_ub_thin", debug=True)

        assert mem_width == data_width, f"This module should only be used when the fetch width is 1!"

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support
        self.output_addr_iterator_support = output_addr_iterator_support
        self.output_sched_iterator_support = output_sched_iterator_support
        self.rw_same_cycle = rw_same_cycle
        self.read_delay = read_delay
        self.gen_addr = gen_addr
        self.default_iterator_support = 6
        self.default_config_width = 16
        # generation parameters
        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._data_in = self.input("data_in",
                                   self.data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)

        # outputs
        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        self._data_to_sram = self.output("data_to_strg",
                                         self.data_width,
                                         size=self.fetch_width,
                                         packed=True)
        self._data_from_sram = self.input("data_from_strg",
                                          self.data_width,
                                          size=self.fetch_width,
                                          packed=True)
        # Early out in case...
        if self.gen_addr is False:
            # Pass through write enable, addr data and
            # read enable, addr data
            self._read = self.input("ren_in", 1)
            self._write = self.input("wen_in", 1)
            self._write_addr = self.input("write_addr", self.config_width)
            self._read_addr = self.input("read_addr", self.config_width)
            self._cen_to_sram = self.output("cen_to_strg", 1, packed=True)
            self._wen_to_sram = self.output("wen_to_strg", 1, packed=True)
            self._ren_to_sram = self.output("ren_to_strg", 1, packed=True)
            self._wr_addr_to_sram = self.output("wr_addr_out",
                                                clog2(self.mem_depth),
                                                packed=True)
            self._rd_addr_to_sram = self.output("rd_addr_out",
                                                clog2(self.mem_depth),
                                                packed=True)
            self._accessor_output = self.output("accessor_output",
                                                self.interconnect_output_ports)
            self.wire(self._accessor_output, self._read)
            self.wire(self._cen_to_sram, self._write | self._read)
            self.wire(self._wen_to_sram, self._write)
            self.wire(self._ren_to_sram, self._read)
            self.wire(self._data_out, self._data_from_sram)
            self.wire(self._data_in, self._data_to_sram)
            self.wire(self._wr_addr_to_sram,
                      self._write_addr[clog2(self.mem_depth) - 1, 0])
            self.wire(self._rd_addr_to_sram,
                      self._read_addr[clog2(self.mem_depth) - 1, 0])

            return

        # Create cycle counter to share...
        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", self.interconnect_output_ports)
        self._accessor_output = self.output("accessor_output",
                                            self.interconnect_output_ports)
        self.wire(self._accessor_output, self._read)

        self._valid_out = self.output("valid_out", 1)
        if self.read_delay == 1:
            self._read_d1 = self.var("read_d1", 1)
            self.add_code(self.delay_read)
            self.wire(self._valid_out, self._read_d1)
        else:
            self.wire(self._valid_out, self._read)

        self._write_addr = self.var("write_addr", self.config_width)
        self._read_addr = self.var("read_addr", self.config_width)
        self._addr = self.var("addr", clog2(self.mem_depth))

        # Create for loop counters that can be shared across the input port selection and SRAM write
        fl_ctr_sram_wr = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_wr.get_iter()
        loop_wth = fl_ctr_sram_wr.get_cfg_width()

        self.add_child(f"sram_write_loops",
                       fl_ctr_sram_wr,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write)

        # Whatever comes through here should hopefully just pipe through seamlessly
        # addressor modules
        self.add_child(f"sram_write_addr_gen",
                       AddrGen(iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       restart=fl_ctr_sram_wr.ports.restart,
                       addr_out=self._write_addr)

        # scheduler modules
        self.add_child(f"sram_write_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       finished=fl_ctr_sram_wr.ports.restart,
                       valid_output=self._write)

        # -------------------------------- Delineate new group -------------------------------
        fl_ctr_sram_rd = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_rd.get_iter()
        loop_wth = fl_ctr_sram_rd.get_cfg_width()

        self.add_child(f"sram_read_loops",
                       fl_ctr_sram_rd,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read)

        self.add_child(f"sram_read_addr_gen",
                       AddrGen(iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       restart=fl_ctr_sram_rd.ports.restart,
                       addr_out=self._read_addr)

        self.add_child(f"sram_read_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       finished=fl_ctr_sram_rd.ports.restart,
                       valid_output=self._read)

        # Now deal with dual_port/single_port madness...
        self._cen_to_sram = self.output("cen_to_strg", 1, packed=True)
        self._wen_to_sram = self.output("wen_to_strg", 1, packed=True)
        self._ren_to_sram = self.output("ren_to_strg", 1, packed=True)
        self.wire(self._cen_to_sram, self._write | self._read)
        self.wire(self._wen_to_sram, self._write)
        self.wire(self._ren_to_sram, self._read)
        self.wire(self._data_out, self._data_from_sram)
        self.wire(self._data_in, self._data_to_sram)

        if self.rw_same_cycle:
            # If we can read and write the same cycle we
            # can pretty safeuly assume we have separate read/write ports...
            self._wr_addr_to_sram = self.output("wr_addr_out",
                                                clog2(self.mem_depth),
                                                packed=True)
            self._rd_addr_to_sram = self.output("rd_addr_out",
                                                clog2(self.mem_depth),
                                                packed=True)
            self.wire(self._wr_addr_to_sram,
                      self._write_addr[clog2(self.mem_depth) - 1, 0])
            self.wire(self._rd_addr_to_sram,
                      self._read_addr[clog2(self.mem_depth) - 1, 0])
        else:
            self._addr_to_sram = self.output("addr_out",
                                             clog2(self.mem_depth),
                                             packed=True)
            self.wire(self._addr_to_sram, self._addr)
            self.add_code(self.set_sram_addr)
Esempio n. 4
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=1,  # Connection to int
            interconnect_output_ports=1,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4):
        super().__init__("tb_formal", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth
        self.banks = banks
        self.data_width = data_width
        self.config_width = config_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.output_addr_iterator_support = output_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support
        self.output_sched_iterator_support = output_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16

        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))

        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)
        self._read = self.var("read", 1)
        self._valid_in = self.output("valid_in", 1)
        self.wire(self._read, self._valid_in)
        self._valid_in.add_attribute(
            FormalAttr(f"{self._valid_in.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._data_in = self.input("data_in",
                                   data_width,
                                   size=self.fetch_width,
                                   packed=True,
                                   explicit_array=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))

        # outputs
        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._tb_read = self.var("tb_read", self.interconnect_output_ports)
        # Break out valids for formal!
        self._valid_out = self.output("valid_out",
                                      self.interconnect_output_ports)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self.wire(self._valid_out, self._tb_read)

        self.tb_height = 4

        self._tb_write_addr = self.var("tb_write_addr",
                                       6,
                                       size=self.interconnect_output_ports,
                                       packed=True,
                                       explicit_array=True)
        self._tb_read_addr = self.var("tb_read_addr",
                                      6,
                                      size=self.interconnect_output_ports,
                                      packed=True,
                                      explicit_array=True)

        self._tb = self.var("tb",
                            width=data_width,
                            size=(self.interconnect_output_ports,
                                  self.tb_height, self.fetch_width),
                            packed=True,
                            explicit_array=True)

        self._output_port_sel_addr = self.var(
            "tb_bank_sel_addr", max(1, clog2(self.interconnect_output_ports)))

        # -------------------------------- Delineate new group -------------------------------
        fl_ctr_sram_rd = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_rd.get_iter()
        loop_wth = fl_ctr_sram_rd.get_cfg_width()

        self.add_child(f"tb_write_loops",
                       fl_ctr_sram_rd,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read)

        self.add_child(f"tb_write_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       finished=fl_ctr_sram_rd.ports.restart,
                       valid_output=self._read)

        for i in range(self.interconnect_output_ports):
            # fl_ctr_tb_wr = ForLoop(iterator_support=self.default_iterator_support,
            #                        config_width=self.default_config_width)
            # loop_itr = fl_ctr_tb_wr.get_iter()
            # loop_wth = fl_ctr_tb_wr.get_cfg_width()

            # self.add_child(f"tb_write_loops_{i}",
            #                fl_ctr_tb_wr,
            #                clk=self._clk,
            #                rst_n=self._rst_n,
            #                step=self._read & (self._output_port_sel_addr ==
            #                                   const(i, self._output_port_sel_addr.width)))

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)

            self.add_child(
                f"tb_write_addr_gen_{i}",
                newAG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._read & (self._output_port_sel_addr == const(
                    i, self._output_port_sel_addr.width)),
                # addr_out=self._tb_write_addr[i])
                mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                restart=fl_ctr_sram_rd.ports.restart)

            safe_wire(self, self._tb_write_addr[i], newAG.ports.addr_out)

            fl_ctr_tb_rd = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            loop_itr = fl_ctr_tb_rd.get_iter()
            loop_wth = fl_ctr_tb_rd.get_cfg_width()

            self.add_child(f"tb_read_loops_{i}",
                           fl_ctr_tb_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i])

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)

            self.add_child(
                f"tb_read_addr_gen_{i}",
                newAG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._tb_read[i],
                # addr_out=self._tb_read_addr[i])
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                restart=fl_ctr_tb_rd.ports.restart)

            safe_wire(self, self._tb_read_addr[i], newAG.ports.addr_out)

            self.add_child(f"tb_read_sched_gen_{i}",
                           SchedGen(
                               iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           cycle_count=self._cycle_count,
                           mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                           finished=fl_ctr_tb_rd.ports.restart,
                           valid_output=self._tb_read[i])

        if self.interconnect_output_ports > 1:

            # fl_ctr_out_sel = ForLoop(iterator_support=self.default_iterator_support,
            #                          # config_width=clog2(self.interconnect_output_ports))
            #                          config_width=self.default_config_width)
            # loop_itr = fl_ctr_out_sel.get_iter()
            # loop_wth = fl_ctr_out_sel.get_cfg_width()

            # self.add_child(f"tb_sel_loops",
            #                fl_ctr_out_sel,
            #                clk=self._clk,
            #                rst_n=self._rst_n,
            #                step=self._read)

            self.add_child(f"out_port_sel_addr",
                           AddrGen(
                               iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read,
                           mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                           addr_out=self._output_port_sel_addr)
            # Addr for port select should be driven on agg to sram write sched
        else:
            self.wire(self._output_port_sel_addr[0],
                      const(0, self._output_port_sel_addr.width))

        self.add_code(self.tb_ctrl)
        for idx in range(self.interconnect_output_ports):
            self.add_code(self.tb_to_out, idx=idx)
Esempio n. 5
0
def test_addr_gen_basic(depth=512,
                        addr_width=16,
                        iterator_support=6):
    model_ag = AddrGenModel(iterator_support=iterator_support,
                            address_width=addr_width)

    config_dict = {}
    config_dict["starting_addr"] = 0
    config_dict["dimensionality"] = 3
    config_dict["strides_0"] = 1
    config_dict["strides_1"] = 3
    config_dict["strides_2"] = 9
    config_dict["ranges_0"] = 3
    config_dict["ranges_1"] = 3
    config_dict["ranges_2"] = 3

    model_ag.set_config(config_dict)

    word_width = 1
    fetch_width = 4
    stencil_height = 3
    max_range_value = 5
    img_height = 4
    dut = AddrGen(iterator_support=iterator_support,
                  address_width=addr_width)

    magma_dut = k.util.to_magma(dut, flatten_array=True,
                                check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)

    tester.circuit.dimensionality = 4
    tester.circuit.starting_addr = 0
    # tester.circuit.strides_0 = 1
    # tester.circuit.strides_1 = 3
    # tester.circuit.strides_2 = 9
    # tester.circuit.ranges_0 = 3
    # tester.circuit.ranges_1 = 3
    # tester.circuit.ranges_2 = 3
    tester.circuit.strides_0 = 1
    tester.circuit.strides_1 = 1
    tester.circuit.strides_2 = 1
    tester.circuit.strides_3 = -26
    tester.circuit.ranges_0 = 1
    tester.circuit.ranges_1 = 1
    tester.circuit.ranges_2 = 1
    tester.circuit.ranges_3 = 10000

    tester.circuit.clk = 0
    tester.circuit.clk_en = 1
    tester.circuit.rst_n = 0
    tester.eval()
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.eval()
    tester.step(2)

    for i in range(1000):
        tester.circuit.step = 1
        tester.circuit.addr_out.expect(model_ag.get_address())
        model_ag.step()
        tester.eval()
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
Esempio n. 6
0
    def __init__(self,
                 data_width=16,  # CGRA Params
                 mem_width=64,
                 mem_depth=512,
                 banks=1,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6,
                 config_width=16,
                 #  output_config_width=16,
                 interconnect_input_ports=1,  # Connection to int
                 interconnect_output_ports=1,
                 mem_input_ports=1,
                 mem_output_ports=1,
                 read_delay=1,  # Cycle delay in read (SRAM vs Register File)
                 rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
                 agg_height=4):
        super().__init__("agg_formal", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth

        self.default_iterator_support = 6
        self.default_config_width = 16

        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))

        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)

        self._data_in = self.input("data_in", data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)
        self._data_in.add_attribute(FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE))

        self._agg_write = self.var("agg_write", self.interconnect_input_ports)
        self._valid_in = self.output("valid_in", self.interconnect_input_ports)
        self._valid_in.add_attribute(FormalAttr(f"{self._valid_in.name}", FormalSignalConstraint.SEQUENCE))
        self.wire(self._valid_in, self._agg_write)

        self._write = self.var("write", 1)
        self._valid_out = self.output("valid_out", 1)
        self._valid_out.add_attribute(FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE))
        self.wire(self._write, self._valid_out)

        self._data_out = self.output("data_out", data_width,
                                     size=self.fetch_width,
                                     packed=True)
        self._data_out.add_attribute(FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE))

        # Make this based on the size
        self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height),
                                        size=self.interconnect_input_ports,
                                        packed=True,
                                        explicit_array=True)
        self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)),
                                       size=self.interconnect_input_ports,
                                       packed=True,
                                       explicit_array=True)

        self.agg_rd_addr_gen_width = 8
        self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width,
                                               size=self.interconnect_input_ports,
                                               packed=True,
                                               explicit_array=True)
        self._input_port_sel_addr = self.var("input_port_sel_addr",
                                             max(1, clog2(self.interconnect_input_ports)))
        # Create an input to agg write scheduler + addressor for each input
        # Also need an addressor for the mux in addition to the read addr
        self._agg = self.var(f"agg",
                             width=data_width,
                             size=(self.interconnect_input_ports,
                                   self.agg_height,
                                   self.fetch_width),
                             packed=True,
                             explicit_array=True)

        output_loops = None

        for i in range(self.interconnect_input_ports):

            forloop_ctr = ForLoop(iterator_support=self.default_iterator_support,
                                  config_width=self.default_config_width)
            loop_itr = forloop_ctr.get_iter()
            loop_wth = forloop_ctr.get_cfg_width()

            self.add_child(f"agg_write_loops_{i}",
                           forloop_ctr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i])

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)
            self.add_child(f"agg_write_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i],
                           # addr_out=self._agg_write_addr[i])
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           restart=forloop_ctr.ports.restart)
            safe_wire(self, self._agg_write_addr[i], newAG.ports.addr_out)

            newSG = SchedGen(iterator_support=self.default_iterator_support,
                             config_width=self.default_config_width)
            self.add_child(f"agg_write_sched_gen_{i}",
                           newSG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           finished=forloop_ctr.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._agg_write[i])

            forloop_ctr_rd = ForLoop(iterator_support=self.default_iterator_support,
                                     config_width=self.default_config_width)
            loop_itr = forloop_ctr_rd.get_iter()
            loop_wth = forloop_ctr_rd.get_cfg_width()

            # Add loops for the output of each agg...
            self.add_child(f"agg_read_loops_{i}",
                           forloop_ctr_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           # (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))))
                           step=self._write)

            output_loops = forloop_ctr_rd

            # And an associated read address...
            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)
            self.add_child(f"agg_read_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write,
                           #  (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))),
                           # addr_out=self._agg_read_addr_gen_out[i])
                           mux_sel=forloop_ctr_rd.ports.mux_sel_out,
                           restart=forloop_ctr_rd.ports.restart)

            safe_wire(self, self._agg_read_addr_gen_out[i], newAG.ports.addr_out)
            self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0])

        # Now we determine what data goes through to the sram...
        # If we have more than one port, we can generate a selector
        # to pick which input port should go through - then we send
        # the step signal to the appropriate input port
        if self.interconnect_input_ports > 1:

            # Create for loop counters that can be shared across the input port selection and SRAM write
            fl_ctr_sram_wr = ForLoop(iterator_support=self.default_iterator_support,
                                     config_width=self.default_config_width)
            loop_itr = fl_ctr_sram_wr.get_iter()
            loop_wth = fl_ctr_sram_wr.get_cfg_width()

            output_loops = fl_ctr_sram_wr

            self.add_child(f"agg_select_loops",
                           fl_ctr_sram_wr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write)

            tmp_AG = AddrGen(iterator_support=self.default_iterator_support,
                             # config_width=clog2(self.interconnect_input_ports)),
                             config_width=self.default_config_width)
            self.add_child(f"port_sel_addr",
                           tmp_AG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write,
                           # addr_out=self._input_port_sel_addr)
                           mux_sel=fl_ctr_sram_wr.ports.mux_sel_out)
            safe_wire(self, self._input_port_sel_addr, tmp_AG.ports.addr_out)

        else:
            self.wire(self._input_port_sel_addr[0], const(0, self._input_port_sel_addr.width))

        # Addr for port select should be driven on agg to sram write sched
        # scheduler modules
        self.add_child(f"agg_read_output_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=output_loops.ports.mux_sel_out,
                       finished=output_loops.ports.restart,
                       valid_output=self._write)

        for idx in range(self.interconnect_input_ports):
            self.add_code(self.agg_ctrl, idx=idx)
        self.add_code(self.agg_to_sram)
Esempio n. 7
0
    def __init__(self,
                 word_width,
                 input_ports,
                 output_ports,
                 memories,
                 edges):

        super().__init__("LakeTop", debug=True)

        # parameters
        self.word_width = word_width
        self.input_ports = input_ports
        self.output_ports = output_ports

        self.default_config_width = 16
        self.cycle_count_width = 16

        self.stencil_valid = False

        # objects
        self.memories = memories
        self.edges = edges

        # tile enable and clock
        self.tile_en = self.input("tile_en", 1)
        self.tile_en.add_attribute(ConfigRegAttr("Tile logic enable manifested as clock gate"))
        self.tile_en.add_attribute(FormalAttr(self.tile_en.name, FormalSignalConstraint.SET1))

        self.clk_mem = self.clock("clk")
        self.clk_mem.add_attribute(FormalAttr(self.clk_mem.name, FormalSignalConstraint.CLK))

        # chaining
        chain_supported = False
        for mem in self.memories.keys():
            if self.memories[mem]["chaining"]:
                chain_supported = True
                break

        if chain_supported:
            self.chain_en = self.input("chain_en", 1)
            self.chain_en.add_attribute(ConfigRegAttr("Chaining enable"))
            self.chain_en.add_attribute(FormalAttr(self.chain_en.name, FormalSignalConstraint.SET0))
        else:
            self.chain_en = self.var("chain_en", 1)
            self.wire(self.chain_en, 0)

        # gate clock with tile_en
        gclk = self.var("gclk", 1)
        self.gclk = kts.util.clock(gclk)
        self.wire(gclk, self.clk_mem & self.tile_en)

        self.clk_en = self.clock_en("clk_en", 1)

        # active low asynchornous reset
        self.rst_n = self.reset("rst_n", 1)
        self.rst_n.add_attribute(FormalAttr(self.rst_n.name, FormalSignalConstraint.RSTN))

        # data in and out of top level Lake memory object
        self.data_in = self.input("data_in",
                                  width=self.word_width,
                                  size=self.input_ports,
                                  explicit_array=True,
                                  packed=True)
        self.data_in.add_attribute(FormalAttr(self.data_in.name, FormalSignalConstraint.SEQUENCE))

        self.data_out = self.output("data_out",
                                    width=self.word_width,
                                    size=self.output_ports,
                                    explicit_array=True,
                                    packed=True)
        self.data_out.add_attribute(FormalAttr(self.data_out.name, FormalSignalConstraint.SEQUENCE))

        # global cycle count for accessor comparison
        self._cycle_count = self.var("cycle_count", 16)

        @always_ff((posedge, self.gclk), (negedge, "rst_n"))
        def increment_cycle_count(self):
            if ~self.rst_n:
                self._cycle_count = 0
            else:
                self._cycle_count = self._cycle_count + 1

        self.add_always(increment_cycle_count)

        # info about memories
        num_mem = len(memories)
        subscript_mems = list(self.memories.keys())

        # list of the data out from each memory
        self.mem_data_outs = [self.var(f"mem_data_out_{subscript_mems[i]}",
                                       width=self.word_width,
                                       size=self.memories[subscript_mems[i]]
                                       ["read_port_width" if "read_port_width" in self.memories[subscript_mems[i]]
                                        else "read_write_port_width"],
                                       explicit_array=True, packed=True) for i in range(num_mem)]

        # keep track of write, read_addr, and write_addr vars for read/write memories
        # to later check whether there is a write and what to use for the shared port
        self.mem_read_write_addrs = {}

        # create memory instance for each memory
        self.mem_insts = {}
        i = 0
        for mem in self.memories.keys():
            m = mem_inst(self.memories[mem], self.word_width)
            self.mem_insts[mem] = m

            self.add_child(mem,
                           m,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           # put data out in memory data out list
                           data_out=self.mem_data_outs[i],
                           chain_en=self.chain_en)
            i += 1

        # get input and output memories
        is_input, is_output = [], []
        for mem_name in self.memories.keys():
            mem = self.memories[mem_name]
            if mem["is_input"]:
                is_input.append(mem_name)
            if mem["is_output"]:
                is_output.append(mem_name)

        # TODO direct connection to write doesn't work (?), so have to do this...
        self.low = self.var("low", 1)
        self.wire(self.low, 0)

        # TODO adding multiple ports to 1 memory after talking about mux with compiler team

        # set up input memories
        for i in range(len(is_input)):
            in_mem = is_input[i]

            # input addressor / accessor parameters
            input_dim = self.memories[in_mem]["input_edge_params"]["dim"]
            input_range = self.memories[in_mem]["input_edge_params"]["max_range"]
            input_stride = self.memories[in_mem]["input_edge_params"]["max_stride"]
            # input port associated with memory
            input_port_index = self.memories[in_mem]["input_port"]

            self.valid = self.var(
                f"input_port{input_port_index}_2{in_mem}_accessor_valid", 1)
            self.wire(self.mem_insts[in_mem].ports.write, self.valid)

            # hook up data from the specified input port to the memory
            safe_wire(self, self.mem_insts[in_mem].ports.data_in[0],
                      self.data_in[input_port_index])

            if self.memories[in_mem]["num_read_write_ports"] > 0:
                self.mem_read_write_addrs[in_mem] = {"write": self.valid}

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for writes to this input memory
            forloop = ForLoop(iterator_support=input_dim,
                              config_width=max(1, clog2(input_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"input_port{input_port_index}_2{in_mem}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=input_dim,
                            config_width=max(1, clog2(input_stride)))  # self.default_config_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[in_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[in_mem].ports.write_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["write_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=input_dim,
                             config_width=self.cycle_count_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # set up output memories
        for i in range(len(is_output)):
            out_mem = is_output[i]

            # output addressor / accessor parameters
            output_dim = self.memories[out_mem]["output_edge_params"]["dim"]
            output_range = self.memories[out_mem]["output_edge_params"]["max_range"]
            output_stride = self.memories[out_mem]["output_edge_params"]["max_stride"]
            # output port associated with memory
            output_port_index = self.memories[out_mem]["output_port"]

            # hook up data from the memory to the specified output port
            self.wire(self.data_out[output_port_index],
                      self.mem_insts[out_mem].ports.data_out[0][0])
            # self.mem_data_outs[subscript_mems.index(out_mem)][0])

            self.valid = self.var(f"{out_mem}2output_port{output_port_index}_accessor_valid", 1)
            if self.memories[out_mem]["rw_same_cycle"]:
                self.wire(self.mem_insts[out_mem].ports.read, self.valid)

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for reads from this output memory
            forloop = ForLoop(iterator_support=output_dim,
                              config_width=max(1, clog2(output_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"{out_mem}2output_port{output_port_index}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=output_dim,
                            config_width=max(1, clog2(output_stride)))  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[out_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[out_mem].ports.read_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["read_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=output_dim,
                             config_width=self.cycle_count_width)  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # create shared IteratorDomains and accessors as well as
        # read/write addressors for memories connected by each edge
        for edge in self.edges:

            # see how many signals need to be selected between for
            # from and to signals for edge
            num_mux_from = len(edge["from_signal"])
            num_mux_to = len(edge["to_signal"])

            # get unique edge_name identifier for hardware modules
            edge_name = get_edge_name(edge)

            # create forloop and accessor valid output signal
            self.valid = self.var(edge_name + "_accessor_valid", 1)

            forloop = ForLoop(iterator_support=edge["dim"])
            self.forloop = forloop
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(edge_name + "_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            # create input addressor
            readAG = AddrGen(iterator_support=edge["dim"],
                             config_width=self.default_config_width)
            self.add_child(f"{edge_name}_read_addr_gen",
                           readAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            # assign read address to all from memories
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                # can assign same read addrs to all the memories
                for i in range(len(edge["from_signal"])):
                    safe_wire(self, self.mem_insts[edge["from_signal"][i]].ports.read_addr[0], readAG.ports.addr_out)
            else:
                for i in range(len(edge["from_signal"])):
                    self.mem_read_write_addrs[edge["from_signal"][i]]["read_addr"] = readAG.ports.addr_out

            # if needing to mux, choose which from memory we get data
            # from for to memory data in
            if num_mux_from > 1:
                num_mux_bits = clog2(num_mux_from)
                self.mux_sel = self.var(f"{edge_name}_mux_sel",
                                        width=num_mux_bits)

                read_addr_width = max(1, clog2(self.memories[edge["from_signal"][0]]["capacity"]))
                # decide which memory to get data from for to memory's data in
                safe_wire(self, self.mux_sel,
                          readAG.ports.addr_out[read_addr_width + num_mux_from - 1, read_addr_width])

                comb_mux_from = self.combinational()
                # for i in range(num_mux_from):
                # TODO want to use a switch statement here, but get add_fn_ln issue
                if_mux_sel = IfStmt(self.mux_sel == 0)
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][i])
                    if_mux_sel.then_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][0]].ports.data_out))
                    if_mux_sel.else_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][1]].ports.data_out))
                comb_mux_from.add_stmt(if_mux_sel)

            # no muxing from, data_out from the one and only memory
            # goes to all to memories (valid determines whether it is
            # actually written)
            else:
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][0])
                    safe_wire(self,
                              self.mem_insts[edge["to_signal"][j]].ports.data_in,
                              # only one memory to read from
                              self.mem_insts[edge["from_signal"][0]].ports.data_out)

            # create output addressor
            writeAG = AddrGen(iterator_support=edge["dim"],
                              config_width=self.default_config_width)
            # step, mux_sel, restart may need delayed signals (assigned later)
            self.add_child(f"{edge_name}_write_addr_gen",
                           writeAG,
                           clk=self.gclk,
                           rst_n=self.rst_n)

            # set write addr for to memories
            if self.memories[edge["to_signal"][0]]["num_read_write_ports"] == 0:
                for i in range(len(edge["to_signal"])):
                    safe_wire(self, self.mem_insts[edge["to_signal"][i]].ports.write_addr[0], writeAG.ports.addr_out)
            else:
                for i in range(len(edge["to_signal"])):
                    self.mem_read_write_addrs[edge["to_signal"][i]] = {"write": self.valid, "write_addr": writeAG.ports.addr_out}

            # calculate necessary delay between from_signal to to_signal
            # TODO this may need to be more sophisticated and based on II as well
            # TODO just need to add for loops for all the ports
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                self.delay = self.memories[edge["from_signal"][0]]["read_info"][0]["latency"]
            else:
                self.delay = self.memories[edge["from_signal"][0]]["read_write_info"][0]["latency"]

            if self.delay > 0:
                # signals that need to be delayed due to edge latency
                self.delayed_writes = self.var(f"{edge_name}_delayed_writes",
                                               width=self.delay)
                self.delayed_mux_sels = self.var(f"{edge_name}_delayed_mux_sels",
                                                 width=self.forloop.ports.mux_sel_out.width,
                                                 size=self.delay,
                                                 explicit_array=True,
                                                 packed=True)
                self.delayed_restarts = self.var(f"{edge_name}_delayed_restarts",
                                                 width=self.delay)

                # delay in valid between read from memory and write to next memory
                @always_ff((posedge, self.gclk), (negedge, "rst_n"))
                def get_delayed_write(self):
                    if ~self.rst_n:
                        self.delayed_writes = 0
                        self.delayed_mux_sels = 0
                        self.delayed_restarts = 0
                    else:
                        for i in range(self.delay - 1):
                            self.delayed_writes[i + 1] = self.delayed_writes[i]
                            self.delayed_mux_sels[i + 1] = self.delayed_mux_sels[i]
                            self.delayed_restarts[i + 1] = self.delayed_restarts[i]
                        self.delayed_writes[0] = self.valid
                        self.delayed_mux_sels[0] = self.forloop.ports.mux_sel_out
                        self.delayed_restarts[0] = self.forloop.ports.restart

                self.add_always(get_delayed_write)

            # if we have a mux for the destination memories,
            # choose which mux to write to
            if num_mux_to > 1:
                num_mux_bits = clog2(num_mux_to)
                self.mux_sel_to = self.var(f"{edge_name}_mux_sel_to",
                                           width=num_mux_bits)

                write_addr_width = max(1, clog2(self.memories[edge["to_signal"][0]]["capacity"]))
                # decide which destination memory gets written to
                safe_wire(self, self.mux_sel_to,
                          writeAG.ports.addr_out[write_addr_width + num_mux_to - 1, write_addr_width])

                # wire the write (or if needed, delayed write) signal to the selected destination memory
                # and set write enable low for all other destination memories
                comb_mux_to = self.combinational()
                for i in range(num_mux_to):
                    if_mux_sel_to = IfStmt(self.mux_sel_to == i)
                    if self.delay == 0:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.valid))
                    else:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.delayed_writes[self.delay - 1]))

                    if_mux_sel_to.else_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.low))
                    comb_mux_to.add_stmt(if_mux_sel_to)

            # no muxing to, just write to the one destination memory
            else:
                if self.delay == 0:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.valid)
                else:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.delayed_writes[self.delay - 1])

            # assign delayed signals for write addressor if needed
            if self.delay == 0:
                self.wire(writeAG.ports.step, self.valid)
                self.wire(writeAG.ports.mux_sel, self.forloop.ports.mux_sel_out)
                self.wire(writeAG.ports.restart, self.forloop.ports.restart)
            else:
                self.wire(writeAG.ports.step, self.delayed_writes[self.delay - 1])
                self.wire(writeAG.ports.mux_sel, self.delayed_mux_sels[self.delay - 1])
                self.wire(writeAG.ports.restart, self.delayed_restarts[self.delay - 1])

            # create accessor for edge
            newSG = SchedGen(iterator_support=edge["dim"],
                             config_width=self.cycle_count_width)  # self.default_config_width)

            self.add_child(edge_name + "_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # for read write memories, choose either read or write address based on whether
        # we are writing to the memory (whether write enable is high)
        read_write_addr_comb = self.combinational()
        for mem_name in self.memories:
            if mem_name in self.mem_read_write_addrs:
                mem_info = self.mem_read_write_addrs[mem_name]
                if_write = IfStmt(mem_info["write"] == 1)
                addr_width = self.mem_insts[mem_name].ports.read_write_addr[0].width
                if_write.then_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["write_addr"][addr_width - 1, 0]))
                if_write.else_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["read_addr"][addr_width - 1, 0]))
                read_write_addr_comb.add_stmt(if_write)

        # clock enable and flush passes
        kts.passes.auto_insert_clock_enable(self.internal_generator)
        clk_en_port = self.internal_generator.get_port("clk_en")
        clk_en_port.add_attribute(FormalAttr(clk_en_port.name, FormalSignalConstraint.SET1))

        self.add_attribute("sync-reset=flush")
        kts.passes.auto_insert_sync_reset(self.internal_generator)
        flush_port = self.internal_generator.get_port("flush")

        # bring config registers up to top level
        lift_config_reg(self.internal_generator)
Esempio n. 8
0
    def __init__(self,
                 interconnect_output_ports,
                 mem_depth,
                 num_tiles,
                 banks,
                 iterator_support,
                 address_width,
                 config_width=16):
        super().__init__("output_addr_ctrl")

        self.interconnect_output_ports = interconnect_output_ports
        self.mem_depth = mem_depth
        self.num_tiles = num_tiles
        self.banks = banks
        self.iterator_support = iterator_support
        self.address_width = address_width
        self.port_sched_width = clog2(self.interconnect_output_ports)
        self.config_width = config_width

        self.mem_addr_width = clog2(self.num_tiles * self.mem_depth)

        if self.banks > 1:
            self.bank_addr_width = clog2(self.banks)
        else:
            self.bank_addr_width = 0
        self.address_width = self.mem_addr_width + self.bank_addr_width

        # Clock and Reset
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        # Inputs
        # Take in the valid and attach an address + direct to a port
        self._valid_in = self.input("valid_in", self.interconnect_output_ports)
        self._step_in = self.input("step_in", self.interconnect_output_ports)

        # Outputs
        self._ren = self.output("ren",
                                self.interconnect_output_ports,
                                size=self.banks,
                                explicit_array=True,
                                packed=True)

        self._addresses = self.output("addr_out",
                                      self.mem_addr_width,
                                      size=self.interconnect_output_ports,
                                      explicit_array=True,
                                      packed=True)

        # LOCAL VARS
        self._local_addrs = self.var("local_addrs",
                                     self.address_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        if self.banks == 1 and self.interconnect_output_ports == 1:
            self.wire(self._ren[0][0], self._valid_in)
        elif self.banks == 1 and self.interconnect_output_ports > 1:
            self.add_code(self.set_ren_single)
        elif self.banks > 1 and self.interconnect_output_ports == 1:
            self.add_code(self.set_ren_mult)
        else:
            self.add_code(self.set_ren_mult)

        # MAIN

        self.add_code(self.set_out)

        # Now we should instantiate the child address generators
        # (1 per input port) to send to the sram banks
        for i in range(self.interconnect_output_ports):
            new_addr_gen = AddrGen(
                iterator_support=self.iterator_support,
                # address_width=self.address_width,
                config_width=self.config_width)

            self.add_child(f"address_gen_{i}",
                           new_addr_gen,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           clk_en=const(1, 1),
                           flush=const(0, 1))
            # self.add_stmt(new_addr_gen.ports.step.assign(self._valid_in[i]))
            self.add_stmt(
                new_addr_gen.ports.step.assign(self._step_in[i]
                                               & self._valid_in[i]))

            # Get the address for each input port
            self.wire(self._local_addrs[i],
                      new_addr_gen.ports.addr_out[self.address_width - 1, 0])
Esempio n. 9
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=1,  # Connection to int
            interconnect_output_ports=1,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4):
        super().__init__("sram_formal", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth
        self.banks = banks
        self.data_width = data_width
        self.config_width = config_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.output_addr_iterator_support = output_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support
        self.output_sched_iterator_support = output_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))

        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)

        self._data_in = self.input("data_in",
                                   self.data_width,
                                   size=self.fetch_width,
                                   packed=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.fetch_width,
                                     packed=True,
                                     explicit_array=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._addr = self.var("addr", clog2(self.mem_depth))

        # Connect up the write to valid in for sequence
        self._write = self.var("write", 1)
        self._valid_in = self.output("valid_in", 1)
        self._valid_in.add_attribute(
            FormalAttr(f"{self._valid_in.name}",
                       FormalSignalConstraint.SEQUENCE))
        self.wire(self._write, self._valid_in)

        self._read = self.var("read", 1)
        self._wen_to_sram = self.var("wen_to_strg", 1, packed=True)
        self._cen_to_sram = self.var("cen_to_strg", 1, packed=True)
        self._valid_out = self.output("valid_out", 1)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))

        # Valid out should just be if a read was on the previous cycle...
        self.add_code(self.set_valid_out)

        self.wire(self._wen_to_sram, self._write)
        self.wire(self._cen_to_sram, self._write | self._read)

        self._write_addr = self.var("write_addr", self.config_width)
        self._read_addr = self.var("read_addr", self.config_width)

        fl_ctr_sram_wr = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_wr.get_iter()
        loop_wth = fl_ctr_sram_wr.get_cfg_width()

        self.add_child(f"sram_write_loops",
                       fl_ctr_sram_wr,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write)
        # Whatever comes through here should hopefully just pipe through seamlessly
        # addressor modules
        self.add_child(f"sram_write_addr_gen",
                       AddrGen(iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       addr_out=self._write_addr,
                       restart=fl_ctr_sram_wr.ports.restart)

        # scheduler modules
        self.add_child(f"sram_write_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                       finished=fl_ctr_sram_wr.ports.restart,
                       valid_output=self._write)

        # -------------------------------- Delineate new group -------------------------------
        fl_ctr_sram_rd = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_rd.get_iter()
        loop_wth = fl_ctr_sram_rd.get_cfg_width()

        self.add_child(f"sram_read_loops",
                       fl_ctr_sram_rd,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read)

        for i in range(self.banks):
            mbank = SRAMWrapper(
                use_sram_stub=True,
                sram_name="NA",
                data_width=self.data_width,
                fw_int=self.fetch_width,
                mem_depth=self.mem_depth,
                mem_input_ports=1,
                mem_output_ports=1,
                # address_width=self.config_width,
                address_width=9,
                bank_num=i,
                num_tiles=1)

            self.add_child(
                f"mem_{i}",
                mbank,
                clk=self._clk,
                enable_chain_input=0,
                enable_chain_output=0,
                chain_idx_input=0,
                chain_idx_output=0,
                clk_en=1,
                mem_data_in_bank=self._data_in,
                mem_data_out_bank=self._data_out,
                mem_addr_in_bank=self._addr,
                mem_cen_in_bank=self._write | self._read,
                mem_wen_in_bank=self._write,
                wtsel=0,
                # valid_data=,
                rtsel=0)

        self.add_child(f"sram_read_addr_gen",
                       AddrGen(iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       addr_out=self._read_addr,
                       restart=fl_ctr_sram_rd.ports.restart)

        self.add_child(f"sram_read_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       finished=fl_ctr_sram_rd.ports.restart,
                       valid_output=self._read)

        self.add_code(self.set_sram_addr)
Esempio n. 10
0
    def __init__(self, iterator_support=6, config_width=16, use_enable=True):

        super().__init__(f"sched_gen_{iterator_support}_{config_width}")

        self.iterator_support = iterator_support
        self.config_width = config_width
        self.use_enable = use_enable

        # PORT DEFS: begin

        # INPUTS
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        # OUTPUTS
        self._valid_output = self.output("valid_output", 1)

        # VARS
        self._valid_out = self.var("valid_out", 1)
        self._cycle_count = self.input("cycle_count", self.config_width)
        self._mux_sel = self.input("mux_sel",
                                   max(clog2(self.iterator_support), 1))
        self._addr_out = self.var("addr_out", self.config_width)

        # Receive signal on last iteration of looping structure and
        # gate the output...
        self._finished = self.input("finished", 1)
        self._valid_gate_inv = self.var("valid_gate_inv", 1)
        self._valid_gate = self.var("valid_gate", 1)
        self.wire(self._valid_gate, ~self._valid_gate_inv)

        # Since dim = 0 is not sufficient, we need a way to prevent
        # the controllers from firing on the starting offset
        if self.use_enable:
            self._enable = self.input("enable", 1)
            self._enable.add_attribute(
                ConfigRegAttr("Disable the controller so it never fires..."))
            self._enable.add_attribute(
                FormalAttr(f"{self._enable.name}",
                           FormalSignalConstraint.SOLVE))
        # Otherwise we set it as a 1 and leave it up to synthesis...
        else:
            self._enable = self.var("enable", 1)
            self.wire(self._enable, kratos.const(1, 1))

        @always_ff((posedge, "clk"), (negedge, "rst_n"))
        def valid_gate_inv_ff():
            if ~self._rst_n:
                self._valid_gate_inv = 0
            # If we are finishing the looping structure, turn this off to implement one-shot
            elif self._finished:
                self._valid_gate_inv = 1

        self.add_code(valid_gate_inv_ff)

        # Compare based on minimum of addr + global cycle...
        self.c_a_cmp = min(self._cycle_count.width, self._addr_out.width)

        # PORT DEFS: end

        self.add_child(f"sched_addr_gen",
                       AddrGen(iterator_support=self.iterator_support,
                               config_width=self.config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._valid_out,
                       mux_sel=self._mux_sel,
                       addr_out=self._addr_out,
                       restart=const(0, 1))

        self.add_code(self.set_valid_out)
        self.add_code(self.set_valid_output)
Esempio n. 11
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_tb_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        # data from SRAM
        self._sram_read_data = self.input("sram_read_data",
                                          self.data_width,
                                          size=self.fetch_width,
                                          packed=True,
                                          explicit_array=True)
        # read enable from SRAM
        self._t_read = self.input("t_read", self.interconnect_output_ports)

        # sram to tb for loop
        self._loops_sram2tb_mux_sel = self.input(
            "loops_sram2tb_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._loops_sram2tb_restart = self.input(
            "loops_sram2tb_restart",
            width=1,
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._valid_out = self.output("accessor_output",
                                      self.interconnect_output_ports)
        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        ##################################################################################
        # TB RELEVANT SIGNALS
        ##################################################################################
        self._tb = self.var("tb",
                            width=self.data_width,
                            size=(self.interconnect_output_ports,
                                  self.tb_height, self.fetch_width),
                            packed=True,
                            explicit_array=True)

        self._tb_write_addr = self.var("tb_write_addr",
                                       2 + max(1, clog2(self.tb_height)),
                                       size=self.interconnect_output_ports,
                                       packed=True,
                                       explicit_array=True)

        self._tb_read_addr = self.var("tb_read_addr",
                                      2 + max(1, clog2(self.tb_height)),
                                      size=self.interconnect_output_ports,
                                      packed=True,
                                      explicit_array=True)

        # write enable to tb, delayed 1 cycle from SRAM reads
        self._t_read_d1 = self.var("t_read_d1", self.interconnect_output_ports)
        # read enable for reads from tb
        self._tb_read = self.var("tb_read", self.interconnect_output_ports)

        # Break out valids...
        self.wire(self._valid_out, self._tb_read)

        # delayed input mux_sel and restart signals from sram read/tb write
        # for loop and scheduling
        self._mux_sel_d1 = self.var("mux_sel_d1",
                                    kts.clog2(self.default_iterator_support),
                                    size=self.interconnect_output_ports,
                                    packed=True,
                                    explicit_array=True)

        self._restart_d1 = self.var("restart_d1",
                                    width=1,
                                    size=self.interconnect_output_ports,
                                    explicit_array=True,
                                    packed=True)

        for i in range(self.interconnect_output_ports):
            # signals delayed by 1 cycle from SRAM
            @always_ff((posedge, "clk"), (negedge, "rst_n"))
            def delay_read():
                if ~self._rst_n:
                    self._t_read_d1[i] = 0
                    self._mux_sel_d1[i] = 0
                    self._restart_d1[i] = 0
                else:
                    self._t_read_d1[i] = self._t_read[i]
                    self._mux_sel_d1[i] = self._loops_sram2tb_mux_sel[i]
                    self._restart_d1[i] = self._loops_sram2tb_restart[i]

            self.add_code(delay_read)

        ##################################################################################
        # TB PATHS
        ##################################################################################
        for i in range(self.interconnect_output_ports):

            self.tb_iter_support = 6
            self.tb_addr_width = 4
            self.tb_range_width = 16

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.tb_addr_width)

            self.add_child(f"tb_write_addr_gen_{i}",
                           _AG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._t_read_d1[i],
                           mux_sel=self._mux_sel_d1[i],
                           restart=self._restart_d1[i])
            safe_wire(gen=self,
                      w_to=self._tb_write_addr[i],
                      w_from=_AG.ports.addr_out)

            @always_ff((posedge, "clk"))
            def tb_ctrl():
                if self._t_read_d1[i]:
                    self._tb[i][self._tb_write_addr[i][0]] = \
                        self._sram_read_data

            self.add_code(tb_ctrl)

            # READ FROM TB

            fl_ctr_tb_rd = ForLoop(iterator_support=self.tb_iter_support,
                                   config_width=self.tb_range_width)

            self.add_child(f"loops_buf2out_read_{i}",
                           fl_ctr_tb_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i])

            _AG = AddrGen(iterator_support=self.tb_iter_support,
                          config_width=self.tb_addr_width)
            self.add_child(
                f"tb_read_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._tb_read[i],
                # addr_out=self._tb_read_addr[i])
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                restart=fl_ctr_tb_rd.ports.restart)
            safe_wire(gen=self,
                      w_to=self._tb_read_addr[i],
                      w_from=_AG.ports.addr_out)

            self.add_child(
                f"tb_read_sched_gen_{i}",
                SchedGen(
                    iterator_support=self.tb_iter_support,
                    # config_width=self.tb_addr_width),
                    config_width=16),
                clk=self._clk,
                rst_n=self._rst_n,
                cycle_count=self._cycle_count,
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                finished=fl_ctr_tb_rd.ports.restart,
                valid_output=self._tb_read[i])

            @always_comb
            def tb_to_out():
                self._data_out[i] = self._tb[i][self._tb_read_addr[i][
                    clog2(self.tb_height) + clog2(self.fetch_width) - 1,
                    clog2(self.fetch_width)]][self._tb_read_addr[i][
                        clog2(self.fetch_width) - 1, 0]]

            self.add_code(tb_to_out)
Esempio n. 12
0
    def __init__(self,
                 data_width=16,  # CGRA Params
                 mem_width=64,
                 mem_depth=512,
                 banks=1,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6,
                 config_width=16,
                 #  output_config_width=16,
                 interconnect_input_ports=2,  # Connection to int
                 interconnect_output_ports=2,
                 mem_input_ports=1,
                 mem_output_ports=1,
                 read_delay=1,  # Cycle delay in read (SRAM vs Register File)
                 rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
                 agg_height=4,
                 tb_height=2):

        super().__init__("strg_ub_agg_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        self._data_in = self.input("data_in", self.data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)

        self._agg_read = self.input("agg_read", self.interconnect_input_ports)

        self._floop_mux_sel = self.input("floop_mux_sel",
                                         width=max(clog2(self.default_iterator_support), 1),
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        self._floop_restart = self.input("floop_restart",
                                         width=1,
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        self._agg_data_out = self.output(f"agg_data_out", self.data_width,
                                         size=(self.interconnect_input_ports,
                                               self.fetch_width),
                                         packed=True,
                                         explicit_array=True)
        self._agg_data_out.add_attribute(FormalAttr(self._agg_data_out.name, FormalSignalConstraint.SEQUENCE, "sram"))

        ##################################################################################
        # AGG RELEVANT SIGNALS
        ##################################################################################
        # Create an input to agg write scheduler + addressor for each input
        # Also need an addressor for the mux in addition to the read addr
        self._agg = self.var(f"agg",
                             width=self.data_width,
                             size=(self.interconnect_input_ports,
                                   self.agg_height,
                                   self.fetch_width),
                             packed=True,
                             explicit_array=True)

        self._agg_write = self.var("agg_write", self.interconnect_input_ports)
        # Make this based on the size
        self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height),
                                        size=self.interconnect_input_ports,
                                        packed=True,
                                        explicit_array=True)
        self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)),
                                       size=self.interconnect_input_ports,
                                       packed=True,
                                       explicit_array=True)
        self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width,
                                               size=self.interconnect_input_ports,
                                               packed=True,
                                               explicit_array=True)

        ##################################################################################
        # AGG PATHS
        ##################################################################################
        for i in range(self.interconnect_input_ports):

            self.agg_iter_support = 6
            self.agg_addr_width = 4
            self.agg_range_width = 16

            forloop_ctr = ForLoop(iterator_support=self.agg_iter_support,
                                  # config_width=self.default_config_width)
                                  config_width=self.agg_range_width)
            loop_itr = forloop_ctr.get_iter()
            loop_wth = forloop_ctr.get_cfg_width()

            self.add_child(f"loops_in2buf_{i}",
                           forloop_ctr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i])

            newAG = AddrGen(iterator_support=self.agg_iter_support,
                            config_width=self.agg_addr_width)
            self.add_child(f"agg_write_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i],
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           restart=forloop_ctr.ports.restart)
            safe_wire(gen=self, w_to=self._agg_write_addr[i], w_from=newAG.ports.addr_out)

            newSG = SchedGen(iterator_support=self.agg_iter_support,
                             # config_width=self.agg_addr_width)
                             config_width=16)

            self.add_child(f"agg_write_sched_gen_{i}",
                           newSG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           finished=forloop_ctr.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._agg_write[i])

            @always_ff((posedge, "clk"))
            def agg_ctrl():
                if self._agg_write[i]:
                    if self.agg_height == 1:
                        self._agg[i][0][self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i]
                    else:
                        self._agg[i][self._agg_write_addr[i]
                                     [self._agg_write_addr[0].width - 1, clog2(self.fetch_width)]]\
                                    [self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i]

            self.add_code(agg_ctrl)

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.agg_addr_width)

            self.add_child(f"agg_read_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_read[i],
                           #  (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))),
                           # mux_sel=self._floop_mux_sel[i],
                           restart=self._floop_restart[i])
            safe_wire(gen=self, w_to=newAG.ports.mux_sel, w_from=self._floop_mux_sel[i])
            safe_wire(gen=self, w_to=self._agg_read_addr_gen_out[i], w_from=newAG.ports.addr_out)
            self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0])

            # Now pick out the data from the agg...
            @always_comb
            def get_agg_data():
                self._agg_data_out[i] = self._agg[i][self._agg_read_addr[i]]
            self.add_code(get_agg_data)
Esempio n. 13
0
    def __init__(self,
                 data_width=16,
                 fetch_width=4,
                 mem_depth=512,
                 config_width=9,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6):

        super().__init__("lake_top_test")

        # generation parameters

        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._clk_en = self.input("clk_en", 1)
        self._flush = self.input("flush", 1)

        self._data_in = self.input("data_in", data_width, packed=True)

        # outputs
        self._data_out = self.output("data_out", data_width, packed=True)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._write_addr = self.var("write_addr", config_width)
        self._read_addr = self.var("read_addr", config_width)
        self._addr = self.var("addr", clog2(mem_depth))

        self._agg_write = self.var("agg_write", 1)
        self._agg_write_addr = self.var("agg_write_addr", 2)
        self._agg_read_addr = self.var("agg_read_addr", 2)

        self._tb_read = self.var("tb_read", 1)
        self._tb_write_addr = self.var("tb_write_addr", 2)
        self._tb_read_addr = self.var("tb_read_addr", 2)

        self._sram_write_data = self.var("sram_write_data",
                                         data_width,
                                         size=fetch_width,
                                         packed=True)
        self._sram_read_data = self.var("sram_read_data",
                                        data_width,
                                        size=fetch_width,
                                        packed=True)

        #        self._aggw_start_addr = self.input("aggw_start_addr", 2)
        #        self._aggw_start_addr.add_attribute(ConfigRegAttr("agg write start addr"))
        #        self._agg_start_addr = self.input("agg_start_addr", 2)
        #        self._agg_start_addr.add_attribute(ConfigRegAttr("agg read start addr"))

        self._agg_write_index = self.var("agg_write_index", 2, size=4)

        self._agg = self.var("agg",
                             width=data_width,
                             size=fetch_width,
                             packed=True)

        self.add_child(f"agg_write_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._agg_write,
                       addr_out=self._agg_write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"agg_read_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._agg_read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"agg_write_sched_gen",
                       SchedGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._agg_write)

        self._tb = self.var("tb", width=data_width, size=fetch_width)

        self.add_child(f"tb_write_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._tb_write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"tb_read_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._tb_read,
                       addr_out=self._tb_read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"tb_read_sched_gen",
                       SchedGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._tb_read)

        # memory module
        self.add_child(f"sram",
                       SRAMStub(data_width, fetch_width, mem_depth),
                       clk=self._clk,
                       wen=self._write,
                       cen=self._write | self._read,
                       addr=self._addr,
                       data_in=self._sram_write_data,
                       data_out=self._sram_read_data)

        # addressor modules
        self.add_child(f"input_addr_gen",
                       AddrGen(input_addr_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"output_addr_gen",
                       AddrGen(output_addr_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        # scheduler modules
        self.add_child(f"input_sched_gen",
                       SchedGen(input_sched_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._write)

        self.add_child(f"output_sched_gen",
                       SchedGen(output_sched_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._read)

        lift_config_reg(self.internal_generator)

        self.add_code(self.set_sram_addr)
        self.add_code(self.agg_ctrl)
        self.add_code(self.tb_ctrl)
        self.add_code(self.agg_to_sram)
        self.add_code(self.tb_to_out)
Esempio n. 14
0
    def __init__(self,
                 interconnect_input_ports=2,
                 mem_depth=32,
                 num_tiles=1,
                 banks=1,
                 iterator_support=6,
                 address_width=5,
                 data_width=16,
                 fetch_width=16,
                 multiwrite=1,
                 strg_wr_ports=2,
                 config_width=16):
        super().__init__("input_addr_ctrl", debug=True)

        assert multiwrite >= 1, "Multiwrite must be at least 1..."

        self.interconnect_input_ports = interconnect_input_ports
        self.mem_depth = mem_depth
        self.num_tiles = num_tiles
        self.banks = banks
        self.iterator_support = iterator_support
        self.address_width = address_width
        self.port_sched_width = max(1, clog2(self.interconnect_input_ports))
        self.data_width = data_width
        self.fetch_width = fetch_width
        self.fw_int = int(self.fetch_width / self.data_width)
        self.multiwrite = multiwrite
        self.strg_wr_ports = strg_wr_ports
        self.config_width = config_width

        self.mem_addr_width = clog2(self.num_tiles * self.mem_depth)
        if self.banks > 1:
            self.bank_addr_width = clog2(self.banks)
        else:
            self.bank_addr_width = 0
        self.address_width = self.mem_addr_width + self.bank_addr_width

        # Clock and Reset
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        # Inputs
        # phases = [] TODO
        # Take in the valid and data and attach an address + direct to a port
        self._valid_in = self.input("valid_in", self.interconnect_input_ports)
        self._wen_en = self.input("wen_en", self.interconnect_input_ports)

        self._wen_en_saved = self.var("wen_en_saved",
                                      self.interconnect_input_ports)

        self._data_in = self.input("data_in",
                                   self.data_width,
                                   size=(self.interconnect_input_ports,
                                         self.fw_int),
                                   explicit_array=True,
                                   packed=True)

        self._data_in_saved = self.var("data_in_saved",
                                       self.data_width,
                                       size=(self.interconnect_input_ports,
                                             self.fw_int),
                                       explicit_array=True,
                                       packed=True)

        # Outputs
        self._wen = self.output("wen_to_sram",
                                self.strg_wr_ports,
                                size=self.banks,
                                explicit_array=True,
                                packed=True)

        wen_full_size = (self.interconnect_input_ports, self.multiwrite)
        self._wen_full = self.var("wen_full",
                                  self.banks,
                                  size=wen_full_size,
                                  explicit_array=True,
                                  packed=True)

        self._wen_reduced = self.var("wen_reduced",
                                     self.banks,
                                     size=self.interconnect_input_ports,
                                     explicit_array=True,
                                     packed=True)

        self._wen_reduced_saved = self.var("wen_reduced_saved",
                                           self.banks,
                                           size=self.interconnect_input_ports,
                                           explicit_array=True,
                                           packed=True)

        self._addresses = self.output("addr_out",
                                      self.mem_addr_width,
                                      size=(self.banks, self.strg_wr_ports),
                                      explicit_array=True,
                                      packed=True)

        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=(self.banks, self.strg_wr_ports,
                                           self.fw_int),
                                     explicit_array=True,
                                     packed=True)

        self._port_out_exp = self.var("port_out_exp",
                                      self.interconnect_input_ports,
                                      size=self.banks,
                                      explicit_array=True,
                                      packed=True)

        self._port_out = self.output("port_out", self.interconnect_input_ports)

        self._counter = self.var("counter", self.port_sched_width)

        # Wire to port out
        for i in range(self.interconnect_input_ports):
            new_tmp = []
            for j in range(self.banks):
                new_tmp.append(self._port_out_exp[j][i])
            self.wire(self._port_out[i], kts.concat(*new_tmp).r_or())

        self._done = self.var("done",
                              self.strg_wr_ports,
                              size=self.banks,
                              explicit_array=True,
                              packed=True)

        # LOCAL VARS
        self._local_addrs = self.var("local_addrs",
                                     self.address_width,
                                     size=(self.interconnect_input_ports,
                                           self.multiwrite),
                                     packed=True,
                                     explicit_array=True)

        self._local_addrs_saved = self.var("local_addrs_saved",
                                           self.address_width,
                                           size=(self.interconnect_input_ports,
                                                 self.multiwrite),
                                           packed=True,
                                           explicit_array=True)

        for i in range(self.interconnect_input_ports):
            for j in range(self.banks):
                concat_ports = []
                for k in range(self.multiwrite):
                    concat_ports.append(self._wen_full[i][k][j])
                self.wire(self._wen_reduced[i][j],
                          kts.concat(*concat_ports).r_or())

        if self.banks == 1 and self.interconnect_input_ports == 1:
            self.wire(self._wen_full[0][0][0], self._valid_in)
        elif self.banks == 1 and self.interconnect_input_ports > 1:
            self.add_code(self.set_wen_single)
        else:
            self.add_code(self.set_wen_mult)

        # MAIN
        # Iterate through all banks to priority decode the wen
        self.add_code(self.decode_out_lowest)
        # Also set the write ports on the storage
        if self.strg_wr_ports > 1:
            self._idx_cnt = self.var("idx_cnt",
                                     8,
                                     size=(self.banks, self.strg_wr_ports - 1),
                                     explicit_array=True,
                                     packed=True)
            for i in range(self.strg_wr_ports - 1):
                self.add_code(self.decode_out_alt, idx=i + 1)

        # Now we should instantiate the child address generators
        # (1 per input port) to send to the sram banks
        for i in range(self.interconnect_input_ports):
            self.add_child(f"address_gen_{i}",
                           AddrGen(iterator_support=self.iterator_support,
                                   config_width=self.config_width),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           clk_en=const(1, 1),
                           flush=const(0, 1),
                           step=self._valid_in[i])

        # Need to check that the address falls into the bank for implicit banking

        # Then, obey the input schedule to send the proper Aggregator to the output
        # The wen to sram should be that the valid for the selected port is high
        # Do the same thing for the output address
        assert self.multiwrite <= self.banks and self.multiwrite > 0,\
            "Multiwrite should be between 1 and banks"
        if self.multiwrite > 1:
            size = (self.interconnect_input_ports, self.multiwrite - 1)
            self._offsets_cfg = self.input("offsets_cfg",
                                           self.address_width,
                                           size=size,
                                           packed=True,
                                           explicit_array=True)
            doc = "These offsets provide the ability to write to multiple banks explicitly"
            self._offsets_cfg.add_attribute(ConfigRegAttr(doc))
        self.add_code(self.set_multiwrite_addrs)

        # to handle multiple input ports going to fewer SRAM write ports
        self.add_code(self.set_int_ports_counter)
        self.add_code(self.save_mult_int_signals)
Esempio n. 15
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_depth=32,
            default_iterator_support=3,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            config_data_width=32,
            config_addr_width=8,
            cycle_count_width=16,
            add_clk_enable=True,
            add_flush=True):
        super().__init__("pond", debug=True)

        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.mem_input_ports = mem_input_ports
        self.mem_output_ports = mem_output_ports
        self.mem_depth = mem_depth
        self.data_width = data_width
        self.config_data_width = config_data_width
        self.config_addr_width = config_addr_width
        self.add_clk_enable = add_clk_enable
        self.add_flush = add_flush
        self.cycle_count_width = cycle_count_width
        self.default_iterator_support = default_iterator_support
        self.default_config_width = kts.clog2(self.mem_depth)
        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))
        self._clk_en = self.clock_en("clk_en", 1)

        # Enable/Disable tile
        self._tile_en = self.input("tile_en", 1)
        self._tile_en.add_attribute(
            ConfigRegAttr("Tile logic enable manifested as clock gate"))

        gclk = self.var("gclk", 1)
        self._gclk = kts.util.clock(gclk)
        self.wire(gclk, kts.util.clock(self._clk & self._tile_en))

        self._cycle_count = add_counter(self, "cycle_count",
                                        self.cycle_count_width)

        # Create write enable + addr, same for read.
        # self._write = self.input("write", self.interconnect_input_ports)
        self._write = self.var("write", self.mem_input_ports)
        # self._write.add_attribute(ControlSignalAttr(is_control=True))

        self._write_addr = self.var("write_addr",
                                    kts.clog2(self.mem_depth),
                                    size=self.interconnect_input_ports,
                                    explicit_array=True,
                                    packed=True)

        # Add "_pond" suffix to avoid error during garnet RTL generation
        self._data_in = self.input("data_in_pond",
                                   self.data_width,
                                   size=self.interconnect_input_ports,
                                   explicit_array=True,
                                   packed=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._read = self.var("read", self.mem_output_ports)
        self._t_write = self.var("t_write", self.interconnect_input_ports)
        self._t_read = self.var("t_read", self.interconnect_output_ports)
        # self._read.add_attribute(ControlSignalAttr(is_control=True))

        self._read_addr = self.var("read_addr",
                                   kts.clog2(self.mem_depth),
                                   size=self.interconnect_output_ports,
                                   explicit_array=True,
                                   packed=True)

        self._s_read_addr = self.var("s_read_addr",
                                     kts.clog2(self.mem_depth),
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)

        self._data_out = self.output("data_out_pond",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_out.add_attribute(ControlSignalAttr(is_control=False))

        self._valid_out = self.output("valid_out_pond",
                                      self.interconnect_output_ports)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._valid_out.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_out = self.var("mem_data_out",
                                      self.data_width,
                                      size=self.mem_output_ports,
                                      explicit_array=True,
                                      packed=True)

        self._s_mem_data_in = self.var("s_mem_data_in",
                                       self.data_width,
                                       size=self.interconnect_input_ports,
                                       explicit_array=True,
                                       packed=True)

        self._mem_data_in = self.var("mem_data_in",
                                     self.data_width,
                                     size=self.mem_input_ports,
                                     explicit_array=True,
                                     packed=True)

        self._s_mem_write_addr = self.var("s_mem_write_addr",
                                          kts.clog2(self.mem_depth),
                                          size=self.interconnect_input_ports,
                                          explicit_array=True,
                                          packed=True)

        self._s_mem_read_addr = self.var("s_mem_read_addr",
                                         kts.clog2(self.mem_depth),
                                         size=self.interconnect_output_ports,
                                         explicit_array=True,
                                         packed=True)

        self._mem_write_addr = self.var("mem_write_addr",
                                        kts.clog2(self.mem_depth),
                                        size=self.mem_input_ports,
                                        explicit_array=True,
                                        packed=True)

        self._mem_read_addr = self.var("mem_read_addr",
                                       kts.clog2(self.mem_depth),
                                       size=self.mem_output_ports,
                                       explicit_array=True,
                                       packed=True)

        if self.interconnect_output_ports == 1:
            self.wire(self._data_out[0], self._mem_data_out[0])
        else:
            for i in range(self.interconnect_output_ports):
                self.wire(self._data_out[i], self._mem_data_out[0])

        # Valid out is simply passing the read signal through...
        self.wire(self._valid_out, self._t_read)

        # Create write addressors
        for wr_port in range(self.interconnect_input_ports):

            RF_WRITE_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_WRITE_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_WRITE_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_write_iter_{wr_port}",
                           RF_WRITE_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port])
            # Whatever comes through here should hopefully just pipe through seamlessly
            # addressor modules
            self.add_child(f"rf_write_addr_{wr_port}",
                           RF_WRITE_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port],
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           restart=RF_WRITE_ITER.ports.restart)
            safe_wire(self, self._write_addr[wr_port],
                      RF_WRITE_ADDR.ports.addr_out)

            self.add_child(f"rf_write_sched_{wr_port}",
                           RF_WRITE_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           finished=RF_WRITE_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_write[wr_port])

        # Create read addressors
        for rd_port in range(self.interconnect_output_ports):

            RF_READ_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_READ_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_READ_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_read_iter_{rd_port}",
                           RF_READ_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port])

            self.add_child(f"rf_read_addr_{rd_port}",
                           RF_READ_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port],
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           restart=RF_READ_ITER.ports.restart)
            if self.interconnect_output_ports > 1:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)
            else:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)

            self.add_child(f"rf_read_sched_{rd_port}",
                           RF_READ_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           finished=RF_READ_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_read[rd_port])

        self.wire(self._write, self._t_write.r_or())
        self.wire(self._mem_write_addr[0],
                  decode(self, self._t_write, self._s_mem_write_addr))

        self.wire(self._mem_data_in[0],
                  decode(self, self._t_write, self._s_mem_data_in))

        self.wire(self._read, self._t_read.r_or())
        self.wire(self._mem_read_addr[0],
                  decode(self, self._t_read, self._s_mem_read_addr))
        # ===================================
        # Instantiate config hooks...
        # ===================================
        self.fw_int = 1
        self.data_words_per_set = 2**self.config_addr_width
        self.sets = int(
            (self.fw_int * self.mem_depth) / self.data_words_per_set)

        self.sets_per_macro = max(
            1, int(self.mem_depth / self.data_words_per_set))
        self.total_sets = max(1, 1 * self.sets_per_macro)

        self._config_data_in = self.input("config_data_in",
                                          self.config_data_width)
        self._config_data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_in_shrt = self.var("config_data_in_shrt",
                                             self.data_width)

        self.wire(self._config_data_in_shrt,
                  self._config_data_in[self.data_width - 1, 0])

        self._config_addr_in = self.input("config_addr_in",
                                          self.config_addr_width)
        self._config_addr_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_out_shrt = self.var("config_data_out_shrt",
                                              self.data_width,
                                              size=self.total_sets,
                                              explicit_array=True,
                                              packed=True)

        self._config_data_out = self.output("config_data_out",
                                            self.config_data_width,
                                            size=self.total_sets,
                                            explicit_array=True,
                                            packed=True)
        self._config_data_out.add_attribute(
            ControlSignalAttr(is_control=False))

        for i in range(self.total_sets):
            self.wire(
                self._config_data_out[i],
                self._config_data_out_shrt[i].extend(self.config_data_width))

        self._config_read = self.input("config_read", 1)
        self._config_read.add_attribute(ControlSignalAttr(is_control=False))

        self._config_write = self.input("config_write", 1)
        self._config_write.add_attribute(ControlSignalAttr(is_control=False))

        self._config_en = self.input("config_en", self.total_sets)
        self._config_en.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_cfg = self.var("mem_data_cfg",
                                      self.data_width,
                                      explicit_array=True,
                                      packed=True)

        self._mem_addr_cfg = self.var("mem_addr_cfg",
                                      kts.clog2(self.mem_depth))

        # Add config...
        stg_cfg_seq = StorageConfigSeq(
            data_width=self.data_width,
            config_addr_width=self.config_addr_width,
            addr_width=kts.clog2(self.mem_depth),
            fetch_width=self.data_width,
            total_sets=self.total_sets,
            sets_per_macro=self.sets_per_macro)

        # The clock to config sequencer needs to be the normal clock or
        # if the tile is off, we bring the clock back in based on config_en
        cfg_seq_clk = self.var("cfg_seq_clk", 1)
        self._cfg_seq_clk = kts.util.clock(cfg_seq_clk)
        self.wire(cfg_seq_clk, kts.util.clock(self._gclk))

        self.add_child(f"config_seq",
                       stg_cfg_seq,
                       clk=self._cfg_seq_clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en | self._config_en.r_or(),
                       config_data_in=self._config_data_in_shrt,
                       config_addr_in=self._config_addr_in,
                       config_wr=self._config_write,
                       config_rd=self._config_read,
                       config_en=self._config_en,
                       wr_data=self._mem_data_cfg,
                       rd_data_out=self._config_data_out_shrt,
                       addr_out=self._mem_addr_cfg)

        if self.interconnect_output_ports == 1:
            self.wire(stg_cfg_seq.ports.rd_data_stg, self._mem_data_out)
        else:
            self.wire(stg_cfg_seq.ports.rd_data_stg[0], self._mem_data_out[0])

        self.RF_GEN = RegisterFile(data_width=self.data_width,
                                   write_ports=self.mem_input_ports,
                                   read_ports=self.mem_output_ports,
                                   width_mult=1,
                                   depth=self.mem_depth,
                                   read_delay=0)

        # Now we can instantiate and wire up the register file
        self.add_child(f"rf",
                       self.RF_GEN,
                       clk=self._gclk,
                       rst_n=self._rst_n,
                       data_out=self._mem_data_out)

        # Opt in for config_write
        self._write_rf = self.var("write_rf", self.mem_input_ports)
        self.wire(
            self._write_rf[0],
            kts.ternary(self._config_en.r_or(), self._config_write,
                        self._write[0]))
        for i in range(self.mem_input_ports - 1):
            self.wire(
                self._write_rf[i + 1],
                kts.ternary(self._config_en.r_or(), kts.const(0, 1),
                            self._write[i + 1]))
        self.wire(self.RF_GEN.ports.wen, self._write_rf)

        # Opt in for config_data_in
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_data_in[i],
                kts.ternary(self._config_en.r_or(), self._mem_data_cfg,
                            self._data_in[i]))
        self.wire(self.RF_GEN.ports.data_in, self._mem_data_in)

        # Opt in for config_addr
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_write_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._write_addr[i]))

        self.wire(self.RF_GEN.ports.wr_addr, self._mem_write_addr[0])

        for i in range(self.interconnect_output_ports):
            self.wire(
                self._s_mem_read_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._read_addr[i]))

        self.wire(self.RF_GEN.ports.rd_addr, self._mem_read_addr[0])

        if self.add_clk_enable:
            # self.clock_en("clk_en")
            kts.passes.auto_insert_clock_enable(self.internal_generator)
            clk_en_port = self.internal_generator.get_port("clk_en")
            clk_en_port.add_attribute(ControlSignalAttr(False))

        if self.add_flush:
            self.add_attribute("sync-reset=flush")
            kts.passes.auto_insert_sync_reset(self.internal_generator)
            flush_port = self.internal_generator.get_port("flush")
            flush_port.add_attribute(ControlSignalAttr(True))

        # Finally, lift the config regs...
        lift_config_reg(self.internal_generator)
Esempio n. 16
0
    def __init__(self,
                 data_width=16,
                 fetch_width=1,
                 mem_depth=512,
                 config_width=16,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6
                 ):

        super().__init__("lake_top_test")

        # generation parameters

        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._clk_en = self.input("clk_en", 1)
        self._flush = self.input("flush", 1)

        self._data_in = self.input("data_in", data_width, packed=True)

        # outputs
        self._data_out = self.output("data_out", data_width, packed=True)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._write_addr = self.var("write_addr", config_width)
        self._read_addr = self.var("read_addr", config_width)
        self._addr = self.var("addr", clog2(mem_depth))

        # memory module
        self.add_child(f"sram",
                       SRAMStub(data_width,
                                fetch_width,
                                mem_depth),
                       clk=self._clk,
                       wen=self._write,
                       cen=self._write | self._read,
                       addr=self._addr,
                       data_in=self._data_in,
                       data_out=self._data_out)

        # addressor modules
        self.add_child(f"input_addr_gen",
                       AddrGen(input_addr_iterator_support,
                               config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"output_addr_gen",
                       AddrGen(output_addr_iterator_support,
                               config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        # scheduler modules
        self.add_child(f"input_sched_gen",
                       SchedGen(input_sched_iterator_support,
                                config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._write)

        self.add_child(f"output_sched_gen",
                       SchedGen(output_sched_iterator_support,
                                config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._read)

        lift_config_reg(self.internal_generator)

        self.add_code(self.set_sram_addr)
Esempio n. 17
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_sram_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_width = mem_width
        self.tb_height = tb_height
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        # agg to sram for loop
        self._floop_mux_sel = self.input(
            "floop_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_input_ports,
            explicit_array=True,
            packed=True)

        self._floop_restart = self.input("floop_restart",
                                         width=1,
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        # sram to tb for loop
        self._loops_sram2tb_mux_sel = self.input(
            "loops_sram2tb_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._loops_sram2tb_restart = self.input(
            "loops_sram2tb_restart",
            width=1,
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._agg_read = self.input("agg_read", self.interconnect_input_ports)
        self._t_read = self.input("t_read", self.interconnect_output_ports)

        # data from aggs, get decoded for sram_write_data which is wired to data_to_sram
        self._agg_data_out = self.input(f"agg_data_out",
                                        self.data_width,
                                        size=(self.interconnect_input_ports,
                                              self.fetch_width),
                                        packed=True,
                                        explicit_array=True)
        self._agg_data_out.add_attribute(
            FormalAttr(self._agg_data_out.name,
                       FormalSignalConstraint.SEQUENCE, "agg"))
        # sram attribute for data_in, comes from cut gen of agg_only for agg_data_out_top

        self._wen_to_sram = self.output("wen_to_sram", 1, packed=True)
        self._cen_to_sram = self.output("cen_to_sram", 1, packed=True)
        self._addr_to_sram = self.output("addr_to_sram",
                                         clog2(self.mem_depth),
                                         packed=True)
        self._data_to_sram = self.output("data_to_sram",
                                         self.data_width,
                                         size=self.fetch_width,
                                         packed=True)

        ##################################################################################
        # INTERNAL SIGNALS
        ##################################################################################
        self._s_write_addr = self.var("s_write_addr",
                                      self.config_width,
                                      size=self.interconnect_input_ports,
                                      packed=True,
                                      explicit_array=True)

        self._s_read_addr = self.var("s_read_addr",
                                     self.config_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._addr = self.var("addr", clog2(self.mem_depth))

        self._sram_write_data = self.var("sram_write_data",
                                         data_width,
                                         size=self.fetch_width,
                                         packed=True)

        self.mem_addr_width = clog2(self.mem_depth)

        for i in range(self.interconnect_input_ports):

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.mem_addr_width)
            self.add_child(
                f"input_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._agg_read[i],
                # mux_sel=self._floop_mux_sel[i],
                restart=self._floop_restart[i])
            safe_wire(gen=self,
                      w_to=_AG.ports.mux_sel,
                      w_from=self._floop_mux_sel[i])
            safe_wire(gen=self,
                      w_to=self._s_write_addr[i],
                      w_from=_AG.ports.addr_out)

        ##################################################################################
        # TB PATHS
        ##################################################################################
        for i in range(self.interconnect_output_ports):

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.mem_addr_width)
            self.add_child(
                f"output_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._t_read[i],
                # mux_sel=self._loops_sram2tb_mux_sel[i],
                restart=self._loops_sram2tb_restart[i])
            safe_wire(gen=self,
                      w_to=_AG.ports.mux_sel,
                      w_from=self._loops_sram2tb_mux_sel[i])
            safe_wire(gen=self,
                      w_to=self._s_read_addr[i],
                      w_from=_AG.ports.addr_out)

        ##################################################################################
        # WIRE TO SRAM INTERFACE
        ##################################################################################
        # Now select the write address as a decode of the underlying enables
        self.wire(self._addr_to_sram, self._addr)
        self.wire(self._data_to_sram, self._sram_write_data)
        self.wire(self._wen_to_sram, self._write)
        self.wire(self._cen_to_sram, self._write | self._read)

        self.wire(self._write, self._agg_read.r_or())
        self.wire(self._read, self._t_read.r_or())

        self.wire(self._sram_write_data,
                  decode(self, self._agg_read, self._agg_data_out))

        self._write_addr = decode(self, self._agg_read, self._s_write_addr)
        self._read_addr = decode(self, self._t_read, self._s_read_addr)
        self.add_code(self.set_sram_addr)