def __init__( self, data_width=16, # CGRA Params mem_depth=32, default_iterator_support=3, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, config_data_width=32, config_addr_width=8, cycle_count_width=16, add_clk_enable=True, add_flush=True): super().__init__("pond", debug=True) self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_input_ports = mem_input_ports self.mem_output_ports = mem_output_ports self.mem_depth = mem_depth self.data_width = data_width self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.add_clk_enable = add_clk_enable self.add_flush = add_flush self.cycle_count_width = cycle_count_width self.default_iterator_support = default_iterator_support self.default_config_width = kts.clog2(self.mem_depth) # inputs self._clk = self.clock("clk") self._clk.add_attribute( FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK)) self._rst_n = self.reset("rst_n") self._rst_n.add_attribute( FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN)) self._clk_en = self.clock_en("clk_en", 1) # Enable/Disable tile self._tile_en = self.input("tile_en", 1) self._tile_en.add_attribute( ConfigRegAttr("Tile logic enable manifested as clock gate")) gclk = self.var("gclk", 1) self._gclk = kts.util.clock(gclk) self.wire(gclk, kts.util.clock(self._clk & self._tile_en)) self._cycle_count = add_counter(self, "cycle_count", self.cycle_count_width) # Create write enable + addr, same for read. # self._write = self.input("write", self.interconnect_input_ports) self._write = self.var("write", self.mem_input_ports) # self._write.add_attribute(ControlSignalAttr(is_control=True)) self._write_addr = self.var("write_addr", kts.clog2(self.mem_depth), size=self.interconnect_input_ports, explicit_array=True, packed=True) # Add "_pond" suffix to avoid error during garnet RTL generation self._data_in = self.input("data_in_pond", self.data_width, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._data_in.add_attribute( FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE)) self._data_in.add_attribute(ControlSignalAttr(is_control=False)) self._read = self.var("read", self.mem_output_ports) self._t_write = self.var("t_write", self.interconnect_input_ports) self._t_read = self.var("t_read", self.interconnect_output_ports) # self._read.add_attribute(ControlSignalAttr(is_control=True)) self._read_addr = self.var("read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._s_read_addr = self.var("s_read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._data_out = self.output("data_out_pond", self.data_width, size=self.interconnect_output_ports, explicit_array=True, packed=True) self._data_out.add_attribute( FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE)) self._data_out.add_attribute(ControlSignalAttr(is_control=False)) self._valid_out = self.output("valid_out_pond", self.interconnect_output_ports) self._valid_out.add_attribute( FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE)) self._valid_out.add_attribute(ControlSignalAttr(is_control=False)) self._mem_data_out = self.var("mem_data_out", self.data_width, size=self.mem_output_ports, explicit_array=True, packed=True) self._s_mem_data_in = self.var("s_mem_data_in", self.data_width, size=self.interconnect_input_ports, explicit_array=True, packed=True) self._mem_data_in = self.var("mem_data_in", self.data_width, size=self.mem_input_ports, explicit_array=True, packed=True) self._s_mem_write_addr = self.var("s_mem_write_addr", kts.clog2(self.mem_depth), size=self.interconnect_input_ports, explicit_array=True, packed=True) self._s_mem_read_addr = self.var("s_mem_read_addr", kts.clog2(self.mem_depth), size=self.interconnect_output_ports, explicit_array=True, packed=True) self._mem_write_addr = self.var("mem_write_addr", kts.clog2(self.mem_depth), size=self.mem_input_ports, explicit_array=True, packed=True) self._mem_read_addr = self.var("mem_read_addr", kts.clog2(self.mem_depth), size=self.mem_output_ports, explicit_array=True, packed=True) if self.interconnect_output_ports == 1: self.wire(self._data_out[0], self._mem_data_out[0]) else: for i in range(self.interconnect_output_ports): self.wire(self._data_out[i], self._mem_data_out[0]) # Valid out is simply passing the read signal through... self.wire(self._valid_out, self._t_read) # Create write addressors for wr_port in range(self.interconnect_input_ports): RF_WRITE_ITER = ForLoop( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width) RF_WRITE_ADDR = AddrGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width) RF_WRITE_SCHED = SchedGen( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width, use_enable=True) self.add_child(f"rf_write_iter_{wr_port}", RF_WRITE_ITER, clk=self._gclk, rst_n=self._rst_n, step=self._t_write[wr_port]) # Whatever comes through here should hopefully just pipe through seamlessly # addressor modules self.add_child(f"rf_write_addr_{wr_port}", RF_WRITE_ADDR, clk=self._gclk, rst_n=self._rst_n, step=self._t_write[wr_port], mux_sel=RF_WRITE_ITER.ports.mux_sel_out, restart=RF_WRITE_ITER.ports.restart) safe_wire(self, self._write_addr[wr_port], RF_WRITE_ADDR.ports.addr_out) self.add_child(f"rf_write_sched_{wr_port}", RF_WRITE_SCHED, clk=self._gclk, rst_n=self._rst_n, mux_sel=RF_WRITE_ITER.ports.mux_sel_out, finished=RF_WRITE_ITER.ports.restart, cycle_count=self._cycle_count, valid_output=self._t_write[wr_port]) # Create read addressors for rd_port in range(self.interconnect_output_ports): RF_READ_ITER = ForLoop( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width) RF_READ_ADDR = AddrGen( iterator_support=self.default_iterator_support, config_width=self.default_config_width) RF_READ_SCHED = SchedGen( iterator_support=self.default_iterator_support, config_width=self.cycle_count_width, use_enable=True) self.add_child(f"rf_read_iter_{rd_port}", RF_READ_ITER, clk=self._gclk, rst_n=self._rst_n, step=self._t_read[rd_port]) self.add_child(f"rf_read_addr_{rd_port}", RF_READ_ADDR, clk=self._gclk, rst_n=self._rst_n, step=self._t_read[rd_port], mux_sel=RF_READ_ITER.ports.mux_sel_out, restart=RF_READ_ITER.ports.restart) if self.interconnect_output_ports > 1: safe_wire(self, self._read_addr[rd_port], RF_READ_ADDR.ports.addr_out) else: safe_wire(self, self._read_addr[rd_port], RF_READ_ADDR.ports.addr_out) self.add_child(f"rf_read_sched_{rd_port}", RF_READ_SCHED, clk=self._gclk, rst_n=self._rst_n, mux_sel=RF_READ_ITER.ports.mux_sel_out, finished=RF_READ_ITER.ports.restart, cycle_count=self._cycle_count, valid_output=self._t_read[rd_port]) self.wire(self._write, self._t_write.r_or()) self.wire(self._mem_write_addr[0], decode(self, self._t_write, self._s_mem_write_addr)) self.wire(self._mem_data_in[0], decode(self, self._t_write, self._s_mem_data_in)) self.wire(self._read, self._t_read.r_or()) self.wire(self._mem_read_addr[0], decode(self, self._t_read, self._s_mem_read_addr)) # =================================== # Instantiate config hooks... # =================================== self.fw_int = 1 self.data_words_per_set = 2**self.config_addr_width self.sets = int( (self.fw_int * self.mem_depth) / self.data_words_per_set) self.sets_per_macro = max( 1, int(self.mem_depth / self.data_words_per_set)) self.total_sets = max(1, 1 * self.sets_per_macro) self._config_data_in = self.input("config_data_in", self.config_data_width) self._config_data_in.add_attribute(ControlSignalAttr(is_control=False)) self._config_data_in_shrt = self.var("config_data_in_shrt", self.data_width) self.wire(self._config_data_in_shrt, self._config_data_in[self.data_width - 1, 0]) self._config_addr_in = self.input("config_addr_in", self.config_addr_width) self._config_addr_in.add_attribute(ControlSignalAttr(is_control=False)) self._config_data_out_shrt = self.var("config_data_out_shrt", self.data_width, size=self.total_sets, explicit_array=True, packed=True) self._config_data_out = self.output("config_data_out", self.config_data_width, size=self.total_sets, explicit_array=True, packed=True) self._config_data_out.add_attribute( ControlSignalAttr(is_control=False)) for i in range(self.total_sets): self.wire( self._config_data_out[i], self._config_data_out_shrt[i].extend(self.config_data_width)) self._config_read = self.input("config_read", 1) self._config_read.add_attribute(ControlSignalAttr(is_control=False)) self._config_write = self.input("config_write", 1) self._config_write.add_attribute(ControlSignalAttr(is_control=False)) self._config_en = self.input("config_en", self.total_sets) self._config_en.add_attribute(ControlSignalAttr(is_control=False)) self._mem_data_cfg = self.var("mem_data_cfg", self.data_width, explicit_array=True, packed=True) self._mem_addr_cfg = self.var("mem_addr_cfg", kts.clog2(self.mem_depth)) # Add config... stg_cfg_seq = StorageConfigSeq( data_width=self.data_width, config_addr_width=self.config_addr_width, addr_width=kts.clog2(self.mem_depth), fetch_width=self.data_width, total_sets=self.total_sets, sets_per_macro=self.sets_per_macro) # The clock to config sequencer needs to be the normal clock or # if the tile is off, we bring the clock back in based on config_en cfg_seq_clk = self.var("cfg_seq_clk", 1) self._cfg_seq_clk = kts.util.clock(cfg_seq_clk) self.wire(cfg_seq_clk, kts.util.clock(self._gclk)) self.add_child(f"config_seq", stg_cfg_seq, clk=self._cfg_seq_clk, rst_n=self._rst_n, clk_en=self._clk_en | self._config_en.r_or(), config_data_in=self._config_data_in_shrt, config_addr_in=self._config_addr_in, config_wr=self._config_write, config_rd=self._config_read, config_en=self._config_en, wr_data=self._mem_data_cfg, rd_data_out=self._config_data_out_shrt, addr_out=self._mem_addr_cfg) if self.interconnect_output_ports == 1: self.wire(stg_cfg_seq.ports.rd_data_stg, self._mem_data_out) else: self.wire(stg_cfg_seq.ports.rd_data_stg[0], self._mem_data_out[0]) self.RF_GEN = RegisterFile(data_width=self.data_width, write_ports=self.mem_input_ports, read_ports=self.mem_output_ports, width_mult=1, depth=self.mem_depth, read_delay=0) # Now we can instantiate and wire up the register file self.add_child(f"rf", self.RF_GEN, clk=self._gclk, rst_n=self._rst_n, data_out=self._mem_data_out) # Opt in for config_write self._write_rf = self.var("write_rf", self.mem_input_ports) self.wire( self._write_rf[0], kts.ternary(self._config_en.r_or(), self._config_write, self._write[0])) for i in range(self.mem_input_ports - 1): self.wire( self._write_rf[i + 1], kts.ternary(self._config_en.r_or(), kts.const(0, 1), self._write[i + 1])) self.wire(self.RF_GEN.ports.wen, self._write_rf) # Opt in for config_data_in for i in range(self.interconnect_input_ports): self.wire( self._s_mem_data_in[i], kts.ternary(self._config_en.r_or(), self._mem_data_cfg, self._data_in[i])) self.wire(self.RF_GEN.ports.data_in, self._mem_data_in) # Opt in for config_addr for i in range(self.interconnect_input_ports): self.wire( self._s_mem_write_addr[i], kts.ternary(self._config_en.r_or(), self._mem_addr_cfg, self._write_addr[i])) self.wire(self.RF_GEN.ports.wr_addr, self._mem_write_addr[0]) for i in range(self.interconnect_output_ports): self.wire( self._s_mem_read_addr[i], kts.ternary(self._config_en.r_or(), self._mem_addr_cfg, self._read_addr[i])) self.wire(self.RF_GEN.ports.rd_addr, self._mem_read_addr[0]) if self.add_clk_enable: # self.clock_en("clk_en") kts.passes.auto_insert_clock_enable(self.internal_generator) clk_en_port = self.internal_generator.get_port("clk_en") clk_en_port.add_attribute(ControlSignalAttr(False)) if self.add_flush: self.add_attribute("sync-reset=flush") kts.passes.auto_insert_sync_reset(self.internal_generator) flush_port = self.internal_generator.get_port("flush") flush_port.add_attribute(ControlSignalAttr(True)) # Finally, lift the config regs... lift_config_reg(self.internal_generator)
def test_input_addr_basic(banks, interconnect_input_ports, mem_depth=512, data_width=16, fetch_width=32, iterator_support=4, address_width=16, multiwrite=1, num_tiles=1): fw_int = int(fetch_width / data_width) # Set up model... model_iac = InputAddrCtrlModel( interconnect_input_ports=interconnect_input_ports, mem_depth=mem_depth, banks=banks, num_tiles=num_tiles, iterator_support=iterator_support, max_port_schedule=64, address_width=address_width, data_width=data_width, fetch_width=fetch_width) new_config = {} new_config['address_gen_0_starting_addr'] = 0 new_config['address_gen_0_dimensionality'] = 3 new_config['address_gen_0_strides_0'] = 1 new_config['address_gen_0_strides_1'] = 3 new_config['address_gen_0_strides_2'] = 9 new_config['address_gen_0_ranges_0'] = 3 new_config['address_gen_0_ranges_1'] = 3 new_config['address_gen_0_ranges_2'] = 3 new_config['address_gen_1_starting_addr'] = mem_depth new_config['address_gen_1_dimensionality'] = 3 new_config['address_gen_1_strides_0'] = 1 new_config['address_gen_1_strides_1'] = 3 new_config['address_gen_1_strides_2'] = 9 new_config['address_gen_1_ranges_0'] = 3 new_config['address_gen_1_ranges_1'] = 3 new_config['address_gen_1_ranges_2'] = 3 model_iac.set_config(new_config=new_config) ### # Set up dut... dut = InputAddrCtrl(interconnect_input_ports=interconnect_input_ports, mem_depth=mem_depth, banks=banks, num_tiles=num_tiles, iterator_support=iterator_support, address_width=address_width, data_width=16, fetch_width=fetch_width, multiwrite=multiwrite, strg_wr_ports=1, config_width=16) lift_config_reg(dut.internal_generator) magma_dut = k.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) for key, value in new_config.items(): setattr(tester.circuit, key, value) valid_in = [] wen_en = [] for i in range(interconnect_input_ports): valid_in.append(0) wen_en.append(0) # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 for i in range(interconnect_input_ports): tester.circuit.wen_en[i] = 1 tester.step(2) rand.seed(0) data_in = [] # Init blank data input for i in range(interconnect_input_ports): data_in.append([0 for z in range(fw_int)]) for i in range(1000): # Set valid and wen enable for j in range(interconnect_input_ports): valid_in[j] = rand.randint(0, 1) wen_en[j] = rand.randint(0, 1) # Deal with data in for j in range(interconnect_input_ports): for z in range(fw_int): data_in[j][z] = rand.randint(0, 2**data_width - 1) (wen, data_out, addrs, port_out) = model_iac.interact(valid_in, data_in, wen_en) for z in range(interconnect_input_ports): tester.circuit.valid_in[z] = valid_in[z] tester.circuit.wen_en[z] = wen_en[z] for z in range(interconnect_input_ports): for word in range(fw_int): setattr(tester.circuit, f"data_in_{z}_{word}", data_in[z][word]) tester.eval() if (banks == 1): tester.circuit.addr_out_0_0.expect(addrs[0]) tester.circuit.wen_to_sram.expect(wen[0]) else: for z in range(banks): getattr(tester.circuit, f"addr_out_{z}_0").expect(addrs[z]) getattr(tester.circuit, f"wen_to_sram_{z}").expect(wen[z]) for z in range(banks): for word in range(fw_int): getattr(tester.circuit, f"data_out_{z}_0_{word}").expect(data_out[z][word]) for j in range(interconnect_input_ports): tester.circuit.port_out[j].expect(port_out[j]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=2, input_iterator_support=6, # Addr Controllers output_iterator_support=6, interconnect_input_ports=1, # Connection to int interconnect_output_ports=3, mem_input_ports=1, mem_output_ports=1, use_sram_stub=1, sram_macro_info=SRAMMacroInfo(), read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, max_agg_schedule=32, input_max_port_sched=32, output_max_port_sched=32, align_input=1, max_line_length=128, max_tb_height=1, tb_range_max=128, tb_sched_max=64, max_tb_stride=15, num_tb=1, tb_iterator_support=2, multiwrite=1, max_prefetch=64, config_data_width=16, config_addr_width=8, num_tiles=2, remove_tb=False, fifo_mode=False, add_clk_enable=False, add_flush=False): super().__init__("LakeChain", debug=True) fw_int = int(mem_width / data_width) data_words_per_set = 2**config_addr_width sets = int((fw_int * mem_depth) / data_words_per_set) sets_per_macro = max(1, int(mem_depth / data_words_per_set)) total_sets = max(1, banks * sets_per_macro) self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._data_in = self.input("data_in", data_width, size=interconnect_input_ports, packed=True, explicit_array=True) self._addr_in = self.input("addr_in", data_width, size=interconnect_input_ports, packed=True, explicit_array=True) self._wen = self.input("wen", interconnect_input_ports) self._ren = self.input("ren", interconnect_output_ports) self._config_data_in = self.input("config_data_in", config_data_width) self._config_addr_in = self.input("config_addr_in", config_addr_width) self._config_data_out = self.output("config_data_out", config_data_width, size=(num_tiles, total_sets), explicit_array=True, packed=True) self._config_read = self.input("config_read", 1) self._config_write = self.input("config_write", 1) self._config_en = self.input("config_en", total_sets) self._data_out = self.output("data_out", data_width, size=(num_tiles, interconnect_output_ports), packed=True, explicit_array=True) self._data_out_inter = self.var("data_out_inter", data_width, size=(num_tiles, interconnect_output_ports), packed=True, explicit_array=True) self._valid_out = self.output("valid_out", interconnect_output_ports, size=num_tiles, packed=True, explicit_array=True) self._valid_out_inter = self.var("valid_out_inter", interconnect_output_ports, size=num_tiles, packed=True, explicit_array=True) self._enable_chain_output = self.input("enable_chain_output", 1) self._chain_data_out = self.output("chain_data_out", data_width, size=interconnect_output_ports, packed=True, explicit_array=True) self._chain_valid_out = self.output("chain_valid_out", interconnect_output_ports) self._tile_output_en = self.var("tile_output_en", 1, size=(num_tiles, interconnect_output_ports), packed=True, explicit_array=True) self.is_valid_ = self.var("is_valid", 1, size=interconnect_output_ports, packed=True, explicit_array=True) self.valids = self.var("valids", clog2(num_tiles), size=interconnect_output_ports, packed=True, explicit_array=True) for i in range(num_tiles): tile = LakeTop(data_width=data_width, mem_width=mem_width, mem_depth=mem_depth, banks=banks, input_iterator_support=input_iterator_support, output_iterator_support=output_iterator_support, interconnect_input_ports=interconnect_input_ports, interconnect_output_ports=interconnect_output_ports, mem_input_ports=mem_input_ports, mem_output_ports=mem_output_ports, use_sram_stub=use_sram_stub, sram_macro_info=sram_macro_info, read_delay=read_delay, rw_same_cycle=rw_same_cycle, agg_height=agg_height, max_agg_schedule=max_agg_schedule, input_max_port_sched=input_max_port_sched, output_max_port_sched=output_max_port_sched, align_input=align_input, max_line_length=max_line_length, max_tb_height=max_tb_height, tb_range_max=tb_range_max, tb_sched_max=tb_sched_max, max_tb_stride=max_tb_stride, num_tb=num_tb, tb_iterator_support=tb_iterator_support, multiwrite=multiwrite, max_prefetch=max_prefetch, config_data_width=config_data_width, config_addr_width=config_addr_width, num_tiles=num_tiles, remove_tb=remove_tb, fifo_mode=fifo_mode, add_clk_enable=add_clk_enable, add_flush=add_flush) self.add_child( f"tile_{i}", tile, clk=self._clk, rst_n=self._rst_n, enable_chain_output=self._enable_chain_output, # tile index chain_idx_input=i, chain_idx_output=0, tile_output_en=self._tile_output_en[i], # broadcast input data to all tiles data_in=self._data_in, addr_in=self._addr_in, wen=self._wen, ren=self._ren, config_data_in=self._config_data_in, config_addr_in=self._config_addr_in, config_data_out=self._config_data_out[i], config_read=self._config_read, config_write=self._config_write, config_en=self._config_en, # used if output chaining not enabled data_out=self._data_out_inter[i], valid_out=self._valid_out_inter[i], # unused currently? tile_en=1, # UB mode mode=0) self.add_code(self.set_data_out) self.add_code(self.set_valid_out) self.add_code(self.set_chain_outputs) # config regs lift_config_reg(self.internal_generator)
def test_agg_formal(): agg_dut = AggFormal( data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4) lift_config_reg(agg_dut.internal_generator) magma_dut = k.util.to_magma(agg_dut, flatten_array=True, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) config = {} config["agg_write_sched_gen_0_sched_addr_gen_starting_addr"] = 0 config["agg_write_addr_gen_0_strides_0"] = 54657 config["agg_write_addr_gen_0_strides_1"] = 0 config["agg_write_addr_gen_0_strides_2"] = 0 config["agg_write_addr_gen_0_strides_3"] = 0 config["agg_write_addr_gen_0_strides_4"] = 0 config["agg_write_addr_gen_0_strides_5"] = 0 config["agg_read_addr_gen_0_strides_0"] = 65032 config["agg_read_addr_gen_0_strides_1"] = 0 config["agg_read_addr_gen_0_strides_2"] = 0 config["agg_read_addr_gen_0_strides_3"] = 0 config["agg_read_addr_gen_0_strides_4"] = 0 config["agg_read_addr_gen_0_strides_5"] = 0 config["agg_write_loops_0_ranges_0"] = 2 config["agg_write_loops_0_ranges_1"] = 0 config["agg_write_loops_0_ranges_2"] = 0 config["agg_write_loops_0_ranges_3"] = 0 config["agg_write_loops_0_ranges_4"] = 0 config["agg_write_loops_0_ranges_5"] = 0 config["agg_write_loops_0_dimensionality"] = 1 config["agg_write_addr_gen_0_starting_addr"] = 0 config["agg_read_loops_0_ranges_0"] = 0 config["agg_read_loops_0_ranges_1"] = 0 config["agg_read_loops_0_ranges_2"] = 0 config["agg_read_loops_0_ranges_3"] = 0 config["agg_read_loops_0_ranges_4"] = 0 config["agg_read_loops_0_ranges_5"] = 0 config["agg_read_output_sched_gen_sched_addr_gen_starting_addr"] = 4 config["agg_read_output_sched_gen_sched_addr_gen_strides_0"] = 4 config["agg_read_output_sched_gen_sched_addr_gen_strides_1"] = 0 config["agg_read_output_sched_gen_sched_addr_gen_strides_2"] = 0 config["agg_read_output_sched_gen_sched_addr_gen_strides_3"] = 0 config["agg_read_output_sched_gen_sched_addr_gen_strides_4"] = 0 config["agg_read_output_sched_gen_sched_addr_gen_strides_5"] = 0 config["agg_read_addr_gen_0_starting_addr"] = 0 config["agg_write_sched_gen_0_sched_addr_gen_strides_0"] = 1 # configuration registers passed through from top level for key, value in config.items(): setattr(tester.circuit, key, value) tester.circuit.clk = 0 tester.circuit.rst_n = 1 tester.step(2) tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 rand.seed(0) num_iters = 500 data_in = 0 for i in range(num_iters): tester.circuit.data_in = data_in tester.eval() data_in = data_in + 1 tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tempdir = "dump_agg_formal" tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal", "--trace"], disp_type="realtime")
self.add_code(self.tb_ctrl) for idx in range(self.interconnect_output_ports): self.add_code(self.tb_to_out, idx=idx) @always_ff((posedge, "clk")) def tb_ctrl(self): if self._read: self._tb[self._output_port_sel_addr][self._tb_write_addr[self._output_port_sel_addr][1, 0]] = \ self._data_in @always_comb def tb_to_out(self, idx): self._data_out[idx] = self._tb[idx][self._tb_read_addr[idx][3, 2]][ self._tb_read_addr[idx][1, 0]] @always_ff((posedge, "clk"), (negedge, "rst_n")) def increment_cycle_count(self): if ~self._rst_n: self._cycle_count = 0 else: self._cycle_count = self._cycle_count + 1 if __name__ == "__main__": tb_dut = TBFormal() lift_config_reg(tb_dut.internal_generator) extract_formal_annotation(tb_dut, 'tb_formal_annotation.txt') verilog(tb_dut, filename="tb_formal.sv", optimize_if=False)
def test_app_ctrl(sprt_stcl_valid, int_in_ports=1, int_out_ports=3, depth_width=16, stcl_cnt_width=16, stcl_iter_support=4): # Set up model.. model_ac = AppCtrlModel(int_in_ports=int_in_ports, int_out_ports=int_out_ports, sprt_stcl_valid=sprt_stcl_valid, stcl_iter_support=stcl_iter_support) new_config = {} new_config['input_port_0'] = 0 new_config['input_port_1'] = 0 new_config['input_port_2'] = 0 new_config['read_depth_0'] = 196 new_config['read_depth_1'] = 196 new_config['read_depth_2'] = 196 new_config['write_depth_0'] = 196 for i in range(stcl_iter_support): new_config[f'ranges_{i}'] = 4 new_config[f'threshold_{i}'] = 4 rand.seed(0) prefill = [] for i in range(int_out_ports): prefill_num = rand.randint(0, 1) new_config[f'prefill_{i}'] = prefill_num prefill.append(prefill_num) model_ac.set_config(new_config=new_config) # Set up dut... dut = AppCtrl(interconnect_input_ports=int_in_ports, interconnect_output_ports=int_out_ports, depth_width=depth_width, sprt_stcl_valid=sprt_stcl_valid, stcl_cnt_width=stcl_cnt_width, stcl_iter_support=stcl_iter_support) lift_config_reg(dut.internal_generator) magma_dut = kts.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) ### tester.zero_inputs() for key, value in new_config.items(): setattr(tester.circuit, key, value) tester.circuit.write_depth = 196 # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 tester.step(2) # Seed for posterity wen_in = [0] * int_in_ports ren_in = [0] * int_out_ports tb_valid = [0] * int_out_ports ren_update = [0] * int_out_ports for i in range(300): # Gen random data for j in range(int_in_ports): wen_in[j] = rand.randint(0, 1) ren_in_tmp = rand.randint(0, 1) for j in range(int_out_ports): tb_valid[j] = rand.randint(0, 1) ren_in[j] = ren_in_tmp ren_update[j] = rand.randint(0, 1) # Apply stimulus to dut for j in range(int_in_ports): tester.circuit.wen_in[j] = wen_in[j] for j in range(int_out_ports): tester.circuit.ren_in[j] = ren_in[j] tester.circuit.tb_valid[j] = tb_valid[j] tester.circuit.ren_update[j] = ren_update[j] tester.circuit.prefill[j] = prefill[j] # Interact w/ model (wen_out, ren_out, valid_out_data, valid_out_stencil) = model_ac.interact(wen_in=wen_in, ren_in=ren_in, tb_valid=tb_valid, ren_update=ren_update) tester.eval() for j in range(int_in_ports): tester.circuit.wen_out[j].expect(wen_out[j]) for j in range(int_out_ports): # tester.circuit.ren_out[j].expect(ren_out[j]) tester.circuit.valid_out_data[j].expect(valid_out_data[j]) tester.circuit.valid_out_stencil[j].expect(valid_out_stencil[j]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
self._agg[idx][0][self._agg_write_addr[idx][clog2(self.fetch_width) - 1, 0]]\ = self._data_in[idx] else: self._agg[idx][self._agg_write_addr[idx] [self._agg_write_addr[0].width - 1, clog2(self.fetch_width)]]\ [self._agg_write_addr[idx][clog2(self.fetch_width) - 1, 0]]\ = self._data_in[idx] @always_comb def agg_to_sram(self): for i in range(self.fetch_width): self._data_out[i] = \ self._agg[self._input_port_sel_addr][self._agg_read_addr[self._input_port_sel_addr]][i] @always_ff((posedge, "clk"), (negedge, "rst_n")) def increment_cycle_count(self): if ~self._rst_n: self._cycle_count = 0 else: self._cycle_count = self._cycle_count + 1 if __name__ == "__main__": lake_dut = AggFormal() lift_config_reg(lake_dut.internal_generator) extract_formal_annotation(lake_dut, "agg_formal_annotation.txt") verilog(lake_dut, filename="agg_formal.sv", optimize_if=False)
def __init__(self, word_width, input_ports, output_ports, memories, edges): super().__init__("LakeTop", debug=True) # parameters self.word_width = word_width self.input_ports = input_ports self.output_ports = output_ports self.default_config_width = 16 self.cycle_count_width = 16 self.stencil_valid = False # objects self.memories = memories self.edges = edges # tile enable and clock self.tile_en = self.input("tile_en", 1) self.tile_en.add_attribute(ConfigRegAttr("Tile logic enable manifested as clock gate")) self.tile_en.add_attribute(FormalAttr(self.tile_en.name, FormalSignalConstraint.SET1)) self.clk_mem = self.clock("clk") self.clk_mem.add_attribute(FormalAttr(self.clk_mem.name, FormalSignalConstraint.CLK)) # chaining chain_supported = False for mem in self.memories.keys(): if self.memories[mem]["chaining"]: chain_supported = True break if chain_supported: self.chain_en = self.input("chain_en", 1) self.chain_en.add_attribute(ConfigRegAttr("Chaining enable")) self.chain_en.add_attribute(FormalAttr(self.chain_en.name, FormalSignalConstraint.SET0)) else: self.chain_en = self.var("chain_en", 1) self.wire(self.chain_en, 0) # gate clock with tile_en gclk = self.var("gclk", 1) self.gclk = kts.util.clock(gclk) self.wire(gclk, self.clk_mem & self.tile_en) self.clk_en = self.clock_en("clk_en", 1) # active low asynchornous reset self.rst_n = self.reset("rst_n", 1) self.rst_n.add_attribute(FormalAttr(self.rst_n.name, FormalSignalConstraint.RSTN)) # data in and out of top level Lake memory object self.data_in = self.input("data_in", width=self.word_width, size=self.input_ports, explicit_array=True, packed=True) self.data_in.add_attribute(FormalAttr(self.data_in.name, FormalSignalConstraint.SEQUENCE)) self.data_out = self.output("data_out", width=self.word_width, size=self.output_ports, explicit_array=True, packed=True) self.data_out.add_attribute(FormalAttr(self.data_out.name, FormalSignalConstraint.SEQUENCE)) # global cycle count for accessor comparison self._cycle_count = self.var("cycle_count", 16) @always_ff((posedge, self.gclk), (negedge, "rst_n")) def increment_cycle_count(self): if ~self.rst_n: self._cycle_count = 0 else: self._cycle_count = self._cycle_count + 1 self.add_always(increment_cycle_count) # info about memories num_mem = len(memories) subscript_mems = list(self.memories.keys()) # list of the data out from each memory self.mem_data_outs = [self.var(f"mem_data_out_{subscript_mems[i]}", width=self.word_width, size=self.memories[subscript_mems[i]] ["read_port_width" if "read_port_width" in self.memories[subscript_mems[i]] else "read_write_port_width"], explicit_array=True, packed=True) for i in range(num_mem)] # keep track of write, read_addr, and write_addr vars for read/write memories # to later check whether there is a write and what to use for the shared port self.mem_read_write_addrs = {} # create memory instance for each memory self.mem_insts = {} i = 0 for mem in self.memories.keys(): m = mem_inst(self.memories[mem], self.word_width) self.mem_insts[mem] = m self.add_child(mem, m, clk=self.gclk, rst_n=self.rst_n, # put data out in memory data out list data_out=self.mem_data_outs[i], chain_en=self.chain_en) i += 1 # get input and output memories is_input, is_output = [], [] for mem_name in self.memories.keys(): mem = self.memories[mem_name] if mem["is_input"]: is_input.append(mem_name) if mem["is_output"]: is_output.append(mem_name) # TODO direct connection to write doesn't work (?), so have to do this... self.low = self.var("low", 1) self.wire(self.low, 0) # TODO adding multiple ports to 1 memory after talking about mux with compiler team # set up input memories for i in range(len(is_input)): in_mem = is_input[i] # input addressor / accessor parameters input_dim = self.memories[in_mem]["input_edge_params"]["dim"] input_range = self.memories[in_mem]["input_edge_params"]["max_range"] input_stride = self.memories[in_mem]["input_edge_params"]["max_stride"] # input port associated with memory input_port_index = self.memories[in_mem]["input_port"] self.valid = self.var( f"input_port{input_port_index}_2{in_mem}_accessor_valid", 1) self.wire(self.mem_insts[in_mem].ports.write, self.valid) # hook up data from the specified input port to the memory safe_wire(self, self.mem_insts[in_mem].ports.data_in[0], self.data_in[input_port_index]) if self.memories[in_mem]["num_read_write_ports"] > 0: self.mem_read_write_addrs[in_mem] = {"write": self.valid} # create IteratorDomain, AddressGenerator, and ScheduleGenerator # for writes to this input memory forloop = ForLoop(iterator_support=input_dim, config_width=max(1, clog2(input_range))) # self.default_config_width) loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(f"input_port{input_port_index}_2{in_mem}_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) newAG = AddrGen(iterator_support=input_dim, config_width=max(1, clog2(input_stride))) # self.default_config_width) self.add_child(f"input_port{input_port_index}_2{in_mem}_write_addr_gen", newAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) if self.memories[in_mem]["num_read_write_ports"] == 0: safe_wire(self, self.mem_insts[in_mem].ports.write_addr[0], newAG.ports.addr_out) else: self.mem_read_write_addrs[in_mem]["write_addr"] = newAG.ports.addr_out newSG = SchedGen(iterator_support=input_dim, config_width=self.cycle_count_width) self.add_child(f"input_port{input_port_index}_2{in_mem}_write_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # set up output memories for i in range(len(is_output)): out_mem = is_output[i] # output addressor / accessor parameters output_dim = self.memories[out_mem]["output_edge_params"]["dim"] output_range = self.memories[out_mem]["output_edge_params"]["max_range"] output_stride = self.memories[out_mem]["output_edge_params"]["max_stride"] # output port associated with memory output_port_index = self.memories[out_mem]["output_port"] # hook up data from the memory to the specified output port self.wire(self.data_out[output_port_index], self.mem_insts[out_mem].ports.data_out[0][0]) # self.mem_data_outs[subscript_mems.index(out_mem)][0]) self.valid = self.var(f"{out_mem}2output_port{output_port_index}_accessor_valid", 1) if self.memories[out_mem]["rw_same_cycle"]: self.wire(self.mem_insts[out_mem].ports.read, self.valid) # create IteratorDomain, AddressGenerator, and ScheduleGenerator # for reads from this output memory forloop = ForLoop(iterator_support=output_dim, config_width=max(1, clog2(output_range))) # self.default_config_width) loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(f"{out_mem}2output_port{output_port_index}_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) newAG = AddrGen(iterator_support=output_dim, config_width=max(1, clog2(output_stride))) # self.default_config_width) self.add_child(f"{out_mem}2output_port{output_port_index}_read_addr_gen", newAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) if self.memories[out_mem]["num_read_write_ports"] == 0: safe_wire(self, self.mem_insts[out_mem].ports.read_addr[0], newAG.ports.addr_out) else: self.mem_read_write_addrs[in_mem]["read_addr"] = newAG.ports.addr_out newSG = SchedGen(iterator_support=output_dim, config_width=self.cycle_count_width) # self.default_config_width) self.add_child(f"{out_mem}2output_port{output_port_index}_read_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # create shared IteratorDomains and accessors as well as # read/write addressors for memories connected by each edge for edge in self.edges: # see how many signals need to be selected between for # from and to signals for edge num_mux_from = len(edge["from_signal"]) num_mux_to = len(edge["to_signal"]) # get unique edge_name identifier for hardware modules edge_name = get_edge_name(edge) # create forloop and accessor valid output signal self.valid = self.var(edge_name + "_accessor_valid", 1) forloop = ForLoop(iterator_support=edge["dim"]) self.forloop = forloop loop_itr = forloop.get_iter() loop_wth = forloop.get_cfg_width() self.add_child(edge_name + "_forloop", forloop, clk=self.gclk, rst_n=self.rst_n, step=self.valid) # create input addressor readAG = AddrGen(iterator_support=edge["dim"], config_width=self.default_config_width) self.add_child(f"{edge_name}_read_addr_gen", readAG, clk=self.gclk, rst_n=self.rst_n, step=self.valid, mux_sel=forloop.ports.mux_sel_out, restart=forloop.ports.restart) # assign read address to all from memories if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0: # can assign same read addrs to all the memories for i in range(len(edge["from_signal"])): safe_wire(self, self.mem_insts[edge["from_signal"][i]].ports.read_addr[0], readAG.ports.addr_out) else: for i in range(len(edge["from_signal"])): self.mem_read_write_addrs[edge["from_signal"][i]]["read_addr"] = readAG.ports.addr_out # if needing to mux, choose which from memory we get data # from for to memory data in if num_mux_from > 1: num_mux_bits = clog2(num_mux_from) self.mux_sel = self.var(f"{edge_name}_mux_sel", width=num_mux_bits) read_addr_width = max(1, clog2(self.memories[edge["from_signal"][0]]["capacity"])) # decide which memory to get data from for to memory's data in safe_wire(self, self.mux_sel, readAG.ports.addr_out[read_addr_width + num_mux_from - 1, read_addr_width]) comb_mux_from = self.combinational() # for i in range(num_mux_from): # TODO want to use a switch statement here, but get add_fn_ln issue if_mux_sel = IfStmt(self.mux_sel == 0) for j in range(len(edge["to_signal"])): # print("TO ", edge["to_signal"][j]) # print("FROM ", edge["from_signal"][i]) if_mux_sel.then_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][0]].ports.data_out)) if_mux_sel.else_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][1]].ports.data_out)) comb_mux_from.add_stmt(if_mux_sel) # no muxing from, data_out from the one and only memory # goes to all to memories (valid determines whether it is # actually written) else: for j in range(len(edge["to_signal"])): # print("TO ", edge["to_signal"][j]) # print("FROM ", edge["from_signal"][0]) safe_wire(self, self.mem_insts[edge["to_signal"][j]].ports.data_in, # only one memory to read from self.mem_insts[edge["from_signal"][0]].ports.data_out) # create output addressor writeAG = AddrGen(iterator_support=edge["dim"], config_width=self.default_config_width) # step, mux_sel, restart may need delayed signals (assigned later) self.add_child(f"{edge_name}_write_addr_gen", writeAG, clk=self.gclk, rst_n=self.rst_n) # set write addr for to memories if self.memories[edge["to_signal"][0]]["num_read_write_ports"] == 0: for i in range(len(edge["to_signal"])): safe_wire(self, self.mem_insts[edge["to_signal"][i]].ports.write_addr[0], writeAG.ports.addr_out) else: for i in range(len(edge["to_signal"])): self.mem_read_write_addrs[edge["to_signal"][i]] = {"write": self.valid, "write_addr": writeAG.ports.addr_out} # calculate necessary delay between from_signal to to_signal # TODO this may need to be more sophisticated and based on II as well # TODO just need to add for loops for all the ports if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0: self.delay = self.memories[edge["from_signal"][0]]["read_info"][0]["latency"] else: self.delay = self.memories[edge["from_signal"][0]]["read_write_info"][0]["latency"] if self.delay > 0: # signals that need to be delayed due to edge latency self.delayed_writes = self.var(f"{edge_name}_delayed_writes", width=self.delay) self.delayed_mux_sels = self.var(f"{edge_name}_delayed_mux_sels", width=self.forloop.ports.mux_sel_out.width, size=self.delay, explicit_array=True, packed=True) self.delayed_restarts = self.var(f"{edge_name}_delayed_restarts", width=self.delay) # delay in valid between read from memory and write to next memory @always_ff((posedge, self.gclk), (negedge, "rst_n")) def get_delayed_write(self): if ~self.rst_n: self.delayed_writes = 0 self.delayed_mux_sels = 0 self.delayed_restarts = 0 else: for i in range(self.delay - 1): self.delayed_writes[i + 1] = self.delayed_writes[i] self.delayed_mux_sels[i + 1] = self.delayed_mux_sels[i] self.delayed_restarts[i + 1] = self.delayed_restarts[i] self.delayed_writes[0] = self.valid self.delayed_mux_sels[0] = self.forloop.ports.mux_sel_out self.delayed_restarts[0] = self.forloop.ports.restart self.add_always(get_delayed_write) # if we have a mux for the destination memories, # choose which mux to write to if num_mux_to > 1: num_mux_bits = clog2(num_mux_to) self.mux_sel_to = self.var(f"{edge_name}_mux_sel_to", width=num_mux_bits) write_addr_width = max(1, clog2(self.memories[edge["to_signal"][0]]["capacity"])) # decide which destination memory gets written to safe_wire(self, self.mux_sel_to, writeAG.ports.addr_out[write_addr_width + num_mux_to - 1, write_addr_width]) # wire the write (or if needed, delayed write) signal to the selected destination memory # and set write enable low for all other destination memories comb_mux_to = self.combinational() for i in range(num_mux_to): if_mux_sel_to = IfStmt(self.mux_sel_to == i) if self.delay == 0: if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.valid)) else: if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.delayed_writes[self.delay - 1])) if_mux_sel_to.else_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.low)) comb_mux_to.add_stmt(if_mux_sel_to) # no muxing to, just write to the one destination memory else: if self.delay == 0: self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.valid) else: self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.delayed_writes[self.delay - 1]) # assign delayed signals for write addressor if needed if self.delay == 0: self.wire(writeAG.ports.step, self.valid) self.wire(writeAG.ports.mux_sel, self.forloop.ports.mux_sel_out) self.wire(writeAG.ports.restart, self.forloop.ports.restart) else: self.wire(writeAG.ports.step, self.delayed_writes[self.delay - 1]) self.wire(writeAG.ports.mux_sel, self.delayed_mux_sels[self.delay - 1]) self.wire(writeAG.ports.restart, self.delayed_restarts[self.delay - 1]) # create accessor for edge newSG = SchedGen(iterator_support=edge["dim"], config_width=self.cycle_count_width) # self.default_config_width) self.add_child(edge_name + "_sched_gen", newSG, clk=self.gclk, rst_n=self.rst_n, mux_sel=forloop.ports.mux_sel_out, finished=forloop.ports.restart, cycle_count=self._cycle_count, valid_output=self.valid) # for read write memories, choose either read or write address based on whether # we are writing to the memory (whether write enable is high) read_write_addr_comb = self.combinational() for mem_name in self.memories: if mem_name in self.mem_read_write_addrs: mem_info = self.mem_read_write_addrs[mem_name] if_write = IfStmt(mem_info["write"] == 1) addr_width = self.mem_insts[mem_name].ports.read_write_addr[0].width if_write.then_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["write_addr"][addr_width - 1, 0])) if_write.else_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["read_addr"][addr_width - 1, 0])) read_write_addr_comb.add_stmt(if_write) # clock enable and flush passes kts.passes.auto_insert_clock_enable(self.internal_generator) clk_en_port = self.internal_generator.get_port("clk_en") clk_en_port.add_attribute(FormalAttr(clk_en_port.name, FormalSignalConstraint.SET1)) self.add_attribute("sync-reset=flush") kts.passes.auto_insert_sync_reset(self.internal_generator) flush_port = self.internal_generator.get_port("flush") # bring config registers up to top level lift_config_reg(self.internal_generator)
def test_prefetcher_basic(input_latency=10, max_prefetch=64, fetch_width=32, data_width=16): assert input_latency < max_prefetch, "Input latency must be smaller than fifo" fw_int = int(fetch_width / data_width) # Set up model.. model_pf = PrefetcherModel(fetch_width=fetch_width, data_width=data_width, max_prefetch=max_prefetch) new_config = {} new_config['input_latency'] = input_latency model_pf.set_config(new_config=new_config) ### # Set up dut... dut = Prefetcher(fetch_width=fetch_width, data_width=data_width, max_prefetch=max_prefetch) lift_config_reg(dut.internal_generator) magma_dut = k.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) ### for key, value in new_config.items(): setattr(tester.circuit, key, value) # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 tester.circuit.data_in = 0 tester.circuit.valid_read = 0 tester.circuit.tba_rdy_in = 0 tester.step(2) tester.circuit.rst_n = 1 tester.step(2) # Seed for posterity rand.seed(0) data_in = [0 for i in range(fw_int)] for i in range(1000): # Gen random data print(i) for j in range(fw_int): data_in[j] = rand.randint(0, 2**data_width - 1) tba_rdy_in = rand.randint(0, 1) valid_read = rand.randint(0, 1) mem_valid_data = rand.randint(0, 1) (model_d, model_v, model_stp, model_mem_valid) = \ model_pf.interact(data_in, valid_read, tba_rdy_in, mem_valid_data) for j in range(fw_int): setattr(tester.circuit, f"data_in_{j}", data_in[j]) tester.circuit.valid_read = valid_read tester.circuit.tba_rdy_in = tba_rdy_in tester.eval() # Check the step tester.circuit.prefetch_step.expect(model_stp) tester.circuit.valid_out.expect(model_v) if (model_v): for j in range(fw_int): getattr(tester.circuit, f"data_out_{j}").expect(model_d[j]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def test_sync_groups(int_out_ports, fetch_width=32, data_width=16): fw_int = int(fetch_width / data_width) # Set up model.. model_sg = SyncGroupsModel(fetch_width=fetch_width, data_width=data_width, int_out_ports=int_out_ports) rand.seed(0) group_choice = rand.randint(0, 1) new_config = {} if int_out_ports == 1: new_config['sync_group'] = 1 new_config['sync_group_0'] = 1 elif group_choice == 1: new_config['sync_group_0'] = 1 new_config['sync_group_1'] = 1 new_config['sync_group_2'] = 1 else: new_config['sync_group_0'] = 1 new_config['sync_group_1'] = 1 new_config['sync_group_2'] = 2 model_sg.set_config(new_config=new_config) ### # Set up dut... dut = SyncGroups(fetch_width=fetch_width, data_width=data_width, int_out_ports=int_out_ports) lift_config_reg(dut.internal_generator) magma_dut = kts.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) ### for key, value in new_config.items(): setattr(tester.circuit, key, value) # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 tester.step(2) # Seed for posterity data_in = [] for i in range(int_out_ports): row = [] for j in range(fw_int): row.append(0) data_in.append(row) for i in range(1000): # Gen random data ack_in = rand.randint(0, 2 ** int_out_ports - 1) ren_in = [] valid_in = [] mem_valid_data = [] for j in range(int_out_ports): ren_in.append(rand.randint(0, 1)) valid_in.append(rand.randint(0, 1)) mem_valid_data.append(rand.randint(0, 1)) for k in range(fw_int): data_in[j][k] = rand.randint(0, 2 ** data_width - 1) # Apply stimulus to dut tester.circuit.ack_in = ack_in for j in range(int_out_ports): tester.circuit.ren_in[j] = ren_in[j] tester.circuit.valid_in[j] = valid_in[j] tester.circuit.mem_valid_data[j] = mem_valid_data[j] for j in range(int_out_ports): for k in range(fw_int): setattr(tester.circuit, f"data_in_{j}_{k}", data_in[j][k]) # Interact w/ model (model_do, model_vo, model_rd_sync, model_mem_valid) = \ model_sg.interact(ack_in, data_in, valid_in, ren_in, mem_valid_data) tester.eval() for j in range(int_out_ports): for k in range(fw_int): getattr(tester.circuit, f"data_out_{j}_{k}").expect(model_do[j][k]) for j in range(int_out_ports): tester.circuit.valid_out[j].expect(model_vo[j]) tester.circuit.rd_sync_gate[j].expect(model_rd_sync[j]) tester.circuit.mem_valid_data_out[j].expect(model_mem_valid[j]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def __init__(self, data_width=16, fetch_width=1, mem_depth=512, config_width=16, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6 ): super().__init__("lake_top_test") # generation parameters # inputs self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._clk_en = self.input("clk_en", 1) self._flush = self.input("flush", 1) self._data_in = self.input("data_in", data_width, packed=True) # outputs self._data_out = self.output("data_out", data_width, packed=True) # local variables self._write = self.var("write", 1) self._read = self.var("read", 1) self._write_addr = self.var("write_addr", config_width) self._read_addr = self.var("read_addr", config_width) self._addr = self.var("addr", clog2(mem_depth)) # memory module self.add_child(f"sram", SRAMStub(data_width, fetch_width, mem_depth), clk=self._clk, wen=self._write, cen=self._write | self._read, addr=self._addr, data_in=self._data_in, data_out=self._data_out) # addressor modules self.add_child(f"input_addr_gen", AddrGen(input_addr_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, step=self._write, addr_out=self._write_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"output_addr_gen", AddrGen(output_addr_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, step=self._read, addr_out=self._read_addr, clk_en=self._clk_en, flush=self._flush) # scheduler modules self.add_child(f"input_sched_gen", SchedGen(input_sched_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._write) self.add_child(f"output_sched_gen", SchedGen(output_sched_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._read) lift_config_reg(self.internal_generator) self.add_code(self.set_sram_addr)
def test_tba(word_width=16, fetch_width=4, num_tb=1, tb_height=1, max_range=5, max_range_inner=5): model_tba = TBAModel(word_width, fetch_width, num_tb, tb_height, max_range, max_range_inner) new_config = {} new_config["range_outer"] = 5 new_config["range_inner"] = 3 new_config["stride"] = 2 new_config["indices"] = [0, 1, 2] new_config["dimensionality"] = 2 new_config["tb_height"] = 1 new_config["starting_addr"] = 0 model_tba.set_config(new_config=new_config) dut = TransposeBufferAggregation(word_width, fetch_width, num_tb, tb_height, max_range, max_range_inner, max_stride=5, tb_iterator_support=2) lift_config_reg(dut.internal_generator) magma_dut = k.util.to_magma(dut, flatten_array=True, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) # configuration registers tester.circuit.tb_0_indices_0 = 0 tester.circuit.tb_0_indices_1 = 1 tester.circuit.tb_0_indices_2 = 2 tester.circuit.tb_0_range_outer = 5 tester.circuit.tb_0_range_inner = 3 tester.circuit.tb_0_stride = 2 tester.circuit.tb_0_dimensionality = 2 tester.circuit.tb_0_tb_height = 1 tester.circuit.tb_0_starting_addr = 0 tester.circuit.clk = 0 tester.circuit.rst_n = 1 tester.step(2) tester.circuit.rst_n = 0 tester.step(2) tester.circuit.tba_ren = 1 tester.circuit.rst_n = 1 rand.seed(0) num_iters = 100 for i in range(num_iters): data = [] for j in range(fetch_width): data.append(rand.randint(0, 2**word_width - 1)) for j in range(fetch_width): setattr(tester.circuit, f"SRAM_to_tb_data_{j}", data[j]) valid_data = rand.randint(0, 1) tester.circuit.valid_data = valid_data mem_valid_data = rand.randint(0, 1) tester.circuit.mem_valid_data = mem_valid_data tb_index_for_data = 0 tester.circuit.tb_index_for_data = tb_index_for_data ack_in = valid_data tester.circuit.ack_in = ack_in model_data, model_valid = \ model_tba.tba_main(data, valid_data, ack_in, tb_index_for_data, 1, mem_valid_data) tester.eval() tester.circuit.tb_to_interconnect_valid.expect(model_valid) if model_valid: tester.circuit.tb_to_interconnect_data.expect(model_data[0]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def test_output_addr_basic(banks, interconnect_output_ports, enable_chain_output, mem_depth=512, num_tiles=1, data_width=16, fetch_width=32, iterator_support=4, address_width=16, config_width=16, chain_idx_output=0): fw_int = int(fetch_width / data_width) # Set up model.. model_oac = OutputAddrCtrlModel( interconnect_output_ports=interconnect_output_ports, mem_depth=mem_depth, banks=banks, num_tiles=num_tiles, iterator_support=iterator_support, address_width=address_width, data_width=data_width, fetch_width=fetch_width, chain_idx_output=chain_idx_output) new_config = {} new_config['address_gen_0_starting_addr'] = 0 new_config['address_gen_0_dimensionality'] = 3 new_config['address_gen_0_strides_0'] = 1 new_config['address_gen_0_strides_1'] = 3 new_config['address_gen_0_strides_2'] = 9 new_config['address_gen_0_ranges_0'] = 3 new_config['address_gen_0_ranges_1'] = 3 new_config['address_gen_0_ranges_2'] = 3 new_config['address_gen_1_starting_addr'] = mem_depth new_config['address_gen_1_dimensionality'] = 3 new_config['address_gen_1_strides_0'] = 1 new_config['address_gen_1_strides_1'] = 3 new_config['address_gen_1_strides_2'] = 9 new_config['address_gen_1_ranges_0'] = 3 new_config['address_gen_1_ranges_1'] = 3 new_config['address_gen_1_ranges_2'] = 3 model_oac.set_config(new_config=new_config) ### # Set up dut... dut = OutputAddrCtrl(interconnect_output_ports=interconnect_output_ports, mem_depth=mem_depth, num_tiles=num_tiles, banks=banks, iterator_support=iterator_support, address_width=address_width, config_width=config_width) lift_config_reg(dut.internal_generator) magma_dut = kts.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) ### for key, value in new_config.items(): setattr(tester.circuit, key, value) valid_in = [] for i in range(interconnect_output_ports): valid_in.append(0) # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 tester.step(2) # Seed for posterity rand.seed(0) for i in range(1000): for j in range(interconnect_output_ports): valid_in[j] = rand.randint(0, 1) step_in = rand.randint(0, 2**interconnect_output_ports - 1) for z in range(interconnect_output_ports): tester.circuit.valid_in[z] = valid_in[z] tester.circuit.step_in = step_in # top level config regs passed down tester.circuit.enable_chain_output = enable_chain_output tester.circuit.chain_idx_output = chain_idx_output (ren, addrs) = model_oac.interact(valid_in, step_in, enable_chain_output) tester.eval() if (banks == 1): for k in range(interconnect_output_ports): tester.circuit.ren[k].expect(ren[0][k]) else: for j in range(banks): for k in range(interconnect_output_ports): getattr(tester.circuit, f"ren_{j}")[k].expect(ren[j][k]) if (interconnect_output_ports == 1): tester.circuit.addr_out.expect(addrs[0]) else: for j in range(interconnect_output_ports): getattr(tester.circuit, f"addr_out_{z}").expect(addrs[z]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def test_rw_arbiter_basic(int_out_ports, fetch_width, read_delay, data_width=16, memory_depth=256, int_in_ports=1, strg_wr_ports=1, strg_rd_ports=1, rw_same_cycle=False, separate_addresses=False): fw_int = int(fetch_width / data_width) # Set up model.. model_rwa = RWArbiterModel(fetch_width=fetch_width, data_width=data_width, memory_depth=memory_depth, int_out_ports=int_out_ports, read_delay=read_delay) new_config = {} model_rwa.set_config(new_config=new_config) ### # Set up dut... dut = RWArbiter(fetch_width=fetch_width, data_width=data_width, memory_depth=memory_depth, int_in_ports=int_in_ports, int_out_ports=int_out_ports, strg_wr_ports=strg_wr_ports, strg_rd_ports=strg_rd_ports, read_delay=read_delay, rw_same_cycle=rw_same_cycle, separate_addresses=separate_addresses) lift_config_reg(dut.internal_generator) magma_dut = k.util.to_magma(dut, flatten_array=True, check_multiple_driver=False, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) ### for key, value in new_config.items(): setattr(tester.circuit, key, value) # initial reset tester.circuit.clk = 0 tester.circuit.rst_n = 0 if(int_out_ports == 1): tester.circuit.rd_addr = 0 else: for i in range(int_out_ports): setattr(tester.circuit, f"rd_addr_{i}", 0) tester.step(2) tester.circuit.rst_n = 1 tester.step(2) # Seed for posterity rand.seed(0) ren_in = [0 for i in range(int_out_ports)] ren_en = [0 for i in range(int_out_ports)] w_data = [0 for i in range(fw_int)] data_from_mem = [0 for i in range(fw_int)] for i in range(100): # Gen random data wen_in = rand.randint(0, 1) wen_en = 1 for j in range(fw_int): w_data[j] = rand.randint(0, 2 ** data_width - 1) data_from_mem[j] = rand.randint(0, 2 ** data_width - 1) w_addr = rand.randint(0, 2 ** 9 - 1) ren_en_base = rand.randint(0, 1) for j in range(int_out_ports): ren_in[j] = rand.randint(0, 1) ren_en[j] = ren_en_base rd_addr = [] for j in range(int_out_ports): rd_addr.append(rand.randint(0, 2 ** 9 - 1)) mem_valid_data = [] for j in range(strg_rd_ports): mem_valid_data.append(rand.randint(0, 1)) # Apply stimulus to dut tester.circuit.wen_in = wen_in tester.circuit.wen_en = wen_en tester.circuit.w_addr = w_addr if fw_int == 1: tester.circuit.w_data_0_0 = w_data[0] tester.circuit.data_from_mem_0_0 = data_from_mem[0] else: for j in range(fw_int): setattr(tester.circuit, f"w_data_0_{j}", w_data[j]) setattr(tester.circuit, f"data_from_mem_0_{j}", data_from_mem[j]) for j in range(int_out_ports): tester.circuit.ren_in[j] = ren_in[j] tester.circuit.ren_en[j] = ren_en[j] if (int_out_ports == 1): tester.circuit.rd_addr = rd_addr[0] else: for j in range(int_out_ports): setattr(tester.circuit, f"rd_addr_{j}", rd_addr[j]) if (strg_rd_ports == 1): tester.circuit.mem_valid_data = mem_valid_data[0] else: for j in range(strg_rd_ports): setattr(tester.circuit, f"mem_valid_data_{j}", mem_valid_data[j]) # Interact w/ model (model_od, model_op, model_ov, model_cen_mem, model_wen_mem, model_mem_data, model_mem_addr, model_ack, model_out_mem_valid_data) = model_rwa.interact(wen_in, wen_en, w_data, w_addr, data_from_mem, ren_in, ren_en, rd_addr, mem_valid_data) tester.eval() # Check outputs tester.circuit.out_valid.expect(model_ov) if(model_ov): tester.circuit.out_port.expect(model_op) if fw_int == 1: tester.circuit.out_data_0_0.expect(model_od[0]) else: for j in range(fw_int): getattr(tester.circuit, f"out_data_0_{j}").expect(model_od[j]) tester.circuit.cen_mem.expect(model_cen_mem) tester.circuit.wen_mem.expect(model_wen_mem) if fw_int == 1: tester.circuit.data_to_mem_0_0.expect(model_mem_data[0]) else: for j in range(fw_int): getattr(tester.circuit, f"data_to_mem_0_{j}").expect(model_mem_data[j]) tester.circuit.addr_to_mem.expect(model_mem_addr) tester.circuit.out_ack.expect(model_ack) print(mem_valid_data) print(model_out_mem_valid_data) if strg_rd_ports == 1: tester.circuit.out_mem_valid_data.expect(model_out_mem_valid_data[0]) else: for j in range(strg_rd_ports): tester.circuit.out_mem_valid_data[j].expect(model_out_mem_valid_data[j]) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])
def __init__(self, data_width=16, fetch_width=4, mem_depth=512, config_width=9, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6): super().__init__("lake_top_test") # generation parameters # inputs self._clk = self.clock("clk") self._rst_n = self.reset("rst_n") self._clk_en = self.input("clk_en", 1) self._flush = self.input("flush", 1) self._data_in = self.input("data_in", data_width, packed=True) # outputs self._data_out = self.output("data_out", data_width, packed=True) # local variables self._write = self.var("write", 1) self._read = self.var("read", 1) self._write_addr = self.var("write_addr", config_width) self._read_addr = self.var("read_addr", config_width) self._addr = self.var("addr", clog2(mem_depth)) self._agg_write = self.var("agg_write", 1) self._agg_write_addr = self.var("agg_write_addr", 2) self._agg_read_addr = self.var("agg_read_addr", 2) self._tb_read = self.var("tb_read", 1) self._tb_write_addr = self.var("tb_write_addr", 2) self._tb_read_addr = self.var("tb_read_addr", 2) self._sram_write_data = self.var("sram_write_data", data_width, size=fetch_width, packed=True) self._sram_read_data = self.var("sram_read_data", data_width, size=fetch_width, packed=True) # self._aggw_start_addr = self.input("aggw_start_addr", 2) # self._aggw_start_addr.add_attribute(ConfigRegAttr("agg write start addr")) # self._agg_start_addr = self.input("agg_start_addr", 2) # self._agg_start_addr.add_attribute(ConfigRegAttr("agg read start addr")) self._agg_write_index = self.var("agg_write_index", 2, size=4) self._agg = self.var("agg", width=data_width, size=fetch_width, packed=True) self.add_child(f"agg_write_addr_gen", AddrGen(2, 2), clk=self._clk, rst_n=self._rst_n, step=self._agg_write, addr_out=self._agg_write_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"agg_read_addr_gen", AddrGen(2, 2), clk=self._clk, rst_n=self._rst_n, step=self._write, addr_out=self._agg_read_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"agg_write_sched_gen", SchedGen(2, 2), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._agg_write) self._tb = self.var("tb", width=data_width, size=fetch_width) self.add_child(f"tb_write_addr_gen", AddrGen(2, 2), clk=self._clk, rst_n=self._rst_n, step=self._read, addr_out=self._tb_write_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"tb_read_addr_gen", AddrGen(2, 2), clk=self._clk, rst_n=self._rst_n, step=self._tb_read, addr_out=self._tb_read_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"tb_read_sched_gen", SchedGen(2, 2), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._tb_read) # memory module self.add_child(f"sram", SRAMStub(data_width, fetch_width, mem_depth), clk=self._clk, wen=self._write, cen=self._write | self._read, addr=self._addr, data_in=self._sram_write_data, data_out=self._sram_read_data) # addressor modules self.add_child(f"input_addr_gen", AddrGen(input_addr_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, step=self._write, addr_out=self._write_addr, clk_en=self._clk_en, flush=self._flush) self.add_child(f"output_addr_gen", AddrGen(output_addr_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, step=self._read, addr_out=self._read_addr, clk_en=self._clk_en, flush=self._flush) # scheduler modules self.add_child(f"input_sched_gen", SchedGen(input_sched_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._write) self.add_child(f"output_sched_gen", SchedGen(output_sched_iterator_support, config_width), clk=self._clk, rst_n=self._rst_n, clk_en=self._clk_en, flush=self._flush, valid_output=self._read) lift_config_reg(self.internal_generator) self.add_code(self.set_sram_addr) self.add_code(self.agg_ctrl) self.add_code(self.tb_ctrl) self.add_code(self.agg_to_sram) self.add_code(self.tb_to_out)
def test_sram_formal(): sram_dut = SRAMFormal( data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_addr_iterator_support=6, output_addr_iterator_support=6, input_sched_iterator_support=6, output_sched_iterator_support=6, config_width=16, # output_config_width=16, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, mem_input_ports=1, mem_output_ports=1, read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4) lift_config_reg(sram_dut.internal_generator) magma_dut = k.util.to_magma(sram_dut, flatten_array=True, check_flip_flop_always_ff=False) tester = fault.Tester(magma_dut, magma_dut.clk) in_ranges = [2, 8, 1] in_addr_strides = [1, 2, 0] in_addr_strt = 0 in_sched_strides = [4, 8, 0] in_sched_strt = 4 dim = 3 (write_ranges, tform_in_addr) = transform_strides_and_ranges(ranges=in_ranges, strides=in_addr_strides, dimensionality=dim) (write_ranges, tform_in_sched) = transform_strides_and_ranges(ranges=in_ranges, strides=in_sched_strides, dimensionality=dim) out_ranges = [2, 8, 1] out_addr_strides = [1, 2, 0] out_addr_strt = 0 out_sched_strides = [4, 8, 0] out_sched_strt = 6 dim = 3 (read_ranges, tform_out_addr) = transform_strides_and_ranges(ranges=out_ranges, strides=out_addr_strides, dimensionality=dim) (read_ranges, tform_out_sched) = transform_strides_and_ranges(ranges=out_ranges, strides=out_sched_strides, dimensionality=dim) config = {} # config["sram_write_loops_ranges_0"] = 485 # config["sram_write_loops_ranges_1"] = 0 # config["sram_write_loops_ranges_2"] = 0 # config["sram_write_loops_ranges_3"] = 0 # config["sram_write_loops_ranges_4"] = 0 # config["sram_write_loops_ranges_5"] = 0 # # config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5 # # config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 3 # # config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 1 # # config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535 # # config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535 # # config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535 # # config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535 # config["sram_write_addr_gen_strides_0"] = 65534 # config["sram_write_addr_gen_strides_1"] = 0 # config["sram_write_addr_gen_strides_2"] = 0 # config["sram_write_addr_gen_strides_3"] = 0 # config["sram_write_addr_gen_strides_4"] = 0 # config["sram_write_addr_gen_strides_5"] = 0 # config["sram_write_addr_gen_starting_addr"] = 33791 # config["sram_write_sched_gen_sched_addr_gen_starting_addr"] = 4 # config["sram_read_addr_gen_strides_0"] = 65535 # config["sram_read_addr_gen_strides_1"] = 65535 # config["sram_read_addr_gen_strides_2"] = 65535 # config["sram_read_addr_gen_strides_3"] = 65535 # config["sram_read_addr_gen_strides_4"] = 65535 # config["sram_read_addr_gen_strides_5"] = 65535 # config["sram_write_loops_dimensionality"] = 0 # # config["sram_read_loops_dimensionality"] = 0 # # config["sram_read_loops_ranges_0"] = 0 # # config["sram_read_loops_ranges_1"] = 65535 # # config["sram_read_loops_ranges_2"] = 65535 # # config["sram_read_loops_ranges_3"] = 65535 # # config["sram_read_loops_ranges_4"] = 65535 # # config["sram_read_loops_ranges_5"] = 65535 # config["sram_read_addr_gen_starting_addr"] = 511 # config["sram_write_sched_gen_sched_addr_gen_strides_0"] = 4 # config["sram_write_sched_gen_sched_addr_gen_strides_1"] = 0 # config["sram_write_sched_gen_sched_addr_gen_strides_2"] = 0 # config["sram_write_sched_gen_sched_addr_gen_strides_3"] = 0 # config["sram_write_sched_gen_sched_addr_gen_strides_4"] = 0 # config["sram_write_sched_gen_sched_addr_gen_strides_5"] = 0 # config["sram_read_loops_dimensionality"] = 0 # config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 4 # config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 65535 # config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535 # config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535 # config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535 # config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535 # config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5 # config["sram_read_loops_ranges_0"] = 398 # config["sram_read_loops_ranges_1"] = 65535 # config["sram_read_loops_ranges_2"] = 65535 # config["sram_read_loops_ranges_3"] = 65535 # config["sram_read_loops_ranges_4"] = 65535 # config["sram_read_loops_ranges_5"] = 65535 config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5 config["sram_write_loops_ranges_0"] = 65535 config["sram_write_loops_ranges_1"] = 65535 config["sram_write_loops_ranges_2"] = 65535 config["sram_write_loops_ranges_3"] = 65535 config["sram_write_loops_ranges_4"] = 65535 config["sram_write_loops_ranges_5"] = 65535 config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 4 config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 65535 config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535 config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535 config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535 config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535 config["sram_write_addr_gen_strides_0"] = 65535 config["sram_write_addr_gen_strides_1"] = 65535 config["sram_write_addr_gen_strides_2"] = 65535 config["sram_write_addr_gen_strides_3"] = 65535 config["sram_write_addr_gen_strides_4"] = 65535 config["sram_write_addr_gen_strides_5"] = 65535 config["sram_write_addr_gen_starting_addr"] = 65535 config["sram_write_sched_gen_sched_addr_gen_starting_addr"] = 4 config["sram_read_addr_gen_strides_0"] = 65535 config["sram_read_addr_gen_strides_1"] = 65535 config["sram_read_addr_gen_strides_2"] = 65535 config["sram_read_addr_gen_strides_3"] = 65535 config["sram_read_addr_gen_strides_4"] = 65535 config["sram_read_addr_gen_strides_5"] = 65535 config["sram_write_loops_dimensionality"] = 0 config["sram_read_loops_dimensionality"] = 0 config["sram_read_loops_ranges_0"] = 398 config["sram_read_loops_ranges_1"] = 65535 config["sram_read_loops_ranges_2"] = 65535 config["sram_read_loops_ranges_3"] = 65535 config["sram_read_loops_ranges_4"] = 65535 config["sram_read_loops_ranges_5"] = 65535 config["sram_read_addr_gen_starting_addr"] = 65535 config["sram_write_sched_gen_sched_addr_gen_strides_0"] = 4 config["sram_write_sched_gen_sched_addr_gen_strides_1"] = 65535 config["sram_write_sched_gen_sched_addr_gen_strides_2"] = 65535 config["sram_write_sched_gen_sched_addr_gen_strides_3"] = 65535 config["sram_write_sched_gen_sched_addr_gen_strides_4"] = 65535 config["sram_write_sched_gen_sched_addr_gen_strides_5"] = 65535 # configuration registers passed through from top level for key, value in config.items(): setattr(tester.circuit, key, value) tester.circuit.clk = 0 tester.circuit.rst_n = 1 tester.step(2) tester.circuit.rst_n = 0 tester.step(2) tester.circuit.rst_n = 1 rand.seed(0) im_size = 40 num_iters = im_size * im_size data_in = 0 for i in range(num_iters): for i in range(4): setattr(tester.circuit, f'data_in_{i}', data_in + i) tester.eval() data_in = data_in + 4 tester.step(2) with tempfile.TemporaryDirectory() as tempdir: tester.compile_and_run(target="verilator", directory=tempdir, magma_output="verilog", flags=["-Wno-fatal"])