def test_interconnect_point_wise(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing, _ = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() src0 = placement["I0"] src1 = placement["I1"] src_name0 = interconnect.get_top_input_port_by_coord(src0, 16) src_name1 = interconnect.get_top_input_port_by_coord(src1, 16) dst = placement["I2"] dst_name = interconnect.get_top_output_port_by_coord(dst, 16) random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) run_tb(tester)
def test_1x1(): # this is all PE interconnect = create_cgra(1, 1, IOSide.None_, num_tracks=3, mem_ratio=(0, 1)) circuit = interconnect.circuit() with tempfile.TemporaryDirectory() as temp: filename = os.path.join(temp, "1x1") magma.compile(filename, circuit) assert os.path.isfile(filename + ".v")
def test_interconnect_reset(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) run_tb(tester)
def _run(directory, width=2, height=2): """Generates and writes SV testbench in @directory""" # Create cgra generator object. interconnect = create_cgra(width=width, height=height, io_sides=IOSide.North, num_tracks=5, add_pd=True) # Poke the circuit with a reset sequence and short configuration sequence. sequence = common.basic_sequence(interconnect) sequence = sequence[:2] # limit to 2 addr's circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) common.configure(tester, sequence, check_read_data=True) common.generate_testbench(tester, directory)
def test_peak_tile_sequence(sequence, seed, run_tb): """ Tile level test: * Generates 1x1 CGRA * configures PE_tile using test application * similar input driver and output monitor behavior to core test except: * inputs are driven onto the appropriate tile ports based on the generated route for the application * output is similarly monitored based on the generate route """ interconnect = create_cgra(1, 1, IOSide.None_, num_tracks=3, standalone=True) routing, port_mapping = route_one_tile(interconnect, 0, 0, ports=["data0", "data1", "alu_res"], seed=seed) route_config = interconnect.get_route_bitstream(routing) route_config = compress_config_data(route_config) x, y = 0, 0 circuit = interconnect.circuit() input_a = port_mapping["data0"] input_b = port_mapping["data1"] output_port = port_mapping["alu_res"] class TileDriver(Driver): def lower(self, config_data, a, b, output): for addr, data in config_data: addr = interconnect.get_config_addr(addr, 0, x, y) self.tester.configure(addr, data) setattr(self.tester.circuit, input_a, a) setattr(self.tester.circuit, input_b, b) class TileMonitor(Monitor): def observe(self, config_data, a, b, output): getattr(self.tester.circuit, output_port).expect(output) tester = BasicSequenceTester(circuit, TileDriver(), TileMonitor(), sequence, circuit.clk, circuit.reset) tester.reset() for addr, data in route_config: tester.configure(addr, data) run_tb(tester)
def test_basic(run_tb): """ Configuration sequence test on 2x2 fabric + IO tiles. """ # Create cgra generator object. chip_size = 2 interconnect = create_cgra(width=chip_size, height=chip_size, io_sides=IOSide.North, num_tracks=5, add_pd=True) # Poke the circuit with a reset sequence and short configuration sequence. sequence = common.basic_sequence(interconnect) sequence = sequence[:2] # limit to 2 addr's circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) common.configure(tester, sequence, check_read_data=True) # Compile and run the test using a verilator backend. run_tb(tester)
def test_basic(dw_files): """ Configuration sequence test on 2x2 fabric + IO tiles. """ # Create cgra generator object. chip_size = 2 interconnect = create_cgra(width=chip_size, height=chip_size, io_sides=IOSide.North, num_tracks=5, add_pd=True) # Poke the circuit with a reset sequence and short configuration sequence. sequence = common.basic_sequence(interconnect) sequence = sequence[:2] # limit to 2 addr's circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) common.configure(tester, sequence, check_read_data=True) # Compile and run the test using a verilator backend. with tempfile.TemporaryDirectory() as tempdir: common.generate_scaffolding(tempdir) magma.compile(f"{tempdir}/{circuit.name}", circuit, output="coreir-verilog", coreir_libs={"float_DW"}) tester.compile_and_run(skip_compile=True, target="verilator", directory=tempdir, flags=["-Wno-fatal"])
def test_stall(run_tb, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_output_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_0", 0, 0), ("strg_ub_tba_0_tb_0_indices_1", 1, 0), ("strg_ub_tba_0_tb_0_indices_2", 2, 0), ("strg_ub_tba_0_tb_0_indices_3", 3, 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) run_tb(tester)
def __init__(self, width, height, add_pd, interconnect_only: bool = False, use_sram_stub: bool = True): super().__init__() # configuration parameters config_addr_width = 32 config_data_width = 32 axi_addr_width = 12 tile_id_width = 16 config_addr_reg_width = 8 num_tracks = 5 # size self.width = width self.height = height # only north side has IO io_side = IOSide.North # global buffer parameters num_banks = 32 bank_addr_width = 17 bank_data_width = 64 glb_addr_width = 32 # parallel configuration parameter num_parallel_cfg = math.ceil(width / 4) # number of input/output channels parameter num_io = math.ceil(width / 4) if not interconnect_only: wiring = GlobalSignalWiring.ParallelMeso self.global_controller = GlobalController(config_addr_width, config_data_width, axi_addr_width) self.global_buffer = GlobalBuffer(num_banks=num_banks, num_io=num_io, num_cfg=num_parallel_cfg, bank_addr_width=bank_addr_width, glb_addr_width=glb_addr_width, cfg_addr_width=config_addr_width, cfg_data_width=config_data_width, axi_addr_width=axi_addr_width) else: wiring = GlobalSignalWiring.Meso interconnect = create_cgra(width, height, io_side, reg_addr_width=config_addr_reg_width, config_data_width=config_data_width, tile_id_width=tile_id_width, num_tracks=num_tracks, add_pd=add_pd, use_sram_stub=use_sram_stub, global_signal_wiring=wiring, num_parallel_config=num_parallel_cfg, mem_ratio=(1, 4)) self.interconnect = interconnect if not interconnect_only: self.add_ports( jtag=JTAGType, clk_in=magma.In(magma.Clock), reset_in=magma.In(magma.AsyncReset), soc_data=SoCDataType(glb_addr_width, bank_data_width), axi4_ctrl=AXI4SlaveType(axi_addr_width, config_data_width), cgra_running_clk_out=magma.Out(magma.Clock), ) # top <-> global controller ports connection self.wire(self.ports.clk_in, self.global_controller.ports.clk_in) self.wire(self.ports.reset_in, self.global_controller.ports.reset_in) self.wire(self.ports.jtag, self.global_controller.ports.jtag) self.wire(self.ports.axi4_ctrl, self.global_controller.ports.axi4_ctrl) self.wire(self.ports.cgra_running_clk_out, self.global_controller.ports.clk_out) # top <-> global buffer ports connection self.wire(self.ports.soc_data, self.global_buffer.ports.soc_data) glc_interconnect_wiring(self) glb_glc_wiring(self) glb_interconnect_wiring(self, width, num_parallel_cfg) else: # lift all the interconnect ports up self._lift_interconnect_ports(config_data_width) self.mapper_initalized = False self.__rewrite_rules = None
def __init__(self, width, height, add_pd, interconnect_only: bool = False, use_sram_stub: bool = True, standalone: bool = False): super().__init__() # Check consistency of @standalone and @interconnect_only parameters. If # @standalone is True, then interconnect_only must also be True. if standalone: assert interconnect_only # configuration parameters config_addr_width = 32 config_data_width = 32 self.config_addr_width = config_addr_width self.config_data_width = config_data_width axi_addr_width = 13 glb_axi_addr_width = 12 axi_data_width = 32 # axi_data_width must be same as cgra config_data_width assert axi_data_width == config_data_width tile_id_width = 16 config_addr_reg_width = 8 num_tracks = 5 # size self.width = width self.height = height # only north side has IO if standalone: io_side = IOSide.None_ else: io_side = IOSide.North if not interconnect_only: # global buffer parameters # width must be even number assert (self.width % 2) == 0 num_glb_tiles = self.width // 2 bank_addr_width = 17 bank_data_width = 64 banks_per_tile = 2 glb_addr_width = (bank_addr_width + magma.bitutils.clog2(banks_per_tile) + magma.bitutils.clog2(num_glb_tiles)) # bank_data_width must be the size of bitstream assert bank_data_width == config_addr_width + config_data_width wiring = GlobalSignalWiring.ParallelMeso self.global_controller = GlobalController( addr_width=config_addr_width, data_width=config_data_width, axi_addr_width=axi_addr_width, axi_data_width=axi_data_width, num_glb_tiles=num_glb_tiles, glb_addr_width=glb_addr_width, block_axi_addr_width=glb_axi_addr_width) self.global_buffer = GlobalBuffer( num_glb_tiles=num_glb_tiles, num_cgra_cols=width, bank_addr_width=bank_addr_width, bank_data_width=bank_data_width, cfg_addr_width=config_addr_width, cfg_data_width=config_data_width, axi_addr_width=glb_axi_addr_width, axi_data_width=axi_data_width) else: wiring = GlobalSignalWiring.Meso interconnect = create_cgra(width, height, io_side, reg_addr_width=config_addr_reg_width, config_data_width=config_data_width, tile_id_width=tile_id_width, num_tracks=num_tracks, add_pd=add_pd, use_sram_stub=use_sram_stub, global_signal_wiring=wiring, mem_ratio=(1, 4), standalone=standalone) self.interconnect = interconnect if not interconnect_only: self.add_ports( jtag=JTAGType, clk_in=magma.In(magma.Clock), reset_in=magma.In(magma.AsyncReset), proc_packet=ProcPacketIfc(glb_addr_width, bank_data_width).slave, axi4_slave=AXI4LiteIfc(axi_addr_width, axi_data_width).slave, interrupt=magma.Out(magma.Bit), cgra_running_clk_out=magma.Out(magma.Clock), ) # top <-> global controller ports connection self.wire(self.ports.clk_in, self.global_controller.ports.clk_in) self.wire(self.ports.reset_in, self.global_controller.ports.reset_in) self.wire(self.ports.jtag, self.global_controller.ports.jtag) self.wire(self.ports.axi4_slave, self.global_controller.ports.axi4_slave) self.wire(self.ports.interrupt, self.global_controller.ports.interrupt) self.wire(self.ports.cgra_running_clk_out, self.global_controller.ports.clk_out) # top <-> global buffer ports connection self.wire(self.ports.proc_packet, self.global_buffer.ports.proc_packet) glb_glc_wiring(self) glb_interconnect_wiring(self) glc_interconnect_wiring(self) else: # lift all the interconnect ports up for name in self.interconnect.interface(): self.add_port(name, self.interconnect.ports[name].type()) self.wire(self.ports[name], self.interconnect.ports[name]) self.add_ports( clk=magma.In(magma.Clock), reset=magma.In(magma.AsyncReset), config=magma.In( ConfigurationType(self.interconnect.config_data_width, self.interconnect.config_data_width)), stall=magma.In( magma.Bits[self.interconnect.stall_signal_width]), read_config_data=magma.Out(magma.Bits[config_data_width])) self.wire(self.ports.clk, self.interconnect.ports.clk) self.wire(self.ports.reset, self.interconnect.ports.reset) self.wire(self.ports.config, self.interconnect.ports.config) self.wire(self.ports.stall, self.interconnect.ports.stall) self.wire(self.interconnect.ports.read_config_data, self.ports.read_config_data)
def test_interconnect_sram(cw_files, add_pd, io_sides): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in")], "e1": [("m0", "data_out"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["m0"] sram_config_addr = interconnect.get_config_addr(0, 0, x, y) # in this case we configure (1, 0) as sram mode config_data.append((sram_config_addr, 0x00000006)) sram_data = [] # add SRAM data for i in range(0, 1024, 4): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, x, y), i + 10)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: tester.configure(addr, data) # currently read back doesn't work # tester.config_read(addr) # tester.eval() # tester.expect(circuit.read_config_data, data) addr_x, addr_y = placement["I0"] src = f"glb2io_16_X{addr_x:02X}_Y{addr_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" ren_x, ren_y = placement["i3"] ren = f"glb2io_1_X{ren_x:02X}_Y{ren_y:02X}" tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(0, 1024, 4): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i + 10) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_fifo(dw_files, io_sides, depth): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in_0")], "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")], "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")], "e3": [("m0", "data_out_0"), ("I1", "f2io_16")], "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")], "e5": [("m0", "empty"), ("i2", "f2io_1")], "e6": [("m0", "full"), ("i3", "f2io_1")] } bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as fifo mode mode = 1 # Mode.FIFO tile_en = 1 almost_count = 3 if(depth < 5): almost_count = 0 configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(src_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) wen_coord = placement["i3"] wen = interconnect.get_top_input_port_by_coord(wen_coord, 1) valid_coord = placement["i4"] valid = interconnect.get_top_output_port_by_coord(valid_coord, 1) ren_coord = placement["i4"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) full_coord = placement["i3"] full = interconnect.get_top_output_port_by_coord(full_coord, 1) empty_coord = placement["i2"] empty = interconnect.get_top_output_port_by_coord(empty_coord, 1) tester.step(1) fifo = deque() valid_check = 0 most_recent_read = 0 for i in range(2048): len_fifo = len(fifo) # Pick random from (READ, WRITE, READ_AND_WRITE) move = random.randint(0, 3) if move == 0: # read tester.poke(circuit.interface[ren], 1) if(len(fifo) > 0): most_recent_read = fifo.pop() # tester.expect(circuit.interface[dst], most_recent_read) valid_check = 1 else: valid_check = 0 elif move == 1: # write write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[src], write_val) if(len(fifo) < depth): fifo.appendleft(write_val) valid_check = 0 elif move == 2: # r and w write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[ren], 1) tester.poke(circuit.interface[src], write_val) fifo.appendleft(write_val) most_recent_read = fifo.pop() valid_check = 1 else: # If not doing anything, valid will be low, and we expect # to see the same output as before valid_check = 0 tester.eval() tester.expect(circuit.interface[empty], len_fifo == 0) tester.expect(circuit.interface[full], len_fifo == depth) tester.expect(circuit.interface[valid], valid_check) if valid_check: tester.expect(circuit.interface[dst], most_recent_read) tester.step(2) tester.poke(circuit.interface[wen], 0) tester.poke(circuit.interface[ren], 0) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_sram(dw_files, io_sides): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")], "e1": [("m0", "data_out_0"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) mode = 2 # Mode.SRAM tile_en = 1 configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) # in this case we configure (1, 0) as sram mode sram_data = [] # add SRAM data for i in range(0, 512): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, mem_x, mem_y), i)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: for i in range(4): tester.configure(addr, data * 4 + i) tester.eval() # currently read back doesn't work for i in range(4): tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data * 4 + i) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() addr_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(addr_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) ren_coord = placement["i3"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(2048): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def __init__(self, width, height, add_pd, interconnect_only: bool = False, use_sram_stub: bool = True, standalone: bool = False, add_pond: bool = True, use_io_valid: bool = False, pipeline_config_interval: int = 8, glb_params: GlobalBufferParams = GlobalBufferParams(), pe_fc=lassen_fc): super().__init__() # Check consistency of @standalone and @interconnect_only parameters. If # @standalone is True, then interconnect_only must also be True. if standalone: assert interconnect_only # configuration parameters self.glb_params = glb_params config_addr_width = 32 config_data_width = 32 self.config_addr_width = config_addr_width self.config_data_width = config_data_width axi_addr_width = 13 axi_data_width = 32 # axi_data_width must be same as cgra config_data_width assert axi_data_width == config_data_width tile_id_width = 16 config_addr_reg_width = 8 num_tracks = 5 # size self.width = width self.height = height # only north side has IO if standalone: io_side = IOSide.None_ else: io_side = IOSide.North self.pe_fc = pe_fc if not interconnect_only: # width must be even number assert (self.width % 2) == 0 # Bank should be larger than or equal to 1KB assert glb_params.bank_addr_width >= 10 glb_tile_mem_size = 2 ** (glb_params.bank_addr_width - 10) + \ math.ceil(math.log(glb_params.banks_per_tile, 2)) wiring = GlobalSignalWiring.ParallelMeso self.global_controller = GlobalController(addr_width=config_addr_width, data_width=config_data_width, axi_addr_width=axi_addr_width, axi_data_width=axi_data_width, num_glb_tiles=glb_params.num_glb_tiles, glb_addr_width=glb_params.glb_addr_width, glb_tile_mem_size=glb_tile_mem_size, block_axi_addr_width=glb_params.axi_addr_width) self.global_buffer = GlobalBufferMagma(glb_params) else: wiring = GlobalSignalWiring.Meso interconnect = create_cgra(width, height, io_side, reg_addr_width=config_addr_reg_width, config_data_width=config_data_width, tile_id_width=tile_id_width, num_tracks=num_tracks, add_pd=add_pd, add_pond=add_pond, use_io_valid=use_io_valid, use_sram_stub=use_sram_stub, global_signal_wiring=wiring, pipeline_config_interval=pipeline_config_interval, mem_ratio=(1, 4), standalone=standalone, pe_fc=pe_fc) self.interconnect = interconnect # make multiple stall ports stall_port_pass(self.interconnect) # make multiple configuration ports config_port_pass(self.interconnect) if not interconnect_only: self.add_ports( jtag=JTAGType, clk_in=magma.In(magma.Clock), reset_in=magma.In(magma.AsyncReset), proc_packet=ProcPacketIfc( glb_params.glb_addr_width, glb_params.bank_data_width).slave, axi4_slave=AXI4LiteIfc(axi_addr_width, axi_data_width).slave, interrupt=magma.Out(magma.Bit), cgra_running_clk_out=magma.Out(magma.Clock), ) # top <-> global controller ports connection self.wire(self.ports.clk_in, self.global_controller.ports.clk_in) self.wire(self.ports.reset_in, self.global_controller.ports.reset_in) self.wire(self.ports.jtag, self.global_controller.ports.jtag) self.wire(self.ports.axi4_slave, self.global_controller.ports.axi4_slave) self.wire(self.ports.interrupt, self.global_controller.ports.interrupt) self.wire(self.ports.cgra_running_clk_out, self.global_controller.ports.clk_out) # top <-> global buffer ports connection self.wire(self.ports.clk_in, self.global_buffer.ports.clk) self.wire(self.ports.proc_packet.wr_en, self.global_buffer.ports.proc_wr_en[0]) self.wire(self.ports.proc_packet.wr_strb, self.global_buffer.ports.proc_wr_strb) self.wire(self.ports.proc_packet.wr_addr, self.global_buffer.ports.proc_wr_addr) self.wire(self.ports.proc_packet.wr_data, self.global_buffer.ports.proc_wr_data) self.wire(self.ports.proc_packet.rd_en, self.global_buffer.ports.proc_rd_en[0]) self.wire(self.ports.proc_packet.rd_addr, self.global_buffer.ports.proc_rd_addr) self.wire(self.ports.proc_packet.rd_data, self.global_buffer.ports.proc_rd_data) self.wire(self.ports.proc_packet.rd_data_valid, self.global_buffer.ports.proc_rd_data_valid[0]) # Top -> Interconnect clock port connection self.wire(self.ports.clk_in, self.interconnect.ports.clk) glb_glc_wiring(self) glb_interconnect_wiring(self) glc_interconnect_wiring(self) else: # lift all the interconnect ports up for name in self.interconnect.interface(): self.add_port(name, self.interconnect.ports[name].type()) self.wire(self.ports[name], self.interconnect.ports[name]) self.add_ports( clk=magma.In(magma.Clock), reset=magma.In(magma.AsyncReset), config=magma.In(magma.Array[width, ConfigurationType(config_data_width, config_data_width)]), stall=magma.In( magma.Bits[self.width * self.interconnect.stall_signal_width]), read_config_data=magma.Out(magma.Bits[config_data_width]) ) self.wire(self.ports.clk, self.interconnect.ports.clk) self.wire(self.ports.reset, self.interconnect.ports.reset) self.wire(self.ports.config, self.interconnect.ports.config) self.wire(self.ports.stall, self.interconnect.ports.stall) self.wire(self.interconnect.ports.read_config_data, self.ports.read_config_data)
def test_interconnect_reset(batch_size: int, dw_files, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_line_buffer(cw_files, add_pd, io_sides): depth = 10 chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mem_x, mem_y = placement["m0"] config_data.append( (interconnect.get_config_addr(0, 0, mem_x, mem_y), 0x00000004 | (depth << 3))) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(200): tester.poke(circuit.interface[src], i) tester.eval() if i > depth + 10: tester.expect(circuit.interface[dst], i * 2 - depth) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_sram(run_tb, io_sides): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")], "e1": [("m0", "data_out_0"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) mode = 2 # Mode.SRAM tile_en = 1 configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) # in this case we configure (1, 0) as sram mode sram_data = [] # add SRAM data for i in range(0, 512): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, mem_x, mem_y), i)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: for i in range(4): tester.configure(addr, data * 4 + i) tester.eval() # currently read back doesn't work for i in range(4): tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data * 4 + i) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() addr_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(addr_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) ren_coord = placement["i3"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(2048): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i) run_tb(tester)
def test_interconnect_point_wise(batch_size: int, cw_files, add_pd, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])
def test_interconnect_fifo(run_tb, io_sides, depth): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in_0")], "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")], "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")], "e3": [("m0", "data_out_0"), ("I1", "f2io_16")], "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")], "e5": [("m0", "empty"), ("i2", "f2io_1")], "e6": [("m0", "full"), ("i3", "f2io_1")] } bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as fifo mode mode = 1 # Mode.FIFO tile_en = 1 almost_count = 3 if (depth < 5): almost_count = 0 configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(src_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) wen_coord = placement["i3"] wen = interconnect.get_top_input_port_by_coord(wen_coord, 1) valid_coord = placement["i4"] valid = interconnect.get_top_output_port_by_coord(valid_coord, 1) ren_coord = placement["i4"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) full_coord = placement["i3"] full = interconnect.get_top_output_port_by_coord(full_coord, 1) empty_coord = placement["i2"] empty = interconnect.get_top_output_port_by_coord(empty_coord, 1) tester.step(1) fifo = deque() valid_check = 0 most_recent_read = 0 for i in range(2048): len_fifo = len(fifo) # Pick random from (READ, WRITE, READ_AND_WRITE) move = random.randint(0, 3) if move == 0: # read tester.poke(circuit.interface[ren], 1) if (len(fifo) > 0): most_recent_read = fifo.pop() # tester.expect(circuit.interface[dst], most_recent_read) valid_check = 1 else: valid_check = 0 elif move == 1: # write write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[src], write_val) if (len(fifo) < depth): fifo.appendleft(write_val) valid_check = 0 elif move == 2: # r and w write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[ren], 1) tester.poke(circuit.interface[src], write_val) fifo.appendleft(write_val) most_recent_read = fifo.pop() valid_check = 1 else: # If not doing anything, valid will be low, and we expect # to see the same output as before valid_check = 0 tester.eval() tester.expect(circuit.interface[empty], len_fifo == 0) tester.expect(circuit.interface[full], len_fifo == depth) tester.expect(circuit.interface[valid], valid_check) if valid_check: tester.expect(circuit.interface[dst], most_recent_read) tester.step(2) tester.poke(circuit.interface[wen], 0) tester.poke(circuit.interface[ren], 0) run_tb(tester)
def test_stall(dw_files, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_merged_0", (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides, stencil_width, depth): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")], "e5": [("m0", "valid_out"), ("i4", "f2io_1")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mode = Mode.LINE_BUFFER tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x, mem_y), depth)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x, mem_y), mode.value)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0, mem_x, mem_y), stencil_width)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x, mem_y), tile_en)) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) counter = 0 for i in range(3 * depth): tester.poke(circuit.interface[src], counter) tester.eval() if i < depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) elif i < 2 * depth: tester.expect(circuit.interface[valid], 1) elif i < 2 * depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) else: tester.expect(circuit.interface[valid], 1) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])