def compile(self, halide_src, unconstrained_io=False, compact=False): id_to_name, instance_to_instr, netlist, bus = self.map(halide_src) app_dir = os.path.dirname(halide_src) if unconstrained_io: fixed_io = None else: fixed_io = place_io_blk(id_to_name) placement, routing = archipelago.pnr(self.interconnect, (netlist, bus), cwd="temp", id_to_name=id_to_name, fixed_pos=fixed_io, compact=compact, copy_to_dir=app_dir) routing_fix = archipelago.power.reduce_switching(routing, self.interconnect, compact=compact) routing.update(routing_fix) bitstream = [] bitstream += self.interconnect.get_route_bitstream(routing) bitstream += self.get_placement_bitstream(placement, id_to_name, instance_to_instr) skip_addr = self.interconnect.get_skip_addr() bitstream = compress_config_data(bitstream, skip_compression=skip_addr) inputs, outputs = self.get_input_output(netlist) input_interface, output_interface,\ (reset, valid, en) = self.get_io_interface(inputs, outputs, placement, id_to_name) delay = 1 if has_rom(id_to_name) else 0 # also write out the meta file archipelago.io.dump_meta_file(halide_src, "design", os.path.dirname(halide_src)) return bitstream, (input_interface, output_interface, reset, valid, en, delay)
def interconnect_route(): chip_size = 2 # creates all the cores here # we don't want duplicated cores when snapping into different interconnect # graphs cores = {} for x in range(0, chip_size + 2): for y in range(0, chip_size + 2): cores[(x, y)] = IO16bit() for x in range(1, 1 + chip_size): for y in range(1, 1 + chip_size): cores[(x, y)] = DummyCore() # corners for x, y in [(0, 0), (0, chip_size + 1), (chip_size + 1, 0), (chip_size + 1, chip_size + 1)]: cores[(x, y)] = None interconnect = create_cgra(chip_size, True, cores_input=cores) netlist = { "e0": [("I0", "io2f_16"), ("r0", "reg")], "e1": [("r0", "reg"), ("D0", "data_in_16b")], "e2": [("D0", "data_out_16b"), ("I1", "f2io_16")] } bus = {"e0": 16, "e1": 16, "e2": 16} with tempfile.TemporaryDirectory() as tempdir: _, route = pnr(interconnect, (netlist, bus), cwd=tempdir) # two paths route_path = [route["e0"][0], route["e1"][0], route["e2"][0]] return interconnect, route_path
def test_interconnect_point_wise(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing, _ = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() src0 = placement["I0"] src1 = placement["I1"] src_name0 = interconnect.get_top_input_port_by_coord(src0, 16) src_name1 = interconnect.get_top_input_port_by_coord(src1, 16) dst = placement["I2"] dst_name = interconnect.get_top_output_port_by_coord(dst, 16) random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) run_tb(tester)
def test_interconnect_reset(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) run_tb(tester)
def place_and_route(self, halide_src, unconstrained_io=False, compact=False, load_only=False): id_to_name, instance_to_instr, netlist, bus = self.load_netlist(halide_src, load_only) app_dir = os.path.dirname(halide_src) if unconstrained_io: fixed_io = None else: fixed_io = place_io_blk(id_to_name) placement, routing, id_to_name = archipelago.pnr(self.interconnect, (netlist, bus), load_only=load_only, cwd=app_dir, id_to_name=id_to_name, fixed_pos=fixed_io, compact=compact) return placement, routing, id_to_name, instance_to_instr, netlist, bus
def interconnect_route(): chip_size = 2 interconnect = create_cgra(chip_size, True, cores_input=None) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e2": [("p0", "out"), ("m0", "addr")], "e3": [("m0", "data_out"), ("I2", "f2io_16")], "e4": [("i0", "io2f_1"), ("m0", "ren")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16, "e4": 1} placement, route = pnr(interconnect, (netlist, bus), cwd="temp") return interconnect, placement, route
def interconnect_route(): chip_size = 2 interconnect = create_cgra(chip_size, True, cores_input=None) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in")], "e1": [("m0", "data_out"), ("I1", "f2io_16")], "e2": [("i0", "io2f_1"), ("m0", "wen")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, route = pnr(interconnect, (netlist, bus)) # two paths route_path = [route["e0"][0], route["e1"][0], route["e2"][0]] return interconnect, placement, route_path
def compile(self, halide_src): if not self.mapper_initalized: self.initialize_mapper(self.__rewrite_rules) mapped, instrs = self.map(halide_src) # id to name converts the id to instance name netlist, bus, id_to_name = self.convert_mapped_to_netlist(mapped) fixed_io = place_io_blk(id_to_name, self.width) placement, routing = archipelago.pnr(self.interconnect, (netlist, bus), cwd="temp", id_to_name=id_to_name, fixed_pos=fixed_io) bitstream = [] bitstream += self.interconnect.get_route_bitstream(routing) bitstream += self.get_placement_bitstream(placement, id_to_name, instrs) inputs, outputs = self.get_input_output(netlist) input_interface, output_interface, \ (reset, valid) = self.get_io_interface(inputs, outputs, placement, id_to_name) return bitstream, (input_interface, output_interface, reset, valid)
def compile(self, halide_src, unconstrained_io=False): id_to_name, instance_to_instr, netlist, bus = self.map(halide_src) if unconstrained_io: fixed_io = None else: fixed_io = place_io_blk(id_to_name, self.width) placement, routing = archipelago.pnr(self.interconnect, (netlist, bus), cwd="temp", id_to_name=id_to_name, fixed_pos=fixed_io) bitstream = [] bitstream += self.interconnect.get_route_bitstream(routing) bitstream += self.get_placement_bitstream(placement, id_to_name, instance_to_instr) inputs, outputs = self.get_input_output(netlist) input_interface, output_interface,\ (reset, valid, en) = self.get_io_interface(inputs, outputs, placement, id_to_name) delay = 1 if has_rom(id_to_name) else 0 return bitstream, (input_interface, output_interface, reset, valid, en, delay)
def test_interconnect_fifo(run_tb, io_sides, depth): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in_0")], "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")], "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")], "e3": [("m0", "data_out_0"), ("I1", "f2io_16")], "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")], "e5": [("m0", "empty"), ("i2", "f2io_1")], "e6": [("m0", "full"), ("i3", "f2io_1")] } bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as fifo mode mode = 1 # Mode.FIFO tile_en = 1 almost_count = 3 if (depth < 5): almost_count = 0 configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(src_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) wen_coord = placement["i3"] wen = interconnect.get_top_input_port_by_coord(wen_coord, 1) valid_coord = placement["i4"] valid = interconnect.get_top_output_port_by_coord(valid_coord, 1) ren_coord = placement["i4"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) full_coord = placement["i3"] full = interconnect.get_top_output_port_by_coord(full_coord, 1) empty_coord = placement["i2"] empty = interconnect.get_top_output_port_by_coord(empty_coord, 1) tester.step(1) fifo = deque() valid_check = 0 most_recent_read = 0 for i in range(2048): len_fifo = len(fifo) # Pick random from (READ, WRITE, READ_AND_WRITE) move = random.randint(0, 3) if move == 0: # read tester.poke(circuit.interface[ren], 1) if (len(fifo) > 0): most_recent_read = fifo.pop() # tester.expect(circuit.interface[dst], most_recent_read) valid_check = 1 else: valid_check = 0 elif move == 1: # write write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[src], write_val) if (len(fifo) < depth): fifo.appendleft(write_val) valid_check = 0 elif move == 2: # r and w write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[ren], 1) tester.poke(circuit.interface[src], write_val) fifo.appendleft(write_val) most_recent_read = fifo.pop() valid_check = 1 else: # If not doing anything, valid will be low, and we expect # to see the same output as before valid_check = 0 tester.eval() tester.expect(circuit.interface[empty], len_fifo == 0) tester.expect(circuit.interface[full], len_fifo == depth) tester.expect(circuit.interface[valid], valid_check) if valid_check: tester.expect(circuit.interface[dst], most_recent_read) tester.step(2) tester.poke(circuit.interface[wen], 0) tester.poke(circuit.interface[ren], 0) run_tb(tester)
def test_interconnect_sram(cw_files, add_pd, io_sides): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in")], "e1": [("m0", "data_out"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["m0"] sram_config_addr = interconnect.get_config_addr(0, 0, x, y) # in this case we configure (1, 0) as sram mode config_data.append((sram_config_addr, 0x00000006)) sram_data = [] # add SRAM data for i in range(0, 1024, 4): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, x, y), i + 10)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: tester.configure(addr, data) # currently read back doesn't work # tester.config_read(addr) # tester.eval() # tester.expect(circuit.read_config_data, data) addr_x, addr_y = placement["I0"] src = f"glb2io_16_X{addr_x:02X}_Y{addr_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" ren_x, ren_y = placement["i3"] ren = f"glb2io_1_X{ren_x:02X}_Y{ren_y:02X}" tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(0, 1024, 4): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i + 10) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_point_wise(batch_size: int, cw_files, add_pd, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])
def test_interconnect_reset(batch_size: int, dw_files, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_line_buffer(cw_files, add_pd, io_sides): depth = 10 chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mem_x, mem_y = placement["m0"] config_data.append( (interconnect.get_config_addr(0, 0, mem_x, mem_y), 0x00000004 | (depth << 3))) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(200): tester.poke(circuit.interface[src], i) tester.eval() if i > depth + 10: tester.expect(circuit.interface[dst], i * 2 - depth) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_sram(dw_files, io_sides): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")], "e1": [("m0", "data_out_0"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) mode = 2 # Mode.SRAM tile_en = 1 configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) # in this case we configure (1, 0) as sram mode sram_data = [] # add SRAM data for i in range(0, 512): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, mem_x, mem_y), i)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: for i in range(4): tester.configure(addr, data * 4 + i) tester.eval() # currently read back doesn't work for i in range(4): tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data * 4 + i) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() addr_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(addr_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) ren_coord = placement["i3"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(2048): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def basic_tb(config_path, stream_path, run_tb, in_file_name="input", out_file_name="output", cwd=None, trace=False): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in_0")], "e1": [("m0", "data_out_0"), ("I1", "f2io_16")] } bus = {"e0": 16, "e1": 16} placement, routing, _ = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # Regular Bootstrap MCore = make_memory_core() # Get configuration configs_mem = MCore.get_static_bitstream(config_path=config_path, in_file_name=in_file_name, out_file_name=out_file_name) config_final = [] for (f1, f2) in configs_mem: config_final.append((f1, f2, 0)) mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, config_final, mem_x, mem_y, mcore) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.zero_inputs() tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() in_data, out_data, valids = generate_data_lists(csv_file_name=stream_path, data_in_width=MCore.num_data_inputs(), data_out_width=MCore.num_data_outputs()) data_in_x, data_in_y = placement["I0"] data_in = f"glb2io_16_X{data_in_x:02X}_Y{data_in_y:02X}" data_out_x, data_out_y = placement["I1"] data_out = f"io2glb_16_X{data_out_x:02X}_Y{data_out_y:02X}" for i in range(len(out_data)): tester.poke(circuit.interface[data_in], in_data[0][i]) tester.eval() tester.expect(circuit.interface[data_out], out_data[0][i]) # toggle the clock tester.step(2) run_tb(tester, cwd=cwd, trace=trace, disable_ndarray=True)
def test_stall(run_tb, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_output_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_0", 0, 0), ("strg_ub_tba_0_tb_0_indices_1", 1, 0), ("strg_ub_tba_0_tb_0_indices_2", 2, 0), ("strg_ub_tba_0_tb_0_indices_3", 3, 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) run_tb(tester)
def test_pond_pe(verilator=True): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, add_pond=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data_in_pond")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e2": [("p0", "alu_res"), ("I2", "f2io_16")], "e3": [("p0", "data_out_pond"), ("p0", "data0")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) pe_x, pe_y = placement["p0"] petile = interconnect.tile_circuits[(pe_x, pe_y)] pondcore = petile.additional_cores[0] add_bs = petile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data)) # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule ctrl_rd = [[16, 1], [1, 1], 2, 0, 16] ctrl_wr = [[16, 1], [1, 1], 2, 0, 0] generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for i in range(32): if i < 16: tester.poke(circuit.interface[src_name0], i) tester.eval() if i >= 16: num = random.randrange(0, 256) tester.poke(circuit.interface[src_name1], num) tester.eval() tester.expect(circuit.interface[dst_name], (i - 16) * num) tester.step(2) tester.eval() with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files(): shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) target = "verilator" runtime_kwargs = { "magma_output": "coreir-verilog", "magma_opts": { "coreir_libs": {"float_DW"} }, "directory": tempdir, "flags": ["-Wno-fatal", "--trace"] } if verilator is False: target = "system-verilog" runtime_kwargs["simulator"] = "vcs" tester.compile_and_run(target=target, tmp_dir=False, **runtime_kwargs)
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides, stencil_width, depth): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")], "e5": [("m0", "valid_out"), ("i4", "f2io_1")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mode = Mode.LINE_BUFFER tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x, mem_y), depth)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x, mem_y), mode.value)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0, mem_x, mem_y), stencil_width)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x, mem_y), tile_en)) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) counter = 0 for i in range(3 * depth): tester.poke(circuit.interface[src], counter) tester.eval() if i < depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) elif i < 2 * depth: tester.expect(circuit.interface[valid], 1) elif i < 2 * depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) else: tester.expect(circuit.interface[valid], 1) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])
def test_interconnect_fifo(dw_files, io_sides, depth): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in_0")], "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")], "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")], "e3": [("m0", "data_out_0"), ("I1", "f2io_16")], "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")], "e5": [("m0", "empty"), ("i2", "f2io_1")], "e6": [("m0", "full"), ("i3", "f2io_1")] } bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as fifo mode mode = 1 # Mode.FIFO tile_en = 1 almost_count = 3 if(depth < 5): almost_count = 0 configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(src_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) wen_coord = placement["i3"] wen = interconnect.get_top_input_port_by_coord(wen_coord, 1) valid_coord = placement["i4"] valid = interconnect.get_top_output_port_by_coord(valid_coord, 1) ren_coord = placement["i4"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) full_coord = placement["i3"] full = interconnect.get_top_output_port_by_coord(full_coord, 1) empty_coord = placement["i2"] empty = interconnect.get_top_output_port_by_coord(empty_coord, 1) tester.step(1) fifo = deque() valid_check = 0 most_recent_read = 0 for i in range(2048): len_fifo = len(fifo) # Pick random from (READ, WRITE, READ_AND_WRITE) move = random.randint(0, 3) if move == 0: # read tester.poke(circuit.interface[ren], 1) if(len(fifo) > 0): most_recent_read = fifo.pop() # tester.expect(circuit.interface[dst], most_recent_read) valid_check = 1 else: valid_check = 0 elif move == 1: # write write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[src], write_val) if(len(fifo) < depth): fifo.appendleft(write_val) valid_check = 0 elif move == 2: # r and w write_val = random.randint(0, 60000) tester.poke(circuit.interface[wen], 1) tester.poke(circuit.interface[ren], 1) tester.poke(circuit.interface[src], write_val) fifo.appendleft(write_val) most_recent_read = fifo.pop() valid_check = 1 else: # If not doing anything, valid will be low, and we expect # to see the same output as before valid_check = 0 tester.eval() tester.expect(circuit.interface[empty], len_fifo == 0) tester.expect(circuit.interface[full], len_fifo == depth) tester.expect(circuit.interface[valid], valid_check) if valid_check: tester.expect(circuit.interface[dst], most_recent_read) tester.step(2) tester.poke(circuit.interface[wen], 0) tester.poke(circuit.interface[ren], 0) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_interconnect_sram(run_tb, io_sides): # NEW: PASSES # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS # Basically same chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")], "e1": [("m0", "data_out_0"), ("I1", "f2io_16")], "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")] } bus = {"e0": 16, "e1": 16, "e2": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) mode = 2 # Mode.SRAM tile_en = 1 configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)] mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) # in this case we configure (1, 0) as sram mode sram_data = [] # add SRAM data for i in range(0, 512): feat_addr = i // 256 + 1 mem_addr = i % 256 sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, mem_x, mem_y), i)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) for addr, data in sram_data: for i in range(4): tester.configure(addr, data * 4 + i) tester.eval() # currently read back doesn't work for i in range(4): tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data * 4 + i) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() addr_coord = placement["I0"] src = interconnect.get_top_input_port_by_coord(addr_coord, 16) dst_coord = placement["I1"] dst = interconnect.get_top_output_port_by_coord(dst_coord, 16) ren_coord = placement["i3"] ren = interconnect.get_top_input_port_by_coord(ren_coord, 1) tester.step(2) tester.poke(circuit.interface[ren], 1) tester.eval() for i in range(2048): tester.poke(circuit.interface[src], i) tester.eval() tester.step(2) tester.eval() tester.expect(circuit.interface[dst], i) run_tb(tester)
def test_stall(dw_files, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_merged_0", (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_pond_pe_acc(run_tb): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, add_pond=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("p0", "data_out_pond"), ("p0", "data1")], "e2": [("p0", "alu_res"), ("p0", "data_in_pond")], "e3": [("p0", "data_out_pond"), ("I1", "f2io_16")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) pe_x, pe_y = placement["p0"] petile = interconnect.tile_circuits[(pe_x, pe_y)] pondcore = petile.additional_cores[0] add_bs = petile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data)) # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule ctrl_rd = [[16, 1], [0, 0], 2, 8, 0, [1, 0]] ctrl_wr = [[16, 1], [0, 0], 2, 8, 0, [1, 0]] generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() src_x0, src_y0 = placement["I0"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" dst_x, dst_y = placement["I1"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) total = 0 for i in range(16): tester.poke(circuit.interface[src_name0], i + 1) total = total + i tester.eval() tester.expect(circuit.interface[dst_name], total) tester.step(2) tester.eval() run_tb(tester)