Exemple #1
0
 def compile(self, halide_src, unconstrained_io=False, compact=False):
     id_to_name, instance_to_instr, netlist, bus = self.map(halide_src)
     app_dir = os.path.dirname(halide_src)
     if unconstrained_io:
         fixed_io = None
     else:
         fixed_io = place_io_blk(id_to_name)
     placement, routing = archipelago.pnr(self.interconnect, (netlist, bus),
                                          cwd="temp",
                                          id_to_name=id_to_name,
                                          fixed_pos=fixed_io,
                                          compact=compact,
                                          copy_to_dir=app_dir)
     routing_fix = archipelago.power.reduce_switching(routing,
                                                      self.interconnect,
                                                      compact=compact)
     routing.update(routing_fix)
     bitstream = []
     bitstream += self.interconnect.get_route_bitstream(routing)
     bitstream += self.get_placement_bitstream(placement, id_to_name,
                                               instance_to_instr)
     skip_addr = self.interconnect.get_skip_addr()
     bitstream = compress_config_data(bitstream, skip_compression=skip_addr)
     inputs, outputs = self.get_input_output(netlist)
     input_interface, output_interface,\
         (reset, valid, en) = self.get_io_interface(inputs,
                                                    outputs,
                                                    placement,
                                                    id_to_name)
     delay = 1 if has_rom(id_to_name) else 0
     # also write out the meta file
     archipelago.io.dump_meta_file(halide_src, "design",
                                   os.path.dirname(halide_src))
     return bitstream, (input_interface, output_interface, reset, valid, en,
                        delay)
Exemple #2
0
def test_interconnect_point_wise(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing, _ = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    # set the PE core
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    src0 = placement["I0"]
    src1 = placement["I1"]
    src_name0 = interconnect.get_top_input_port_by_coord(src0, 16)
    src_name1 = interconnect.get_top_input_port_by_coord(src1, 16)
    dst = placement["I2"]
    dst_name = interconnect.get_top_output_port_by_coord(dst, 16)
    random.seed(0)
    for _ in range(batch_size):
        num_1 = random.randrange(0, 256)
        num_2 = random.randrange(0, 256)
        tester.poke(circuit.interface[src_name0], num_1)
        tester.poke(circuit.interface[src_name1], num_2)

        tester.eval()
        tester.expect(circuit.interface[dst_name], num_1 * num_2)

    run_tb(tester)
Exemple #3
0
def test_interconnect_reset(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    run_tb(tester)
Exemple #4
0
 def compile_virtualize(self, halide_src, max_group):
     id_to_name, instance_to_instr, netlist, bus = self.map(halide_src)
     partition_result = archipelago.pnr_virtualize(self.interconnect, (netlist, bus), cwd="temp",
                                                   id_to_name=id_to_name, max_group=max_group)
     result = {}
     for c_id, ((placement, routing), p_id_to_name) in partition_result.items():
         bitstream = []
         bitstream += self.interconnect.get_route_bitstream(routing)
         bitstream += self.get_placement_bitstream(placement, p_id_to_name,
                                                   instance_to_instr)
         skip_addr = self.interconnect.get_skip_addr()
         bitstream = compress_config_data(
             bitstream, skip_compression=skip_addr)
         result[c_id] = bitstream
     return result
def test_peak_tile_sequence(sequence, seed, run_tb):
    """
    Tile level test:
    * Generates 1x1 CGRA
    * configures PE_tile using test application
    * similar input driver and output monitor behavior to core test except:
      * inputs are driven onto the appropriate tile ports based on the
        generated route for the application
      * output is similarly monitored based on the generate route
    """
    interconnect = create_cgra(1, 1, IOSide.None_, num_tracks=3,
                               standalone=True)

    routing, port_mapping = route_one_tile(interconnect, 0, 0,
                                           ports=["data0", "data1", "alu_res"],
                                           seed=seed)
    route_config = interconnect.get_route_bitstream(routing)
    route_config = compress_config_data(route_config)

    x, y = 0, 0
    circuit = interconnect.circuit()
    input_a = port_mapping["data0"]
    input_b = port_mapping["data1"]
    output_port = port_mapping["alu_res"]

    class TileDriver(Driver):
        def lower(self, config_data, a, b, output):
            for addr, data in config_data:
                addr = interconnect.get_config_addr(addr, 0, x, y)
                self.tester.configure(addr, data)
            setattr(self.tester.circuit, input_a, a)
            setattr(self.tester.circuit, input_b, b)

    class TileMonitor(Monitor):
        def observe(self, config_data, a, b, output):
            getattr(self.tester.circuit, output_port).expect(output)

    tester = BasicSequenceTester(circuit, TileDriver(), TileMonitor(),
                                 sequence, circuit.clk, circuit.reset)
    tester.reset()
    for addr, data in route_config:
        tester.configure(addr, data)

    run_tb(tester)
Exemple #6
0
 def generate_bitstream(self, halide_src, placement, routing, id_to_name, instance_to_instr, netlist, bus, compact=False):
     routing_fix = archipelago.power.reduce_switching(routing, self.interconnect,
                                                      compact=compact)
     routing.update(routing_fix)
     bitstream = []
     bitstream += self.interconnect.get_route_bitstream(routing)
     bitstream += self.get_placement_bitstream(placement, id_to_name,
                                               instance_to_instr)
     skip_addr = self.interconnect.get_skip_addr()
     bitstream = compress_config_data(bitstream, skip_compression=skip_addr)
     inputs, outputs = self.get_input_output(netlist)
     input_interface, output_interface,\
         (reset, valid, en) = self.get_io_interface(inputs,
                                                    outputs,
                                                    placement,
                                                    id_to_name)
     delay = 0
     # also write out the meta file
     archipelago.io.dump_meta_file(
         halide_src, "design", os.path.dirname(halide_src))
     return bitstream, (input_interface, output_interface, reset, valid, en,
                        delay)
Exemple #7
0
def test_interconnect_reset(batch_size: int, dw_files, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
Exemple #8
0
def test_stall(run_tb, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_output_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_0", 0, 0),
        ("strg_ub_tba_0_tb_0_indices_1", 1, 0),
        ("strg_ub_tba_0_tb_0_indices_2", 2, 0),
        ("strg_ub_tba_0_tb_0_indices_3", 3, 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    run_tb(tester)
Exemple #9
0
def test_interconnect_fifo(run_tb, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if (depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0),
                   ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if (len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if (len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    run_tb(tester)
Exemple #10
0
def test_interconnect_sram(run_tb, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y), i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    run_tb(tester)
def test_interconnect_fifo(dw_files, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if(depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0),
                   ("mode", 1, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if(len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if(len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
def test_interconnect_sram(dw_files, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y),
                          i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])