コード例 #1
0
ファイル: garnet.py プロジェクト: mfkiwl/garnet
 def compile(self, halide_src, unconstrained_io=False, compact=False):
     id_to_name, instance_to_instr, netlist, bus = self.map(halide_src)
     app_dir = os.path.dirname(halide_src)
     if unconstrained_io:
         fixed_io = None
     else:
         fixed_io = place_io_blk(id_to_name)
     placement, routing = archipelago.pnr(self.interconnect, (netlist, bus),
                                          cwd="temp",
                                          id_to_name=id_to_name,
                                          fixed_pos=fixed_io,
                                          compact=compact,
                                          copy_to_dir=app_dir)
     routing_fix = archipelago.power.reduce_switching(routing,
                                                      self.interconnect,
                                                      compact=compact)
     routing.update(routing_fix)
     bitstream = []
     bitstream += self.interconnect.get_route_bitstream(routing)
     bitstream += self.get_placement_bitstream(placement, id_to_name,
                                               instance_to_instr)
     skip_addr = self.interconnect.get_skip_addr()
     bitstream = compress_config_data(bitstream, skip_compression=skip_addr)
     inputs, outputs = self.get_input_output(netlist)
     input_interface, output_interface,\
         (reset, valid, en) = self.get_io_interface(inputs,
                                                    outputs,
                                                    placement,
                                                    id_to_name)
     delay = 1 if has_rom(id_to_name) else 0
     # also write out the meta file
     archipelago.io.dump_meta_file(halide_src, "design",
                                   os.path.dirname(halide_src))
     return bitstream, (input_interface, output_interface, reset, valid, en,
                        delay)
コード例 #2
0
ファイル: test_model.py プロジェクト: StanfordAHA/delta
def interconnect_route():
    chip_size = 2

    # creates all the cores here
    # we don't want duplicated cores when snapping into different interconnect
    # graphs
    cores = {}
    for x in range(0, chip_size + 2):
        for y in range(0, chip_size + 2):
            cores[(x, y)] = IO16bit()
    for x in range(1, 1 + chip_size):
        for y in range(1, 1 + chip_size):
            cores[(x, y)] = DummyCore()
    # corners
    for x, y in [(0, 0), (0, chip_size + 1), (chip_size + 1, 0),
                 (chip_size + 1, chip_size + 1)]:
        cores[(x, y)] = None

    interconnect = create_cgra(chip_size, True, cores_input=cores)

    netlist = {
        "e0": [("I0", "io2f_16"), ("r0", "reg")],
        "e1": [("r0", "reg"), ("D0", "data_in_16b")],
        "e2": [("D0", "data_out_16b"), ("I1", "f2io_16")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 16}

    with tempfile.TemporaryDirectory() as tempdir:
        _, route = pnr(interconnect, (netlist, bus), cwd=tempdir)

    # two paths
    route_path = [route["e0"][0], route["e1"][0], route["e2"][0]]

    return interconnect, route_path
コード例 #3
0
def test_interconnect_point_wise(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing, _ = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    # set the PE core
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    src0 = placement["I0"]
    src1 = placement["I1"]
    src_name0 = interconnect.get_top_input_port_by_coord(src0, 16)
    src_name1 = interconnect.get_top_input_port_by_coord(src1, 16)
    dst = placement["I2"]
    dst_name = interconnect.get_top_output_port_by_coord(dst, 16)
    random.seed(0)
    for _ in range(batch_size):
        num_1 = random.randrange(0, 256)
        num_2 = random.randrange(0, 256)
        tester.poke(circuit.interface[src_name0], num_1)
        tester.poke(circuit.interface[src_name1], num_2)

        tester.eval()
        tester.expect(circuit.interface[dst_name], num_1 * num_2)

    run_tb(tester)
コード例 #4
0
ファイル: test_reset.py プロジェクト: mfkiwl/garnet
def test_interconnect_reset(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    run_tb(tester)
コード例 #5
0
ファイル: garnet.py プロジェクト: StanfordAHA/garnet
 def place_and_route(self, halide_src, unconstrained_io=False, compact=False, load_only=False):
     id_to_name, instance_to_instr, netlist, bus = self.load_netlist(halide_src, load_only)
     app_dir = os.path.dirname(halide_src)
     if unconstrained_io:
         fixed_io = None
     else:
         fixed_io = place_io_blk(id_to_name)
     placement, routing, id_to_name = archipelago.pnr(self.interconnect, (netlist, bus),
                                          load_only=load_only, 
                                          cwd=app_dir,
                                          id_to_name=id_to_name,
                                          fixed_pos=fixed_io,
                                          compact=compact)
     
     return placement, routing, id_to_name, instance_to_instr, netlist, bus 
コード例 #6
0
def interconnect_route():
    chip_size = 2

    interconnect = create_cgra(chip_size, True, cores_input=None)

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e2": [("p0", "out"), ("m0", "addr")],
        "e3": [("m0", "data_out"), ("I2", "f2io_16")],
        "e4": [("i0", "io2f_1"), ("m0", "ren")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16, "e4": 1}

    placement, route = pnr(interconnect, (netlist, bus), cwd="temp")

    return interconnect, placement, route
コード例 #7
0
ファイル: test_mem.py プロジェクト: StanfordAHA/delta
def interconnect_route():
    chip_size = 2

    interconnect = create_cgra(chip_size, True, cores_input=None)

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in")],
        "e1": [("m0", "data_out"), ("I1", "f2io_16")],
        "e2": [("i0", "io2f_1"), ("m0", "wen")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, route = pnr(interconnect, (netlist, bus))

    # two paths
    route_path = [route["e0"][0], route["e1"][0], route["e2"][0]]

    return interconnect, placement, route_path
コード例 #8
0
ファイル: garnet.py プロジェクト: richmorj/garnet
 def compile(self, halide_src):
     if not self.mapper_initalized:
         self.initialize_mapper(self.__rewrite_rules)
     mapped, instrs = self.map(halide_src)
     # id to name converts the id to instance name
     netlist, bus, id_to_name = self.convert_mapped_to_netlist(mapped)
     fixed_io = place_io_blk(id_to_name, self.width)
     placement, routing = archipelago.pnr(self.interconnect, (netlist, bus),
                                          cwd="temp",
                                          id_to_name=id_to_name,
                                          fixed_pos=fixed_io)
     bitstream = []
     bitstream += self.interconnect.get_route_bitstream(routing)
     bitstream += self.get_placement_bitstream(placement, id_to_name,
                                               instrs)
     inputs, outputs = self.get_input_output(netlist)
     input_interface, output_interface, \
         (reset, valid) = self.get_io_interface(inputs,
                                                outputs,
                                                placement,
                                                id_to_name)
     return bitstream, (input_interface, output_interface, reset, valid)
コード例 #9
0
ファイル: garnet.py プロジェクト: zamyers/garnet
 def compile(self, halide_src, unconstrained_io=False):
     id_to_name, instance_to_instr, netlist, bus = self.map(halide_src)
     if unconstrained_io:
         fixed_io = None
     else:
         fixed_io = place_io_blk(id_to_name, self.width)
     placement, routing = archipelago.pnr(self.interconnect, (netlist, bus),
                                          cwd="temp",
                                          id_to_name=id_to_name,
                                          fixed_pos=fixed_io)
     bitstream = []
     bitstream += self.interconnect.get_route_bitstream(routing)
     bitstream += self.get_placement_bitstream(placement, id_to_name,
                                               instance_to_instr)
     inputs, outputs = self.get_input_output(netlist)
     input_interface, output_interface,\
         (reset, valid, en) = self.get_io_interface(inputs,
                                                    outputs,
                                                    placement,
                                                    id_to_name)
     delay = 1 if has_rom(id_to_name) else 0
     return bitstream, (input_interface, output_interface, reset, valid, en,
                        delay)
コード例 #10
0
def test_interconnect_fifo(run_tb, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if (depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0),
                   ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if (len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if (len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    run_tb(tester)
コード例 #11
0
def test_interconnect_sram(cw_files, add_pd, io_sides):
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in")],
        "e1": [("m0", "data_out"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["m0"]
    sram_config_addr = interconnect.get_config_addr(0, 0, x, y)
    # in this case we configure (1, 0) as sram mode
    config_data.append((sram_config_addr, 0x00000006))

    sram_data = []
    # add SRAM data
    for i in range(0, 1024, 4):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, x,
                                                       y), i + 10))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        tester.configure(addr, data)
        # currently read back doesn't work
        # tester.config_read(addr)
        # tester.eval()
        # tester.expect(circuit.read_config_data, data)

    addr_x, addr_y = placement["I0"]
    src = f"glb2io_16_X{addr_x:02X}_Y{addr_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    ren_x, ren_y = placement["i3"]
    ren = f"glb2io_1_X{ren_x:02X}_Y{ren_y:02X}"

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(0, 1024, 4):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i + 10)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #12
0
def test_interconnect_point_wise(batch_size: int, cw_files, add_pd, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    # set the PE core
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x0, src_y0 = placement["I0"]
    src_x1, src_y1 = placement["I1"]
    src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}"
    src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}"
    dst_x, dst_y = placement["I2"]
    dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    random.seed(0)
    for _ in range(batch_size):
        num_1 = random.randrange(0, 256)
        num_2 = random.randrange(0, 256)
        tester.poke(circuit.interface[src_name0], num_1)
        tester.poke(circuit.interface[src_name1], num_2)

        tester.eval()
        tester.expect(circuit.interface[dst_name], num_1 * num_2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal", "--trace"])
コード例 #13
0
def test_interconnect_reset(batch_size: int, dw_files, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #14
0
def test_interconnect_line_buffer(cw_files, add_pd, io_sides):
    depth = 10
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode
    mem_x, mem_y = placement["m0"]
    config_data.append(
        (interconnect.get_config_addr(0, 0, mem_x,
                                      mem_y), 0x00000004 | (depth << 3)))
    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(200):
        tester.poke(circuit.interface[src], i)
        tester.eval()

        if i > depth + 10:
            tester.expect(circuit.interface[dst], i * 2 - depth)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #15
0
def test_interconnect_sram(dw_files, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y),
                          i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #16
0
def basic_tb(config_path,
             stream_path,
             run_tb,
             in_file_name="input",
             out_file_name="output",
             cwd=None,
             trace=False):

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides(),
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")]
    }
    bus = {"e0": 16, "e1": 16}

    placement, routing, _ = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # Regular Bootstrap
    MCore = make_memory_core()
    # Get configuration
    configs_mem = MCore.get_static_bitstream(config_path=config_path,
                                             in_file_name=in_file_name,
                                             out_file_name=out_file_name)

    config_final = []
    for (f1, f2) in configs_mem:
        config_final.append((f1, f2, 0))
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, config_final, mem_x, mem_y, mcore)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    tester.zero_inputs()

    tester.poke(circuit.interface["stall"], 1)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()

    tester.done_config()
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    in_data, out_data, valids = generate_data_lists(csv_file_name=stream_path,
                                                    data_in_width=MCore.num_data_inputs(),
                                                    data_out_width=MCore.num_data_outputs())

    data_in_x, data_in_y = placement["I0"]
    data_in = f"glb2io_16_X{data_in_x:02X}_Y{data_in_y:02X}"
    data_out_x, data_out_y = placement["I1"]
    data_out = f"io2glb_16_X{data_out_x:02X}_Y{data_out_y:02X}"

    for i in range(len(out_data)):
        tester.poke(circuit.interface[data_in], in_data[0][i])
        tester.eval()
        tester.expect(circuit.interface[data_out], out_data[0][i])
        # toggle the clock
        tester.step(2)

    run_tb(tester, cwd=cwd, trace=trace, disable_ndarray=True)
コード例 #17
0
def test_stall(run_tb, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_output_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_0", 0, 0),
        ("strg_ub_tba_0_tb_0_indices_1", 1, 0),
        ("strg_ub_tba_0_tb_0_indices_2", 2, 0),
        ("strg_ub_tba_0_tb_0_indices_3", 3, 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    run_tb(tester)
コード例 #18
0
def test_pond_pe(verilator=True):

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides(),
                               num_tracks=3,
                               add_pd=True,
                               add_pond=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data_in_pond")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e2": [("p0", "alu_res"), ("I2", "f2io_16")],
        "e3": [("p0", "data_out_pond"), ("p0", "data0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    pe_x, pe_y = placement["p0"]

    petile = interconnect.tile_circuits[(pe_x, pe_y)]

    pondcore = petile.additional_cores[0]

    add_bs = petile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, pe_x,
                                                         pe_y), data))

    # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule
    ctrl_rd = [[16, 1], [1, 1], 2, 0, 16]
    ctrl_wr = [[16, 1], [1, 1], 2, 0, 0]

    generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y,
                      config_data)

    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)

    tester.poke(circuit.interface["stall"], 1)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x0, src_y0 = placement["I0"]
    src_x1, src_y1 = placement["I1"]
    src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}"
    src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}"
    dst_x, dst_y = placement["I2"]
    dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    random.seed(0)

    for i in range(32):
        if i < 16:
            tester.poke(circuit.interface[src_name0], i)
            tester.eval()
        if i >= 16:
            num = random.randrange(0, 256)
            tester.poke(circuit.interface[src_name1], num)
            tester.eval()
            tester.expect(circuit.interface[dst_name], (i - 16) * num)
        tester.step(2)
        tester.eval()

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files():
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)

        target = "verilator"
        runtime_kwargs = {
            "magma_output": "coreir-verilog",
            "magma_opts": {
                "coreir_libs": {"float_DW"}
            },
            "directory": tempdir,
            "flags": ["-Wno-fatal", "--trace"]
        }
        if verilator is False:
            target = "system-verilog"
            runtime_kwargs["simulator"] = "vcs"

        tester.compile_and_run(target=target, tmp_dir=False, **runtime_kwargs)
コード例 #19
0
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides,
                                                  stencil_width, depth):

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")],
        "e5": [("m0", "valid_out"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode

    mode = Mode.LINE_BUFFER
    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x,
                                      mem_y), depth))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x,
                                      mem_y), mode.value))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0,
                                      mem_x, mem_y), stencil_width))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x,
                                      mem_y), tile_en))

    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    counter = 0
    for i in range(3 * depth):
        tester.poke(circuit.interface[src], counter)
        tester.eval()

        if i < depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        elif i < 2 * depth:
            tester.expect(circuit.interface[valid], 1)
        elif i < 2 * depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        else:
            tester.expect(circuit.interface[valid], 1)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal", "--trace"])
コード例 #20
0
def test_interconnect_fifo(dw_files, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if(depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0),
                   ("mode", 1, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if(len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if(len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #21
0
def test_interconnect_sram(run_tb, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y), i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    run_tb(tester)
コード例 #22
0
ファイル: test_stall.py プロジェクト: zamyers/garnet
def test_stall(dw_files, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_merged_0",
         (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
コード例 #23
0
ファイル: test_pond.py プロジェクト: mfkiwl/garnet
def test_pond_pe_acc(run_tb):

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides(),
                               num_tracks=3,
                               add_pd=True,
                               add_pond=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("p0", "data_out_pond"), ("p0", "data1")],
        "e2": [("p0", "alu_res"), ("p0", "data_in_pond")],
        "e3": [("p0", "data_out_pond"), ("I1", "f2io_16")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    pe_x, pe_y = placement["p0"]

    petile = interconnect.tile_circuits[(pe_x, pe_y)]

    pondcore = petile.additional_cores[0]

    add_bs = petile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data))

    # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule
    ctrl_rd = [[16, 1], [0, 0], 2, 8, 0, [1, 0]]
    ctrl_wr = [[16, 1], [0, 0], 2, 8, 0, [1, 0]]

    generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data)

    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()

    tester.poke(circuit.interface["stall"], 1)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x0, src_y0 = placement["I0"]
    src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}"
    dst_x, dst_y = placement["I1"]
    dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    random.seed(0)

    total = 0
    for i in range(16):
        tester.poke(circuit.interface[src_name0], i + 1)
        total = total + i
        tester.eval()
        tester.expect(circuit.interface[dst_name], total)
        tester.step(2)
        tester.eval()

    run_tb(tester)