예제 #1
0
def test_add(interconnect_route):
    interconnect, placement, route_path = interconnect_route
    instruction = asm.add()

    compiler = InterconnectModelCompiler(interconnect)
    compiler.configure_route(route_path)
    x, y = placement["p0"]
    compiler.set_core_instr(x, y, instruction)
    # configure the memory
    data_entries = [(i, i + 42) for i in range(100)]
    mem_instr = MemoryInstruction(MemoryMode.SRAM, data_entries=data_entries)
    x, y = placement["m0"]
    compiler.set_core_instr(x, y, mem_instr)

    model = compiler.compile()

    # poke values
    path = route_path["e0"][0]
    input_1 = path[0]
    path = route_path["e1"][0]
    input_2 = path[0]
    path = route_path["e4"][0]
    input_3 = path[0]
    path = route_path["e3"][0]
    end = path[-1]

    # set ren to high all the time
    model.set_value(input_3, 1)

    for idx, value in enumerate(range(10)):
        model.set_value(input_1, value)
        model.set_value(input_2, value)
        model.eval()
        if idx > 0:
            assert model.get_value(end) == value + value + 42 - 2
예제 #2
0
def test_reset():
    tester = fault.Tester(pe_circuit, clock=pe_circuit.CLK)

    inst = add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)
    tester.circuit.inst = assembler(inst)
    data = [0, 0]
    for i in range(2):
        while data[i] == 0:
            data[i] = hwtypes.BitVector.random(16)
    tester.circuit.data0 = data[0]
    tester.circuit.data1 = data[1]
    tester.circuit.CLK = 0
    tester.circuit.clk_en = 1
    tester.circuit.ASYNCRESET = 0
    tester.step(1)
    tester.circuit.O0.expect(data[0] + data[1])
    tester.circuit.ASYNCRESET = 1
    tester.eval()
    tester.circuit.O0.expect(0)
    tester.step(2)
    tester.circuit.O0.expect(0)
    tester.circuit.ASYNCRESET = 0
    tester.step(2)
    tester.circuit.O0.expect(data[0] + data[1])
    tester.compile_and_run("verilator",
                           flags=["-Wno-UNUSED", "-Wno-fatal"],
                           directory="tests/build",
                           magma_opts={"coreir_libs": {"float_DW"}})
예제 #3
0
def test_pe_stall(dw_files):
    core = PeakCore(gen_pe)
    core.name = lambda: "PECore"
    circuit = core.circuit()

    # random test stuff
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    tester.poke(circuit.interface["stall"], 1)
    config_data = core.get_config_bitstream(
        add(ra_mode=Mode.DELAY, rb_mode=Mode.DELAY))

    for addr, data in config_data:
        tester.configure(addr, data)
        # can't read back yet

    for i in range(100):
        tester.poke(circuit.interface["data0"], i + 1)
        tester.poke(circuit.interface["data1"], i + 1)
        tester.eval()
        tester.expect(circuit.interface["alu_res"], 0)

    with tempfile.TemporaryDirectory() as tempdir:
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
예제 #4
0
def write_data01(pe, data0: Data, data1: Data, instr=asm.add(), ra=Data(0)):
    config_addr = Data8(DATA01_ADDR)
    config_data = BitVector.concat(data0, data1)
    config_en = Bit(1)
    return pe(instr,
              data0=ra,
              config_addr=config_addr,
              config_data=config_data,
              config_en=config_en)
예제 #5
0
def test_write_priority_data0(args):
    instr = asm.add(ra_mode=Mode_t.DELAY)
    write_data01(pe, data0=args[0], data1=args[1], instr=instr, ra=args[2])
    #The config takes prioirty over the ra input
    assert args[0] == read_data0(pe, instr=instr, ra=args[2])
    #Now data0 register should contain args[2] (from delay)
    assert args[2] == read_data0(pe, instr=instr, ra=args[1])
    assert args[1] == read_data0(pe, instr=instr)
    #data1 should still contain args[1] from the first write_data01
    assert args[1] == read_data1(pe)
예제 #6
0
def test_reg_delay(args):
    data0, data1 = args
    inst = asm.add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)
    data1_delay_values = [UIntVector.random(DATAWIDTH)]
    rtl_tester(inst,
               data0,
               data1,
               res=data0 + data1,
               delay=1,
               data1_delay_values=data1_delay_values)
예제 #7
0
def test_stall(args):
    data0, data1 = args
    inst = asm.add(ra_mode=Mode_t.BYPASS, rb_mode=Mode_t.DELAY)
    data1_delay_values = [UIntVector.random(DATAWIDTH)]
    rtl_tester(inst,
               data0,
               data1,
               res=data0,
               clk_en=0,
               data1_delay_values=data1_delay_values)
예제 #8
0
def write_bit012(pe, bit0: Bit, bit1: Bit, bit2: Bit, instr=asm.add()):
    BV1 = BitVector[1]
    config_addr = Data8(BIT012_ADDR)
    config_data = BitVector.concat(
        BitVector.concat(BitVector.concat(BV1(bit0), BV1(bit1)), BV1(bit2)),
        BitVector[29](0))
    config_en = Bit(1)
    return pe(instr,
              data0=Data(0),
              config_addr=config_addr,
              config_data=config_data,
              config_en=config_en)
예제 #9
0
def test_pe_config(dw_files):
    core = PeakCore(PE_fc)
    core.name = lambda: "PECore"
    circuit = core.circuit()

    # random test stuff
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    tester.poke(circuit.interface["stall"], 1)
    config_data = core.get_config_bitstream(
        add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY))
    # hacky way to configure it as 0x42 + 0x42 from the operand register
    config_data += [(3, 0x42 << 16 | 0x42)]
    for addr, data in config_data:
        print("{0:08X} {1:08X}".format(addr, data))
        tester.configure(addr, data)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, data)

    for i in range(10):
        tester.poke(circuit.interface["data0"], i + 1)
        tester.poke(circuit.interface["data1"], i + 1)
        tester.eval()
        tester.expect(circuit.interface["alu_res"], 0x42 + 0x42)

    tester.reset()
    lut_val = lut_and().lut

    config_data = core.get_config_bitstream(
        inst(alu=ALU_t.Add,
             lut=lut_val,
             rd_mode=Mode_t.DELAY,
             re_mode=Mode_t.DELAY,
             rf_mode=Mode_t.DELAY))
    config_data += [(4, 0x7)]
    tester.poke(circuit.interface["bit0"], 0)
    tester.poke(circuit.interface["bit1"], 0)
    tester.eval()
    tester.expect(circuit.interface["res_p"], 1)

    with tempfile.TemporaryDirectory() as tempdir:
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
예제 #10
0
def test_pe_stall(run_tb):
    core = PeakCore(PE_fc)
    core.name = lambda: "PECore"
    circuit = core.circuit()

    # random test stuff
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    tester.poke(circuit.interface["stall"], 1)
    config_data = core.get_config_bitstream(
        add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY))

    for addr, data in config_data:
        tester.configure(addr, data)
        # can't read back yet

    for i in range(100):
        tester.poke(circuit.interface["data0"], i + 1)
        tester.poke(circuit.interface["data1"], i + 1)
        tester.eval()
        tester.expect(circuit.interface["alu_res"], 0)

    run_tb(tester)
예제 #11
0
def test_stall(dw_files, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_merged_0",
         (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
예제 #12
0
파일: test_pond.py 프로젝트: mfkiwl/garnet
def test_pond_pe_acc(run_tb):

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides(),
                               num_tracks=3,
                               add_pd=True,
                               add_pond=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("p0", "data_out_pond"), ("p0", "data1")],
        "e2": [("p0", "alu_res"), ("p0", "data_in_pond")],
        "e3": [("p0", "data_out_pond"), ("I1", "f2io_16")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    pe_x, pe_y = placement["p0"]

    petile = interconnect.tile_circuits[(pe_x, pe_y)]

    pondcore = petile.additional_cores[0]

    add_bs = petile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data))

    # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule
    ctrl_rd = [[16, 1], [0, 0], 2, 8, 0, [1, 0]]
    ctrl_wr = [[16, 1], [0, 0], 2, 8, 0, [1, 0]]

    generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data)

    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()

    tester.poke(circuit.interface["stall"], 1)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x0, src_y0 = placement["I0"]
    src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}"
    dst_x, dst_y = placement["I1"]
    dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    random.seed(0)

    total = 0
    for i in range(16):
        tester.poke(circuit.interface[src_name0], i + 1)
        total = total + i
        tester.eval()
        tester.expect(circuit.interface[dst_name], total)
        tester.step(2)
        tester.eval()

    run_tb(tester)
예제 #13
0
def test_interconnect_line_buffer(cw_files, add_pd, io_sides):
    depth = 10
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode
    mem_x, mem_y = placement["m0"]
    config_data.append(
        (interconnect.get_config_addr(0, 0, mem_x,
                                      mem_y), 0x00000004 | (depth << 3)))
    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(200):
        tester.poke(circuit.interface[src], i)
        tester.eval()

        if i > depth + 10:
            tester.expect(circuit.interface[dst], i * 2 - depth)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal"])
예제 #14
0
def test_stall(run_tb, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_output_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_0", 0, 0),
        ("strg_ub_tba_0_tb_0_indices_1", 1, 0),
        ("strg_ub_tba_0_tb_0_indices_2", 2, 0),
        ("strg_ub_tba_0_tb_0_indices_3", 3, 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    run_tb(tester)
예제 #15
0
def read_bit2(pe):
    instr = asm.add()
    config_addr = Data8(BIT012_ADDR)
    _, _, config_read = pe(instr, Data(0), config_addr=config_addr)
    return config_read[BIT2_START]
예제 #16
0
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides,
                                                  stencil_width, depth):

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")],
        "e5": [("m0", "valid_out"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode

    mode = Mode.LINE_BUFFER
    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x,
                                      mem_y), depth))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x,
                                      mem_y), mode.value))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0,
                                      mem_x, mem_y), stencil_width))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x,
                                      mem_y), tile_en))

    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    counter = 0
    for i in range(3 * depth):
        tester.poke(circuit.interface[src], counter)
        tester.eval()

        if i < depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        elif i < 2 * depth:
            tester.expect(circuit.interface[valid], 1)
        elif i < 2 * depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        else:
            tester.expect(circuit.interface[valid], 1)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal", "--trace"])
예제 #17
0
def read_data1(pe):
    instr = asm.add()
    config_addr = Data8(DATA01_ADDR)
    _, _, config_read = pe(instr, Data(0), config_addr=config_addr)
    return config_read[DATA1_START:DATA1_START + DATA1_WIDTH]
예제 #18
0
def read_data0(pe, instr=asm.add(), ra=Data(0)):
    config_addr = Data8(DATA01_ADDR)
    _, _, config_read = pe(instr, data0=ra, config_addr=config_addr)
    return config_read[DATA0_START:DATA0_START + DATA0_WIDTH]
예제 #19
0
PE = PE_fc(PyFamily())
pe = PE()

BFloat16 = BFloat16_fc(PyFamily())
Data = BitVector[DATAWIDTH]

op = namedtuple("op", ["inst", "func"])
NTESTS = 4


@pytest.mark.parametrize("op", [
    op(asm.and_(), lambda x, y: x & y),
    op(asm.or_(), lambda x, y: x | y),
    op(asm.xor(), lambda x, y: x ^ y),
    op(asm.add(), lambda x, y: x + y),
    op(asm.sub(), lambda x, y: x - y),
    op(asm.lsl(), lambda x, y: x << y),
    op(asm.lsr(), lambda x, y: x >> y),
    op(asm.umin(), lambda x, y: (x < y).ite(x, y)),
    op(asm.umax(), lambda x, y: (x > y).ite(x, y))
])
@pytest.mark.parametrize(
    "args", [(UIntVector.random(DATAWIDTH), UIntVector.random(DATAWIDTH))
             for _ in range(NTESTS)])
def test_unsigned_binary(op, args):
    x, y = args
    res, _, _ = pe(op.inst, Data(x), Data(y))
    assert res == op.func(x, y)
    rtl_tester(op, x, y, res=res)
예제 #20
0
 def __call__(self, in0: Data, in1: Data, in2: Data) -> Data:
     inst1 = asm.smult0()
     inst2 = asm.add()
     pe1_out, _, _ = self.pe1(inst1, in0, in1)
     pe2_out, _, _ = self.pe2(inst2, pe1_out, in2)
     return pe2_out
예제 #21
0
def test_reg_const(args):
    data0, const1 = args
    data1 = UIntVector.random(DATAWIDTH)
    inst = asm.add(rb_mode=Mode_t.CONST, rb_const=const1)
    rtl_tester(inst, data0, data1, res=data0 + const1)