Esempio n. 1
0
def main():
    Ns = np.array([0, 8, 7], dtype=np.int32)
    scb = Scoreboard()
    test = scb.GetTest("test")
    golden = np.concatenate([np.arange(N + 1, dtype=np.int32)
                             for N in Ns])[:, np.newaxis]
    st = Stacker(n=golden.shape[0], callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    test.Expect((golden, ))
    yield rst_out_ev

    master = TwoWire.Master(irdy, iack, i, ck_ev)
    slave = TwoWire.Slave(ordy, oack, o, ck_ev, callbacks=[bg.Get])

    values = master.values

    def it():
        for N in Ns:
            values[0][0] = N
            yield values

    yield from master.SendIter(it())

    # for i in range(100):
    # 	yield ck_ev
    yield from repeat(ck_ev, 100)
    FinishSim()
Esempio n. 2
0
def main():
    # Calculate answer
    N_TEST = 100
    N_ANS = 100 >> 1
    gold_in = np.random.randint(256, size=N_TEST, dtype=np.int32)
    gold_out = M.Downsample(gold_in)

    # Connect to Verilog
    (
        irdy,
        iack,
        ordy,
        oack,
        idata,
        odata,
    ) = CreateBuses([
        (("dut", "i_rdy"), ),
        (("dut", "i_ack"), ),
        (("dut", "o_rdy"), ),
        (("dut", "o_ack"), ),
        (("dut", "i_data"), ),
        (("dut", "o_data"), ),
    ])
    rst_out_ev, ck_ev = CreateEvents(["rst_out", "ck_ev"])

    # Initialization
    scb = Scoreboard("Counter")
    test = scb.GetTest("Counter")
    st = Stacker(N_ANS, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    master = TwoWire.Master(irdy, iack, idata, ck_ev, A=1, B=2)
    slave = TwoWire.Slave(ordy,
                          oack,
                          odata,
                          ck_ev,
                          A=1,
                          B=2,
                          callbacks=[bg.Get])
    mdata = master.values
    test.Expect((gold_out[:, np.newaxis], ))

    # start simulation
    yield rst_out_ev
    yield ck_ev

    def Iter():
        for i in gold_in:
            mdata.i_data[0] = i
            yield mdata

    yield from master.SendIter(Iter())

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 3
0
def main():
    scb = Scoreboard("RemapCache")
    test = scb.GetTest(f"test{RMC_CONF}")
    st = Stacker(0,
                 callbacks=[
                     lambda mat: npd.savetxt("rmc_got.txt", mat[0], fmt="%d"),
                     test.Get
                 ])
    bg = BusGetter(callbacks=[st.Get])
    wad_master = OneWire.Master(wad_dval_bus, wad_bus, ck_ev)
    ra_master = TwoWire.Master(ra_rdy_bus, ra_ack_bus, ra_bus, ck_ev)
    rd_slave = TwoWire.Slave(rd_rdy_bus,
                             rd_ack_bus,
                             rd_bus,
                             ck_ev,
                             callbacks=[bg.Get])
    wad_data = wad_master.values
    ra_data = ra_master.values
    yield rst_out_ev

    # start simulation
    npd.copyto(cfg_bus.values.i_xor_srcs[0], xsrc)
    cfg_bus.values.i_xor_swaps[0] = xswap
    cfg_bus.Write()
    yield ck_ev

    def IterWrite():
        for i in range(N_VEC):
            wad_data[0][0] = 0
            wad_data[1][0] = i
            npd.copyto(wad_data[2], npi.arange(i * VSIZE, (i + 1) * VSIZE))
            yield wad_data

    yield from wad_master.SendIter(IterWrite())

    for i in range(10):
        yield ck_ev

    NTEST = N_VEC * VSIZE - npd.sum(stride, dtype=npd.int32) - 1
    raddr = npi.arange(NTEST)[:, newaxis] + STEP
    st.Resize(NTEST)
    test.Expect((raddr, ))

    def IterRead():
        for i in raddr:
            ra_data[0][0] = 0
            npd.copyto(ra_data[1], i)
            yield ra_data

    yield from ra_master.SendIter(IterRead())

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 4
0
def main():
    n_golden, bofs = cfg.CreateBlockTransaction()
    scb = Scoreboard("ParallelBlockLooper_mc")
    test_b = scb.GetTest("bofs")
    # master
    master = TwoWire.Master(rdy_bus_s, ack_bus_s, bus_s, ck_ev)
    i_data = master.values
    # slave
    ans = [deque() for _ in range(N_TAU)]
    bgs_b = list()
    masters_bdone = list()
    resps = list()
    slaves_b = list()
    for i in range(N_TAU):
        bg_b = BusGetter(copy=True,
                         callbacks=[lambda x, i=i: ans[i].append(x)])
        master_bdone = OneWire.Master(dval_buss[i], tuple(), ck_ev)
        resp = Response(master_bdone.SendIter, ck_ev, B=100)
        slave_b = TwoWire.Slave(
            rdy_buss_b[i],
            ack_buss_b[i],
            buss_b[i],
            ck_ev,
            callbacks=[bg_b.Get, lambda x, resp=resp: resp.Append(tuple())],
            A=1,
            B=100)
        bgs_b.append(bg_b)
        masters_bdone.append(masters_bdone)
        resps.append(resp)
        slaves_b.append(slave_b)
    yield rst_out_ev
    yield ck_ev

    # start simulation
    npd.copyto(i_data[0], cfg.pcfg['local'][0])
    npd.copyto(i_data[1], cfg.pcfg['end'][0])

    yield from master.Send(i_data)

    for i in range(30):
        yield ck_ev

    for i in range(n_golden):
        b = bofs[i]
        popped = False
        for a in ans:
            if a and (a[0] == b).all():
                a.popleft()
                popped = True
            if popped:
                break
        assert popped, f"No correct bofs to match {b}"
    assert all(not a for a in ans), "Some extra bofs"
    FinishSim()
Esempio n. 5
0
def main():
    seed = np.random.randint(10000)
    print("Seed for this run is {}".format(seed))
    np.random.seed(seed)
    N = 100
    golden = np.random.randint(100, size=(N, 1))
    scb = Scoreboard("Controller")
    test = scb.GetTest("Forward" if getenv("SLOW") is None else "ForwardSlow")
    st = Stacker(N, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    (
        srdy,
        sack,
        sdata,
        drdy,
        dack,
        ddata,
    ) = CreateBuses([
        ("src_rdy", ),
        ("src_ack", ),
        ("src_data", ),
        ("dst_rdy", ),
        ("dst_canack", ),
        ("dst_data", ),
    ])
    master = TwoWire.Master(srdy, sack, sdata, ck_ev, A=5, B=8)
    slave = TwoWire.Slave(drdy,
                          dack,
                          ddata,
                          ck_ev,
                          callbacks=[bg.Get],
                          A=4,
                          B=8)
    yield rst_out_ev
    yield ck_ev

    def It():
        sv = sdata.values
        for i in golden.flat:
            sv[0][0] = i
            yield sv

    test.Expect((golden, ))
    yield from master.SendIter(It())

    for i in range(10):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 6
0
def main():
    scb = Scoreboard("ChunkAddrLooper")
    test = scb.GetTest("test")
    st = Stacker(0, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    master = TwoWire.Master(mrdy_bus, mack_bus, mofs_bus, ck_ev)
    i_data = master.values
    slave = TwoWire.Slave(crdy_bus,
                          cack_bus,
                          cmd_bus,
                          ck_ev,
                          callbacks=[bg.Get])
    yield rst_out_ev

    # simulation
    n_bofs, bofs = cfg.CreateBlockTransaction()
    TEST0 = not getenv("TEST0") is None
    print(f"Testing {0 if TEST0 else 1}...")
    TEST_UMCFG = cfg.umcfg_i0 if TEST0 else cfg.umcfg_i1
    OFS = int(bool(TEST0))
    for i in range(n_bofs):
        (n_i, bofs_i, abeg_i, aend_i, abeg_id_i, aend_id_i,
         dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[OFS]
        for j in range(n_i):
            # only use the first one
            TEST_ABMOFS = cfg.CreateChunkHead(bofs_i[j], abeg_i[j],
                                              abeg_id_i[j], aend_id_i[j],
                                              TEST_UMCFG)[0]
            ans = cfg.CreateDramReadTransaction(TEST_ABMOFS, TEST_UMCFG, 0)
            st.Resize(ans.shape[0])
            npd.copyto(i_data[0], TEST_ABMOFS)
            npd.copyto(i_data[1], TEST_UMCFG["lmpad"][0])
            npd.copyto(i_data[2], TEST_UMCFG["mboundary"][0])
            npd.copyto(i_data[3], TEST_UMCFG["mboundary_lmwidth"][0])
            i_data[4][0] = TEST_UMCFG["mlinear"][0]
            i_data[5][
                0] = 1 if TEST_UMCFG["mwrap"][0] == UmiModel.MEM_WRAP else 0
            test.Expect(
                tuple(ans[k][:, newaxis]
                      for k in ("cmd_type", "islast", "addr", "ofs", "len")))
            yield from master.Send(i_data)
            for i in range(100):
                yield ck_ev

    for i in range(300):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 7
0
def main():
    yield rst_out_ev
    n_bofs, bofs = cfg.CreateBlockTransaction()
    (n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu,
     dummy) = cfg.CreateAccumBlockTransaction(bofs[0])[-1]
    master = TwoWire.Master(src_rdy, src_ack, src_bus, ck_ev)
    inst_commit = OneWire.Master(inst_commit_dval, tuple(), ck_ev)
    resp = Response(inst_commit.SendIter, ck_ev)
    slave = TwoWire.Slave(inst_rdy,
                          inst_ack,
                          inst_bus,
                          ck_ev,
                          callbacks=[bg.Get, lambda _: resp.Append(tuple())])
    data_bus = master.values

    # start simulation
    for i in range(n_alu):
        # Expect?
        (n_aofs_alu, agofs_alu, alofs_alu, rt_i_alu, rg_li_alu,
         rg_ri_alu) = cfg.CreateAccumTransaction(abeg_alu[i], aend_alu[i])
        accum_alu, warpid_alu, rg_flat_alu = cfg.CreateAccumWarpTransaction(
            abeg_alu[i], aend_alu[i], None, rg_li_alu, rg_ri_alu, cfg.n_inst)
        bofs_alu, blofs_alu, valid_alu = cfg.CreateBofsValidTransaction(
            bofs[0], warpid_alu)
        # /2 since in hardware, we use two warps (2x, 2x+1) to form a large warp x
        warpid_alu >>= 1
        npd.copyto(data_bus.i_bofs, bofs[0])
        npd.copyto(data_bus.i_aofs_beg, abeg_alu[i])
        npd.copyto(data_bus.i_aofs_end, aend_alu[i])
        npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis'])
        npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order'])
        npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0])
        npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0])
        npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0])
        npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0])
        npd.copyto(data_bus.i_inst_id_begs, cfg.n_inst[0])
        npd.copyto(data_bus.i_inst_id_ends, cfg.n_inst[1])
        col.Resize(rg_flat_alu.size)
        tst.Expect((bofs_alu[:, 0, :], agofs_alu[accum_alu],
                    rg_flat_alu[:, newaxis], warpid_alu[:, newaxis]))
        yield from master.Send(data_bus)
        yield ck_ev

    for i in range(10):
        yield ck_ev
    assert col.is_clean
    FinishSim()
Esempio n. 8
0
 def __init__(self,
              aw_rdy: Bus,
              aw_ack: Bus,
              aw: Bus,
              w_rdy: Bus,
              w_ack: Bus,
              w: Bus,
              b_rdy: Bus,
              b_ack: Bus,
              b: Bus,
              ar_rdy: Bus,
              ar_ack: Bus,
              ar: Bus,
              r_rdy: Bus,
              r_ack: Bus,
              r: Bus,
              clk: int,
              A=1,
              B=5,
              read_callbacks=list()):
     clk = GetEvent(clk)
     self.clk = clk
     self.sem = Semaphore(-1)
     self.aw = TwoWire.Master(aw_rdy,
                              aw_ack,
                              aw,
                              clk,
                              callbacks=[lambda x: self.sem.ReleaseNB()])
     self.w = TwoWire.Master(w_rdy,
                             w_ack,
                             w,
                             clk,
                             callbacks=[lambda x: self.sem.ReleaseNB()])
     self.b = TwoWire.Slave(b_rdy, b_ack, b, clk, callbacks=[self._NoErr])
     self.ar = TwoWire.Master(ar_rdy, ar_ack, ar, clk)
     self.r = TwoWire.Slave(r_rdy, r_ack, r, clk, callbacks=read_callbacks)
     self.writing = False
Esempio n. 9
0
def main():
    scb = Scoreboard("AccumBlockLooper")
    test_i0 = scb.GetTest("test_i0")
    test_i1 = scb.GetTest("test_i1")
    test_dma = scb.GetTest("test_dma")
    test_o = scb.GetTest("test_o")
    test_alu = scb.GetTest("test_alu")
    st_i0 = Stacker(0, callbacks=[test_i0.Get])
    st_i1 = Stacker(0, callbacks=[test_i1.Get])
    st_dma = Stacker(0, callbacks=[test_dma.Get])
    st_o = Stacker(0, callbacks=[test_o.Get])
    st_alu = Stacker(0, callbacks=[test_alu.Get])
    bg_i0 = BusGetter(callbacks=[st_i0.Get])
    bg_i1 = BusGetter(callbacks=[st_i1.Get])
    bg_dma = BusGetter(callbacks=[st_dma.Get])
    bg_o = BusGetter(callbacks=[st_o.Get])
    bg_alu = BusGetter(callbacks=[st_alu.Get])
    master = TwoWire.Master(s_rdy_bus, s_ack_bus, s_bus, ck_ev)
    i_data = master.values
    slave_i0 = TwoWire.Slave(i0_rdy_bus,
                             i0_ack_bus,
                             i0_bus,
                             ck_ev,
                             callbacks=[bg_i0.Get])
    slave_i1 = TwoWire.Slave(i1_rdy_bus,
                             i1_ack_bus,
                             i1_bus,
                             ck_ev,
                             callbacks=[bg_i1.Get])
    slave_dma = TwoWire.Slave(dma_rdy_bus,
                              dma_ack_bus,
                              dma_bus,
                              ck_ev,
                              callbacks=[bg_dma.Get])
    slave_o = TwoWire.Slave(o_rdy_bus,
                            o_ack_bus,
                            o_bus,
                            ck_ev,
                            callbacks=[bg_o.Get])
    slave_alu = TwoWire.Slave(alu_rdy_bus,
                              alu_ack_bus,
                              alu_bus,
                              ck_ev,
                              callbacks=[bg_alu.Get])
    yield rst_out_ev
    yield ck_ev

    n_bofs, bofs, = cfg.CreateBlockTransaction()
    ans_i0, ans_i1, ans_dma, ans_o, ans_alu, = cfg.CreateAccumBlockTransaction(
        bofs[0])
    n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0, dummy, = ans_i0
    n_i1, bofs_i1, abeg_i1, aend_i1, abeg_id_i1, aend_id_i1, dummy, = ans_i1
    n_dma, bofs_dma, abeg_dma, aend_dma, abeg_id_dma, aend_id_dma, which_dma = ans_dma
    n_o, bofs_o, abeg_o, aend_o, abeg_id_o, aend_id_o, dummy, = ans_o
    n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu, dummy, = ans_alu

    # start simulation
    npd.copyto(i_data[0], bofs[0])
    npd.copyto(i_data[1], cfg.acfg['local'][0])
    npd.copyto(i_data[2], cfg.acfg['end'][0])
    npd.copyto(i_data[3], cfg.acfg['total'][0])
    npd.copyto(i_data[4], cfg.n_i0[0])
    npd.copyto(i_data[5], cfg.n_i0[1])
    npd.copyto(i_data[6], cfg.n_i1[0])
    npd.copyto(i_data[7], cfg.n_i1[1])
    npd.copyto(i_data[8], cfg.n_o[0])
    npd.copyto(i_data[9], cfg.n_o[1])
    npd.copyto(i_data[10], cfg.n_inst[0])
    npd.copyto(i_data[11], cfg.n_inst[1])
    test_i0.Expect(
        (bofs_i0, abeg_i0, aend_i0, abeg_id_i0[:,
                                               newaxis], aend_id_i0[:,
                                                                    newaxis]))
    test_i1.Expect(
        (bofs_i1, abeg_i1, aend_i1, abeg_id_i1[:,
                                               newaxis], aend_id_i1[:,
                                                                    newaxis]))
    test_dma.Expect((bofs_dma, which_dma[:, newaxis], abeg_dma,
                     abeg_id_dma[:, newaxis], aend_id_dma[:, newaxis]))
    test_o.Expect(
        (bofs_o, abeg_o, aend_o, abeg_id_o[:, newaxis], aend_id_o[:, newaxis]))
    test_alu.Expect((bofs_alu, abeg_alu, aend_alu))
    st_i0.Resize(n_i0)
    st_i1.Resize(n_i1)
    st_dma.Resize(n_dma)
    st_o.Resize(n_o)
    st_alu.Resize(n_alu)
    yield from master.Send(i_data)

    for i in range(300):
        yield ck_ev
    assert st_i0.is_clean
    assert st_i1.is_clean
    assert st_dma.is_clean
    assert st_o.is_clean
    assert st_alu.is_clean
    FinishSim()
Esempio n. 10
0
def main():
    # Calculate answer
    rgbs = M.RandomPixelStreams()
    yuvs = M.Rgb2Yuv(rgbs)
    coeff_test = M.coeff_test
    gold_in_px = np.hstack(rgbs)
    gold_in_coeff = np.vstack(
        np.repeat(np.reshape(c[1], (1, -1)), c[0], axis=0) for c in coeff_test)
    gold_out = np.hstack(yuvs)
    N = gold_out.shape[1]

    # Connect to Verilog
    (
        # pixel, coeff, y, u, v
        pvalid,
        pready,
        cvalid,
        cready,
        yvalid,
        yready,
        uvalid,
        uready,
        vvalid,
        vready,
        pdata,
        cdata,
        ydata,
        udata,
        vdata,
    ) = CreateBuses([
        (("dut", "rgb_valid"), ),
        (("dut", "rgb_ready"), ),
        (("dut", "coeffs_valid"), ),
        (("dut", "coeffs_ready"), ),
        (("dut", "y_valid"), ),
        (("dut", "y_ready"), ),
        (("dut", "u_valid"), ),
        (("dut", "u_ready"), ),
        (("dut", "v_valid"), ),
        (("dut", "v_ready"), ),
        (("dut", "rgb_data", (3, )), ),
        (("dut", "coeffs_data", (9, )), ),
        (("dut", "y_data"), ),
        (("dut", "u_data"), ),
        (("dut", "v_data"), ),
    ])
    rst_out_ev, ck_ev = CreateEvents(["rst_out", "ck_ev"])

    # Initialization
    scb = Scoreboard("Rgb2Yuv")
    testy = scb.GetTest("Y")
    testu = scb.GetTest("U")
    testv = scb.GetTest("V")
    sty = Stacker(N, callbacks=[testy.Get])
    stu = Stacker(N, callbacks=[testu.Get])
    stv = Stacker(N, callbacks=[testv.Get])
    bgy = BusGetter(callbacks=[sty.Get])
    bgu = BusGetter(callbacks=[stu.Get])
    bgv = BusGetter(callbacks=[stv.Get])
    masterc = TwoWire.Master(cvalid, cready, cdata, ck_ev, A=1, B=2)
    masterp = TwoWire.Master(pvalid, pready, pdata, ck_ev, A=1, B=2)
    slavey = TwoWire.Slave(yvalid,
                           yready,
                           ydata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgy.Get])
    slaveu = TwoWire.Slave(uvalid,
                           uready,
                           udata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgu.Get])
    slavev = TwoWire.Slave(vvalid,
                           vready,
                           vdata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgv.Get])
    mdatac = masterc.values
    mdatap = masterp.values
    testy.Expect((gold_out[0, :, np.newaxis], ))
    testu.Expect((gold_out[1, :, np.newaxis], ))
    testv.Expect((gold_out[2, :, np.newaxis], ))

    # start simulation
    yield rst_out_ev
    yield ck_ev

    def IterC():
        for i in range(N):
            np.copyto(mdatac.coeffs_data, gold_in_coeff[i, :])
            yield mdatac

    def IterP():
        for i in range(N):
            np.copyto(mdatap.rgb_data, gold_in_px[:, i])
            yield mdatap

    Fork(masterc.SendIter(IterC()))
    yield from masterp.SendIter(IterP())

    for i in range(100):
        yield ck_ev
    assert sty.is_clean
    assert stu.is_clean
    assert stv.is_clean
    FinishSim()
Esempio n. 11
0
def main():
    seed = np.random.randint(10000)
    print("Seed for this run is {}".format(seed))
    np.random.seed(seed)
    N = 250
    golden = np.random.randint(100, size=(N, 2))
    scb = Scoreboard("Controller")
    test = scb.GetTest("Merge")
    st = Stacker(N, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    (
        srdy0,
        sack0,
        sdata0,
        srdy1,
        sack1,
        sdata1,
        drdy,
        dack,
        ddata,
    ) = CreateBuses([
        ((
            "",
            "src0_rdy",
        ), ),
        ((
            "",
            "src0_ack",
        ), ),
        ((
            "",
            "src0_data",
        ), ),
        ((
            "",
            "src1_rdy",
        ), ),
        ((
            "",
            "src1_ack",
        ), ),
        ((
            "",
            "src1_data",
        ), ),
        ((
            "",
            "dst_rdy",
        ), ),
        ((
            "",
            "dst_canack",
        ), ),
        (("", "dst_data", (2, )), ),
    ])
    master0 = TwoWire.Master(srdy0, sack0, sdata0, ck_ev, A=1, B=2)
    master1 = TwoWire.Master(srdy1, sack1, sdata1, ck_ev, A=1, B=2)
    slave = TwoWire.Slave(drdy,
                          dack,
                          ddata,
                          ck_ev,
                          callbacks=[bg.Get],
                          A=1,
                          B=3)
    yield rst_out_ev
    yield ck_ev

    def It(target, it):
        for i in it:
            target[0][0] = i
            yield target

    test.Expect((golden, ))
    Fork(master0.SendIter(It(sdata0.values, golden[:, 0].flat)))
    yield from master1.SendIter(It(sdata1.values, golden[:, 1].flat))

    for i in range(10):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 12
0
def main():
    scb = Scoreboard("WriteCollector")
    test = scb.GetTest("test")
    st = Stacker(callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    master_a = TwoWire.Master(a_rdy_bus, a_ack_bus, a_bus, ck_ev)
    master_d = TwoWire.Master(d_rdy_bus, d_ack_bus, d_bus, ck_ev)
    slave = TwoWire.Slave(w_rdy_bus,
                          w_ack_bus,
                          w_bus,
                          ck_ev,
                          callbacks=[bg.Get])
    yield rst_out_ev

    # global
    master_ad = master_a.values
    master_dd = master_d.values

    # simulation
    n_bofs, bofs = conf.CreateBlockTransaction()
    for i in range(n_bofs):
        (n_o, bofs_o, abeg_o, aend_o, abeg_id_o, aend_id_o,
         dummy) = conf.CreateAccumBlockTransaction(bofs[i])[3]
        for j in range(n_o):
            # Expect?
            (n_aofs_o, agofs_o, alofs_o, rt_i_o, rg_li_o,
             rg_ri_o) = conf.CreateAccumTransaction(abeg_o[j], aend_o[j])
            accum_idx_o, warpid_o, rg_flat_o, rt_flat_o = conf.CreateAccumWarpTransaction(
                abeg_o[j], aend_o[j], rt_i_o, rg_li_o, rg_ri_o, conf.n_o)
            bgofs_o, blofs_o, valid_o = conf.CreateBofsValidTransaction(
                bofs[i], warpid_o)
            addr_o = conf.CreateVectorAddressTransaction(
                bgofs_o[:, 0, :], agofs_o[accum_idx_o], rg_flat_o,
                conf.umcfg_o['mlinear'], conf.umcfg_o, False)
            valid_o_packed = npd.bitwise_or.reduce(
                valid_o << npi.arange(VSIZE)[newaxis, :], axis=1).astype('u4')

            # output part
            if addr_o.size:
                da, dm = conf.CreateDramWriteTransaction(valid_o, addr_o)
                dm_packed = npd.bitwise_or.reduce(
                    dm << npi.arange(conf.DRAM_ALIGN)[newaxis, :], axis=1)
                st.Resize(da.shape[0])
                test.Expect((da[:, newaxis], dm_packed[:, newaxis]))

                # Send
                def iter_a():
                    for k in range(addr_o.shape[0]):
                        npd.copyto(master_ad[0], addr_o[k])
                        master_ad[1][0] = valid_o_packed[k]
                        yield master_ad

                Fork(master_d.SendIter(repeat(tuple(), da.shape[0])))
                yield from master_a.SendIter(iter_a())

        for i in range(100):
            yield ck_ev

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 13
0
def main():
    # Calculate answer
    coeff_test = M.coeff_test
    rgbs = M.RandomPixelStreams()
    yuvs = M.Yuv422(M.Rgb2Yuv(rgbs))
    gold_rgb = np.hstack(rgbs)
    gold_y = np.concatenate([img[0] for img in yuvs])
    gold_u = np.concatenate([img[1] for img in yuvs])
    gold_v = np.concatenate([img[2] for img in yuvs])
    N_Y = gold_y.shape[0]
    N_UV = gold_u.shape[0]

    # Connect to Verilog
    (
        # pixel, number, coeff, y, u, v
        pvalid,
        pready,
        nvalid,
        nready,
        cvalid,
        cready,
        yvalid,
        yready,
        uvalid,
        uready,
        vvalid,
        vready,
        pdata,
        ndata,
        cdata,
        ydata,
        udata,
        vdata,
    ) = CreateBuses([
        (("dut", "rgb_valid"), ),
        (("dut", "rgb_ready"), ),
        (("dut", "pixel_count_valid"), ),
        (("dut", "pixel_count_ready"), ),
        (("dut", "coeff_valid"), ),
        (("dut", "coeff_ready"), ),
        (("dut", "y_valid"), ),
        (("dut", "y_ready"), ),
        (("dut", "u_valid"), ),
        (("dut", "u_ready"), ),
        (("dut", "v_valid"), ),
        (("dut", "v_ready"), ),
        (("dut", "rgb_data", (3, )), ),
        (("dut", "pixel_count"), ),
        (("dut", "coeff_data"), ),
        (("dut", "y_data"), ),
        (("dut", "u_data"), ),
        (("dut", "v_data"), ),
    ])
    rst_out_ev, ck_ev = CreateEvents(["rst_out", "ck_ev"])

    # Initialization
    scb = Scoreboard("Rgb888ToYuv422")
    testy = scb.GetTest("Y")
    testu = scb.GetTest("U")
    testv = scb.GetTest("V")
    sty = Stacker(N_Y, callbacks=[testy.Get])
    stu = Stacker(N_UV, callbacks=[testu.Get])
    stv = Stacker(N_UV, callbacks=[testv.Get])
    bgy = BusGetter(callbacks=[sty.Get])
    bgu = BusGetter(callbacks=[stu.Get])
    bgv = BusGetter(callbacks=[stv.Get])
    masterc = TwoWire.Master(cvalid, cready, cdata, ck_ev)
    mastern = TwoWire.Master(nvalid, nready, ndata, ck_ev)
    masterp = TwoWire.Master(pvalid, pready, pdata, ck_ev, A=1, B=2)
    slavey = TwoWire.Slave(yvalid,
                           yready,
                           ydata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgy.Get])
    slaveu = TwoWire.Slave(uvalid,
                           uready,
                           udata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgu.Get])
    slavev = TwoWire.Slave(vvalid,
                           vready,
                           vdata,
                           ck_ev,
                           A=1,
                           B=2,
                           callbacks=[bgv.Get])
    mdatac = masterc.values
    mdatan = mastern.values
    mdatap = masterp.values
    testy.Expect((gold_y[:, np.newaxis], ))
    testu.Expect((gold_u[:, np.newaxis], ))
    testv.Expect((gold_v[:, np.newaxis], ))

    # start simulation
    yield rst_out_ev
    yield ck_ev

    def IterC():
        for i in coeff_test:
            for j in i[1].flat:
                mdatac.coeff_data[0] = j
                yield mdatac

    def IterN():
        for i in coeff_test:
            mdatan.pixel_count[0] = i[0]
            yield mdatan

    def IterP():
        for i in range(N_Y):
            np.copyto(mdatap.rgb_data, gold_rgb[:, i])
            yield mdatap

    th1 = JoinableFork(masterc.SendIter(IterC()))
    th2 = JoinableFork(mastern.SendIter(IterN()))
    th3 = JoinableFork(masterp.SendIter(IterP()))
    yield from th1.Join()
    yield from th2.Join()
    yield from th3.Join()
    th1.Destroy()
    th2.Destroy()
    th3.Destroy()

    for i in range(100):
        yield ck_ev
    assert sty.is_clean
    assert stu.is_clean
    assert stv.is_clean
    FinishSim()
Esempio n. 14
0
def main():
    seed = np.random.randint(10000)
    print("Seed for this run is {}".format(seed))
    np.random.seed(seed)
    N = 100
    indat_val = np.random.randint(256, size=N)
    indat_rl = np.random.randint(4, size=N)
    golden = np.array(
        [vv for v, rl in zip(indat_val, indat_rl) for vv in ToArr(v, rl)],
        dtype=np.int32)
    scb = Scoreboard("Controller")
    test = scb.GetTest("Rld")
    st = Stacker(golden.shape[0], callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    (
        srdy,
        sack,
        sdata,
        drdy,
        dack,
        ddata,
    ) = CreateBuses([
        ((
            "",
            "src_rdy",
        ), ),
        ((
            "",
            "src_ack",
        ), ),
        (
            ("", "src_data"),
            ("", "src_run_len"),
        ),
        ((
            "",
            "dst_rdy",
        ), ),
        ((
            "",
            "dst_canack",
        ), ),
        ((
            "",
            "dst_data",
        ), ),
    ])
    master = TwoWire.Master(srdy, sack, sdata, ck_ev, A=1, B=5)
    slave = TwoWire.Slave(drdy,
                          dack,
                          ddata,
                          ck_ev,
                          callbacks=[bg.Get],
                          A=1,
                          B=2)
    yield rst_out_ev
    yield ck_ev

    def It():
        mv = sdata.values
        for v, rl in zip(indat_val, indat_rl):
            np.copyto(mv[0], v)
            np.copyto(mv[1], rl)
            yield mv

    test.Expect((golden[:, np.newaxis], ))
    yield from master.SendIter(It())

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 15
0
def main():
    # Calculate answer
    coeff_test = M.coeff_test
    N_ANS = sum(x[0] for x in coeff_test)
    ans = np.vstack(
        np.repeat(np.reshape(c[1], (1, -1)), c[0], axis=0) for c in coeff_test)

    # Connect to Verilog
    (pvalid, pready, cvalid, cready, csvalid, csready, pdata, cdata,
     csdata) = CreateBuses([
         (("dut", "pixel_count_valid"), ),
         (("dut", "pixel_count_ready"), ),
         (("dut", "coeff_valid"), ),
         (("dut", "coeff_ready"), ),
         (("dut", "coeffs_valid"), ),
         (("dut", "coeffs_ready"), ),
         (("dut", "pixel_count"), ),
         (("dut", "coeff_data"), ),
         (("", "csdata_sext", (9, )), ),
     ])
    rst_out_ev, ck_ev = CreateEvents(["rst_out", "ck_ev"])

    # Initialization
    scb = Scoreboard("CoeffCollect")
    test = scb.GetTest("CoeffCollect")
    st = Stacker(N_ANS, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    masterp = TwoWire.Master(pvalid, pready, pdata, ck_ev)
    masterc = TwoWire.Master(cvalid, cready, cdata, ck_ev)
    slave = TwoWire.Slave(csvalid,
                          csready,
                          csdata,
                          ck_ev,
                          A=1,
                          B=2,
                          callbacks=[bg.Get])
    mdatap = masterp.values
    mdatac = masterc.values
    test.Expect((ans, ))

    # start simulation
    yield rst_out_ev
    yield ck_ev

    def IterP():
        for i in coeff_test:
            mdatap.pixel_count[0] = i[0]
            yield mdatap

    def IterC():
        for i in coeff_test:
            for j in i[1].flat:
                mdatac.coeff_data[0] = j
                yield mdatac

    th_1 = JoinableFork(masterp.SendIter(IterP()))
    th_2 = JoinableFork(masterc.SendIter(IterC()))
    yield from th_1.Join()
    yield from th_2.Join()
    th_1.Destroy()
    th_2.Destroy()

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
def main():
    seed = np.random.randint(10000)
    print("Seed for this run is {}".format(seed))
    np.random.seed(seed)
    N = 100
    golden = np.random.randint(4, size=(N, 2))
    scb = Scoreboard("Controller")
    test = scb.GetTest("Vec2Arr")
    st = Stacker(golden.shape[0], callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    (
        srdy,
        sack,
        sdata,
        drdy,
        dack,
        ddata,
    ) = CreateBuses([
        ((
            "",
            "src_rdy",
        ), ),
        ((
            "",
            "src_ack",
        ), ),
        (("", "src_data"), ),
        ((
            "",
            "dst_rdy",
        ), ),
        ((
            "",
            "dst_canack",
        ), ),
        (("", "dst_arr", (2, )), ),
    ])
    master = TwoWire.Master(srdy, sack, sdata, ck_ev, A=1, B=5)
    slave = TwoWire.Slave(drdy,
                          dack,
                          ddata,
                          ck_ev,
                          callbacks=[bg.Get],
                          A=1,
                          B=2)
    yield rst_out_ev
    yield ck_ev

    def It():
        mv = sdata.values
        for i in golden.flat:
            np.copyto(mv[0], i)
            yield mv

    test.Expect((golden, ))
    yield from master.SendIter(It())

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Esempio n. 17
0
def main():
    yield rst_out_ev
    master = TwoWire.Master(bofs_rdy, bofs_ack, bofs_bus, ck_ev)
    slave = TwoWire.Slave(av_rdy, av_ack, av_bus, ck_ev, callbacks=[bg.Get])
    data_bus = master.values

    # simulation
    n_bofs, bofs = cfg.CreateBlockTransaction()
    for i in range(n_bofs):
        (n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0,
         dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[0]
        for j in range(n_i0):
            (n_aofs_i0, agofs_i0, alofs_i0, rt_i_i0, rg_li_i0,
             rg_ri_i0) = cfg.CreateAccumTransaction(abeg_i0[j], aend_i0[j])
            cfg.AllocSram(0, abeg_id_i0[j], aend_id_i0[j])
            linear_i0 = cfg.umcfg_i0['local_adr'][:, 0]
            accum_idx_i0, warpid_i0, rg_flat_i0, rt_flat_i0 = cfg.CreateAccumWarpTransaction(
                abeg_i0[j], aend_i0[j], rt_i_i0, rg_li_i0, rg_ri_i0, cfg.n_i0)
            bgofs_i0, blofs_i0, valid_i0 = cfg.CreateBofsValidTransaction(
                bofs[i], warpid_i0)
            addr_i0 = cfg.CreateVectorAddressTransaction(
                blofs_i0[:, 0, :], alofs_i0[accum_idx_i0], rg_flat_i0,
                linear_i0, cfg.umcfg_i0, True)
            valid_i0_packed = npd.bitwise_or.reduce(
                valid_i0 << npi.arange(VSIZE)[newaxis, :], axis=1)
            # Send source
            npd.copyto(data_bus.i_bofs, bofs[i])
            npd.copyto(data_bus.i_abeg, abeg_i0[j])
            npd.copyto(data_bus.i_aend, aend_i0[j])
            npd.copyto(data_bus.i_linears, linear_i0)
            npd.copyto(data_bus.i_bboundary, cfg.pcfg["total"][0])
            npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis'])
            npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order'])
            npd.copyto(data_bus.i_bsubofs, cfg.v_nd >> cfg.pcfg["lg_vshuf"][0])
            npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0])
            npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0])
            npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0])
            npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0])
            npd.copyto(data_bus.i_global_bshufs, cfg.umcfg_i0["udim"][:,
                                                                      VDIM:])
            npd.copyto(data_bus.i_bstrides_frac,
                       cfg.umcfg_i0["ustride_frac"][:, VDIM:])
            npd.copyto(data_bus.i_bstrides_shamt,
                       cfg.umcfg_i0["ustride_shamt"][:, VDIM:])
            npd.copyto(data_bus.i_global_ashufs,
                       cfg.umcfg_i0["udim"][:, :VDIM])
            npd.copyto(data_bus.i_astrides_frac,
                       cfg.umcfg_i0["ustride_frac"][:, :VDIM])
            npd.copyto(data_bus.i_astrides_shamt,
                       cfg.umcfg_i0["ustride_shamt"][:, :VDIM])
            npd.copyto(data_bus.i_mofs_bsubsteps,
                       cfg.umcfg_i0["vlinear"][:, 1 << npi.arange(CV_BW)])
            npd.copyto(data_bus.i_mboundaries,
                       cfg.umcfg_i0["lmalign"][:, :DIM])
            npd.copyto(data_bus.i_id_begs, cfg.n_i0[0][i])
            npd.copyto(data_bus.i_id_ends, cfg.n_i0[1][i])
            if ST_MODE:
                dc.Resize(accum_idx_i0.shape[0] * 2)
                npd.copyto(data_bus.i_stencil, 1)
                npd.copyto(data_bus.i_stencil_begs, 0)
                npd.copyto(data_bus.i_stencil_ends, 2)
                data_bus.i_stencil_lut[:2] = [0, 1]
                tst.Expect((
                    npd.repeat(rg_flat_i0[:, newaxis], 2, axis=0),
                    (accum_idx_i0[:, newaxis, :] +
                     data_bus[17][:2][:, newaxis]).reshape(-1, VSIZE),
                    npd.repeat(valid_i0_packed[:, newaxis], 2, axis=0),
                    # abcde --> 0a0b0c0d
                    npd.column_stack((npd.zeros_like(rt_flat_i0), rt_flat_i0)
                                     ).reshape(-1, 1),
                ))
            else:
                dc.Resize(accum_idx_i0.shape[0])
                npd.copyto(data_bus.i_stencil, 0)
                tst.Expect((rg_flat_i0[:, newaxis], addr_i0,
                            valid_i0_packed[:, newaxis], rt_flat_i0[:,
                                                                    newaxis]))
            yield ck_ev
            yield from master.Send(data_bus)
            for ck in range(30):
                yield ck_ev
        assert dc.is_clean
        FinishSim()
        break
Esempio n. 18
0
def main():
    seed = np.random.randint(10000)
    print("Seed for this run is {}".format(seed))
    np.random.seed(seed)
    N = 250
    golden = np.random.randint(100, size=(N, 2))
    scb = Scoreboard("Controller")
    PARALLEL_BRD = getenv("IN_ORDER") is None
    test0 = scb.GetTest("Broadcast0" if PARALLEL_BRD else "BroadcastInOrder0")
    test1 = scb.GetTest("Broadcast1" if PARALLEL_BRD else "BroadcastInOrder1")
    st0 = Stacker(N, callbacks=[test0.Get])
    st1 = Stacker(N, callbacks=[test1.Get])
    bg0 = BusGetter(callbacks=[st0.Get])
    bg1 = BusGetter(callbacks=[st1.Get])
    (
        srdy,
        sack,
        sdata,
        drdy0,
        dack0,
        ddata0,
        drdy1,
        dack1,
        ddata1,
    ) = CreateBuses([
        ((
            "",
            "src_rdy",
        ), ),
        ((
            "",
            "src_ack",
        ), ),
        (("", "src_data", (2, )), ),
        ((
            "",
            "dst0_rdy",
        ), ),
        ((
            "",
            "dst0_canack",
        ), ),
        ((
            "",
            "dst0_data",
        ), ),
        ((
            "",
            "dst1_rdy",
        ), ),
        ((
            "",
            "dst1_canack",
        ), ),
        ((
            "",
            "dst1_data",
        ), ),
    ])
    cb0 = [bg0.Get]
    cb1 = [bg1.Get]
    if not PARALLEL_BRD:

        class OrderCheck:
            def __init__(self):
                self.received_diff = 0

            def CbAdd(self, x):
                self.received_diff += 1

            def CbMinus(self, x):
                self.received_diff -= 1
                assert self.received_diff >= 0

        chk = OrderCheck()
        cb0.append(chk.CbAdd)
        cb1.append(chk.CbMinus)
    master = TwoWire.Master(srdy, sack, sdata, ck_ev, A=1, B=2)
    slave0 = TwoWire.Slave(drdy0,
                           dack0,
                           ddata0,
                           ck_ev,
                           callbacks=cb0,
                           A=3,
                           B=8)
    slave1 = TwoWire.Slave(drdy1,
                           dack1,
                           ddata1,
                           ck_ev,
                           callbacks=cb1,
                           A=5,
                           B=8)
    yield rst_out_ev
    yield ck_ev

    def It():
        mv = sdata.values
        for i in golden:
            np.copyto(mv[0], i)
            yield mv

    test0.Expect((golden[:, 0, np.newaxis], ))
    test1.Expect((golden[:, 1, np.newaxis], ))
    yield from master.SendIter(It())

    for i in range(10):
        yield ck_ev
    assert st0.is_clean and st1.is_clean
    FinishSim()
Esempio n. 19
0
def main():
    # init
    ms = next(verf_func_gen)
    cfg_master = TwoWire.Master(cfg_rdy_bus,
                                cfg_ack_bus,
                                cfg_bus,
                                ck_ev,
                                strict=strict)
    yield rst_out_ev
    yield ck_ev
    yield ck_ev
    yield ck_ev
    resp_chan = DramRespChan(
        ra_rdy_bus,
        ra_ack_bus,
        ra_bus,
        rd_rdy_bus,
        rd_ack_bus,
        rd_bus,
        w_rdy_bus,
        w_ack_bus,
        w_bus,
        ck_ev,
        ms,
        # Simulation configuration: 1600MHz/200MHz
        8.0,
        N_TAU,
        CSIZE)

    i_data = cfg_master.values
    CompressWrap = lambda x: npd.bitwise_or.reduce(
        (x == UmiModel.MEM_WRAP).astype('i2') << npi.arange(x.shape[0]))
    VL_IDX = slice(None), 1 << npi.arange(CV_BW)
    if SIM_MODE == 2:
        # We use the same 'step' and shrink 'end'.
        nblk = cfg.pcfg['end'][0]
        ss0 = cfg.pcfg['syst0_skip'][0]
        sx0 = cfg.pcfg['syst0_axis'][0]
        st0 = cfg.pcfg['local'][0, sx0]
        ss1 = cfg.pcfg['syst1_skip'][0]
        sx1 = cfg.pcfg['syst1_axis'][0]
        st1 = cfg.pcfg['local'][0, sx1]
        if sx0 != -1:
            nblk[sx0] = ((nblk[sx0] - 1) // (st0 * N_TAU_X) + 1) * st0
        if sx1 != -1:
            nblk[sx1] = ((nblk[sx1] - 1) // (st1 * N_TAU_Y) + 1) * st1
        npd.copyto(i_data.i_bgrid_end, nblk)
        i_data.i_i0_systolic_skip[0] = ss0
        i_data.i_i0_systolic_axis[0] = sx0
        i_data.i_i1_systolic_skip[0] = ss1
        i_data.i_i1_systolic_axis[0] = sx1
    else:
        npd.copyto(i_data.i_bgrid_end, cfg.pcfg['end'][0])
    npd.copyto(i_data.i_bgrid_step, cfg.pcfg['local'][0])
    npd.copyto(i_data.i_bboundary, cfg.pcfg['total'][0])
    npd.copyto(i_data.i_dual_axis, cfg.pcfg['dual_axis'])
    npd.copyto(i_data.i_dual_order, cfg.pcfg['dual_order'])
    npd.copyto(i_data.i_bsubofs, cfg.v_nd >> cfg.pcfg['lg_vshuf'][0])
    npd.copyto(i_data.i_bsub_up_order, cfg.pcfg['lg_vsize_2x'][0])
    npd.copyto(i_data.i_bsub_lo_order, cfg.pcfg['lg_vshuf'][0])
    npd.copyto(i_data.i_agrid_step, cfg.acfg['local'][0])
    npd.copyto(i_data.i_agrid_end, cfg.acfg['end'][0])
    npd.copyto(i_data.i_aboundary, cfg.acfg['total'][0])
    npd.copyto(i_data.i_i0_local_xor_srcs, cfg.umcfg_i0['xor_src'])
    npd.copyto(i_data.i_i0_local_xor_swaps, cfg.umcfg_i0['xor_swap'])
    npd.copyto(i_data.i_i0_local_boundaries, cfg.umcfg_i0['lmalign'])
    npd.copyto(i_data.i_i0_local_bsubsteps, cfg.umcfg_i0['vlinear'][VL_IDX])
    npd.copyto(i_data.i_i0_local_pads, cfg.umcfg_i0['lmpad'])
    npd.copyto(i_data.i_i0_global_starts, cfg.umcfg_i0['mstart'])
    npd.copyto(i_data.i_i0_global_linears, cfg.umcfg_i0['mlinear'])
    npd.copyto(i_data.i_i0_global_cboundaries,
               cfg.umcfg_i0['mboundary_lmwidth'])
    npd.copyto(i_data.i_i0_global_boundaries, cfg.umcfg_i0['mboundary'])
    npd.copyto(i_data.i_i0_global_bshufs, cfg.umcfg_i0['udim'][:, VDIM:])
    npd.copyto(i_data.i_i0_global_ashufs, cfg.umcfg_i0['udim'][:, :VDIM])
    npd.copyto(i_data.i_i0_bstrides_frac, cfg.umcfg_i0['ustride_frac'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_i0_bstrides_shamt,
               cfg.umcfg_i0['ustride_shamt'][:, VDIM:])
    npd.copyto(i_data.i_i0_astrides_frac,
               cfg.umcfg_i0['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_i0_astrides_shamt,
               cfg.umcfg_i0['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_i0_wrap, CompressWrap(cfg.umcfg_i0['mwrap']))
    npd.copyto(i_data.i_i0_pad_value, cfg.umcfg_i0['pad_value'])
    npd.copyto(i_data.i_i0_id_begs, cfg.n_i0[0])
    npd.copyto(i_data.i_i0_id_ends, cfg.n_i0[1])
    # TODO (begin)
    i_data.i_i0_stencil[0] = int(N_SLUT0 != 0)
    npd.copyto(i_data.i_i0_stencil_begs, 0)
    npd.copyto(i_data.i_i0_stencil_ends, 0)
    if N_I0CFG != 0:
        i_data.i_i0_stencil_ends[0] = N_SLUT0
    npd.copyto(i_data.i_i0_stencil_lut, slut0)
    # TODO (end)
    npd.copyto(i_data.i_i1_local_xor_srcs, cfg.umcfg_i1['xor_src'])
    npd.copyto(i_data.i_i1_local_xor_swaps, cfg.umcfg_i1['xor_swap'])
    npd.copyto(i_data.i_i1_local_boundaries, cfg.umcfg_i1['lmalign'])
    npd.copyto(i_data.i_i1_local_bsubsteps, cfg.umcfg_i1['vlinear'][VL_IDX])
    npd.copyto(i_data.i_i1_local_pads, cfg.umcfg_i1['lmpad'])
    npd.copyto(i_data.i_i1_global_starts, cfg.umcfg_i1['mstart'])
    npd.copyto(i_data.i_i1_global_linears, cfg.umcfg_i1['mlinear'])
    npd.copyto(i_data.i_i1_global_cboundaries,
               cfg.umcfg_i1['mboundary_lmwidth'])
    npd.copyto(i_data.i_i1_global_boundaries, cfg.umcfg_i1['mboundary'])
    npd.copyto(i_data.i_i1_global_bshufs, cfg.umcfg_i1['udim'][:, VDIM:])
    npd.copyto(i_data.i_i1_global_ashufs, cfg.umcfg_i1['udim'][:, :VDIM])
    npd.copyto(i_data.i_i1_bstrides_frac, cfg.umcfg_i1['ustride_frac'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_i1_bstrides_shamt,
               cfg.umcfg_i1['ustride_shamt'][:, VDIM:])
    npd.copyto(i_data.i_i1_astrides_frac,
               cfg.umcfg_i1['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_i1_astrides_shamt,
               cfg.umcfg_i1['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_i1_wrap, CompressWrap(cfg.umcfg_i1['mwrap']))
    npd.copyto(i_data.i_i1_pad_value, cfg.umcfg_i1['pad_value'])
    npd.copyto(i_data.i_i1_id_begs, cfg.n_i1[0])
    npd.copyto(i_data.i_i1_id_ends, cfg.n_i1[1])
    # TODO (begin)
    i_data.i_i1_stencil[0] = int(N_SLUT1 != 0)
    npd.copyto(i_data.i_i1_stencil_begs, 0)
    npd.copyto(i_data.i_i1_stencil_ends, 0)
    if N_I1CFG != 0:
        i_data.i_i1_stencil_ends[0] = N_SLUT1
    npd.copyto(i_data.i_i1_stencil_lut, slut1)
    # TODO (end)
    npd.copyto(i_data.i_o_global_boundaries, cfg.umcfg_o['mboundary'])
    npd.copyto(i_data.i_o_global_bsubsteps, cfg.umcfg_o['vlinear'][VL_IDX])
    npd.copyto(i_data.i_o_global_linears, cfg.umcfg_o['mlinear'])
    npd.copyto(i_data.i_o_global_bshufs, cfg.umcfg_o['udim'][:, VDIM:])
    npd.copyto(i_data.i_o_bstrides_frac, cfg.umcfg_o['ustride_frac'][:, VDIM:])
    npd.copyto(i_data.i_o_bstrides_shamt, cfg.umcfg_o['ustride_shamt'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_o_global_ashufs, cfg.umcfg_o['udim'][:, :VDIM])
    npd.copyto(i_data.i_o_astrides_frac, cfg.umcfg_o['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_o_astrides_shamt,
               cfg.umcfg_o['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_o_id_begs, cfg.n_o[0])
    npd.copyto(i_data.i_o_id_ends, cfg.n_o[1])
    npd.copyto(i_data.i_inst_id_begs, cfg.n_inst[0])
    npd.copyto(i_data.i_inst_id_ends, cfg.n_inst[1])
    npd.copyto(i_data.i_insts, cfg.insts)
    npd.copyto(i_data.i_consts, clut)
    npd.copyto(i_data.i_const_texs, tlut)
    npd.copyto(i_data.i_reg_per_warp, cfg.n_reg)
    yield from cfg_master.Send(i_data)

    for i in range(300):
        yield ck_ev
    # check
    try:
        next(verf_func_gen)
    except StopIteration:
        pass
    resp_chan.Report()
    FinishSim()