Example #1
0
def main():
    scb = Scoreboard("RemapCache")
    test = scb.GetTest(f"test{RMC_CONF}")
    st = Stacker(0,
                 callbacks=[
                     lambda mat: npd.savetxt("rmc_got.txt", mat[0], fmt="%d"),
                     test.Get
                 ])
    bg = BusGetter(callbacks=[st.Get])
    wad_master = OneWire.Master(wad_dval_bus, wad_bus, ck_ev)
    ra_master = TwoWire.Master(ra_rdy_bus, ra_ack_bus, ra_bus, ck_ev)
    rd_slave = TwoWire.Slave(rd_rdy_bus,
                             rd_ack_bus,
                             rd_bus,
                             ck_ev,
                             callbacks=[bg.Get])
    wad_data = wad_master.values
    ra_data = ra_master.values
    yield rst_out_ev

    # start simulation
    npd.copyto(cfg_bus.values.i_xor_srcs[0], xsrc)
    cfg_bus.values.i_xor_swaps[0] = xswap
    cfg_bus.Write()
    yield ck_ev

    def IterWrite():
        for i in range(N_VEC):
            wad_data[0][0] = 0
            wad_data[1][0] = i
            npd.copyto(wad_data[2], npi.arange(i * VSIZE, (i + 1) * VSIZE))
            yield wad_data

    yield from wad_master.SendIter(IterWrite())

    for i in range(10):
        yield ck_ev

    NTEST = N_VEC * VSIZE - npd.sum(stride, dtype=npd.int32) - 1
    raddr = npi.arange(NTEST)[:, newaxis] + STEP
    st.Resize(NTEST)
    test.Expect((raddr, ))

    def IterRead():
        for i in raddr:
            ra_data[0][0] = 0
            npd.copyto(ra_data[1], i)
            yield ra_data

    yield from ra_master.SendIter(IterRead())

    for i in range(100):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Example #2
0
def main():
    n_golden, bofs = cfg.CreateBlockTransaction()
    scb = Scoreboard("ParallelBlockLooper_mc")
    test_b = scb.GetTest("bofs")
    # master
    master = TwoWire.Master(rdy_bus_s, ack_bus_s, bus_s, ck_ev)
    i_data = master.values
    # slave
    ans = [deque() for _ in range(N_TAU)]
    bgs_b = list()
    masters_bdone = list()
    resps = list()
    slaves_b = list()
    for i in range(N_TAU):
        bg_b = BusGetter(copy=True,
                         callbacks=[lambda x, i=i: ans[i].append(x)])
        master_bdone = OneWire.Master(dval_buss[i], tuple(), ck_ev)
        resp = Response(master_bdone.SendIter, ck_ev, B=100)
        slave_b = TwoWire.Slave(
            rdy_buss_b[i],
            ack_buss_b[i],
            buss_b[i],
            ck_ev,
            callbacks=[bg_b.Get, lambda x, resp=resp: resp.Append(tuple())],
            A=1,
            B=100)
        bgs_b.append(bg_b)
        masters_bdone.append(masters_bdone)
        resps.append(resp)
        slaves_b.append(slave_b)
    yield rst_out_ev
    yield ck_ev

    # start simulation
    npd.copyto(i_data[0], cfg.pcfg['local'][0])
    npd.copyto(i_data[1], cfg.pcfg['end'][0])

    yield from master.Send(i_data)

    for i in range(30):
        yield ck_ev

    for i in range(n_golden):
        b = bofs[i]
        popped = False
        for a in ans:
            if a and (a[0] == b).all():
                a.popleft()
                popped = True
            if popped:
                break
        assert popped, f"No correct bofs to match {b}"
    assert all(not a for a in ans), "Some extra bofs"
    FinishSim()
Example #3
0
def main():
    scb = Scoreboard("ChunkAddrLooper")
    test = scb.GetTest("test")
    st = Stacker(0, callbacks=[test.Get])
    bg = BusGetter(callbacks=[st.Get])
    master = TwoWire.Master(mrdy_bus, mack_bus, mofs_bus, ck_ev)
    i_data = master.values
    slave = TwoWire.Slave(crdy_bus,
                          cack_bus,
                          cmd_bus,
                          ck_ev,
                          callbacks=[bg.Get])
    yield rst_out_ev

    # simulation
    n_bofs, bofs = cfg.CreateBlockTransaction()
    TEST0 = not getenv("TEST0") is None
    print(f"Testing {0 if TEST0 else 1}...")
    TEST_UMCFG = cfg.umcfg_i0 if TEST0 else cfg.umcfg_i1
    OFS = int(bool(TEST0))
    for i in range(n_bofs):
        (n_i, bofs_i, abeg_i, aend_i, abeg_id_i, aend_id_i,
         dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[OFS]
        for j in range(n_i):
            # only use the first one
            TEST_ABMOFS = cfg.CreateChunkHead(bofs_i[j], abeg_i[j],
                                              abeg_id_i[j], aend_id_i[j],
                                              TEST_UMCFG)[0]
            ans = cfg.CreateDramReadTransaction(TEST_ABMOFS, TEST_UMCFG, 0)
            st.Resize(ans.shape[0])
            npd.copyto(i_data[0], TEST_ABMOFS)
            npd.copyto(i_data[1], TEST_UMCFG["lmpad"][0])
            npd.copyto(i_data[2], TEST_UMCFG["mboundary"][0])
            npd.copyto(i_data[3], TEST_UMCFG["mboundary_lmwidth"][0])
            i_data[4][0] = TEST_UMCFG["mlinear"][0]
            i_data[5][
                0] = 1 if TEST_UMCFG["mwrap"][0] == UmiModel.MEM_WRAP else 0
            test.Expect(
                tuple(ans[k][:, newaxis]
                      for k in ("cmd_type", "islast", "addr", "ofs", "len")))
            yield from master.Send(i_data)
            for i in range(100):
                yield ck_ev

    for i in range(300):
        yield ck_ev
    assert st.is_clean
    FinishSim()
Example #4
0
def main():
    yield rst_out_ev
    n_bofs, bofs = cfg.CreateBlockTransaction()
    (n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu,
     dummy) = cfg.CreateAccumBlockTransaction(bofs[0])[-1]
    master = TwoWire.Master(src_rdy, src_ack, src_bus, ck_ev)
    inst_commit = OneWire.Master(inst_commit_dval, tuple(), ck_ev)
    resp = Response(inst_commit.SendIter, ck_ev)
    slave = TwoWire.Slave(inst_rdy,
                          inst_ack,
                          inst_bus,
                          ck_ev,
                          callbacks=[bg.Get, lambda _: resp.Append(tuple())])
    data_bus = master.values

    # start simulation
    for i in range(n_alu):
        # Expect?
        (n_aofs_alu, agofs_alu, alofs_alu, rt_i_alu, rg_li_alu,
         rg_ri_alu) = cfg.CreateAccumTransaction(abeg_alu[i], aend_alu[i])
        accum_alu, warpid_alu, rg_flat_alu = cfg.CreateAccumWarpTransaction(
            abeg_alu[i], aend_alu[i], None, rg_li_alu, rg_ri_alu, cfg.n_inst)
        bofs_alu, blofs_alu, valid_alu = cfg.CreateBofsValidTransaction(
            bofs[0], warpid_alu)
        # /2 since in hardware, we use two warps (2x, 2x+1) to form a large warp x
        warpid_alu >>= 1
        npd.copyto(data_bus.i_bofs, bofs[0])
        npd.copyto(data_bus.i_aofs_beg, abeg_alu[i])
        npd.copyto(data_bus.i_aofs_end, aend_alu[i])
        npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis'])
        npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order'])
        npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0])
        npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0])
        npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0])
        npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0])
        npd.copyto(data_bus.i_inst_id_begs, cfg.n_inst[0])
        npd.copyto(data_bus.i_inst_id_ends, cfg.n_inst[1])
        col.Resize(rg_flat_alu.size)
        tst.Expect((bofs_alu[:, 0, :], agofs_alu[accum_alu],
                    rg_flat_alu[:, newaxis], warpid_alu[:, newaxis]))
        yield from master.Send(data_bus)
        yield ck_ev

    for i in range(10):
        yield ck_ev
    assert col.is_clean
    FinishSim()
def main():
    scb = Scoreboard("AccumBlockLooper")
    test_i0 = scb.GetTest("test_i0")
    test_i1 = scb.GetTest("test_i1")
    test_dma = scb.GetTest("test_dma")
    test_o = scb.GetTest("test_o")
    test_alu = scb.GetTest("test_alu")
    st_i0 = Stacker(0, callbacks=[test_i0.Get])
    st_i1 = Stacker(0, callbacks=[test_i1.Get])
    st_dma = Stacker(0, callbacks=[test_dma.Get])
    st_o = Stacker(0, callbacks=[test_o.Get])
    st_alu = Stacker(0, callbacks=[test_alu.Get])
    bg_i0 = BusGetter(callbacks=[st_i0.Get])
    bg_i1 = BusGetter(callbacks=[st_i1.Get])
    bg_dma = BusGetter(callbacks=[st_dma.Get])
    bg_o = BusGetter(callbacks=[st_o.Get])
    bg_alu = BusGetter(callbacks=[st_alu.Get])
    master = TwoWire.Master(s_rdy_bus, s_ack_bus, s_bus, ck_ev)
    i_data = master.values
    slave_i0 = TwoWire.Slave(i0_rdy_bus,
                             i0_ack_bus,
                             i0_bus,
                             ck_ev,
                             callbacks=[bg_i0.Get])
    slave_i1 = TwoWire.Slave(i1_rdy_bus,
                             i1_ack_bus,
                             i1_bus,
                             ck_ev,
                             callbacks=[bg_i1.Get])
    slave_dma = TwoWire.Slave(dma_rdy_bus,
                              dma_ack_bus,
                              dma_bus,
                              ck_ev,
                              callbacks=[bg_dma.Get])
    slave_o = TwoWire.Slave(o_rdy_bus,
                            o_ack_bus,
                            o_bus,
                            ck_ev,
                            callbacks=[bg_o.Get])
    slave_alu = TwoWire.Slave(alu_rdy_bus,
                              alu_ack_bus,
                              alu_bus,
                              ck_ev,
                              callbacks=[bg_alu.Get])
    yield rst_out_ev
    yield ck_ev

    n_bofs, bofs, = cfg.CreateBlockTransaction()
    ans_i0, ans_i1, ans_dma, ans_o, ans_alu, = cfg.CreateAccumBlockTransaction(
        bofs[0])
    n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0, dummy, = ans_i0
    n_i1, bofs_i1, abeg_i1, aend_i1, abeg_id_i1, aend_id_i1, dummy, = ans_i1
    n_dma, bofs_dma, abeg_dma, aend_dma, abeg_id_dma, aend_id_dma, which_dma = ans_dma
    n_o, bofs_o, abeg_o, aend_o, abeg_id_o, aend_id_o, dummy, = ans_o
    n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu, dummy, = ans_alu

    # start simulation
    npd.copyto(i_data[0], bofs[0])
    npd.copyto(i_data[1], cfg.acfg['local'][0])
    npd.copyto(i_data[2], cfg.acfg['end'][0])
    npd.copyto(i_data[3], cfg.acfg['total'][0])
    npd.copyto(i_data[4], cfg.n_i0[0])
    npd.copyto(i_data[5], cfg.n_i0[1])
    npd.copyto(i_data[6], cfg.n_i1[0])
    npd.copyto(i_data[7], cfg.n_i1[1])
    npd.copyto(i_data[8], cfg.n_o[0])
    npd.copyto(i_data[9], cfg.n_o[1])
    npd.copyto(i_data[10], cfg.n_inst[0])
    npd.copyto(i_data[11], cfg.n_inst[1])
    test_i0.Expect(
        (bofs_i0, abeg_i0, aend_i0, abeg_id_i0[:,
                                               newaxis], aend_id_i0[:,
                                                                    newaxis]))
    test_i1.Expect(
        (bofs_i1, abeg_i1, aend_i1, abeg_id_i1[:,
                                               newaxis], aend_id_i1[:,
                                                                    newaxis]))
    test_dma.Expect((bofs_dma, which_dma[:, newaxis], abeg_dma,
                     abeg_id_dma[:, newaxis], aend_id_dma[:, newaxis]))
    test_o.Expect(
        (bofs_o, abeg_o, aend_o, abeg_id_o[:, newaxis], aend_id_o[:, newaxis]))
    test_alu.Expect((bofs_alu, abeg_alu, aend_alu))
    st_i0.Resize(n_i0)
    st_i1.Resize(n_i1)
    st_dma.Resize(n_dma)
    st_o.Resize(n_o)
    st_alu.Resize(n_alu)
    yield from master.Send(i_data)

    for i in range(300):
        yield ck_ev
    assert st_i0.is_clean
    assert st_i1.is_clean
    assert st_dma.is_clean
    assert st_o.is_clean
    assert st_alu.is_clean
    FinishSim()
Example #6
0
 def IterRead():
     for i in raddr:
         ra_data[0][0] = 0
         npd.copyto(ra_data[1], i)
         yield ra_data
Example #7
0
 def IterWrite():
     for i in range(N_VEC):
         wad_data[0][0] = 0
         wad_data[1][0] = i
         npd.copyto(wad_data[2], npi.arange(i * VSIZE, (i + 1) * VSIZE))
         yield wad_data
Example #8
0
def main():
    # init
    ms = next(verf_func_gen)
    cfg_master = TwoWire.Master(cfg_rdy_bus,
                                cfg_ack_bus,
                                cfg_bus,
                                ck_ev,
                                strict=strict)
    yield rst_out_ev
    yield ck_ev
    yield ck_ev
    yield ck_ev
    resp_chan = DramRespChan(
        ra_rdy_bus,
        ra_ack_bus,
        ra_bus,
        rd_rdy_bus,
        rd_ack_bus,
        rd_bus,
        w_rdy_bus,
        w_ack_bus,
        w_bus,
        ck_ev,
        ms,
        # Simulation configuration: 1600MHz/200MHz
        8.0,
        N_TAU,
        CSIZE)

    i_data = cfg_master.values
    CompressWrap = lambda x: npd.bitwise_or.reduce(
        (x == UmiModel.MEM_WRAP).astype('i2') << npi.arange(x.shape[0]))
    VL_IDX = slice(None), 1 << npi.arange(CV_BW)
    if SIM_MODE == 2:
        # We use the same 'step' and shrink 'end'.
        nblk = cfg.pcfg['end'][0]
        ss0 = cfg.pcfg['syst0_skip'][0]
        sx0 = cfg.pcfg['syst0_axis'][0]
        st0 = cfg.pcfg['local'][0, sx0]
        ss1 = cfg.pcfg['syst1_skip'][0]
        sx1 = cfg.pcfg['syst1_axis'][0]
        st1 = cfg.pcfg['local'][0, sx1]
        if sx0 != -1:
            nblk[sx0] = ((nblk[sx0] - 1) // (st0 * N_TAU_X) + 1) * st0
        if sx1 != -1:
            nblk[sx1] = ((nblk[sx1] - 1) // (st1 * N_TAU_Y) + 1) * st1
        npd.copyto(i_data.i_bgrid_end, nblk)
        i_data.i_i0_systolic_skip[0] = ss0
        i_data.i_i0_systolic_axis[0] = sx0
        i_data.i_i1_systolic_skip[0] = ss1
        i_data.i_i1_systolic_axis[0] = sx1
    else:
        npd.copyto(i_data.i_bgrid_end, cfg.pcfg['end'][0])
    npd.copyto(i_data.i_bgrid_step, cfg.pcfg['local'][0])
    npd.copyto(i_data.i_bboundary, cfg.pcfg['total'][0])
    npd.copyto(i_data.i_dual_axis, cfg.pcfg['dual_axis'])
    npd.copyto(i_data.i_dual_order, cfg.pcfg['dual_order'])
    npd.copyto(i_data.i_bsubofs, cfg.v_nd >> cfg.pcfg['lg_vshuf'][0])
    npd.copyto(i_data.i_bsub_up_order, cfg.pcfg['lg_vsize_2x'][0])
    npd.copyto(i_data.i_bsub_lo_order, cfg.pcfg['lg_vshuf'][0])
    npd.copyto(i_data.i_agrid_step, cfg.acfg['local'][0])
    npd.copyto(i_data.i_agrid_end, cfg.acfg['end'][0])
    npd.copyto(i_data.i_aboundary, cfg.acfg['total'][0])
    npd.copyto(i_data.i_i0_local_xor_srcs, cfg.umcfg_i0['xor_src'])
    npd.copyto(i_data.i_i0_local_xor_swaps, cfg.umcfg_i0['xor_swap'])
    npd.copyto(i_data.i_i0_local_boundaries, cfg.umcfg_i0['lmalign'])
    npd.copyto(i_data.i_i0_local_bsubsteps, cfg.umcfg_i0['vlinear'][VL_IDX])
    npd.copyto(i_data.i_i0_local_pads, cfg.umcfg_i0['lmpad'])
    npd.copyto(i_data.i_i0_global_starts, cfg.umcfg_i0['mstart'])
    npd.copyto(i_data.i_i0_global_linears, cfg.umcfg_i0['mlinear'])
    npd.copyto(i_data.i_i0_global_cboundaries,
               cfg.umcfg_i0['mboundary_lmwidth'])
    npd.copyto(i_data.i_i0_global_boundaries, cfg.umcfg_i0['mboundary'])
    npd.copyto(i_data.i_i0_global_bshufs, cfg.umcfg_i0['udim'][:, VDIM:])
    npd.copyto(i_data.i_i0_global_ashufs, cfg.umcfg_i0['udim'][:, :VDIM])
    npd.copyto(i_data.i_i0_bstrides_frac, cfg.umcfg_i0['ustride_frac'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_i0_bstrides_shamt,
               cfg.umcfg_i0['ustride_shamt'][:, VDIM:])
    npd.copyto(i_data.i_i0_astrides_frac,
               cfg.umcfg_i0['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_i0_astrides_shamt,
               cfg.umcfg_i0['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_i0_wrap, CompressWrap(cfg.umcfg_i0['mwrap']))
    npd.copyto(i_data.i_i0_pad_value, cfg.umcfg_i0['pad_value'])
    npd.copyto(i_data.i_i0_id_begs, cfg.n_i0[0])
    npd.copyto(i_data.i_i0_id_ends, cfg.n_i0[1])
    # TODO (begin)
    i_data.i_i0_stencil[0] = int(N_SLUT0 != 0)
    npd.copyto(i_data.i_i0_stencil_begs, 0)
    npd.copyto(i_data.i_i0_stencil_ends, 0)
    if N_I0CFG != 0:
        i_data.i_i0_stencil_ends[0] = N_SLUT0
    npd.copyto(i_data.i_i0_stencil_lut, slut0)
    # TODO (end)
    npd.copyto(i_data.i_i1_local_xor_srcs, cfg.umcfg_i1['xor_src'])
    npd.copyto(i_data.i_i1_local_xor_swaps, cfg.umcfg_i1['xor_swap'])
    npd.copyto(i_data.i_i1_local_boundaries, cfg.umcfg_i1['lmalign'])
    npd.copyto(i_data.i_i1_local_bsubsteps, cfg.umcfg_i1['vlinear'][VL_IDX])
    npd.copyto(i_data.i_i1_local_pads, cfg.umcfg_i1['lmpad'])
    npd.copyto(i_data.i_i1_global_starts, cfg.umcfg_i1['mstart'])
    npd.copyto(i_data.i_i1_global_linears, cfg.umcfg_i1['mlinear'])
    npd.copyto(i_data.i_i1_global_cboundaries,
               cfg.umcfg_i1['mboundary_lmwidth'])
    npd.copyto(i_data.i_i1_global_boundaries, cfg.umcfg_i1['mboundary'])
    npd.copyto(i_data.i_i1_global_bshufs, cfg.umcfg_i1['udim'][:, VDIM:])
    npd.copyto(i_data.i_i1_global_ashufs, cfg.umcfg_i1['udim'][:, :VDIM])
    npd.copyto(i_data.i_i1_bstrides_frac, cfg.umcfg_i1['ustride_frac'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_i1_bstrides_shamt,
               cfg.umcfg_i1['ustride_shamt'][:, VDIM:])
    npd.copyto(i_data.i_i1_astrides_frac,
               cfg.umcfg_i1['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_i1_astrides_shamt,
               cfg.umcfg_i1['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_i1_wrap, CompressWrap(cfg.umcfg_i1['mwrap']))
    npd.copyto(i_data.i_i1_pad_value, cfg.umcfg_i1['pad_value'])
    npd.copyto(i_data.i_i1_id_begs, cfg.n_i1[0])
    npd.copyto(i_data.i_i1_id_ends, cfg.n_i1[1])
    # TODO (begin)
    i_data.i_i1_stencil[0] = int(N_SLUT1 != 0)
    npd.copyto(i_data.i_i1_stencil_begs, 0)
    npd.copyto(i_data.i_i1_stencil_ends, 0)
    if N_I1CFG != 0:
        i_data.i_i1_stencil_ends[0] = N_SLUT1
    npd.copyto(i_data.i_i1_stencil_lut, slut1)
    # TODO (end)
    npd.copyto(i_data.i_o_global_boundaries, cfg.umcfg_o['mboundary'])
    npd.copyto(i_data.i_o_global_bsubsteps, cfg.umcfg_o['vlinear'][VL_IDX])
    npd.copyto(i_data.i_o_global_linears, cfg.umcfg_o['mlinear'])
    npd.copyto(i_data.i_o_global_bshufs, cfg.umcfg_o['udim'][:, VDIM:])
    npd.copyto(i_data.i_o_bstrides_frac, cfg.umcfg_o['ustride_frac'][:, VDIM:])
    npd.copyto(i_data.i_o_bstrides_shamt, cfg.umcfg_o['ustride_shamt'][:,
                                                                       VDIM:])
    npd.copyto(i_data.i_o_global_ashufs, cfg.umcfg_o['udim'][:, :VDIM])
    npd.copyto(i_data.i_o_astrides_frac, cfg.umcfg_o['ustride_frac'][:, :VDIM])
    npd.copyto(i_data.i_o_astrides_shamt,
               cfg.umcfg_o['ustride_shamt'][:, :VDIM])
    npd.copyto(i_data.i_o_id_begs, cfg.n_o[0])
    npd.copyto(i_data.i_o_id_ends, cfg.n_o[1])
    npd.copyto(i_data.i_inst_id_begs, cfg.n_inst[0])
    npd.copyto(i_data.i_inst_id_ends, cfg.n_inst[1])
    npd.copyto(i_data.i_insts, cfg.insts)
    npd.copyto(i_data.i_consts, clut)
    npd.copyto(i_data.i_const_texs, tlut)
    npd.copyto(i_data.i_reg_per_warp, cfg.n_reg)
    yield from cfg_master.Send(i_data)

    for i in range(300):
        yield ck_ev
    # check
    try:
        next(verf_func_gen)
    except StopIteration:
        pass
    resp_chan.Report()
    FinishSim()
Example #9
0
def main():
    yield rst_out_ev
    master = TwoWire.Master(bofs_rdy, bofs_ack, bofs_bus, ck_ev)
    slave = TwoWire.Slave(av_rdy, av_ack, av_bus, ck_ev, callbacks=[bg.Get])
    data_bus = master.values

    # simulation
    n_bofs, bofs = cfg.CreateBlockTransaction()
    for i in range(n_bofs):
        (n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0,
         dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[0]
        for j in range(n_i0):
            (n_aofs_i0, agofs_i0, alofs_i0, rt_i_i0, rg_li_i0,
             rg_ri_i0) = cfg.CreateAccumTransaction(abeg_i0[j], aend_i0[j])
            cfg.AllocSram(0, abeg_id_i0[j], aend_id_i0[j])
            linear_i0 = cfg.umcfg_i0['local_adr'][:, 0]
            accum_idx_i0, warpid_i0, rg_flat_i0, rt_flat_i0 = cfg.CreateAccumWarpTransaction(
                abeg_i0[j], aend_i0[j], rt_i_i0, rg_li_i0, rg_ri_i0, cfg.n_i0)
            bgofs_i0, blofs_i0, valid_i0 = cfg.CreateBofsValidTransaction(
                bofs[i], warpid_i0)
            addr_i0 = cfg.CreateVectorAddressTransaction(
                blofs_i0[:, 0, :], alofs_i0[accum_idx_i0], rg_flat_i0,
                linear_i0, cfg.umcfg_i0, True)
            valid_i0_packed = npd.bitwise_or.reduce(
                valid_i0 << npi.arange(VSIZE)[newaxis, :], axis=1)
            # Send source
            npd.copyto(data_bus.i_bofs, bofs[i])
            npd.copyto(data_bus.i_abeg, abeg_i0[j])
            npd.copyto(data_bus.i_aend, aend_i0[j])
            npd.copyto(data_bus.i_linears, linear_i0)
            npd.copyto(data_bus.i_bboundary, cfg.pcfg["total"][0])
            npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis'])
            npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order'])
            npd.copyto(data_bus.i_bsubofs, cfg.v_nd >> cfg.pcfg["lg_vshuf"][0])
            npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0])
            npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0])
            npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0])
            npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0])
            npd.copyto(data_bus.i_global_bshufs, cfg.umcfg_i0["udim"][:,
                                                                      VDIM:])
            npd.copyto(data_bus.i_bstrides_frac,
                       cfg.umcfg_i0["ustride_frac"][:, VDIM:])
            npd.copyto(data_bus.i_bstrides_shamt,
                       cfg.umcfg_i0["ustride_shamt"][:, VDIM:])
            npd.copyto(data_bus.i_global_ashufs,
                       cfg.umcfg_i0["udim"][:, :VDIM])
            npd.copyto(data_bus.i_astrides_frac,
                       cfg.umcfg_i0["ustride_frac"][:, :VDIM])
            npd.copyto(data_bus.i_astrides_shamt,
                       cfg.umcfg_i0["ustride_shamt"][:, :VDIM])
            npd.copyto(data_bus.i_mofs_bsubsteps,
                       cfg.umcfg_i0["vlinear"][:, 1 << npi.arange(CV_BW)])
            npd.copyto(data_bus.i_mboundaries,
                       cfg.umcfg_i0["lmalign"][:, :DIM])
            npd.copyto(data_bus.i_id_begs, cfg.n_i0[0][i])
            npd.copyto(data_bus.i_id_ends, cfg.n_i0[1][i])
            if ST_MODE:
                dc.Resize(accum_idx_i0.shape[0] * 2)
                npd.copyto(data_bus.i_stencil, 1)
                npd.copyto(data_bus.i_stencil_begs, 0)
                npd.copyto(data_bus.i_stencil_ends, 2)
                data_bus.i_stencil_lut[:2] = [0, 1]
                tst.Expect((
                    npd.repeat(rg_flat_i0[:, newaxis], 2, axis=0),
                    (accum_idx_i0[:, newaxis, :] +
                     data_bus[17][:2][:, newaxis]).reshape(-1, VSIZE),
                    npd.repeat(valid_i0_packed[:, newaxis], 2, axis=0),
                    # abcde --> 0a0b0c0d
                    npd.column_stack((npd.zeros_like(rt_flat_i0), rt_flat_i0)
                                     ).reshape(-1, 1),
                ))
            else:
                dc.Resize(accum_idx_i0.shape[0])
                npd.copyto(data_bus.i_stencil, 0)
                tst.Expect((rg_flat_i0[:, newaxis], addr_i0,
                            valid_i0_packed[:, newaxis], rt_flat_i0[:,
                                                                    newaxis]))
            yield ck_ev
            yield from master.Send(data_bus)
            for ck in range(30):
                yield ck_ev
        assert dc.is_clean
        FinishSim()
        break
Example #10
0
 def iter_a():
     for k in range(addr_o.shape[0]):
         npd.copyto(master_ad[0], addr_o[k])
         master_ad[1][0] = valid_o_packed[k]
         yield master_ad