def main(): scb = Scoreboard("RemapCache") test = scb.GetTest(f"test{RMC_CONF}") st = Stacker(0, callbacks=[ lambda mat: npd.savetxt("rmc_got.txt", mat[0], fmt="%d"), test.Get ]) bg = BusGetter(callbacks=[st.Get]) wad_master = OneWire.Master(wad_dval_bus, wad_bus, ck_ev) ra_master = TwoWire.Master(ra_rdy_bus, ra_ack_bus, ra_bus, ck_ev) rd_slave = TwoWire.Slave(rd_rdy_bus, rd_ack_bus, rd_bus, ck_ev, callbacks=[bg.Get]) wad_data = wad_master.values ra_data = ra_master.values yield rst_out_ev # start simulation npd.copyto(cfg_bus.values.i_xor_srcs[0], xsrc) cfg_bus.values.i_xor_swaps[0] = xswap cfg_bus.Write() yield ck_ev def IterWrite(): for i in range(N_VEC): wad_data[0][0] = 0 wad_data[1][0] = i npd.copyto(wad_data[2], npi.arange(i * VSIZE, (i + 1) * VSIZE)) yield wad_data yield from wad_master.SendIter(IterWrite()) for i in range(10): yield ck_ev NTEST = N_VEC * VSIZE - npd.sum(stride, dtype=npd.int32) - 1 raddr = npi.arange(NTEST)[:, newaxis] + STEP st.Resize(NTEST) test.Expect((raddr, )) def IterRead(): for i in raddr: ra_data[0][0] = 0 npd.copyto(ra_data[1], i) yield ra_data yield from ra_master.SendIter(IterRead()) for i in range(100): yield ck_ev assert st.is_clean FinishSim()
def main(): n_golden, bofs = cfg.CreateBlockTransaction() scb = Scoreboard("ParallelBlockLooper_mc") test_b = scb.GetTest("bofs") # master master = TwoWire.Master(rdy_bus_s, ack_bus_s, bus_s, ck_ev) i_data = master.values # slave ans = [deque() for _ in range(N_TAU)] bgs_b = list() masters_bdone = list() resps = list() slaves_b = list() for i in range(N_TAU): bg_b = BusGetter(copy=True, callbacks=[lambda x, i=i: ans[i].append(x)]) master_bdone = OneWire.Master(dval_buss[i], tuple(), ck_ev) resp = Response(master_bdone.SendIter, ck_ev, B=100) slave_b = TwoWire.Slave( rdy_buss_b[i], ack_buss_b[i], buss_b[i], ck_ev, callbacks=[bg_b.Get, lambda x, resp=resp: resp.Append(tuple())], A=1, B=100) bgs_b.append(bg_b) masters_bdone.append(masters_bdone) resps.append(resp) slaves_b.append(slave_b) yield rst_out_ev yield ck_ev # start simulation npd.copyto(i_data[0], cfg.pcfg['local'][0]) npd.copyto(i_data[1], cfg.pcfg['end'][0]) yield from master.Send(i_data) for i in range(30): yield ck_ev for i in range(n_golden): b = bofs[i] popped = False for a in ans: if a and (a[0] == b).all(): a.popleft() popped = True if popped: break assert popped, f"No correct bofs to match {b}" assert all(not a for a in ans), "Some extra bofs" FinishSim()
def main(): scb = Scoreboard("ChunkAddrLooper") test = scb.GetTest("test") st = Stacker(0, callbacks=[test.Get]) bg = BusGetter(callbacks=[st.Get]) master = TwoWire.Master(mrdy_bus, mack_bus, mofs_bus, ck_ev) i_data = master.values slave = TwoWire.Slave(crdy_bus, cack_bus, cmd_bus, ck_ev, callbacks=[bg.Get]) yield rst_out_ev # simulation n_bofs, bofs = cfg.CreateBlockTransaction() TEST0 = not getenv("TEST0") is None print(f"Testing {0 if TEST0 else 1}...") TEST_UMCFG = cfg.umcfg_i0 if TEST0 else cfg.umcfg_i1 OFS = int(bool(TEST0)) for i in range(n_bofs): (n_i, bofs_i, abeg_i, aend_i, abeg_id_i, aend_id_i, dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[OFS] for j in range(n_i): # only use the first one TEST_ABMOFS = cfg.CreateChunkHead(bofs_i[j], abeg_i[j], abeg_id_i[j], aend_id_i[j], TEST_UMCFG)[0] ans = cfg.CreateDramReadTransaction(TEST_ABMOFS, TEST_UMCFG, 0) st.Resize(ans.shape[0]) npd.copyto(i_data[0], TEST_ABMOFS) npd.copyto(i_data[1], TEST_UMCFG["lmpad"][0]) npd.copyto(i_data[2], TEST_UMCFG["mboundary"][0]) npd.copyto(i_data[3], TEST_UMCFG["mboundary_lmwidth"][0]) i_data[4][0] = TEST_UMCFG["mlinear"][0] i_data[5][ 0] = 1 if TEST_UMCFG["mwrap"][0] == UmiModel.MEM_WRAP else 0 test.Expect( tuple(ans[k][:, newaxis] for k in ("cmd_type", "islast", "addr", "ofs", "len"))) yield from master.Send(i_data) for i in range(100): yield ck_ev for i in range(300): yield ck_ev assert st.is_clean FinishSim()
def main(): yield rst_out_ev n_bofs, bofs = cfg.CreateBlockTransaction() (n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu, dummy) = cfg.CreateAccumBlockTransaction(bofs[0])[-1] master = TwoWire.Master(src_rdy, src_ack, src_bus, ck_ev) inst_commit = OneWire.Master(inst_commit_dval, tuple(), ck_ev) resp = Response(inst_commit.SendIter, ck_ev) slave = TwoWire.Slave(inst_rdy, inst_ack, inst_bus, ck_ev, callbacks=[bg.Get, lambda _: resp.Append(tuple())]) data_bus = master.values # start simulation for i in range(n_alu): # Expect? (n_aofs_alu, agofs_alu, alofs_alu, rt_i_alu, rg_li_alu, rg_ri_alu) = cfg.CreateAccumTransaction(abeg_alu[i], aend_alu[i]) accum_alu, warpid_alu, rg_flat_alu = cfg.CreateAccumWarpTransaction( abeg_alu[i], aend_alu[i], None, rg_li_alu, rg_ri_alu, cfg.n_inst) bofs_alu, blofs_alu, valid_alu = cfg.CreateBofsValidTransaction( bofs[0], warpid_alu) # /2 since in hardware, we use two warps (2x, 2x+1) to form a large warp x warpid_alu >>= 1 npd.copyto(data_bus.i_bofs, bofs[0]) npd.copyto(data_bus.i_aofs_beg, abeg_alu[i]) npd.copyto(data_bus.i_aofs_end, aend_alu[i]) npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis']) npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order']) npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0]) npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0]) npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0]) npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0]) npd.copyto(data_bus.i_inst_id_begs, cfg.n_inst[0]) npd.copyto(data_bus.i_inst_id_ends, cfg.n_inst[1]) col.Resize(rg_flat_alu.size) tst.Expect((bofs_alu[:, 0, :], agofs_alu[accum_alu], rg_flat_alu[:, newaxis], warpid_alu[:, newaxis])) yield from master.Send(data_bus) yield ck_ev for i in range(10): yield ck_ev assert col.is_clean FinishSim()
def main(): scb = Scoreboard("AccumBlockLooper") test_i0 = scb.GetTest("test_i0") test_i1 = scb.GetTest("test_i1") test_dma = scb.GetTest("test_dma") test_o = scb.GetTest("test_o") test_alu = scb.GetTest("test_alu") st_i0 = Stacker(0, callbacks=[test_i0.Get]) st_i1 = Stacker(0, callbacks=[test_i1.Get]) st_dma = Stacker(0, callbacks=[test_dma.Get]) st_o = Stacker(0, callbacks=[test_o.Get]) st_alu = Stacker(0, callbacks=[test_alu.Get]) bg_i0 = BusGetter(callbacks=[st_i0.Get]) bg_i1 = BusGetter(callbacks=[st_i1.Get]) bg_dma = BusGetter(callbacks=[st_dma.Get]) bg_o = BusGetter(callbacks=[st_o.Get]) bg_alu = BusGetter(callbacks=[st_alu.Get]) master = TwoWire.Master(s_rdy_bus, s_ack_bus, s_bus, ck_ev) i_data = master.values slave_i0 = TwoWire.Slave(i0_rdy_bus, i0_ack_bus, i0_bus, ck_ev, callbacks=[bg_i0.Get]) slave_i1 = TwoWire.Slave(i1_rdy_bus, i1_ack_bus, i1_bus, ck_ev, callbacks=[bg_i1.Get]) slave_dma = TwoWire.Slave(dma_rdy_bus, dma_ack_bus, dma_bus, ck_ev, callbacks=[bg_dma.Get]) slave_o = TwoWire.Slave(o_rdy_bus, o_ack_bus, o_bus, ck_ev, callbacks=[bg_o.Get]) slave_alu = TwoWire.Slave(alu_rdy_bus, alu_ack_bus, alu_bus, ck_ev, callbacks=[bg_alu.Get]) yield rst_out_ev yield ck_ev n_bofs, bofs, = cfg.CreateBlockTransaction() ans_i0, ans_i1, ans_dma, ans_o, ans_alu, = cfg.CreateAccumBlockTransaction( bofs[0]) n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0, dummy, = ans_i0 n_i1, bofs_i1, abeg_i1, aend_i1, abeg_id_i1, aend_id_i1, dummy, = ans_i1 n_dma, bofs_dma, abeg_dma, aend_dma, abeg_id_dma, aend_id_dma, which_dma = ans_dma n_o, bofs_o, abeg_o, aend_o, abeg_id_o, aend_id_o, dummy, = ans_o n_alu, bofs_alu, abeg_alu, aend_alu, abeg_id_alu, aend_id_alu, dummy, = ans_alu # start simulation npd.copyto(i_data[0], bofs[0]) npd.copyto(i_data[1], cfg.acfg['local'][0]) npd.copyto(i_data[2], cfg.acfg['end'][0]) npd.copyto(i_data[3], cfg.acfg['total'][0]) npd.copyto(i_data[4], cfg.n_i0[0]) npd.copyto(i_data[5], cfg.n_i0[1]) npd.copyto(i_data[6], cfg.n_i1[0]) npd.copyto(i_data[7], cfg.n_i1[1]) npd.copyto(i_data[8], cfg.n_o[0]) npd.copyto(i_data[9], cfg.n_o[1]) npd.copyto(i_data[10], cfg.n_inst[0]) npd.copyto(i_data[11], cfg.n_inst[1]) test_i0.Expect( (bofs_i0, abeg_i0, aend_i0, abeg_id_i0[:, newaxis], aend_id_i0[:, newaxis])) test_i1.Expect( (bofs_i1, abeg_i1, aend_i1, abeg_id_i1[:, newaxis], aend_id_i1[:, newaxis])) test_dma.Expect((bofs_dma, which_dma[:, newaxis], abeg_dma, abeg_id_dma[:, newaxis], aend_id_dma[:, newaxis])) test_o.Expect( (bofs_o, abeg_o, aend_o, abeg_id_o[:, newaxis], aend_id_o[:, newaxis])) test_alu.Expect((bofs_alu, abeg_alu, aend_alu)) st_i0.Resize(n_i0) st_i1.Resize(n_i1) st_dma.Resize(n_dma) st_o.Resize(n_o) st_alu.Resize(n_alu) yield from master.Send(i_data) for i in range(300): yield ck_ev assert st_i0.is_clean assert st_i1.is_clean assert st_dma.is_clean assert st_o.is_clean assert st_alu.is_clean FinishSim()
def IterRead(): for i in raddr: ra_data[0][0] = 0 npd.copyto(ra_data[1], i) yield ra_data
def IterWrite(): for i in range(N_VEC): wad_data[0][0] = 0 wad_data[1][0] = i npd.copyto(wad_data[2], npi.arange(i * VSIZE, (i + 1) * VSIZE)) yield wad_data
def main(): # init ms = next(verf_func_gen) cfg_master = TwoWire.Master(cfg_rdy_bus, cfg_ack_bus, cfg_bus, ck_ev, strict=strict) yield rst_out_ev yield ck_ev yield ck_ev yield ck_ev resp_chan = DramRespChan( ra_rdy_bus, ra_ack_bus, ra_bus, rd_rdy_bus, rd_ack_bus, rd_bus, w_rdy_bus, w_ack_bus, w_bus, ck_ev, ms, # Simulation configuration: 1600MHz/200MHz 8.0, N_TAU, CSIZE) i_data = cfg_master.values CompressWrap = lambda x: npd.bitwise_or.reduce( (x == UmiModel.MEM_WRAP).astype('i2') << npi.arange(x.shape[0])) VL_IDX = slice(None), 1 << npi.arange(CV_BW) if SIM_MODE == 2: # We use the same 'step' and shrink 'end'. nblk = cfg.pcfg['end'][0] ss0 = cfg.pcfg['syst0_skip'][0] sx0 = cfg.pcfg['syst0_axis'][0] st0 = cfg.pcfg['local'][0, sx0] ss1 = cfg.pcfg['syst1_skip'][0] sx1 = cfg.pcfg['syst1_axis'][0] st1 = cfg.pcfg['local'][0, sx1] if sx0 != -1: nblk[sx0] = ((nblk[sx0] - 1) // (st0 * N_TAU_X) + 1) * st0 if sx1 != -1: nblk[sx1] = ((nblk[sx1] - 1) // (st1 * N_TAU_Y) + 1) * st1 npd.copyto(i_data.i_bgrid_end, nblk) i_data.i_i0_systolic_skip[0] = ss0 i_data.i_i0_systolic_axis[0] = sx0 i_data.i_i1_systolic_skip[0] = ss1 i_data.i_i1_systolic_axis[0] = sx1 else: npd.copyto(i_data.i_bgrid_end, cfg.pcfg['end'][0]) npd.copyto(i_data.i_bgrid_step, cfg.pcfg['local'][0]) npd.copyto(i_data.i_bboundary, cfg.pcfg['total'][0]) npd.copyto(i_data.i_dual_axis, cfg.pcfg['dual_axis']) npd.copyto(i_data.i_dual_order, cfg.pcfg['dual_order']) npd.copyto(i_data.i_bsubofs, cfg.v_nd >> cfg.pcfg['lg_vshuf'][0]) npd.copyto(i_data.i_bsub_up_order, cfg.pcfg['lg_vsize_2x'][0]) npd.copyto(i_data.i_bsub_lo_order, cfg.pcfg['lg_vshuf'][0]) npd.copyto(i_data.i_agrid_step, cfg.acfg['local'][0]) npd.copyto(i_data.i_agrid_end, cfg.acfg['end'][0]) npd.copyto(i_data.i_aboundary, cfg.acfg['total'][0]) npd.copyto(i_data.i_i0_local_xor_srcs, cfg.umcfg_i0['xor_src']) npd.copyto(i_data.i_i0_local_xor_swaps, cfg.umcfg_i0['xor_swap']) npd.copyto(i_data.i_i0_local_boundaries, cfg.umcfg_i0['lmalign']) npd.copyto(i_data.i_i0_local_bsubsteps, cfg.umcfg_i0['vlinear'][VL_IDX]) npd.copyto(i_data.i_i0_local_pads, cfg.umcfg_i0['lmpad']) npd.copyto(i_data.i_i0_global_starts, cfg.umcfg_i0['mstart']) npd.copyto(i_data.i_i0_global_linears, cfg.umcfg_i0['mlinear']) npd.copyto(i_data.i_i0_global_cboundaries, cfg.umcfg_i0['mboundary_lmwidth']) npd.copyto(i_data.i_i0_global_boundaries, cfg.umcfg_i0['mboundary']) npd.copyto(i_data.i_i0_global_bshufs, cfg.umcfg_i0['udim'][:, VDIM:]) npd.copyto(i_data.i_i0_global_ashufs, cfg.umcfg_i0['udim'][:, :VDIM]) npd.copyto(i_data.i_i0_bstrides_frac, cfg.umcfg_i0['ustride_frac'][:, VDIM:]) npd.copyto(i_data.i_i0_bstrides_shamt, cfg.umcfg_i0['ustride_shamt'][:, VDIM:]) npd.copyto(i_data.i_i0_astrides_frac, cfg.umcfg_i0['ustride_frac'][:, :VDIM]) npd.copyto(i_data.i_i0_astrides_shamt, cfg.umcfg_i0['ustride_shamt'][:, :VDIM]) npd.copyto(i_data.i_i0_wrap, CompressWrap(cfg.umcfg_i0['mwrap'])) npd.copyto(i_data.i_i0_pad_value, cfg.umcfg_i0['pad_value']) npd.copyto(i_data.i_i0_id_begs, cfg.n_i0[0]) npd.copyto(i_data.i_i0_id_ends, cfg.n_i0[1]) # TODO (begin) i_data.i_i0_stencil[0] = int(N_SLUT0 != 0) npd.copyto(i_data.i_i0_stencil_begs, 0) npd.copyto(i_data.i_i0_stencil_ends, 0) if N_I0CFG != 0: i_data.i_i0_stencil_ends[0] = N_SLUT0 npd.copyto(i_data.i_i0_stencil_lut, slut0) # TODO (end) npd.copyto(i_data.i_i1_local_xor_srcs, cfg.umcfg_i1['xor_src']) npd.copyto(i_data.i_i1_local_xor_swaps, cfg.umcfg_i1['xor_swap']) npd.copyto(i_data.i_i1_local_boundaries, cfg.umcfg_i1['lmalign']) npd.copyto(i_data.i_i1_local_bsubsteps, cfg.umcfg_i1['vlinear'][VL_IDX]) npd.copyto(i_data.i_i1_local_pads, cfg.umcfg_i1['lmpad']) npd.copyto(i_data.i_i1_global_starts, cfg.umcfg_i1['mstart']) npd.copyto(i_data.i_i1_global_linears, cfg.umcfg_i1['mlinear']) npd.copyto(i_data.i_i1_global_cboundaries, cfg.umcfg_i1['mboundary_lmwidth']) npd.copyto(i_data.i_i1_global_boundaries, cfg.umcfg_i1['mboundary']) npd.copyto(i_data.i_i1_global_bshufs, cfg.umcfg_i1['udim'][:, VDIM:]) npd.copyto(i_data.i_i1_global_ashufs, cfg.umcfg_i1['udim'][:, :VDIM]) npd.copyto(i_data.i_i1_bstrides_frac, cfg.umcfg_i1['ustride_frac'][:, VDIM:]) npd.copyto(i_data.i_i1_bstrides_shamt, cfg.umcfg_i1['ustride_shamt'][:, VDIM:]) npd.copyto(i_data.i_i1_astrides_frac, cfg.umcfg_i1['ustride_frac'][:, :VDIM]) npd.copyto(i_data.i_i1_astrides_shamt, cfg.umcfg_i1['ustride_shamt'][:, :VDIM]) npd.copyto(i_data.i_i1_wrap, CompressWrap(cfg.umcfg_i1['mwrap'])) npd.copyto(i_data.i_i1_pad_value, cfg.umcfg_i1['pad_value']) npd.copyto(i_data.i_i1_id_begs, cfg.n_i1[0]) npd.copyto(i_data.i_i1_id_ends, cfg.n_i1[1]) # TODO (begin) i_data.i_i1_stencil[0] = int(N_SLUT1 != 0) npd.copyto(i_data.i_i1_stencil_begs, 0) npd.copyto(i_data.i_i1_stencil_ends, 0) if N_I1CFG != 0: i_data.i_i1_stencil_ends[0] = N_SLUT1 npd.copyto(i_data.i_i1_stencil_lut, slut1) # TODO (end) npd.copyto(i_data.i_o_global_boundaries, cfg.umcfg_o['mboundary']) npd.copyto(i_data.i_o_global_bsubsteps, cfg.umcfg_o['vlinear'][VL_IDX]) npd.copyto(i_data.i_o_global_linears, cfg.umcfg_o['mlinear']) npd.copyto(i_data.i_o_global_bshufs, cfg.umcfg_o['udim'][:, VDIM:]) npd.copyto(i_data.i_o_bstrides_frac, cfg.umcfg_o['ustride_frac'][:, VDIM:]) npd.copyto(i_data.i_o_bstrides_shamt, cfg.umcfg_o['ustride_shamt'][:, VDIM:]) npd.copyto(i_data.i_o_global_ashufs, cfg.umcfg_o['udim'][:, :VDIM]) npd.copyto(i_data.i_o_astrides_frac, cfg.umcfg_o['ustride_frac'][:, :VDIM]) npd.copyto(i_data.i_o_astrides_shamt, cfg.umcfg_o['ustride_shamt'][:, :VDIM]) npd.copyto(i_data.i_o_id_begs, cfg.n_o[0]) npd.copyto(i_data.i_o_id_ends, cfg.n_o[1]) npd.copyto(i_data.i_inst_id_begs, cfg.n_inst[0]) npd.copyto(i_data.i_inst_id_ends, cfg.n_inst[1]) npd.copyto(i_data.i_insts, cfg.insts) npd.copyto(i_data.i_consts, clut) npd.copyto(i_data.i_const_texs, tlut) npd.copyto(i_data.i_reg_per_warp, cfg.n_reg) yield from cfg_master.Send(i_data) for i in range(300): yield ck_ev # check try: next(verf_func_gen) except StopIteration: pass resp_chan.Report() FinishSim()
def main(): yield rst_out_ev master = TwoWire.Master(bofs_rdy, bofs_ack, bofs_bus, ck_ev) slave = TwoWire.Slave(av_rdy, av_ack, av_bus, ck_ev, callbacks=[bg.Get]) data_bus = master.values # simulation n_bofs, bofs = cfg.CreateBlockTransaction() for i in range(n_bofs): (n_i0, bofs_i0, abeg_i0, aend_i0, abeg_id_i0, aend_id_i0, dummy) = cfg.CreateAccumBlockTransaction(bofs[i])[0] for j in range(n_i0): (n_aofs_i0, agofs_i0, alofs_i0, rt_i_i0, rg_li_i0, rg_ri_i0) = cfg.CreateAccumTransaction(abeg_i0[j], aend_i0[j]) cfg.AllocSram(0, abeg_id_i0[j], aend_id_i0[j]) linear_i0 = cfg.umcfg_i0['local_adr'][:, 0] accum_idx_i0, warpid_i0, rg_flat_i0, rt_flat_i0 = cfg.CreateAccumWarpTransaction( abeg_i0[j], aend_i0[j], rt_i_i0, rg_li_i0, rg_ri_i0, cfg.n_i0) bgofs_i0, blofs_i0, valid_i0 = cfg.CreateBofsValidTransaction( bofs[i], warpid_i0) addr_i0 = cfg.CreateVectorAddressTransaction( blofs_i0[:, 0, :], alofs_i0[accum_idx_i0], rg_flat_i0, linear_i0, cfg.umcfg_i0, True) valid_i0_packed = npd.bitwise_or.reduce( valid_i0 << npi.arange(VSIZE)[newaxis, :], axis=1) # Send source npd.copyto(data_bus.i_bofs, bofs[i]) npd.copyto(data_bus.i_abeg, abeg_i0[j]) npd.copyto(data_bus.i_aend, aend_i0[j]) npd.copyto(data_bus.i_linears, linear_i0) npd.copyto(data_bus.i_bboundary, cfg.pcfg["total"][0]) npd.copyto(data_bus.i_dual_axis, cfg.pcfg['dual_axis']) npd.copyto(data_bus.i_dual_order, cfg.pcfg['dual_order']) npd.copyto(data_bus.i_bsubofs, cfg.v_nd >> cfg.pcfg["lg_vshuf"][0]) npd.copyto(data_bus.i_bsub_up_order, cfg.pcfg["lg_vsize_2x"][0]) npd.copyto(data_bus.i_bsub_lo_order, cfg.pcfg["lg_vshuf"][0]) npd.copyto(data_bus.i_aboundary, cfg.acfg["total"][0]) npd.copyto(data_bus.i_bgrid_step, cfg.pcfg["local"][0]) npd.copyto(data_bus.i_global_bshufs, cfg.umcfg_i0["udim"][:, VDIM:]) npd.copyto(data_bus.i_bstrides_frac, cfg.umcfg_i0["ustride_frac"][:, VDIM:]) npd.copyto(data_bus.i_bstrides_shamt, cfg.umcfg_i0["ustride_shamt"][:, VDIM:]) npd.copyto(data_bus.i_global_ashufs, cfg.umcfg_i0["udim"][:, :VDIM]) npd.copyto(data_bus.i_astrides_frac, cfg.umcfg_i0["ustride_frac"][:, :VDIM]) npd.copyto(data_bus.i_astrides_shamt, cfg.umcfg_i0["ustride_shamt"][:, :VDIM]) npd.copyto(data_bus.i_mofs_bsubsteps, cfg.umcfg_i0["vlinear"][:, 1 << npi.arange(CV_BW)]) npd.copyto(data_bus.i_mboundaries, cfg.umcfg_i0["lmalign"][:, :DIM]) npd.copyto(data_bus.i_id_begs, cfg.n_i0[0][i]) npd.copyto(data_bus.i_id_ends, cfg.n_i0[1][i]) if ST_MODE: dc.Resize(accum_idx_i0.shape[0] * 2) npd.copyto(data_bus.i_stencil, 1) npd.copyto(data_bus.i_stencil_begs, 0) npd.copyto(data_bus.i_stencil_ends, 2) data_bus.i_stencil_lut[:2] = [0, 1] tst.Expect(( npd.repeat(rg_flat_i0[:, newaxis], 2, axis=0), (accum_idx_i0[:, newaxis, :] + data_bus[17][:2][:, newaxis]).reshape(-1, VSIZE), npd.repeat(valid_i0_packed[:, newaxis], 2, axis=0), # abcde --> 0a0b0c0d npd.column_stack((npd.zeros_like(rt_flat_i0), rt_flat_i0) ).reshape(-1, 1), )) else: dc.Resize(accum_idx_i0.shape[0]) npd.copyto(data_bus.i_stencil, 0) tst.Expect((rg_flat_i0[:, newaxis], addr_i0, valid_i0_packed[:, newaxis], rt_flat_i0[:, newaxis])) yield ck_ev yield from master.Send(data_bus) for ck in range(30): yield ck_ev assert dc.is_clean FinishSim() break
def iter_a(): for k in range(addr_o.shape[0]): npd.copyto(master_ad[0], addr_o[k]) master_ad[1][0] = valid_o_packed[k] yield master_ad