def test_alloc(): c = get_core() (r21,s21,w01,w2x) = c.acquire_fpregisters(range(4)) (i0,i1,ir0,is0,sixteen) = map(IntRegister,range(5)) c.name_registers(a21=FPRegister(4),b21=FPRegister(5)) istream = [ # weights isa.fpset2(w01,1/9,2/9), isa.fpset2(w2x,1/9,-1), # preamble isa.lfpd('a21',i0,0), # A[0],A[1] isa.lfdu('a21',i0,16), # A[2],A[1] isa.lfpd('b21',i1,0), # A[0],A[1] isa.lfdu('b21',i1,16), # A[2],A[1] # start loads isa.fxcpmadd(r21,w01,'a21',r21), isa.fxcpmadd(s21,w01,'b21',s21), isa.lfpd('a23',i0,16), isa.lfpd('b23',i1,16), isa.fxcxma(r21,w01,'a23',r21), isa.fxcxma(s21,w01,'b23',s21), isa.lfdu('a23',i0,16), # a43 isa.lfdu('b23',i1,16), # b43 isa.intset(ir0,8*8), isa.intset(is0,18*8), isa.intset(sixteen,16), isa.stfxdux(r21,ir0,sixteen), isa.stfxdux(s21,is0,sixteen), ] #for instr in istream: print(instr); #c.trace = c.trace_none c.schedule(istream) c.execute([isa.inspect()])
def stencil(): def label(c,i,j,kp,ks): return '%s_%d_%d_%d%d' % (c,i,j,kp,ks) def stream(i,j): def a(kp,ks): return label('a',i,j,kp,ks) p = 'p_%d_%d' % (i,j) yield isa.lfpd(a(0,1),p,0) for k in (2,4,6): yield isa.lfdu(a(k,k-1),p,16) yield isa.lfpd(a(k,k+1),p,0) def jam(i,j): def r(kp,ks): return label('r',i,j,kp,ks) for k in (2,4,6): rr = r(k,k-1) for ii in (-1,0,1): for jj in (-1,0,1): def a(kp,ks): return label('a',i+ii,j+jj,kp,ks) yield isa.fxcpmadd(rr,'w01',a(k,k-1),rr) yield isa.fxcxma(rr,'w01',a(k,k+1),rr) yield isa.fxcpmadd(rr,'w2x',a(k+2,k+1),rr) instrs = [] for i in (0,1,2): for j in (0,1,2): instrs.append(stream(i,j)) instrs.append(jam(1,1)) yield isa.fpset2('w01',1/9,2/9) yield isa.fpset2('w2x',1/9,9) while True: for ins in instrs: yield next(ins)
def s_weights(w01,w2x): return [isa.fpset2(w01,1/9,2/9), isa.fpset2(w2x,1/9,-1)]