def cleanup(self): """Do end-of-loop iterator code""" # Update the current count if self.mode == DEC: self.code.add(cal.iadd(self.r_count, self.r_count, self.r_step(neg=('x', 'y', 'z', 'w')))) elif self.mode == INC: self.code.add(cal.iadd(self.r_count, self.r_count, self.r_step)) return
def TestRelativeAddressing(): import corepy.arch.cal.platform as env import corepy.arch.cal.isa as cal proc = env.Processor(0) input_mem = proc.alloc_remote('I', 4, 16, 1) output_mem = proc.alloc_remote('I', 4, 1, 1) for i in range(16*1*4): for j in range(4): input_mem[i*4 + j] = i prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cal.dcl_output(o0, USAGE=cal.usage.generic) cal.dcl_literal(l0, 1, 1, 1, 1) cal.dcl_literal(l1, 16, 16, 16, 16) cal.mov(r0, r0('0000')) cal.mov(r1, r1('0000')) cal.whileloop() cal.iadd(r1, r1, g[r0.x]) cal.iadd(r0, r0, l0) cal.breakc(cal.relop.ge, r0, l1) cal.endloop() cal.mov(o0, r1) prgm.set_binding('g[]', input_mem) prgm.set_binding('o0', output_mem) prgm.add(code) domain = (0, 0, 128, 128) prgm.print_code() proc.execute(prgm, domain) # code.cache_code() # print code.render_string if output_mem[0] == 120: print "Passed relative addressing test" else: print "Failed relative addressing test" proc.free(input_mem) proc.free(output_mem)
def TestRelativeAddressing(): import corepy.arch.cal.platform as env import corepy.arch.cal.isa as cal proc = env.Processor(0) input_mem = proc.alloc_remote('I', 4, 16, 1) output_mem = proc.alloc_remote('I', 4, 1, 1) for i in range(16 * 1 * 4): for j in range(4): input_mem[i * 4 + j] = i prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cal.dcl_output(o0, USAGE=cal.usage.generic) cal.dcl_literal(l0, 1, 1, 1, 1) cal.dcl_literal(l1, 16, 16, 16, 16) cal.mov(r0, r0('0000')) cal.mov(r1, r1('0000')) cal.whileloop() cal.iadd(r1, r1, g[r0.x]) cal.iadd(r0, r0, l0) cal.breakc(cal.relop.ge, r0, l1) cal.endloop() cal.mov(o0, r1) prgm.set_binding('g[]', input_mem) prgm.set_binding('o0', output_mem) prgm.add(code) domain = (0, 0, 128, 128) prgm.print_code() proc.execute(prgm, domain) # code.cache_code() # print code.render_string if output_mem[0] == 120: print "Passed relative addressing test" else: print "Failed relative addressing test" proc.free(input_mem) proc.free(output_mem)
def block(self, d, a, value): code = self.get_active_code() temp = code.prgm.acquire_register((value, value, value, value)) code.add(cal.iadd(d, a, temp)) code.prgm.release_register(temp) return
def TestSynIterInc(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(cal.mov(counter, ones)) for i in syn_iter(code, 4, step=1, mode=INC): code.add(cal.iadd(counter, counter, ones)) code.add(cal.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 5: passed = False print "Passed == ", passed proc.free(ext_output) return
def test_1comp(): proc = env.Processor(0) prgm = env.Program() code = prgm.get_stream() inp = proc.alloc_remote('i', 4, 1, 1) out = proc.alloc_remote('i', 1, 4, 1) for i in xrange(0, 4): inp[i] = i + 1 out[i] = 0 print "inp", inp[0:4] print "out", out[0:4] cal.set_active_code(code) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions r = prgm.acquire_register() cal.sample(0, 0, r.x000, r('0000')) #cal.iadd(r[0], r[0], r[1]('0x00')) #cal.iadd(r[0], r[0], r[2]('00x0')) #cal.iadd(r[0], r[0], r[3]('000x')) cal.iadd(r, r, r) cal.mov(reg.o0.x, r) prgm.set_binding(reg.i0, inp) prgm.set_binding(reg.o0, out) prgm.add(code) prgm.print_code() proc.execute(prgm, (0, 0, 4, 1)) print "inp", inp[0:4] print "out", out[0:4] for i in xrange(0, 4): assert (out[i] == 2) return
def test_1comp(): proc = env.Processor(0) prgm = env.Program() code = prgm.get_stream() inp = proc.alloc_remote('i', 4, 1, 1) out = proc.alloc_remote('i', 1, 4, 1) for i in xrange(0, 4): inp[i] = i + 1 out[i] = 0 print "inp", inp[0:4] print "out", out[0:4] cal.set_active_code(code) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions r = prgm.acquire_register() cal.sample(0, 0, r.x000, r('0000')) #cal.iadd(r[0], r[0], r[1]('0x00')) #cal.iadd(r[0], r[0], r[2]('00x0')) #cal.iadd(r[0], r[0], r[3]('000x')) cal.iadd(r, r, r) cal.mov(reg.o0.x, r) prgm.set_binding(reg.i0, inp) prgm.set_binding(reg.o0, out) prgm.add(code) prgm.print_code() proc.execute(prgm, (0, 0, 4, 1)) print "inp", inp[0:4] print "out", out[0:4] for i in xrange(0, 4): assert(out[i] == 2) return
def FF(a1, b1, c1, d1, x1, s1, ac1): global xcode l = xcode.acquire_register((ac1, ac1, ac1, ac1)) temp1 = xcode.acquire_register() temp2 = xcode.acquire_register() F(b1, c1, d1, temp1) cal.iadd(a1, a1, temp1) cal.iadd(a1, a1, x1) cal.iadd(a1, a1, l) cal.ishl(temp1, a1, s1) cal.ushr(temp2, a1, s1(neg=('x', 'y', 'z', 'w'))) cal.ior(a1, temp1, temp2) cal.iadd(a1, a1, b1) xcode.release_register(l) xcode.release_register(temp1) xcode.release_register(temp2)
def TestSynIterIncFloatExtStopExtStart(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(cal.mov(counter, ones)) stop = prgm.acquire_register((4.0, 4.0, 4.0, 4.0)) start = prgm.acquire_register((2.0, 2.0, 2.0, 2.0)) step = prgm.acquire_register((1.0, 1.0, 1.0, 1.0)) fiter = syn_iter_float(code, stop, step=step, mode=INC) fiter.set_start_reg(start) for i in fiter: code.add(cal.iadd(counter, counter, ones)) code.add(cal.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE, 1) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 3: passed = False print "Passed == ", passed proc.free(ext_output) return
def block(self, d, a, b): code = self.get_active_code() code.add(cal.iadd(d, b, a.reg(neg=('x', 'y', 'z', 'w')))) return
def MD5Transform(state, block, blocki): proc = env.Processor(0) input_state = proc.alloc_remote('I', 4, 1, 1) input_block = proc.alloc_remote('I', 4, 4, 1) output = proc.alloc_remote('I', 4, 1, 1) for i in range(4): input_state[i] = state[i] Decode(input_block, block, blocki, 64) #print map(hex, input_block) global xcode if xcode == None: xcode = env.InstructionStream() cal.set_active_code(xcode) S11 = xcode.acquire_register((7, 7, 7, 7)) S12 = xcode.acquire_register((12, 12, 12, 12)) S13 = xcode.acquire_register((17, 17, 17, 17)) S14 = xcode.acquire_register((22, 22, 22, 22)) S21 = xcode.acquire_register((5, 5, 5, 5)) S22 = xcode.acquire_register((9, 9, 9, 9)) S23 = xcode.acquire_register((14, 14, 14, 14)) S24 = xcode.acquire_register((20, 20, 20, 20)) S31 = xcode.acquire_register((4, 4, 4, 4)) S32 = xcode.acquire_register((11, 11, 11, 11)) S33 = xcode.acquire_register((16, 16, 16, 16)) S34 = xcode.acquire_register((23, 23, 23, 23)) S41 = xcode.acquire_register((6, 6, 6, 6)) S42 = xcode.acquire_register((10, 10, 10, 10)) S43 = xcode.acquire_register((15, 15, 15, 15)) S44 = xcode.acquire_register((21, 21, 21, 21)) a = xcode.acquire_register() b = xcode.acquire_register() c = xcode.acquire_register() d = xcode.acquire_register() x = [xcode.acquire_register() for i in range(16)] r = xcode.acquire_register() cal.dcl_cb('cb0[1]') cal.dcl_cb('cb1[4]') cal.dcl_output('o0', USAGE=cal.usage.generic) cal.mov(a, 'cb0[0].x') cal.mov(b, 'cb0[0].y') cal.mov(c, 'cb0[0].z') cal.mov(d, 'cb0[0].w') for i in range(4): cal.mov(x[i*4], 'cb1[' + str(i) + '].x') cal.mov(x[i*4+1], 'cb1[' + str(i) + '].y') cal.mov(x[i*4+2], 'cb1[' + str(i) + '].z') cal.mov(x[i*4+3], 'cb1[' + str(i) + '].w') # Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); # 1 FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); # 2 FF (c, d, a, b, x[ 2], S13, 0x242070db); # 3 FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); # 4 FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); # 5 FF (d, a, b, c, x[ 5], S12, 0x4787c62a); # 6 FF (c, d, a, b, x[ 6], S13, 0xa8304613); # 7 FF (b, c, d, a, x[ 7], S14, 0xfd469501); # 8 FF (a, b, c, d, x[ 8], S11, 0x698098d8); # 9 FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); # 10 FF (c, d, a, b, x[10], S13, 0xffff5bb1); # 11 FF (b, c, d, a, x[11], S14, 0x895cd7be); # 12 FF (a, b, c, d, x[12], S11, 0x6b901122); # 13 FF (d, a, b, c, x[13], S12, 0xfd987193); # 14 FF (c, d, a, b, x[14], S13, 0xa679438e); # 15 FF (b, c, d, a, x[15], S14, 0x49b40821); # 16 # Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); # 17 GG (d, a, b, c, x[ 6], S22, 0xc040b340); # 18 GG (c, d, a, b, x[11], S23, 0x265e5a51); # 19 GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); # 20 GG (a, b, c, d, x[ 5], S21, 0xd62f105d); # 21 GG (d, a, b, c, x[10], S22, 0x2441453); # 22 GG (c, d, a, b, x[15], S23, 0xd8a1e681); # 23 GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); # 24 GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); # 25 GG (d, a, b, c, x[14], S22, 0xc33707d6); # 26 GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); # 27 GG (b, c, d, a, x[ 8], S24, 0x455a14ed); # 28 GG (a, b, c, d, x[13], S21, 0xa9e3e905); # 29 GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); # 30 GG (c, d, a, b, x[ 7], S23, 0x676f02d9); # 31 GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); # 32 # Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); # 33 HH (d, a, b, c, x[ 8], S32, 0x8771f681); # 34 HH (c, d, a, b, x[11], S33, 0x6d9d6122); # 35 HH (b, c, d, a, x[14], S34, 0xfde5380c); # 36 HH (a, b, c, d, x[ 1], S31, 0xa4beea44); # 37 HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); # 38 HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); # 39 HH (b, c, d, a, x[10], S34, 0xbebfbc70); # 40 HH (a, b, c, d, x[13], S31, 0x289b7ec6); # 41 HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); # 42 HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); # 43 HH (b, c, d, a, x[ 6], S34, 0x4881d05); # 44 HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); # 45 HH (d, a, b, c, x[12], S32, 0xe6db99e5); # 46 HH (c, d, a, b, x[15], S33, 0x1fa27cf8); # 47 HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); # 48 # Round 4 II (a, b, c, d, x[ 0], S41, 0xf4292244); # 49 II (d, a, b, c, x[ 7], S42, 0x432aff97); # 50 II (c, d, a, b, x[14], S43, 0xab9423a7); # 51 II (b, c, d, a, x[ 5], S44, 0xfc93a039); # 52 II (a, b, c, d, x[12], S41, 0x655b59c3); # 53 II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); # 54 II (c, d, a, b, x[10], S43, 0xffeff47d); # 55 II (b, c, d, a, x[ 1], S44, 0x85845dd1); # 56 II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); # 57 II (d, a, b, c, x[15], S42, 0xfe2ce6e0); # 58 II (c, d, a, b, x[ 6], S43, 0xa3014314); # 59 II (b, c, d, a, x[13], S44, 0x4e0811a1); # 60 II (a, b, c, d, x[ 4], S41, 0xf7537e82); # 61 II (d, a, b, c, x[11], S42, 0xbd3af235); # 62 II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); # 63 II (b, c, d, a, x[ 9], S44, 0xeb86d391); # 64 temp = xcode.acquire_register() #cal.mov(temp.x___, a.x) #cal.mov(temp._y__, b.xx) #cal.mov(temp.__z_, c.xxx) #cal.mov(temp.___w, d.xxxx) cal.iadd(temp, a.x000, b('0x00')) cal.iadd(temp, temp, c('00x0')) cal.iadd(temp, temp, d('000x')) cal.mov('o0', temp) xcode.release_register(a) xcode.release_register(b) xcode.release_register(c) xcode.release_register(d) for xi in x: xcode.release_register(xi) #for i, inst in enumerate(xcode._instructions): # print inst.render() xcode.set_remote_binding('cb0', input_state) xcode.set_remote_binding('cb1', input_block) xcode.set_remote_binding('o0', output) domain = (0, 0, 1, 1) proc.execute(xcode, domain) state[0] += output[0] state[1] += output[1] state[2] += output[2] state[3] += output[3] print 'input = ', map(hex, input_state) print 'output = ', map(hex, output) proc.free_remote(input_state) proc.free_remote(input_block) proc.free_remote(output)