def TestSynIterInc(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(cal.mov(counter, ones)) for i in syn_iter(code, 4, step=1, mode=INC): code.add(cal.iadd(counter, counter, ones)) code.add(cal.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 5: passed = False print "Passed == ", passed proc.free(ext_output) return
def cal_nbody(): global pos, vel, mass, prgm, step, proc step = 0 proc = env.Processor(1) pos = [ proc.alloc_remote('f', 4, SQRT_BODIES, SQRT_BODIES) for i in xrange(0, 2) ] vel = [ proc.alloc_remote('f', 4, SQRT_BODIES, SQRT_BODIES) for i in xrange(0, 2) ] pos[0].clear() vel[0].clear() pos[1].clear() vel[1].clear() for i in xrange(0, N_BODIES): pos[0][i * 4] = random.uniform(-20.0, 20.0) pos[0][i * 4 + 1] = random.uniform(-12.0, 12.0) pos[0][i * 4 + 2] = 0.0 pos[0][i * 4 + 3] = random.uniform(1e9, 1e12) #vel[0][i * 4] = random.uniform(-1.0, 1.0) #vel[0][i * 4 + 1] = random.uniform(-1.0, 1.0) vel[0][i * 4] = 0.0 vel[0][i * 4 + 1] = 0.0 vel[0][i * 4 + 2] = 0.0 vel[0][i * 4 + 3] = 0.0 pos[0][0] = -4.0 pos[0][1] = 0.0 pos[0][2] = 0.0 pos[0][3] = 1e16 ind = 1 pos[0][ind * 4] = 4.0 pos[0][ind * 4 + 1] = 0.0 pos[0][ind * 4 + 2] = 0.0 pos[0][ind * 4 + 3] = 1e16 ind = 2 pos[0][ind * 4] = 0.0 pos[0][ind * 4 + 1] = 4.0 pos[0][ind * 4 + 2] = 0.0 pos[0][ind * 4 + 3] = 1e16 ind = 3 pos[0][ind * 4] = 0.0 pos[0][ind * 4 + 1] = -4.0 pos[0][ind * 4 + 2] = 0.0 pos[0][ind * 4 + 3] = 1e16 code = nbody.cal_nb_generate_2d(prgm, SQRT_BODIES, 0.000002) prgm += code prgm.cache_code() return
def TestRelativeAddressing(): import corepy.arch.cal.platform as env import corepy.arch.cal.isa as cal proc = env.Processor(0) input_mem = proc.alloc_remote('I', 4, 16, 1) output_mem = proc.alloc_remote('I', 4, 1, 1) for i in range(16 * 1 * 4): for j in range(4): input_mem[i * 4 + j] = i prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cal.dcl_output(o0, USAGE=cal.usage.generic) cal.dcl_literal(l0, 1, 1, 1, 1) cal.dcl_literal(l1, 16, 16, 16, 16) cal.mov(r0, r0('0000')) cal.mov(r1, r1('0000')) cal.whileloop() cal.iadd(r1, r1, g[r0.x]) cal.iadd(r0, r0, l0) cal.breakc(cal.relop.ge, r0, l1) cal.endloop() cal.mov(o0, r1) prgm.set_binding('g[]', input_mem) prgm.set_binding('o0', output_mem) prgm.add(code) domain = (0, 0, 128, 128) prgm.print_code() proc.execute(prgm, domain) # code.cache_code() # print code.render_string if output_mem[0] == 120: print "Passed relative addressing test" else: print "Failed relative addressing test" proc.free(input_mem) proc.free(output_mem)
def test_4comp(): proc = env.Processor(0) prgm = env.Program() code = prgm.get_stream() inp = proc.alloc_remote('i', 1, 4, 1) out = proc.alloc_remote('i', 4, 1, 1) for i in xrange(0, 4): inp[i] = i + 1 out[i] = 0 print "inp", inp[0:4] print "out", out[0:4] cal.set_active_code(code) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions r_cnt = prgm.acquire_register() r = prgm.acquire_registers(4) cal.mov(r_cnt, r_cnt('0000')) for i in xrange(0, 4): cal.sample(0, 0, r[i].x000, r_cnt.x) cal.add(r_cnt, r_cnt, r_cnt('1111')) cal.iadd(r[0], r[0], r[1]('0x00')) cal.iadd(r[0], r[0], r[2]('00x0')) cal.iadd(r[0], r[0], r[3]('000x')) cal.iadd(r[0], r[0], r[0]) cal.mov(reg.o0, r[0]) prgm.set_binding(reg.i0, inp) prgm.set_binding(reg.o0, out) prgm.add(code) prgm.print_code() proc.execute(prgm, (0, 0, 1, 1)) print "inp", inp[0:4] print "out", out[0:4] for i in xrange(0, 4): assert (out[i] == (i + 1) * 2) return
def TestSynIterIncFloatExtStopExtStart(): SIZE = 64 # build and run the kernel prgm = env.Program() code = prgm.get_stream() code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos)) ones = prgm.acquire_register((1, 1, 1, 1)) counter = prgm.acquire_register() code.add(cal.mov(counter, ones)) stop = prgm.acquire_register((4.0, 4.0, 4.0, 4.0)) start = prgm.acquire_register((2.0, 2.0, 2.0, 2.0)) step = prgm.acquire_register((1.0, 1.0, 1.0, 1.0)) fiter = syn_iter_float(code, stop, step=step, mode=INC) fiter.set_start_reg(start) for i in fiter: code.add(cal.iadd(counter, counter, ones)) code.add(cal.mov(reg.o0, counter.x)) domain = (0, 0, SIZE, SIZE) proc = env.Processor(0) ext_output=proc.alloc_remote('i', 1, SIZE, 1) prgm.set_binding(reg.o0, ext_output) prgm.add(code) proc.execute(prgm, domain) passed = True for i in xrange(0, SIZE): if ext_output[i] != 3: passed = False print "Passed == ", passed proc.free(ext_output) return
def test_foo(): proc = env.Processor(0) prgm = env.Program() code = prgm.get_stream() cal.set_active_code(code) cb = proc.alloc_remote('i', 1, 4, 1) out = proc.alloc_remote('i', 4, 1, 1) gb = proc.alloc_remote('i', 1, 4, 1, True) for i in xrange(0, 4): cb[i] = i + 1 out[i] = 42 gb[i] = 67 cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_cb('cb0[4]') cal.mov('r0', 'cb0[0]') cal.mov('r1', 'cb0[1]') #cal.mov('r2', 'cb0[2]') #cal.mov('r3', 'cb0[3]') cal.mov('o0', 'r0') cal.mov('g[0]', 'r0') prgm.set_binding('cb0', cb) prgm.set_binding('o0', out) prgm.set_binding('g[]', gb) prgm.add(code) prgm.print_code() proc.execute(prgm, (0, 0, 1, 1)) print "cb ", cb[0:4] print "out", out[0:4] print "gb ", gb[0:4] return
cal.breakc(cal.relop.ge, r_count.y, r_limit) cal.add(r_sum, r_sum, r_sum('1111')) cal.add(r_count, r_count, r_count('0100')) cal.endloop() cal.add(r_count, r_count, r_count('1000')) cal.endloop() cal.mov(reg.o0, r_sum) return code if __name__ == '__main__': SIZE = 64 prgm = env.Program() prgm.add(generate(prgm)) proc = env.Processor(0) out = proc.alloc_remote('f', 4, SIZE, SIZE) prgm.set_binding(reg.o0, out) proc.execute(prgm, (0, 0, SIZE, SIZE)) print out prgm.print_code()
def ParMD5Transform(parcontext, parblock, blocki): num = parcontext.number temp_block = extarray.extarray('I', 16 * num) ParDecode(num, temp_block, parblock, blocki, 64) proc = env.Processor(0) N = int(math.sqrt(num / 4)) #print "N = ", N def address_4_1d(i, pitch=64): x = i % N y = i // 64 * 4 #return x*4 + y*pitch*4*4 return i def address_4_2d(x, y, pitch=64): return x * 4 + y * pitch * 4 input_statea = proc.alloc_remote('I', 4, N, N) input_stateb = proc.alloc_remote('I', 4, N, N) input_statec = proc.alloc_remote('I', 4, N, N) input_stated = proc.alloc_remote('I', 4, N, N) input_block = [proc.alloc_remote('I', 4, N, N) for i in range(16)] outputa = proc.alloc_remote('I', 4, N, N) outputb = proc.alloc_remote('I', 4, N, N) outputc = proc.alloc_remote('I', 4, N, N) outputd = proc.alloc_remote('I', 4, N, N) for j in range(N): for i in range(N): for k in range(4): input_statea[address_4_2d(i, j) + k] = parcontext.statea[k + (i + j * N) * 4] input_stateb[address_4_2d(i, j) + k] = parcontext.stateb[k + (i + j * N) * 4] input_statec[address_4_2d(i, j) + k] = parcontext.statec[k + (i + j * N) * 4] input_stated[address_4_2d(i, j) + k] = parcontext.stated[k + (i + j * N) * 4] for k in range(N): for j in range(N): for l in range(4): for i in range(16): input_block[i][address_4_2d(j, k) + l] = temp_block[i + (j + k * N) * 4 * 16 + l * 16] global xcode if xcode == None: xcode = env.InstructionStream() cal.set_active_code(xcode) S11 = xcode.acquire_register((7, 7, 7, 7)) S12 = xcode.acquire_register((12, 12, 12, 12)) S13 = xcode.acquire_register((17, 17, 17, 17)) S14 = xcode.acquire_register((22, 22, 22, 22)) S21 = xcode.acquire_register((5, 5, 5, 5)) S22 = xcode.acquire_register((9, 9, 9, 9)) S23 = xcode.acquire_register((14, 14, 14, 14)) S24 = xcode.acquire_register((20, 20, 20, 20)) S31 = xcode.acquire_register((4, 4, 4, 4)) S32 = xcode.acquire_register((11, 11, 11, 11)) S33 = xcode.acquire_register((16, 16, 16, 16)) S34 = xcode.acquire_register((23, 23, 23, 23)) S41 = xcode.acquire_register((6, 6, 6, 6)) S42 = xcode.acquire_register((10, 10, 10, 10)) S43 = xcode.acquire_register((15, 15, 15, 15)) S44 = xcode.acquire_register((21, 21, 21, 21)) a = xcode.acquire_register() b = xcode.acquire_register() c = xcode.acquire_register() d = xcode.acquire_register() x = [xcode.acquire_register() for i in range(16)] r = xcode.acquire_register() cal.dcl_resource(0, cal.pixtex_type.twod, cal.fmt.uint, UNNORM=True) # statea cal.dcl_resource(1, cal.pixtex_type.twod, cal.fmt.uint, UNNORM=True) # stateb cal.dcl_resource(2, cal.pixtex_type.twod, cal.fmt.uint, UNNORM=True) # statec cal.dcl_resource(3, cal.pixtex_type.twod, cal.fmt.uint, UNNORM=True) # stated for i in range(16): cal.dcl_resource(i + 4, cal.pixtex_type.twod, cal.fmt.uint, UNNORM=True) cal.dcl_output(reg.o0, USAGE=cal.usage.generic) cal.dcl_output(reg.o1, USAGE=cal.usage.generic) cal.dcl_output(reg.o2, USAGE=cal.usage.generic) cal.dcl_output(reg.o3, USAGE=cal.usage.generic) cal.sample(0, 0, a, reg.v0.xy) cal.sample(1, 0, b, reg.v0.xy) cal.sample(2, 0, c, reg.v0.xy) cal.sample(3, 0, d, reg.v0.xy) for i in range(16): cal.sample(i + 4, 0, x[i], reg.v0.xy) # Round 1 FF(a, b, c, d, x[0], S11, 0xd76aa478) # 1 FF(d, a, b, c, x[1], S12, 0xe8c7b756) # 2 FF(c, d, a, b, x[2], S13, 0x242070db) # 3 FF(b, c, d, a, x[3], S14, 0xc1bdceee) # 4 FF(a, b, c, d, x[4], S11, 0xf57c0faf) # 5 FF(d, a, b, c, x[5], S12, 0x4787c62a) # 6 FF(c, d, a, b, x[6], S13, 0xa8304613) # 7 FF(b, c, d, a, x[7], S14, 0xfd469501) # 8 FF(a, b, c, d, x[8], S11, 0x698098d8) # 9 FF(d, a, b, c, x[9], S12, 0x8b44f7af) # 10 FF(c, d, a, b, x[10], S13, 0xffff5bb1) # 11 FF(b, c, d, a, x[11], S14, 0x895cd7be) # 12 FF(a, b, c, d, x[12], S11, 0x6b901122) # 13 FF(d, a, b, c, x[13], S12, 0xfd987193) # 14 FF(c, d, a, b, x[14], S13, 0xa679438e) # 15 FF(b, c, d, a, x[15], S14, 0x49b40821) # 16 # Round 2 GG(a, b, c, d, x[1], S21, 0xf61e2562) # 17 GG(d, a, b, c, x[6], S22, 0xc040b340) # 18 GG(c, d, a, b, x[11], S23, 0x265e5a51) # 19 GG(b, c, d, a, x[0], S24, 0xe9b6c7aa) # 20 GG(a, b, c, d, x[5], S21, 0xd62f105d) # 21 GG(d, a, b, c, x[10], S22, 0x2441453) # 22 GG(c, d, a, b, x[15], S23, 0xd8a1e681) # 23 GG(b, c, d, a, x[4], S24, 0xe7d3fbc8) # 24 GG(a, b, c, d, x[9], S21, 0x21e1cde6) # 25 GG(d, a, b, c, x[14], S22, 0xc33707d6) # 26 GG(c, d, a, b, x[3], S23, 0xf4d50d87) # 27 GG(b, c, d, a, x[8], S24, 0x455a14ed) # 28 GG(a, b, c, d, x[13], S21, 0xa9e3e905) # 29 GG(d, a, b, c, x[2], S22, 0xfcefa3f8) # 30 GG(c, d, a, b, x[7], S23, 0x676f02d9) # 31 GG(b, c, d, a, x[12], S24, 0x8d2a4c8a) # 32 # Round 3 HH(a, b, c, d, x[5], S31, 0xfffa3942) # 33 HH(d, a, b, c, x[8], S32, 0x8771f681) # 34 HH(c, d, a, b, x[11], S33, 0x6d9d6122) # 35 HH(b, c, d, a, x[14], S34, 0xfde5380c) # 36 HH(a, b, c, d, x[1], S31, 0xa4beea44) # 37 HH(d, a, b, c, x[4], S32, 0x4bdecfa9) # 38 HH(c, d, a, b, x[7], S33, 0xf6bb4b60) # 39 HH(b, c, d, a, x[10], S34, 0xbebfbc70) # 40 HH(a, b, c, d, x[13], S31, 0x289b7ec6) # 41 HH(d, a, b, c, x[0], S32, 0xeaa127fa) # 42 HH(c, d, a, b, x[3], S33, 0xd4ef3085) # 43 HH(b, c, d, a, x[6], S34, 0x4881d05) # 44 HH(a, b, c, d, x[9], S31, 0xd9d4d039) # 45 HH(d, a, b, c, x[12], S32, 0xe6db99e5) # 46 HH(c, d, a, b, x[15], S33, 0x1fa27cf8) # 47 HH(b, c, d, a, x[2], S34, 0xc4ac5665) # 48 # Round 4 II(a, b, c, d, x[0], S41, 0xf4292244) # 49 II(d, a, b, c, x[7], S42, 0x432aff97) # 50 II(c, d, a, b, x[14], S43, 0xab9423a7) # 51 II(b, c, d, a, x[5], S44, 0xfc93a039) # 52 II(a, b, c, d, x[12], S41, 0x655b59c3) # 53 II(d, a, b, c, x[3], S42, 0x8f0ccc92) # 54 II(c, d, a, b, x[10], S43, 0xffeff47d) # 55 II(b, c, d, a, x[1], S44, 0x85845dd1) # 56 II(a, b, c, d, x[8], S41, 0x6fa87e4f) # 57 II(d, a, b, c, x[15], S42, 0xfe2ce6e0) # 58 II(c, d, a, b, x[6], S43, 0xa3014314) # 59 II(b, c, d, a, x[13], S44, 0x4e0811a1) # 60 II(a, b, c, d, x[4], S41, 0xf7537e82) # 61 II(d, a, b, c, x[11], S42, 0xbd3af235) # 62 II(c, d, a, b, x[2], S43, 0x2ad7d2bb) # 63 II(b, c, d, a, x[9], S44, 0xeb86d391) # 64 cal.mov('o0', a) cal.mov('o1', b) cal.mov('o2', c) cal.mov('o3', d) xcode.release_register(a) xcode.release_register(b) xcode.release_register(c) xcode.release_register(d) for xi in x: xcode.release_register(xi) xcode.set_remote_binding('i0', input_statea) xcode.set_remote_binding('i1', input_stateb) xcode.set_remote_binding('i2', input_statec) xcode.set_remote_binding('i3', input_stated) for i in range(16): #range(len(input_block)): xcode.set_remote_binding('i' + str(i + 4), input_block[i]) xcode.set_remote_binding('o0', outputa) xcode.set_remote_binding('o1', outputb) xcode.set_remote_binding('o2', outputc) xcode.set_remote_binding('o3', outputd) domain = (0, 0, N, N) global TIME start_time = time.time() proc.execute(xcode, domain) end_time = time.time() TIME += (end_time - start_time) for j in range(N): for i in range(N): for k in range(4): parcontext.statea[k + (i + j * N) * 4] += outputa[address_4_2d(i, j) + k] parcontext.stateb[k + (i + j * N) * 4] += outputb[address_4_2d(i, j) + k] parcontext.statec[k + (i + j * N) * 4] += outputc[address_4_2d(i, j) + k] parcontext.stated[k + (i + j * N) * 4] += outputd[address_4_2d(i, j) + k] proc.free_remote(input_statea) proc.free_remote(input_stateb) proc.free_remote(input_statec) proc.free_remote(input_stated) for block in input_block: proc.free_remote(block) proc.free_remote(outputa) proc.free_remote(outputb) proc.free_remote(outputc) proc.free_remote(outputd)
def MD5Transform(state, block, blocki): proc = env.Processor(0) input_state = proc.alloc_remote('I', 4, 1, 1) input_block = proc.alloc_remote('I', 4, 4, 1) output = proc.alloc_remote('I', 4, 1, 1) for i in range(4): input_state[i] = state[i] Decode(input_block, block, blocki, 64) #print map(hex, input_block) global xcode if xcode == None: xcode = env.InstructionStream() cal.set_active_code(xcode) S11 = xcode.acquire_register((7, 7, 7, 7)) S12 = xcode.acquire_register((12, 12, 12, 12)) S13 = xcode.acquire_register((17, 17, 17, 17)) S14 = xcode.acquire_register((22, 22, 22, 22)) S21 = xcode.acquire_register((5, 5, 5, 5)) S22 = xcode.acquire_register((9, 9, 9, 9)) S23 = xcode.acquire_register((14, 14, 14, 14)) S24 = xcode.acquire_register((20, 20, 20, 20)) S31 = xcode.acquire_register((4, 4, 4, 4)) S32 = xcode.acquire_register((11, 11, 11, 11)) S33 = xcode.acquire_register((16, 16, 16, 16)) S34 = xcode.acquire_register((23, 23, 23, 23)) S41 = xcode.acquire_register((6, 6, 6, 6)) S42 = xcode.acquire_register((10, 10, 10, 10)) S43 = xcode.acquire_register((15, 15, 15, 15)) S44 = xcode.acquire_register((21, 21, 21, 21)) a = xcode.acquire_register() b = xcode.acquire_register() c = xcode.acquire_register() d = xcode.acquire_register() x = [xcode.acquire_register() for i in range(16)] r = xcode.acquire_register() cal.dcl_cb('cb0[1]') cal.dcl_cb('cb1[4]') cal.dcl_output('o0', USAGE=cal.usage.generic) cal.mov(a, 'cb0[0].x') cal.mov(b, 'cb0[0].y') cal.mov(c, 'cb0[0].z') cal.mov(d, 'cb0[0].w') for i in range(4): cal.mov(x[i*4], 'cb1[' + str(i) + '].x') cal.mov(x[i*4+1], 'cb1[' + str(i) + '].y') cal.mov(x[i*4+2], 'cb1[' + str(i) + '].z') cal.mov(x[i*4+3], 'cb1[' + str(i) + '].w') # Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); # 1 FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); # 2 FF (c, d, a, b, x[ 2], S13, 0x242070db); # 3 FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); # 4 FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); # 5 FF (d, a, b, c, x[ 5], S12, 0x4787c62a); # 6 FF (c, d, a, b, x[ 6], S13, 0xa8304613); # 7 FF (b, c, d, a, x[ 7], S14, 0xfd469501); # 8 FF (a, b, c, d, x[ 8], S11, 0x698098d8); # 9 FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); # 10 FF (c, d, a, b, x[10], S13, 0xffff5bb1); # 11 FF (b, c, d, a, x[11], S14, 0x895cd7be); # 12 FF (a, b, c, d, x[12], S11, 0x6b901122); # 13 FF (d, a, b, c, x[13], S12, 0xfd987193); # 14 FF (c, d, a, b, x[14], S13, 0xa679438e); # 15 FF (b, c, d, a, x[15], S14, 0x49b40821); # 16 # Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); # 17 GG (d, a, b, c, x[ 6], S22, 0xc040b340); # 18 GG (c, d, a, b, x[11], S23, 0x265e5a51); # 19 GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); # 20 GG (a, b, c, d, x[ 5], S21, 0xd62f105d); # 21 GG (d, a, b, c, x[10], S22, 0x2441453); # 22 GG (c, d, a, b, x[15], S23, 0xd8a1e681); # 23 GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); # 24 GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); # 25 GG (d, a, b, c, x[14], S22, 0xc33707d6); # 26 GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); # 27 GG (b, c, d, a, x[ 8], S24, 0x455a14ed); # 28 GG (a, b, c, d, x[13], S21, 0xa9e3e905); # 29 GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); # 30 GG (c, d, a, b, x[ 7], S23, 0x676f02d9); # 31 GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); # 32 # Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); # 33 HH (d, a, b, c, x[ 8], S32, 0x8771f681); # 34 HH (c, d, a, b, x[11], S33, 0x6d9d6122); # 35 HH (b, c, d, a, x[14], S34, 0xfde5380c); # 36 HH (a, b, c, d, x[ 1], S31, 0xa4beea44); # 37 HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); # 38 HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); # 39 HH (b, c, d, a, x[10], S34, 0xbebfbc70); # 40 HH (a, b, c, d, x[13], S31, 0x289b7ec6); # 41 HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); # 42 HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); # 43 HH (b, c, d, a, x[ 6], S34, 0x4881d05); # 44 HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); # 45 HH (d, a, b, c, x[12], S32, 0xe6db99e5); # 46 HH (c, d, a, b, x[15], S33, 0x1fa27cf8); # 47 HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); # 48 # Round 4 II (a, b, c, d, x[ 0], S41, 0xf4292244); # 49 II (d, a, b, c, x[ 7], S42, 0x432aff97); # 50 II (c, d, a, b, x[14], S43, 0xab9423a7); # 51 II (b, c, d, a, x[ 5], S44, 0xfc93a039); # 52 II (a, b, c, d, x[12], S41, 0x655b59c3); # 53 II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); # 54 II (c, d, a, b, x[10], S43, 0xffeff47d); # 55 II (b, c, d, a, x[ 1], S44, 0x85845dd1); # 56 II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); # 57 II (d, a, b, c, x[15], S42, 0xfe2ce6e0); # 58 II (c, d, a, b, x[ 6], S43, 0xa3014314); # 59 II (b, c, d, a, x[13], S44, 0x4e0811a1); # 60 II (a, b, c, d, x[ 4], S41, 0xf7537e82); # 61 II (d, a, b, c, x[11], S42, 0xbd3af235); # 62 II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); # 63 II (b, c, d, a, x[ 9], S44, 0xeb86d391); # 64 temp = xcode.acquire_register() #cal.mov(temp.x___, a.x) #cal.mov(temp._y__, b.xx) #cal.mov(temp.__z_, c.xxx) #cal.mov(temp.___w, d.xxxx) cal.iadd(temp, a.x000, b('0x00')) cal.iadd(temp, temp, c('00x0')) cal.iadd(temp, temp, d('000x')) cal.mov('o0', temp) xcode.release_register(a) xcode.release_register(b) xcode.release_register(c) xcode.release_register(d) for xi in x: xcode.release_register(xi) #for i, inst in enumerate(xcode._instructions): # print inst.render() xcode.set_remote_binding('cb0', input_state) xcode.set_remote_binding('cb1', input_block) xcode.set_remote_binding('o0', output) domain = (0, 0, 1, 1) proc.execute(xcode, domain) state[0] += output[0] state[1] += output[1] state[2] += output[2] state[3] += output[3] print 'input = ', map(hex, input_state) print 'output = ', map(hex, output) proc.free_remote(input_state) proc.free_remote(input_block) proc.free_remote(output)