Ejemplo n.º 1
0
 def cleanup(self):
   """Do end-of-loop iterator code"""
   # Update the current count
   if self.mode == DEC:
     self.code.add(cal.iadd(self.r_count, self.r_count, self.r_step(neg=('x', 'y', 'z', 'w'))))
   elif self.mode == INC:
     self.code.add(cal.iadd(self.r_count, self.r_count, self.r_step))
   return
Ejemplo n.º 2
0
def TestRelativeAddressing():
  import corepy.arch.cal.platform as env
  import corepy.arch.cal.isa as cal
  
  proc = env.Processor(0)
  
  input_mem = proc.alloc_remote('I', 4, 16, 1)
  output_mem = proc.alloc_remote('I', 4, 1, 1)
  
  for i in range(16*1*4):
    for j in range(4):
      input_mem[i*4 + j] = i

  prgm = env.Program()  
  code = prgm.get_stream()
  cal.set_active_code(code)
    
  cal.dcl_output(o0, USAGE=cal.usage.generic)
  cal.dcl_literal(l0, 1, 1, 1, 1)
  cal.dcl_literal(l1, 16, 16, 16, 16)
  cal.mov(r0, r0('0000'))
  cal.mov(r1, r1('0000'))
  

  cal.whileloop()
  cal.iadd(r1, r1, g[r0.x])
  cal.iadd(r0, r0, l0)
  cal.breakc(cal.relop.ge, r0, l1)
  cal.endloop()

  cal.mov(o0, r1)
  
  prgm.set_binding('g[]', input_mem)
  prgm.set_binding('o0', output_mem)

  prgm.add(code)
  domain = (0, 0, 128, 128)

  prgm.print_code()
  proc.execute(prgm, domain)
  
  # code.cache_code()
  # print code.render_string
 
  if output_mem[0] == 120:
    print "Passed relative addressing test"
  else:
    print "Failed relative addressing test"

  proc.free(input_mem)
  proc.free(output_mem)
Ejemplo n.º 3
0
def TestRelativeAddressing():
    import corepy.arch.cal.platform as env
    import corepy.arch.cal.isa as cal

    proc = env.Processor(0)

    input_mem = proc.alloc_remote('I', 4, 16, 1)
    output_mem = proc.alloc_remote('I', 4, 1, 1)

    for i in range(16 * 1 * 4):
        for j in range(4):
            input_mem[i * 4 + j] = i

    prgm = env.Program()
    code = prgm.get_stream()
    cal.set_active_code(code)

    cal.dcl_output(o0, USAGE=cal.usage.generic)
    cal.dcl_literal(l0, 1, 1, 1, 1)
    cal.dcl_literal(l1, 16, 16, 16, 16)
    cal.mov(r0, r0('0000'))
    cal.mov(r1, r1('0000'))

    cal.whileloop()
    cal.iadd(r1, r1, g[r0.x])
    cal.iadd(r0, r0, l0)
    cal.breakc(cal.relop.ge, r0, l1)
    cal.endloop()

    cal.mov(o0, r1)

    prgm.set_binding('g[]', input_mem)
    prgm.set_binding('o0', output_mem)

    prgm.add(code)
    domain = (0, 0, 128, 128)

    prgm.print_code()
    proc.execute(prgm, domain)

    # code.cache_code()
    # print code.render_string

    if output_mem[0] == 120:
        print "Passed relative addressing test"
    else:
        print "Failed relative addressing test"

    proc.free(input_mem)
    proc.free(output_mem)
Ejemplo n.º 4
0
    def block(self, d, a, value):
        code = self.get_active_code()
        temp = code.prgm.acquire_register((value, value, value, value))
        code.add(cal.iadd(d, a, temp))
        code.prgm.release_register(temp)

        return
Ejemplo n.º 5
0
def TestSynIterInc():
  SIZE = 64

  # build and run the kernel
  prgm = env.Program()
  code = prgm.get_stream()

  code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos))
  ones = prgm.acquire_register((1, 1, 1, 1))
  counter = prgm.acquire_register()
  code.add(cal.mov(counter, ones))

  for i in syn_iter(code, 4, step=1, mode=INC):
    code.add(cal.iadd(counter, counter, ones))

  code.add(cal.mov(reg.o0, counter.x))

  domain = (0, 0, SIZE, SIZE)
  proc = env.Processor(0)

  ext_output=proc.alloc_remote('i', 1, SIZE)
  prgm.set_binding(reg.o0, ext_output)

  prgm.add(code)
  proc.execute(prgm, domain)

  passed = True
  for i in xrange(0, SIZE):
    if ext_output[i] != 5:
      passed = False
  print "Passed == ", passed

  proc.free(ext_output)

  return
Ejemplo n.º 6
0
def test_1comp():
    proc = env.Processor(0)
    prgm = env.Program()
    code = prgm.get_stream()

    inp = proc.alloc_remote('i', 4, 1, 1)
    out = proc.alloc_remote('i', 1, 4, 1)

    for i in xrange(0, 4):
        inp[i] = i + 1
        out[i] = 0

    print "inp", inp[0:4]
    print "out", out[0:4]

    cal.set_active_code(code)

    cal.dcl_output(reg.o0, USAGE=cal.usage.generic)
    cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float,
                     UNNORM=True)  # positions

    r = prgm.acquire_register()

    cal.sample(0, 0, r.x000, r('0000'))

    #cal.iadd(r[0], r[0], r[1]('0x00'))
    #cal.iadd(r[0], r[0], r[2]('00x0'))
    #cal.iadd(r[0], r[0], r[3]('000x'))
    cal.iadd(r, r, r)

    cal.mov(reg.o0.x, r)

    prgm.set_binding(reg.i0, inp)
    prgm.set_binding(reg.o0, out)

    prgm.add(code)
    prgm.print_code()

    proc.execute(prgm, (0, 0, 4, 1))

    print "inp", inp[0:4]
    print "out", out[0:4]
    for i in xrange(0, 4):
        assert (out[i] == 2)

    return
Ejemplo n.º 7
0
def test_1comp():
  proc = env.Processor(0)
  prgm = env.Program()
  code = prgm.get_stream()

  inp = proc.alloc_remote('i', 4, 1, 1)
  out = proc.alloc_remote('i', 1, 4, 1)

  for i in xrange(0, 4):
    inp[i] = i + 1
    out[i] = 0

  print "inp", inp[0:4]
  print "out", out[0:4]
  
  cal.set_active_code(code)

  cal.dcl_output(reg.o0, USAGE=cal.usage.generic)
  cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions

  r = prgm.acquire_register()

  cal.sample(0, 0, r.x000, r('0000'))

  #cal.iadd(r[0], r[0], r[1]('0x00'))
  #cal.iadd(r[0], r[0], r[2]('00x0'))
  #cal.iadd(r[0], r[0], r[3]('000x'))
  cal.iadd(r, r, r)

  cal.mov(reg.o0.x, r)

  prgm.set_binding(reg.i0, inp)
  prgm.set_binding(reg.o0, out)

  prgm.add(code)
  prgm.print_code()

  proc.execute(prgm, (0, 0, 4, 1))

  print "inp", inp[0:4]
  print "out", out[0:4]
  for i in xrange(0, 4):
    assert(out[i] == 2)
  
  return
Ejemplo n.º 8
0
def FF(a1, b1, c1, d1, x1, s1, ac1):
    global xcode
    l = xcode.acquire_register((ac1, ac1, ac1, ac1))
    temp1 = xcode.acquire_register()
    temp2 = xcode.acquire_register()

    F(b1, c1, d1, temp1)
    cal.iadd(a1, a1, temp1)
    cal.iadd(a1, a1, x1)
    cal.iadd(a1, a1, l)

    cal.ishl(temp1, a1, s1)
    cal.ushr(temp2, a1, s1(neg=('x', 'y', 'z', 'w')))
    cal.ior(a1, temp1, temp2)
    cal.iadd(a1, a1, b1)

    xcode.release_register(l)
    xcode.release_register(temp1)
    xcode.release_register(temp2)
Ejemplo n.º 9
0
def FF(a1, b1, c1, d1, x1, s1, ac1):
  global xcode
  l = xcode.acquire_register((ac1, ac1, ac1, ac1))
  temp1 = xcode.acquire_register()
  temp2 = xcode.acquire_register()

  F(b1, c1, d1, temp1)
  cal.iadd(a1, a1, temp1)
  cal.iadd(a1, a1, x1)
  cal.iadd(a1, a1, l)
  
  cal.ishl(temp1, a1, s1)
  cal.ushr(temp2, a1, s1(neg=('x', 'y', 'z', 'w')))
  cal.ior(a1, temp1, temp2)
  cal.iadd(a1, a1, b1)

  xcode.release_register(l)
  xcode.release_register(temp1)
  xcode.release_register(temp2)
Ejemplo n.º 10
0
def TestSynIterIncFloatExtStopExtStart():
  SIZE = 64

  # build and run the kernel
  prgm = env.Program()
  code = prgm.get_stream()

  code.add(cal.dcl_output(reg.o0, USAGE=cal.usage.pos))
  ones = prgm.acquire_register((1, 1, 1, 1))
  counter = prgm.acquire_register()
  code.add(cal.mov(counter, ones))

  stop = prgm.acquire_register((4.0, 4.0, 4.0, 4.0))
  start = prgm.acquire_register((2.0, 2.0, 2.0, 2.0))
  step = prgm.acquire_register((1.0, 1.0, 1.0, 1.0))

  fiter = syn_iter_float(code, stop, step=step, mode=INC)
  fiter.set_start_reg(start)
  for i in fiter:
    code.add(cal.iadd(counter, counter, ones))

  code.add(cal.mov(reg.o0, counter.x))

  domain = (0, 0, SIZE, SIZE)
  proc = env.Processor(0)

  ext_output=proc.alloc_remote('i', 1, SIZE, 1)
  prgm.set_binding(reg.o0, ext_output)

  prgm.add(code)
  proc.execute(prgm, domain)

  passed = True
  for i in xrange(0, SIZE):
    if ext_output[i] != 3:
      passed = False
  print "Passed == ", passed

  proc.free(ext_output)

  return
Ejemplo n.º 11
0
 def block(self, d, a, b):
     code = self.get_active_code()
     code.add(cal.iadd(d, b, a.reg(neg=('x', 'y', 'z', 'w'))))
     return
Ejemplo n.º 12
0
def MD5Transform(state, block, blocki):
  proc = env.Processor(0)
  input_state = proc.alloc_remote('I', 4, 1, 1)
  input_block = proc.alloc_remote('I', 4, 4, 1)
  output = proc.alloc_remote('I', 4, 1, 1)


  for i in range(4):
    input_state[i] = state[i]
  Decode(input_block, block, blocki, 64)
  #print map(hex, input_block)

  global xcode
  if xcode == None:
    xcode = env.InstructionStream()
    cal.set_active_code(xcode)


    S11 = xcode.acquire_register((7, 7, 7, 7))
    S12 = xcode.acquire_register((12, 12, 12, 12))
    S13 = xcode.acquire_register((17, 17, 17, 17))
    S14 = xcode.acquire_register((22, 22, 22, 22))
    S21 = xcode.acquire_register((5, 5, 5, 5))
    S22 = xcode.acquire_register((9, 9, 9, 9))
    S23 = xcode.acquire_register((14, 14, 14, 14))
    S24 = xcode.acquire_register((20, 20, 20, 20))
    S31 = xcode.acquire_register((4, 4, 4, 4))
    S32 = xcode.acquire_register((11, 11, 11, 11))
    S33 = xcode.acquire_register((16, 16, 16, 16))
    S34 = xcode.acquire_register((23, 23, 23, 23))
    S41 = xcode.acquire_register((6, 6, 6, 6))
    S42 = xcode.acquire_register((10, 10, 10, 10))
    S43 = xcode.acquire_register((15, 15, 15, 15))
    S44 = xcode.acquire_register((21, 21, 21, 21))

    a = xcode.acquire_register()
    b = xcode.acquire_register()
    c = xcode.acquire_register()
    d = xcode.acquire_register()
    x = [xcode.acquire_register() for i in range(16)]
    r = xcode.acquire_register()
    
    cal.dcl_cb('cb0[1]')
    cal.dcl_cb('cb1[4]')
    cal.dcl_output('o0', USAGE=cal.usage.generic)

    cal.mov(a, 'cb0[0].x')
    cal.mov(b, 'cb0[0].y')
    cal.mov(c, 'cb0[0].z')
    cal.mov(d, 'cb0[0].w')
    for i in range(4):
      cal.mov(x[i*4], 'cb1[' + str(i) + '].x')
      cal.mov(x[i*4+1], 'cb1[' + str(i) + '].y')
      cal.mov(x[i*4+2], 'cb1[' + str(i) + '].z')
      cal.mov(x[i*4+3], 'cb1[' + str(i) + '].w')

    # Round 1
    FF (a, b, c, d, x[ 0], S11, 0xd76aa478); # 1 
    FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); # 2 
    FF (c, d, a, b, x[ 2], S13, 0x242070db); # 3 
    FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); # 4 
    FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); # 5 
    FF (d, a, b, c, x[ 5], S12, 0x4787c62a); # 6 
    FF (c, d, a, b, x[ 6], S13, 0xa8304613); # 7 
    FF (b, c, d, a, x[ 7], S14, 0xfd469501); # 8 
    FF (a, b, c, d, x[ 8], S11, 0x698098d8); # 9 
    FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); # 10 
    FF (c, d, a, b, x[10], S13, 0xffff5bb1); # 11 
    FF (b, c, d, a, x[11], S14, 0x895cd7be); # 12 
    FF (a, b, c, d, x[12], S11, 0x6b901122); # 13 
    FF (d, a, b, c, x[13], S12, 0xfd987193); # 14 
    FF (c, d, a, b, x[14], S13, 0xa679438e); # 15 
    FF (b, c, d, a, x[15], S14, 0x49b40821); # 16 
    
    # Round 2 
    GG (a, b, c, d, x[ 1], S21, 0xf61e2562); # 17 
    GG (d, a, b, c, x[ 6], S22, 0xc040b340); # 18 
    GG (c, d, a, b, x[11], S23, 0x265e5a51); # 19 
    GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); # 20 
    GG (a, b, c, d, x[ 5], S21, 0xd62f105d); # 21 
    GG (d, a, b, c, x[10], S22,  0x2441453); # 22 
    GG (c, d, a, b, x[15], S23, 0xd8a1e681); # 23 
    GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); # 24 
    GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); # 25 
    GG (d, a, b, c, x[14], S22, 0xc33707d6); # 26 
    GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); # 27 
    GG (b, c, d, a, x[ 8], S24, 0x455a14ed); # 28 
    GG (a, b, c, d, x[13], S21, 0xa9e3e905); # 29 
    GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); # 30 
    GG (c, d, a, b, x[ 7], S23, 0x676f02d9); # 31 
    GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); # 32 

    # Round 3 
    HH (a, b, c, d, x[ 5], S31, 0xfffa3942); # 33 
    HH (d, a, b, c, x[ 8], S32, 0x8771f681); # 34 
    HH (c, d, a, b, x[11], S33, 0x6d9d6122); # 35 
    HH (b, c, d, a, x[14], S34, 0xfde5380c); # 36 
    HH (a, b, c, d, x[ 1], S31, 0xa4beea44); # 37 
    HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); # 38 
    HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); # 39 
    HH (b, c, d, a, x[10], S34, 0xbebfbc70); # 40 
    HH (a, b, c, d, x[13], S31, 0x289b7ec6); # 41 
    HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); # 42 
    HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); # 43 
    HH (b, c, d, a, x[ 6], S34,  0x4881d05); # 44 
    HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); # 45 
    HH (d, a, b, c, x[12], S32, 0xe6db99e5); # 46 
    HH (c, d, a, b, x[15], S33, 0x1fa27cf8); # 47 
    HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); # 48 
  
    # Round 4 
    II (a, b, c, d, x[ 0], S41, 0xf4292244); # 49 
    II (d, a, b, c, x[ 7], S42, 0x432aff97); # 50 
    II (c, d, a, b, x[14], S43, 0xab9423a7); # 51 
    II (b, c, d, a, x[ 5], S44, 0xfc93a039); # 52 
    II (a, b, c, d, x[12], S41, 0x655b59c3); # 53 
    II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); # 54 
    II (c, d, a, b, x[10], S43, 0xffeff47d); # 55 
    II (b, c, d, a, x[ 1], S44, 0x85845dd1); # 56 
    II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); # 57 
    II (d, a, b, c, x[15], S42, 0xfe2ce6e0); # 58 
    II (c, d, a, b, x[ 6], S43, 0xa3014314); # 59 
    II (b, c, d, a, x[13], S44, 0x4e0811a1); # 60 
    II (a, b, c, d, x[ 4], S41, 0xf7537e82); # 61 
    II (d, a, b, c, x[11], S42, 0xbd3af235); # 62 
    II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); # 63 
    II (b, c, d, a, x[ 9], S44, 0xeb86d391); # 64

    temp = xcode.acquire_register()
    #cal.mov(temp.x___, a.x)
    #cal.mov(temp._y__, b.xx)
    #cal.mov(temp.__z_, c.xxx)
    #cal.mov(temp.___w, d.xxxx)
    cal.iadd(temp, a.x000, b('0x00'))
    cal.iadd(temp, temp, c('00x0'))
    cal.iadd(temp, temp, d('000x'))
    cal.mov('o0', temp)

    xcode.release_register(a)
    xcode.release_register(b)
    xcode.release_register(c)
    xcode.release_register(d)
    for xi in x:
      xcode.release_register(xi)

    #for i, inst in enumerate(xcode._instructions):
    #  print inst.render()

  xcode.set_remote_binding('cb0', input_state)
  xcode.set_remote_binding('cb1', input_block)
  xcode.set_remote_binding('o0', output)

  domain = (0, 0, 1, 1)
  proc.execute(xcode, domain)

  state[0] += output[0]
  state[1] += output[1]
  state[2] += output[2]
  state[3] += output[3]

  print 'input  = ', map(hex, input_state)
  print 'output = ', map(hex, output)

  proc.free_remote(input_state)
  proc.free_remote(input_block)
  proc.free_remote(output)