Beispiel #1
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    # Reserve two variable registers
    count  = code.acquire_register()
    result = code.acquire_register()
    
    # 'Load' the input vector x from register 5
    x = code.acquire_register() 
    spu.ai(x, 5, 0)

    # Zero count and result
    spu.xor(count, count, count)
    spu.xor(result, result, result)
    
    # Inline the popc and reduce operations
    self.popc(count, x)
    self.reduce_word(result, count)

    # Send the result to the caller
    spu.wrch(result, dma.SPU_WrOutMbox)    

    code.release_register(x)
    spu.set_active_code(old_code)
    return
Beispiel #2
0
def fdiv(code, d, x, y, one = None):
  """
  Single-precision floating point division for x / y
  """
  Y = code.acquire_registers(3)
  t = code.acquire_register()
  regs = Y[:]
  regs.append(t)
  
  if one is None:
    one = code.acquire_register()
    spu.xor(one, one, one)
    spu.ai(one, one, 1)
    spu.cuflt(one, one, 155)
    regs.append(one)
    
  # Compute 1/y (from SPU ISA 1.1, p208, Normal case)
  spu.frest(Y[0], y)
  spu.fi(Y[1], y, Y[0])
  spu.fnms(t, y, Y[1], one)
  spu.fma(Y[2], t, Y[1], Y[1])

  # Compute x * (1/y)
  spu.fm(d, x, Y[2])
  
  code.release_registers(regs)
    
  return
Beispiel #3
0
  def _ab(self, x, y, ab, temp):

    spu.xor(temp, x, y)
    spu.cntb(temp, temp)
    spu.sumb(temp, temp, 0)
    spu.a(ab, ab, temp)

    return
Beispiel #4
0
  def synthesize_constants(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    self._one = code.acquire_register()
    spu.xor(self._one, self._one, self._one)
    spu.ai(self._one, self._one, 1)
    spu.cuflt(self._one, self._one, 155)
    
    if old_code is not None:
      spu.set_active_code(old_code)

    return
Beispiel #5
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
    # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

    # Pack result into vector
    #   [x][y][score][--]

    # Zero the save value
    spu.xor(self._save_value, self._save_value, self._save_value)

    # Copy the score
    spu.selb(self._save_value, self._save_value, self._score, self._word_mask)    
    spu.rotqbyi(self._save_value, self._save_value, 12)

    # Copy the y value
    spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask)
    spu.rotqbyi(self._save_value, self._save_value, 12)        

    # Copy the x value
    spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask)
    
    # Save value to local store
    spu.stqx(self._save_value, self._count, self._md_results.r_addr)
    
    self._count.v = self._count.v + 16

    # --> MemorySave test
    cmp = self._save_value # reuse the save register
    spu.ceq.ex(cmp, self._count, self._md_results.r_size)

    if self._save_op is not None:
      self._save_op.test(cmp, self._count)
      
    # Just reset for now
    spu.selb(self._count, self._count, 0, cmp)

    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Beispiel #6
0
 def _ab_c(self, x, y, ab, c, ab_temp, c_temp):
   """
   Interleave ab and c computations
   """
   spu.xor(ab_temp, x, y)
   spu.and_(c_temp, x, y)
   
   spu.cntb(ab_temp, ab_temp)
   spu.cntb(c_temp, c_temp)
   
   spu.sumb(ab_temp, ab_temp, 0)
   spu.sumb(c_temp, c_temp, 0)
   
   spu.a(ab, ab, ab_temp)
   spu.a(c, c, c_temp)
   
   return
  def row_complete(self, code):
    """
    Save the current row to the framebuffer.
    """

    if self.w is None: raise Exception('Please set width')
    if self.lsa is None: raise Exception('Please set lsa')    
    if self.y_offset is None: raise Exception('Please call setup')

    md = spuiter.memory_desc('I', size = self.w)

    md.set_addr_reg(self.y_offset)
    md.put(code, self.lsa)

    self.y_offset.v = self.y_offset + self.stride
    spu.xor(self.x_offset, self.x_offset, self.x_offset)
    return
Beispiel #8
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    # r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
        spu_param_1,
        spu_param_2,
        spu_param_3,
        spu_param_4,
        spu_param_5,
        spu_param_6,
        spu_param_7,
        spu_param_8,
        spu_param_9,
        spu_param_10,
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    # code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert r[0] == 55
    assert r[1] == 0x200A
    # print 'int result:', r
    return
Beispiel #9
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  # Acquire two registers
  #x    = code.acquire_register()
  x = code.gp_return
  test = prgm.acquire_register(reg_name = 55)

  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  spu.brz(test, 2)
  spu.stop(0x100A)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code(hex = True) 
  r = proc.execute(prgm, mode = 'int', stop = True, debug = True) 
  assert(r[0] == 42)
  assert(r[1] == 0x100A)

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  util.load_float(code, code.fp_return, 3.14)

  prgm.add(code)
  prgm.print_code(hex = True)
  r = proc.execute(prgm, mode = 'fp')
  print r
  return
Beispiel #10
0
if __name__ == '__main__':
  ITERS = 500000
  #ITERS = 15

  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()
  spu.set_active_code(code)
  psmap = extarray.extarray('I', 131072 / 4)
  data = extarray.extarray('I', range(0, 16))

  r_sum = prgm.gp_return
  r_cnt = prgm.acquire_register()

  spu.xor(r_sum, r_sum, r_sum)
  load_word(code, r_cnt, ITERS)

  lbl_loop = prgm.get_label("loop")
  code.add(lbl_loop)

  reg = dma.spu_read_in_mbox(code)

  spu.ai(r_sum, r_sum, 1)
  dma.spu_write_out_intr_mbox(code, r_sum)
  #dma.spu_write_out_mbox(code, reg)

  prgm.release_register(reg)

  spu.ai(r_cnt, r_cnt, -1)
  spu.brnz(r_cnt, lbl_loop)
Beispiel #11
0
if __name__ == '__main__':
    ITERS = 500000
    #ITERS = 15

    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()
    spu.set_active_code(code)
    psmap = extarray.extarray('I', 131072 / 4)
    data = extarray.extarray('I', range(0, 16))

    r_sum = prgm.gp_return
    r_cnt = prgm.acquire_register()

    spu.xor(r_sum, r_sum, r_sum)
    load_word(code, r_cnt, ITERS)

    lbl_loop = prgm.get_label("loop")
    code.add(lbl_loop)

    reg = dma.spu_read_in_mbox(code)

    spu.ai(r_sum, r_sum, 1)
    dma.spu_write_out_intr_mbox(code, r_sum)
    #dma.spu_write_out_mbox(code, reg)

    prgm.release_register(reg)

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, lbl_loop)
Beispiel #12
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return
Beispiel #13
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return
Beispiel #14
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)
    
    if self.x is None: raise Exception("Please set x")
    if self.result is None: raise Exception("Please set result")

    # exponent
    e = var.Word()
    
    # Working values    
    x = var.Word()
    y = var.Word()
    z = var.Word()

    cmp = var.Bits()
    tmp = var.Word()

    spu.xor(cmp, cmp, cmp)
    spu.xor(tmp, tmp, tmp)    

    # Set the working x
    x.v = self.x

    # Extract the exponent
    # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
    e.v = x >> self.consts['_23']
    e.v = spu.andi.ex(e, 0xff)
    e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits
    # 0b 111 1110

    # Extract the mantissa
    x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff;
    x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000;

    # Normalize
    x1, x2, e1 = y, z, tmp
    
    # if (x < SQRTHF) 
    cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

    # (True) { ... }
    e1.v = spu.ai.ex(e, -1)                  #   e -= 1;
    x1.v = spu.fa.ex(x, x)                   #   x = x + x - 1.0;
    x1.v = spu.fs.ex(x1, self.consts['ONE']) #     ""  ""

    # (False) { ... }
    x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

    # Select the True/False values based on cmp
    e.v = spu.selb.ex(e,  e1, cmp)
    x.v = spu.selb.ex(x2, x1, cmp)

    # Compute polynomial
    z.v = spu.fm.ex(x, x)                      #  z = x * x;
    
    y.v = spu.fms.ex(self.consts['C1'], x,     #  y = (((((((( 7.0376836292E-2 * x  
                     self.consts['C2'])        #	       - 1.1514610310E-1) * x      
    y.v = spu.fma.ex(y, x, self.consts['C3'])  #	     + 1.1676998740E-1) * x        
    y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x         
    y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x          
    y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x           
    y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x            
    y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x             
    y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1) 
    y.v = spu.fm.ex(y, x)                      #   * x 
    y.v = spu.fm.ex(y, z)                      #   * z;   
    
    y.v = spu.fma.ex(self.consts['C10'], z, y) #  y += -0.5 * z;

    # Convert to log base 2
    z.v = spu.fm.ex( y, self.consts['LOG2EA'])     # z = y * LOG2EA;
    z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
    z.v = spu.fa.ex(z, y)                          # z += y;
    z.v = spu.fa.ex(z, x)                          # z += x;
    e.v = spu.csflt.ex(e, 155)                     # z += (float) e;
    z.v = spu.fa.ex(z, e)                          #  ""  ""
    
    spu.ai(self.result, z, 0)       # return z

    spu.set_active_code(old_code)
    return
Beispiel #15
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        if self.x is None: raise Exception("Please set x")
        if self.result is None: raise Exception("Please set result")

        # exponent
        e = var.Word()

        # Working values
        x = var.Word()
        y = var.Word()
        z = var.Word()

        cmp = var.Bits()
        tmp = var.Word()

        spu.xor(cmp, cmp, cmp)
        spu.xor(tmp, tmp, tmp)

        # Set the working x
        x.v = self.x

        # Extract the exponent
        # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e;
        e.v = x >> self.consts['_23']
        e.v = spu.andi.ex(e, 0xff)
        e.v = spu.ai.ex(e, 0x382)  # 0x382 == (- 0x7E) using 10 bits
        # 0b 111 1110

        # Extract the mantissa
        x.v = x & self.consts['M1']  # *(unsigned int*)&x &= 0x807fffff;
        x.v = x | self.consts['M2']  # *(unsigned int*)&x |= 0x3f000000;

        # Normalize
        x1, x2, e1 = y, z, tmp

        # if (x < SQRTHF)
        cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x)

        # (True) { ... }
        e1.v = spu.ai.ex(e, -1)  #   e -= 1;
        x1.v = spu.fa.ex(x, x)  #   x = x + x - 1.0;
        x1.v = spu.fs.ex(x1, self.consts['ONE'])  #     ""  ""

        # (False) { ... }
        x2.v = spu.fs.ex(x, self.consts['ONE'])  #   x = x - 1.0;

        # Select the True/False values based on cmp
        e.v = spu.selb.ex(e, e1, cmp)
        x.v = spu.selb.ex(x2, x1, cmp)

        # Compute polynomial
        z.v = spu.fm.ex(x, x)  #  z = x * x;

        y.v = spu.fms.ex(
            self.consts['C1'],
            x,  #  y = (((((((( 7.0376836292E-2 * x  
            self.consts['C2'])  #	       - 1.1514610310E-1) * x
        y.v = spu.fma.ex(y, x,
                         self.consts['C3'])  #	     + 1.1676998740E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C4'])  #	    - 1.2420140846E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C5'])  #	   + 1.4249322787E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C6'])  #	  - 1.6668057665E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C7'])  #	 + 2.0000714765E-1) * x
        y.v = spu.fms.ex(y, x, self.consts['C8'])  #	- 2.4999993993E-1) * x
        y.v = spu.fma.ex(y, x, self.consts['C9'])  #       + 3.3333331174E-1)
        y.v = spu.fm.ex(y, x)  #   * x
        y.v = spu.fm.ex(y, z)  #   * z;

        y.v = spu.fma.ex(self.consts['C10'], z, y)  #  y += -0.5 * z;

        # Convert to log base 2
        z.v = spu.fm.ex(y, self.consts['LOG2EA'])  # z = y * LOG2EA;
        z.v = spu.fma.ex(x, self.consts['LOG2EA'], z)  # z += x * LOG2EA;
        z.v = spu.fa.ex(z, y)  # z += y;
        z.v = spu.fa.ex(z, x)  # z += x;
        e.v = spu.csflt.ex(e, 155)  # z += (float) e;
        z.v = spu.fa.ex(z, e)  #  ""  ""

        spu.ai(self.result, z, 0)  # return z

        spu.set_active_code(old_code)
        return