Example #1
0
  def _update_inc_count(self):
    code = self.obj.code

    code.prgm.acquire_block_registers()
  
    r_block_size = code.prgm.r_block_size
    r_offset = code.prgm.r_offset
  
    # Determine the block size for each loop
    code.prgm.raw_data_size = self.get_count() - self.get_start()
    # synppc.load_word(code, r_block_size, self.get_count() - self.get_start())
    # code.add(synppc.ppc.divw(r_block_size, r_block_size, code.r_size))
  
    # Determine the offset for the current block and update the r_count
    # (this is primarily for range, which uses different values in r_count
    #  to initialize ranges that don't start at 0)
    # code.add(synppc.ppc.mullw(r_offset, code.r_rank, r_block_size))
    code.add(spu.a(self.obj.r_count, r_offset, self.obj.r_count))

    # Offset is rank * block_size
    # Count is count + offset
    # Stop is count + block_size
    if self.obj.r_stop is not None:
      code.add(spu.a(self.obj.r_stop, r_block_size, self.obj.r_count))

    # code.prgm.release_register(r_offset)
    # code.prgm.release_register(r_block_size)
    return
Example #2
0
def vector_from_array(code, r_target, a):
  """
  Generate the instructions to fill a vector register with the values
  from an array.
  """
  prgm = code.prgm
  r0 = r_target

  r1 = prgm.acquire_register()
  r2 = prgm.acquire_register()
  r3 = prgm.acquire_register()

  load_word(code, r0, a[0], True)
  load_word(code, r1, a[1], True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes

  load_word(code, r2, a[2], True)
  code.add(spu.rotqbyi(r2, r2, 8))

  load_word(code, r3, a[3], True)
  code.add(spu.rotqbyi(r3, r3, 4))

  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3))

  prgm.release_register(r1)
  prgm.release_register(r2)
  prgm.release_register(r3)

  return
Example #3
0
def vector_from_array(code, r_target, a):
    """
  Generate the instructions to fill a vector register with the values
  from an array.
  """
    prgm = code.prgm
    r0 = r_target

    r1 = prgm.acquire_register()
    r2 = prgm.acquire_register()
    r3 = prgm.acquire_register()

    load_word(code, r0, a[0], True)
    load_word(code, r1, a[1], True)
    code.add(spu.rotqbyi(r1, r1, 12))  # rotate qw by bytes

    load_word(code, r2, a[2], True)
    code.add(spu.rotqbyi(r2, r2, 8))

    load_word(code, r3, a[3], True)
    code.add(spu.rotqbyi(r3, r3, 4))

    code.add(spu.a(r0, r0, r1))
    code.add(spu.a(r0, r0, r2))
    code.add(spu.a(r0, r0, r3))

    prgm.release_register(r1)
    prgm.release_register(r2)
    prgm.release_register(r3)

    return
Example #4
0
  def _ab(self, x, y, ab, temp):

    spu.xor(temp, x, y)
    spu.cntb(temp, temp)
    spu.sumb(temp, temp, 0)
    spu.a(ab, ab, temp)

    return
Example #5
0
  def _c(self, x, y, c, temp):

    spu.and_(temp, x, y)
    spu.cntb(temp, temp)
    spu.sumb(temp, temp, 0)
    spu.a(c, c, temp)
    
    return
Example #6
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
Example #7
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
Example #8
0
  def _reduce_word(self, words, result):
    """
    Add-reduce a vector of words into the preferred
    slot of result.
    """

    for i in range(4):
      spu.a(result, words, result)
      spu.rotqbyi(words, words, 4)

    return
Example #9
0
    def popc(self, count, x):
        """
    Add the number of 1 bits in each word in X to the value in count.
    """
        temp = spu.get_active_code().acquire_register()

        spu.cntb(temp, x)
        spu.sumb(temp, temp, 0)
        spu.a(count, count, temp)

        spu.get_active_code().release_register(temp)
        return
Example #10
0
  def popc(self, count, x):
    """
    Add the number of 1 bits in each word in X to the value in count.
    """
    temp = spu.get_active_code().acquire_register()
    
    spu.cntb(temp, x)
    spu.sumb(temp, temp, 0)
    spu.a(count, count, temp)

    spu.get_active_code().release_register(temp)
    return
Example #11
0
 def _ab_c(self, x, y, ab, c, ab_temp, c_temp):
   """
   Interleave ab and c computations
   """
   spu.xor(ab_temp, x, y)
   spu.and_(c_temp, x, y)
   
   spu.cntb(ab_temp, ab_temp)
   spu.cntb(c_temp, c_temp)
   
   spu.sumb(ab_temp, ab_temp, 0)
   spu.sumb(c_temp, c_temp, 0)
   
   spu.a(ab, ab, ab_temp)
   spu.a(c, c, c_temp)
   
   return
Example #12
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    # r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
        spu_param_1,
        spu_param_2,
        spu_param_3,
        spu_param_4,
        spu_param_5,
        spu_param_6,
        spu_param_7,
        spu_param_8,
        spu_param_9,
        spu_param_10,
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    # code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert r[0] == 55
    assert r[1] == 0x200A
    # print 'int result:', r
    return
Example #13
0
  def init_address(self):
    # Call syn_iters init self.code
    self.obj.init_address(self)

    # Update the address with the offset
    # For variable iterators, this is the value already computed for r_count
    self.obj.code.add(spu.a(self.r_addr, self.obj.r_count, self.r_addr))

    return
Example #14
0
def mem_write_in_mbox(code, psmap, lsa, tag, cache = False):
  """Write a 32bit message at a local LSA from this SPU to another.
     psmap must contain the base address of the target SPU's PS map.
     lsa must be 12 mod 16 for DMA alignment purposes.

     This is a DMA operation; it must be completed using mem_complete() or
     similar method."""

  if isinstance(lsa, (int, long)):
    if (lsa % 16) != 12:
      print "ERROR LSA for mem_write_mbox() must be 12 mod 16"
      assert(0)

#  r_mbox_mma_cached = True
#  ref = "__mem_write_in_mbox_mma_reg_%s" % (str(psmap))
#  r_mbox_mma = code.prgm.get_storage(ref)
#  if not isinstance(r_mbox_mma, spu.Register):
#    r_size_cached = False
#    r_mbox_mma = code.acquire_register()
#    if isinstance(psmap, (int, long)):
#      util.load_word(code, r_mbox_mma, psmap + 0x400C)
#    else:
#      util.load_word(code, r_mbox_mma, 0x400C)
#      code.add(spu.a(r_mbox_mma, r_mbox_mma, psmap))
#
#    if cache == True:
#      r_mbox_mma_cached = True
#      code.prgm.add_storage(ref, r_mbox_mma)

  r_mbox_mma = code.prgm.acquire_register()
  if isinstance(psmap, (int, long)):
    util.load_word(code, r_mbox_mma, psmap + 0x400C)
  else:
    util.load_word(code, r_mbox_mma, 0x400C)
    code.add(spu.a(r_mbox_mma, r_mbox_mma, psmap))

  r_size_cached = True
  ref = "_const_val_4"
  r_size = code.prgm.get_storage(ref)
  if not isinstance(r_size, spu.Register):
    r_size_cached = False
    r_size = code.prgm.acquire_register()
    util.load_word(code, r_size, 4)
    if cache == True:
      r_size_cached = True
      code.prgm.add_storage(ref, r_size)

  mem_put(code, lsa, r_mbox_mma, r_size, tag)

  code.prgm.release_register(r_mbox_mma)
  if cache == False:
    #if not isinstance(psmap, (int, long)) and r_mbox_mma_cached == False:
    if r_size_cached == False:
      code.prgm.release_register(r_size)
  return
Example #15
0
    def save_ls_buffer(self, ls_size=None, branch=False):
        code = spu.get_active_code()

        regs = []
        if ls_size is None:
            ls_size = code.acquire_register()
            regs.append(ls_size)

        # Set the main memory address
        mm_offset = code.acquire_register()
        regs.append(mm_offset)

        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, self.mm_buffer)

        # Tranfer the buffer
        md = spuiter.memory_desc('b')
        md.set_size_reg(ls_size)
        md.set_addr_reg(mm_offset)

        md.put(code, self.ls_buffer)

        # Increment the main memory offset
        mm_size = code.acquire_register()
        regs.append(mm_size)

        spu.rotqbyi(mm_size, self.mm_buffer, 8)
        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, mm_size)

        util.set_slot_value(code, self.mm_buffer, 2, mm_offset)

        # Reset the ls offset
        util.set_slot_value(code, self.ls_buffer, 2, 0)

        code.release_registers(regs)

        return
Example #16
0
  def save_ls_buffer(self, ls_size = None, branch = False):
    code = spu.get_active_code()
    
    regs = []
    if ls_size is None:
      ls_size = code.acquire_register()
      regs.append(ls_size)

    # Set the main memory address
    mm_offset = code.acquire_register()
    regs.append(mm_offset)

    spu.rotqbyi(mm_offset, self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, self.mm_buffer)

    # Tranfer the buffer
    md = spuiter.memory_desc('b')
    md.set_size_reg(ls_size)
    md.set_addr_reg(mm_offset)

    md.put(code, self.ls_buffer)

    # Increment the main memory offset
    mm_size = code.acquire_register()
    regs.append(mm_size)

    spu.rotqbyi(mm_size, self.mm_buffer, 8)        
    spu.rotqbyi(mm_offset,  self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, mm_size)

    util.set_slot_value(code, self.mm_buffer, 2, mm_offset)
    
    # Reset the ls offset
    util.set_slot_value(code, self.ls_buffer, 2, 0)
    
    code.release_registers(regs)
    
    return
Example #17
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    #r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    #code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert (r[0] == 55)
    assert (r[1] == 0x200A)
    # print 'int result:', r
    return
Example #18
0
 def cleanup(self):
   """Do end-of-loop iterator code"""
   # Update the current count
   if self.mode == DEC:
     if self.r_step is not None:
       self.code.add(spu.sf(self.r_count, self.r_step, self.r_count))
     else:
       self.code.add(spu.ai( self.r_count, self.r_count, -self.step_size()))
   elif self.mode == INC:
     if self.r_step is not None:
       self.code.add(spu.a(self.r_count, self.r_count, self.r_step))
     else:
       self.code.add(spu.ai(self.r_count, self.r_count, self.step_size()))
     
   return
Example #19
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    code = InstructionStream()
    proc = Processor()

    r_sum = code.acquire_register()
    r_current = code.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    r = proc.execute(code, params=params)

    assert (r == 0xA)
    # print 'int result:', r
    # while True:
    #   pass
    return
Example #20
0
prgm.add(code)
prgm.print_code() # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode = 'int')
print "ret", ret

prgm = env.Program()
code = prgm.get_stream()

r_add = prgm.acquire_register()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.a(subcode.gp_return, subcode.gp_return, r_add))

# Initialize gp_return, insert code
code.add(spu.il(r_add, 1))
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.il(r_add, 2))
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)


prgm.add(code)
prgm.print_code()
Example #21
0
prgm.add(code)
prgm.print_code()  # TODO  - support print prgm instead?

ret = proc.execute(prgm, mode='int')
print "ret", ret

prgm = env.Program()
code = prgm.get_stream()

r_add = prgm.acquire_register()

# Generate substream
# Multiply gp_return by 2, add 1
subcode = prgm.get_stream()
subcode.add(spu.shli(subcode.gp_return, subcode.gp_return, 1))
subcode.add(spu.a(subcode.gp_return, subcode.gp_return, r_add))

# Initialize gp_return, insert code
code.add(spu.il(r_add, 1))
code.add(spu.il(code.gp_return, 5))
code.add(subcode)

# Add 3, insert again
code.add(spu.il(r_add, 2))
code.add(spu.ai(code.gp_return, code.gp_return, 3))
code.add(subcode)

prgm.add(code)
prgm.print_code()

ret = proc.execute(prgm, mode='int')
Example #22
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return
Example #23
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return