Example #1
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
Example #2
0
def vector_from_array(code, r_target, a):
  """
  Generate the instructions to fill a vector register with the values
  from an array.
  """
  prgm = code.prgm
  r0 = r_target

  r1 = prgm.acquire_register()
  r2 = prgm.acquire_register()
  r3 = prgm.acquire_register()

  load_word(code, r0, a[0], True)
  load_word(code, r1, a[1], True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes

  load_word(code, r2, a[2], True)
  code.add(spu.rotqbyi(r2, r2, 8))

  load_word(code, r3, a[3], True)
  code.add(spu.rotqbyi(r3, r3, 4))

  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3))

  prgm.release_register(r1)
  prgm.release_register(r2)
  prgm.release_register(r3)

  return
Example #3
0
def set_slot_value(code, reg, slot, value):
  """
  Set the value in reg[slot] with value.  If value is a register, use
  the value from the preferred slot (value[0]).  If value is a
  constant, load it into reg[slot], preserving the values in the other
  slots.
  """
  prgm = code.prgm

  if slot not in [0,1,2,3]:
    raise Exception("Invalid SIMD slot: " + slot)

  mask = prgm.acquire_register()
  vector_from_array(code, mask, [0xFFFFFFFF, 0, 0, 0])

  if not issubclass(type(value), (spe.Register, spe.Variable)):
    r_value = prgm.acquire_register()
    load_word(code, r_value, value)
  else:
    r_value = value

  code.add(spu.rotqbyi(reg, reg, slot * 4))
  code.add(spu.selb(reg, reg, r_value, mask))
  code.add(spu.rotqbyi(reg, reg, (4 - slot) * 4))

  prgm.release_register(mask)
  if not issubclass(type(value), (spe.Register, spe.Variable)):
    prgm.release_register(r_value)
  return
Example #4
0
def TestFloatArray():
    from corepy.arch.spu.platform import InstructionStream, Processor
    import corepy.arch.spu.lib.dma as dma
    import corepy.arch.spu.platform as env

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    x = SingleFloat([1.0, 2.0, 3.0, 4.0])
    y = SingleFloat([0.5, 1.5, 2.5, 3.5])
    sum = SingleFloat(0.0)

    sum.v = spu.fa.ex(x, y)

    r = SingleFloat([0.0, 0.0, 0.0, 0.0], reg=code.fp_return)

    for i in range(4):
        r.v = spu.fa.ex(sum, r)
        spu.rotqbyi(sum, sum, 4)

    prgm.add(code)
    proc = env.Processor()
    result = proc.execute(prgm, mode="fp")

    x_test = array.array("f", [1.0, 2.0, 3.0, 4.0])
    y_test = array.array("f", [0.5, 1.5, 2.5, 3.5])
    r_test = 0.0
    for i in range(4):
        r_test += x_test[i] + y_test[i]

    assert result == r_test

    return
Example #5
0
def set_slot_value(code, reg, slot, value):
    """
  Set the value in reg[slot] with value.  If value is a register, use
  the value from the preferred slot (value[0]).  If value is a
  constant, load it into reg[slot], preserving the values in the other
  slots.
  """
    prgm = code.prgm

    if slot not in [0, 1, 2, 3]:
        raise Exception("Invalid SIMD slot: " + slot)

    mask = prgm.acquire_register()
    vector_from_array(code, mask, [0xFFFFFFFF, 0, 0, 0])

    if not issubclass(type(value), (spe.Register, spe.Variable)):
        r_value = prgm.acquire_register()
        load_word(code, r_value, value)
    else:
        r_value = value

    code.add(spu.rotqbyi(reg, reg, slot * 4))
    code.add(spu.selb(reg, reg, r_value, mask))
    code.add(spu.rotqbyi(reg, reg, (4 - slot) * 4))

    prgm.release_register(mask)
    if not issubclass(type(value), (spe.Register, spe.Variable)):
        prgm.release_register(r_value)
    return
Example #6
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
Example #7
0
def vector_from_array(code, r_target, a):
    """
  Generate the instructions to fill a vector register with the values
  from an array.
  """
    prgm = code.prgm
    r0 = r_target

    r1 = prgm.acquire_register()
    r2 = prgm.acquire_register()
    r3 = prgm.acquire_register()

    load_word(code, r0, a[0], True)
    load_word(code, r1, a[1], True)
    code.add(spu.rotqbyi(r1, r1, 12))  # rotate qw by bytes

    load_word(code, r2, a[2], True)
    code.add(spu.rotqbyi(r2, r2, 8))

    load_word(code, r3, a[3], True)
    code.add(spu.rotqbyi(r3, r3, 4))

    code.add(spu.a(r0, r0, r1))
    code.add(spu.a(r0, r0, r2))
    code.add(spu.a(r0, r0, r3))

    prgm.release_register(r1)
    prgm.release_register(r2)
    prgm.release_register(r3)

    return
Example #8
0
def TestSetSlotValue():
    import corepy.arch.spu.platform as synspu
    import corepy.arch.spu.types.spu_types as var
    import corepy.arch.spu.lib.dma as dma

    prgm = synspu.Program()
    code = prgm.get_stream()
    proc = synspu.Processor()
    spu.set_active_code(code)
    a = var.SignedWord(0x11)
    b = var.SignedWord(0x13)
    r = var.SignedWord(0xFFFFFFFF)

    set_slot_value(code, r, 0, 0x10)
    set_slot_value(code, r, 1, a)
    set_slot_value(code, r, 2, 0x12)
    set_slot_value(code, r, 3, b)

    for i in range(4):
        spu.wrch(r, dma.SPU_WrOutMbox)
        spu.rotqbyi(r, r, 4)

    prgm.add(code)
    spe_id = proc.execute(prgm, async=True)

    for i in range(4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        result = synspu.spu_exec.read_out_mbox(spe_id)
        assert (result == (i + 0x10))

    proc.join(spe_id)

    return
Example #9
0
def TestFloatArray():
    from corepy.arch.spu.platform import InstructionStream, Processor
    import corepy.arch.spu.lib.dma as dma
    import corepy.arch.spu.platform as env

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    x = SingleFloat([1.0, 2.0, 3.0, 4.0])
    y = SingleFloat([0.5, 1.5, 2.5, 3.5])
    sum = SingleFloat(0.0)

    sum.v = spu.fa.ex(x, y)

    r = SingleFloat([0.0, 0.0, 0.0, 0.0], reg=code.fp_return)

    for i in range(4):
        r.v = spu.fa.ex(sum, r)
        spu.rotqbyi(sum, sum, 4)

    prgm.add(code)
    proc = env.Processor()
    result = proc.execute(prgm, mode='fp')

    x_test = array.array('f', [1.0, 2.0, 3.0, 4.0])
    y_test = array.array('f', [0.5, 1.5, 2.5, 3.5])
    r_test = 0.0
    for i in range(4):
        r_test += x_test[i] + y_test[i]

    assert (result == r_test)

    return
Example #10
0
def TestSetSlotValue():
  import corepy.arch.spu.platform as synspu
  import corepy.arch.spu.types.spu_types as var
  import corepy.arch.spu.lib.dma as dma

  prgm = synspu.Program()
  code = prgm.get_stream()
  proc = synspu.Processor()
  spu.set_active_code(code)
  a = var.SignedWord(0x11)
  b = var.SignedWord(0x13)
  r = var.SignedWord(0xFFFFFFFF)

  set_slot_value(code, r, 0, 0x10)
  set_slot_value(code, r, 1, a)
  set_slot_value(code, r, 2, 0x12)
  set_slot_value(code, r, 3, b)

  for i in range(4):
    spu.wrch(r, dma.SPU_WrOutMbox)
    spu.rotqbyi(r, r, 4)

  prgm.add(code)
  spe_id = proc.execute(prgm, async = True)

  for i in range(4):
    while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass
    result = synspu.spu_exec.read_out_mbox(spe_id)
    assert(result == (i + 0x10))

  proc.join(spe_id)

  return
Example #11
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
Example #12
0
def TestInt2(i0 = 0, i1 = 1):
  i2 = i0 + i1
  i3 = i1 + i2
  
  code = InstructionStream()
  proc = Processor()

  r_loop = 4
  r_address = 5
  r0 = 6
  r1 = 7
  r2 = 8
  r3 = 9
  
  # Load arguments into a quadword
  
  #################
  # Pack quadword #
  #################

  def load_value_int32(code, reg, value, clear = False):
    # obviously, value should be 32 bit integer
    code.add(spu.ilhu(reg, value / pow(2, 16)))      # immediate load halfword upper
    code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower
    if clear:
      code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes
    return

  load_value_int32(code, r0, i0, True)
  load_value_int32(code, r1, i1, True)
  code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes
  load_value_int32(code, r2, i2, True)
  code.add(spu.rotqbyi(r2, r2, 8))
  load_value_int32(code, r3, i3, True)
  code.add(spu.rotqbyi(r3, r3, 4))
  code.add(spu.a(r0, r0, r1))
  code.add(spu.a(r0, r0, r2))
  code.add(spu.a(r0, r0, r3)) 

  ##########

  # Main loop to calculate Fibnoccai sequence

  load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K

  load_value_int32(code, r_loop, 0, clear_bits = False)
  start_label = code.size() + 1

  code.add(spu.sfi(r_loop, r_loop, 1))
  code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE)))

  code.add(spu.stop(0x2005))

  r = proc.execute(code)
  # assert(r == 12)
  # print 'int result:', r

  return
Example #13
0
  def _reduce_word(self, words, result):
    """
    Add-reduce a vector of words into the preferred
    slot of result.
    """

    for i in range(4):
      spu.a(result, words, result)
      spu.rotqbyi(words, words, 4)

    return
Example #14
0
    def block(self):
        code = spu.get_active_code()
        self._block_idx = len(code)

        # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
        code[self._branch_idx] = spu.nop(0, ignore_active=True)
        # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
        # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

        # Pack result into vector
        #   [x][y][score][--]

        # Zero the save value
        spu.xor(self._save_value, self._save_value, self._save_value)

        # Copy the score
        spu.selb(self._save_value, self._save_value, self._score,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the y value
        spu.selb(self._save_value, self._save_value, self._y_off,
                 self._word_mask)
        spu.rotqbyi(self._save_value, self._save_value, 12)

        # Copy the x value
        spu.selb(self._save_value, self._save_value, self._x_off,
                 self._word_mask)

        # Save value to local store
        spu.stqx(self._save_value, self._count, self._md_results.r_addr)

        self._count.v = self._count.v + 16

        # --> MemorySave test
        cmp = self._save_value  # reuse the save register
        spu.ceq.ex(cmp, self._count, self._md_results.r_size)

        if self._save_op is not None:
            self._save_op.test(cmp, self._count)

        # Just reset for now
        spu.selb(self._count, self._count, 0, cmp)

        # Return to the loop
        idx = len(code)
        spu.br(-(idx - self._branch_idx - 1))

        return
Example #15
0
 def _toggle(self, var):
   """
   Use rotate to toggle between two preferred slot  values in a vector.
   """
   if self.buffer_mode == 'double':
     self.code.add(spu.rotqbyi(var.reg, var.reg, 4))
   return
Example #16
0
  def block(self):
    code = spu.get_active_code()
    self._block_idx = len(code)

    # --> add the branch instruction (use brz (?) to always branch, nop to never branch)
    code[self._branch_idx] = spu.nop(0, ignore_active = True)
    # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)
    # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True)

    # Pack result into vector
    #   [x][y][score][--]

    # Zero the save value
    spu.xor(self._save_value, self._save_value, self._save_value)

    # Copy the score
    spu.selb(self._save_value, self._save_value, self._score, self._word_mask)    
    spu.rotqbyi(self._save_value, self._save_value, 12)

    # Copy the y value
    spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask)
    spu.rotqbyi(self._save_value, self._save_value, 12)        

    # Copy the x value
    spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask)
    
    # Save value to local store
    spu.stqx(self._save_value, self._count, self._md_results.r_addr)
    
    self._count.v = self._count.v + 16

    # --> MemorySave test
    cmp = self._save_value # reuse the save register
    spu.ceq.ex(cmp, self._count, self._md_results.r_size)

    if self._save_op is not None:
      self._save_op.test(cmp, self._count)
      
    # Just reset for now
    spu.selb(self._count, self._count, 0, cmp)

    # Return to the loop
    idx = len(code)
    spu.br(- (idx - self._branch_idx - 1))
    
    return
Example #17
0
def copy_param(code, target, source):
  """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
  if source[SLOT] != 0:
    code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
  else:
    code.add(spu.ai(target, source[REG], 0))
  return
Example #18
0
def copy_param(code, target, source):
    """
  Copy a parameter from source reg to preferred slot in the target reg.
  For params in slot 0, this is just and add immediate.
  For params in other slots, the source is rotated.
  Note that other values in the source are copied, too.
  """
    if source[SLOT] != 0:
        code.add(spu.rotqbyi(target, source[REG], source[SLOT] * 4))
    else:
        code.add(spu.ai(target, source[REG], 0))
    return
Example #19
0
  def save_ls_buffer(self, ls_size = None, branch = False):
    code = spu.get_active_code()
    
    regs = []
    if ls_size is None:
      ls_size = code.acquire_register()
      regs.append(ls_size)

    # Set the main memory address
    mm_offset = code.acquire_register()
    regs.append(mm_offset)

    spu.rotqbyi(mm_offset, self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, self.mm_buffer)

    # Tranfer the buffer
    md = spuiter.memory_desc('b')
    md.set_size_reg(ls_size)
    md.set_addr_reg(mm_offset)

    md.put(code, self.ls_buffer)

    # Increment the main memory offset
    mm_size = code.acquire_register()
    regs.append(mm_size)

    spu.rotqbyi(mm_size, self.mm_buffer, 8)        
    spu.rotqbyi(mm_offset,  self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, mm_size)

    util.set_slot_value(code, self.mm_buffer, 2, mm_offset)
    
    # Reset the ls offset
    util.set_slot_value(code, self.ls_buffer, 2, 0)
    
    code.release_registers(regs)
    
    return
Example #20
0
    def save_ls_buffer(self, ls_size=None, branch=False):
        code = spu.get_active_code()

        regs = []
        if ls_size is None:
            ls_size = code.acquire_register()
            regs.append(ls_size)

        # Set the main memory address
        mm_offset = code.acquire_register()
        regs.append(mm_offset)

        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, self.mm_buffer)

        # Tranfer the buffer
        md = spuiter.memory_desc('b')
        md.set_size_reg(ls_size)
        md.set_addr_reg(mm_offset)

        md.put(code, self.ls_buffer)

        # Increment the main memory offset
        mm_size = code.acquire_register()
        regs.append(mm_size)

        spu.rotqbyi(mm_size, self.mm_buffer, 8)
        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, mm_size)

        util.set_slot_value(code, self.mm_buffer, 2, mm_offset)

        # Reset the ls offset
        util.set_slot_value(code, self.ls_buffer, 2, 0)

        code.release_registers(regs)

        return