Example #1
0
    def synthesize(self, code):
        old_code = spu.get_active_code()
        spu.set_active_code(code)

        stream = spuiter.stream_buffer(code, self.stream_addr,
                                       self.stream_size * 4, self.buffer_size,
                                       self.lsa)
        ls_data = spuiter.memory_desc('I', self.lsa, self.buffer_size / 4)
        popc = syn_popc_var()

        x = var.Word(0)
        count = var.Word(0)
        total = var.Word(0)

        for buffer in stream:
            for x in spuiter.spu_vec_iter(code, ls_data, addr_reg=buffer):
                popc.popc(count, x)

        popc.reduce_word(total, count)

        # Send the result to the caller
        spu.wrch(total, dma.SPU_WrOutMbox)

        spu.set_active_code(old_code)
        return
Example #2
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)

    if self.buffers is None: raise Exception('Please set buffers')
    if self.stride is None: raise Exception('Please set stride')
    
    # Draw a square
    color  = var.SignedWord(0x0F0F0FFF)
    fb0    = var.Word(self.buffers[0])
    fb1    = var.Word(self.buffers[1])
    stride = var.Word(self.stride)
    addr   = var.Word(0)
    
    # Draw one line
    line_pixels = 256
    for i in spuiter.syn_iter(code, line_pixels*4, step = 16):
      spu.stqx(color, addr, i)

    # Transfer the line to the frame buffer
    md_fb = spuiter.memory_desc('I', size = line_pixels)
    md_fb.set_addr_reg(addr.reg)
    
    addr.v = fb0

    for i in spuiter.syn_iter(code, 128):
      md_fb.put(code, 0)
      addr.v = addr + stride
    
    spu.set_active_code(old_code)
    return
Example #3
0
  def synthesize(self, code):
    old_code = spu.get_active_code()
    spu.set_active_code(code)


    stream = spuiter.stream_buffer(code, self.stream_addr, self.stream_size * 4,
                                   self.buffer_size, self.lsa)
    ls_data = spuiter.memory_desc('I', self.lsa, self.buffer_size / 4)
    popc = syn_popc_var()

    x = var.Word(0)
    count = var.Word(0)
    total = var.Word(0)

    for buffer in stream:
      for x in spuiter.spu_vec_iter(code, ls_data, addr_reg = buffer):
        popc.popc(count, x)

    popc.reduce_word(total, count)

    # Send the result to the caller
    spu.wrch(total, dma.SPU_WrOutMbox)    

    spu.set_active_code(old_code)
    return
Example #4
0
    def row_complete(self, code):
        """
    Save the current row to the framebuffer.
    """

        if self.w is None: raise Exception('Please set width')
        if self.lsa is None: raise Exception('Please set lsa')
        if self.y_offset is None: raise Exception('Please call setup')

        md = spuiter.memory_desc('I', size=self.w)

        md.set_addr_reg(self.y_offset)
        md.put(code, self.lsa)

        self.y_offset.v = self.y_offset + self.stride
        spu.xor(self.x_offset, self.x_offset, self.x_offset)
        return
Example #5
0
  def row_complete(self, code):
    """
    Save the current row to the framebuffer.
    """

    if self.w is None: raise Exception('Please set width')
    if self.lsa is None: raise Exception('Please set lsa')    
    if self.y_offset is None: raise Exception('Please call setup')

    md = spuiter.memory_desc('I', size = self.w)

    md.set_addr_reg(self.y_offset)
    md.put(code, self.lsa)

    self.y_offset.v = self.y_offset + self.stride
    spu.xor(self.x_offset, self.x_offset, self.x_offset)
    return
Example #6
0
  def save_ls_buffer(self, ls_size = None, branch = False):
    code = spu.get_active_code()
    
    regs = []
    if ls_size is None:
      ls_size = code.acquire_register()
      regs.append(ls_size)

    # Set the main memory address
    mm_offset = code.acquire_register()
    regs.append(mm_offset)

    spu.rotqbyi(mm_offset, self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, self.mm_buffer)

    # Tranfer the buffer
    md = spuiter.memory_desc('b')
    md.set_size_reg(ls_size)
    md.set_addr_reg(mm_offset)

    md.put(code, self.ls_buffer)

    # Increment the main memory offset
    mm_size = code.acquire_register()
    regs.append(mm_size)

    spu.rotqbyi(mm_size, self.mm_buffer, 8)        
    spu.rotqbyi(mm_offset,  self.mm_buffer, 4)
    spu.a(mm_offset, mm_offset, mm_size)

    util.set_slot_value(code, self.mm_buffer, 2, mm_offset)
    
    # Reset the ls offset
    util.set_slot_value(code, self.ls_buffer, 2, 0)
    
    code.release_registers(regs)
    
    return
Example #7
0
    def save_ls_buffer(self, ls_size=None, branch=False):
        code = spu.get_active_code()

        regs = []
        if ls_size is None:
            ls_size = code.acquire_register()
            regs.append(ls_size)

        # Set the main memory address
        mm_offset = code.acquire_register()
        regs.append(mm_offset)

        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, self.mm_buffer)

        # Tranfer the buffer
        md = spuiter.memory_desc('b')
        md.set_size_reg(ls_size)
        md.set_addr_reg(mm_offset)

        md.put(code, self.ls_buffer)

        # Increment the main memory offset
        mm_size = code.acquire_register()
        regs.append(mm_size)

        spu.rotqbyi(mm_size, self.mm_buffer, 8)
        spu.rotqbyi(mm_offset, self.mm_buffer, 4)
        spu.a(mm_offset, mm_offset, mm_size)

        util.set_slot_value(code, self.mm_buffer, 2, mm_offset)

        # Reset the ls offset
        util.set_slot_value(code, self.ls_buffer, 2, 0)

        code.release_registers(regs)

        return
Example #8
0
def TestTanimotoBlock(n_vecs = 4):
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  spu.set_active_code(code)
  
  tb = TanimotoBlock()
  ls_save = LocalSave()
  mm_save = MemorySave()

  code.set_debug(True)

  # Input block parameters
  m = 128
  n = 64
  # n_vecs = 9
  n_bits = 128 * n_vecs

  # Main memory results buffer
  # max_results = 2**16
  max_results = 16384
  words_per_result = 4

  mm_results_data = array.array('I', [12 for i in range(max_results * words_per_result)])
  #mm_results_buffer = synspu.aligned_memory(max_results * words_per_result, typecode = 'I')
  # mm_results_buffer.copy_to(mm_results_data.buffer_info()[0], len(mm_results_data))

  mm_results = spuiter.memory_desc('I')
  #mm_results.from_array(mm_results_buffer)
  mm_results.from_array(mm_results_data)

  mm_save.set_md_save_buffer(mm_results)
    
  # Local Results buffer
  buffer_size = var.SignedWord(16384)
  buffer_addr = var.SignedWord(m * n * n_vecs * 4)
  ls_results = spuiter.memory_desc('B')
  ls_results.set_size_reg(buffer_size)
  ls_results.set_addr_reg(buffer_addr)

  ls_save.set_md_results(ls_results)
  ls_save.set_mm_save_op(mm_save)

  # Setup the TanimotoBlock class
  tb.set_n_bits(n_bits)
  tb.set_block_size(m, n)

  tb.set_x_addr(0)
  tb.set_y_addr(m * n_vecs * 16)
  tb.set_save_op(ls_save)

  # Main test loop
  n_samples = 10000
  for samples in spuiter.syn_iter(code, n_samples):
    tb.synthesize(code)

  spu.wrch(buffer_size, dma.SPU_WrOutMbox)
  
  spu.stop(0x2000) 

  # "Function" Calls
  ls_save.block()
  mm_save.block()

  # code.print_code()
  start = time.time()
  spe_id = proc.execute(code, async=True)
  
  while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass
  # print 'tb said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
  stop = time.time()

  # mm_results_buffer.copy_from(mm_results_data.buffer_info()[0], len(mm_results_data))
  
  proc.join(spe_id)
  total = stop - start
  bits_sec = (m * n * n_bits * n_samples) / total / 1e9
  ops_per_compare = 48 * 4 + 8  # 48 SIMD instructions, 8 scalar
  insts_per_compare = 56
  gops = (m * n * n_vecs * n_samples * ops_per_compare ) / total / 1e9
  ginsts = (m * n * n_vecs * n_samples * insts_per_compare ) / total / 1e9  
  print '%.6f sec, %.2f Gbits/sec, %.2f GOps, %.2f GInsts, %d insts' % (
    total, bits_sec, gops, ginsts, code.size())
  return
Example #9
0
def MemoryDescExample(data_size=20000):
    """
  This example uses a memory descriptor to move 20k integers back and 
  forth between main memory and the SPU local store. Each value is
  incremented by 1 while on the SPU.
  
  Memory descriptors are a general purpose method for describing a
  region of memory.  Memory is described by a typecode, address, and
  size.  Memory descriptors can be initialized by hand or from an
  array or buffer object.

  For main memory, memory descriptors are useful for transfering data
  between main memory and an SPU's local store.  The get/put methods
  on a memory descriptor generate the SPU code to move data of any
  size between main memory and local store.

  Memory descriptors can also be used with spu_vec_iters to describe
  the region of memory to iterate over.  The typecode in the memory
  descriptor is used to determine the type for the loop induction
  variable.

  Note that there is currently no difference between memory
  descriptors for main memory and local store.  It's up to the user to
  make sure the memory descriptor settings make sense in the current
  context.  (this will probably change in the near future)

  Note: get/put currently use loops rather than display lists for
        transferring data over 16k.
  """

    code = InstructionStream()
    proc = Processor()

    code.debug = True
    spu.set_active_code(code)

    # Create a python array
    data = extarray.extarray('I', range(data_size))

    # Align the data in the array
    #a_data = aligned_memory(data_size, typecode = 'I')
    #a_data.copy_to(data.buffer_info()[0], data_size)

    # Create memory descriptor for the data in main memory
    data_desc = memory_desc('I')
    #data_desc.from_array(a_data)
    data_desc.from_array(data)

    # Transfer the data to 0x0 in the local store
    data_desc.get(code, 0)

    # Create memory descriptor for the data in the local store for use
    # in the iterator
    lsa_data = memory_desc('i', 0, data_size)

    # Add one to each value
    for x in spu_vec_iter(code, lsa_data):
        x.v = x + 1

    # Transfer the data back to main memory
    data_desc.put(code, 0)

    dma.spu_write_out_mbox(code, 0xCAFE)

    # Execute the synthetic program
    # code.print_code()

    spe_id = proc.execute(code, async=True)
    proc.join(spe_id)

    # Copy it back to the Python array
    #a_data.copy_from(data.buffer_info()[0], data_size)

    for i in xrange(data_size):
        assert (data[i] == i + 1)
    return
Example #10
0
def MemoryDescExample(data_size = 20000):
  """
  This example uses a memory descriptor to move 20k integers back and 
  forth between main memory and the SPU local store. Each value is
  incremented by 1 while on the SPU.
  
  Memory descriptors are a general purpose method for describing a
  region of memory.  Memory is described by a typecode, address, and
  size.  Memory descriptors can be initialized by hand or from an
  array or buffer object.

  For main memory, memory descriptors are useful for transfering data
  between main memory and an SPU's local store.  The get/put methods
  on a memory descriptor generate the SPU code to move data of any
  size between main memory and local store.

  Memory descriptors can also be used with spu_vec_iters to describe
  the region of memory to iterate over.  The typecode in the memory
  descriptor is used to determine the type for the loop induction
  variable.

  Note that there is currently no difference between memory
  descriptors for main memory and local store.  It's up to the user to
  make sure the memory descriptor settings make sense in the current
  context.  (this will probably change in the near future)

  Note: get/put currently use loops rather than display lists for
        transferring data over 16k.
  """
  
  code = env.InstructionStream()
  proc = env.Processor()

  code.debug = True
  spu.set_active_code(code)

  # Create a python array
  data = extarray.extarray('I', range(data_size))

  # Align the data in the array
  #a_data = aligned_memory(data_size, typecode = 'I')
  #a_data.copy_to(data.buffer_info()[0], data_size)
  
  # Create memory descriptor for the data in main memory
  data_desc = memory_desc('I')
  #data_desc.from_array(a_data)
  data_desc.from_array(data)

  # Transfer the data to 0x0 in the local store
  data_desc.get(code, 0)

  # Create memory descriptor for the data in the local store for use
  # in the iterator  
  lsa_data = memory_desc('i', 0, data_size)

  # Add one to each value
  for x in spu_vec_iter(code, lsa_data):
    x.v = x + 1

  # Transfer the data back to main memory
  data_desc.put(code, 0)

  dma.spu_write_out_mbox(code, 0xCAFE)
  
  # Execute the synthetic program
  # code.print_code()
  
  spe_id = proc.execute(code, async=True)
  proc.join(spe_id)

  # Copy it back to the Python array
  #a_data.copy_from(data.buffer_info()[0], data_size)

  for i in xrange(data_size):
    assert(data[i] == i + 1)
  return