コード例 #1
0
ファイル: util.py プロジェクト: forrestv/pyable
def TestSetSlotValue():
    import corepy.arch.spu.platform as synspu
    import corepy.arch.spu.types.spu_types as var
    import corepy.arch.spu.lib.dma as dma

    prgm = synspu.Program()
    code = prgm.get_stream()
    proc = synspu.Processor()
    spu.set_active_code(code)
    a = var.SignedWord(0x11)
    b = var.SignedWord(0x13)
    r = var.SignedWord(0xFFFFFFFF)

    set_slot_value(code, r, 0, 0x10)
    set_slot_value(code, r, 1, a)
    set_slot_value(code, r, 2, 0x12)
    set_slot_value(code, r, 3, b)

    for i in range(4):
        spu.wrch(r, dma.SPU_WrOutMbox)
        spu.rotqbyi(r, r, 4)

    prgm.add(code)
    spe_id = proc.execute(prgm, async=True)

    for i in range(4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        result = synspu.spu_exec.read_out_mbox(spe_id)
        assert (result == (i + 0x10))

    proc.join(spe_id)

    return
コード例 #2
0
def TestStreamBufferDouble(n_spus = 1):
  n = 2048
  a = extarray.extarray('I', range(n))
  
  buffer_size = 32

  if n_spus > 1:  prgm = env.ParallelProgram()
  else:           prgm = env.Program()
  code = prgm.get_stream()

  current = var.SignedWord(0, code)

  addr = a.buffer_info()[0]
  n_bytes = n * 4
  #print 'addr 0x%(addr)x %(addr)d' % {'addr':a.buffer_info()[0]}, n_bytes, buffer_size

  stream = stream_buffer(code, addr, n_bytes, buffer_size, 0, buffer_mode='double', save = True)
  if n_spus > 1:  stream = parallel(stream)

  for buffer in stream:
    for lsa in syn_iter(code, buffer_size, 16):
      code.add(spu.lqx(current, lsa, buffer))
      current.v = current + current
      code.add(spu.stqx(current, lsa, buffer))

  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm, n_spus = n_spus)

  for i in range(0, len(a)):
    assert(a[i] == i + i)
  
  return
コード例 #3
0
ファイル: spu_types.py プロジェクト: forrestv/pyable
def TestFloatArray():
    from corepy.arch.spu.platform import InstructionStream, Processor
    import corepy.arch.spu.lib.dma as dma
    import corepy.arch.spu.platform as env

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    x = SingleFloat([1.0, 2.0, 3.0, 4.0])
    y = SingleFloat([0.5, 1.5, 2.5, 3.5])
    sum = SingleFloat(0.0)

    sum.v = spu.fa.ex(x, y)

    r = SingleFloat([0.0, 0.0, 0.0, 0.0], reg=code.fp_return)

    for i in range(4):
        r.v = spu.fa.ex(sum, r)
        spu.rotqbyi(sum, sum, 4)

    prgm.add(code)
    proc = env.Processor()
    result = proc.execute(prgm, mode='fp')

    x_test = array.array('f', [1.0, 2.0, 3.0, 4.0])
    y_test = array.array('f', [0.5, 1.5, 2.5, 3.5])
    r_test = 0.0
    for i in range(4):
        r_test += x_test[i] + y_test[i]

    assert (result == r_test)

    return
コード例 #4
0
def TestVecIter(n_spus = 1):
  n = 1024
  a = extarray.extarray('I', range(n))
  
  buffer_size = 16

  if n_spus > 1:  prgm = env.ParallelProgram()
  else:           prgm = env.Program()
  code = prgm.get_stream()

  current = var.SignedWord(0, code)

  stream = stream_buffer(code, a.buffer_info()[0], n * 4, buffer_size, 0, save = True)  
  if n_spus > 1:  stream = parallel(stream)

  md = memory_desc('i', 0, buffer_size)

  for buffer in stream:
    for current in spu_vec_iter(code, md):
      current.v = current + current

  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm, n_spus = n_spus)

  for i in range(0, n):
    assert(a[i] == i + i)

  return
コード例 #5
0
def TestAll():
  import corepy.arch.spu.platform as env

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  a = code.prgm.acquire_register()
  b = code.prgm.acquire_register()
  c = code.prgm.acquire_register()
  
  shr(c, a, b)
  cneq(c, a, b)
  cge(c, a, b)
  cgei(c, a, 10)
  lt(c, a, b)
  lti(c, a, 10)  

  a_immediate(c, a, 10)
  a_immediate(c, a, 10000)  
  sf_immediate(c, a, 10000)
  

  prgm.add(code)
  prgm.print_code()

  proc = env.Processor()
  proc.execute(prgm)
  return
コード例 #6
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = prgm.acquire_register(reg_name=55)

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='fp')
    print r
    return
コード例 #7
0
ファイル: fbdemo.py プロジェクト: maxim-tyutyunnikov/corepy
def fb_draw():
  prgm0 = synspu.Program()
  prgm1 = synspu.Program()
  code0 = prgm0.get_stream()
  code1 = prgm1.get_stream()
  prgm0 += code0
  prgm1 += code1
  proc = synspu.Processor()

  fb = cell_fb.framebuffer()
  cell_fb.fb_open(fb)

  draw0 = FBDraw()
  draw0.set_buffers(cell_fb.fb_addr(fb, 0), cell_fb.fb_addr(fb, 1))
  draw0.set_stride(fb.stride)

  draw0.synthesize(code0)

  draw1 = FBDraw()
  draw1.set_buffers(cell_fb.fb_addr(fb, 1), cell_fb.fb_addr(fb, 0))cell_fb.fb_addr(fb, 0))
  draw1.set_stride(fb.stride)

  draw1.synthesize(code1)

  while True:

    # cell_fb.fb_clear(fb, 0)
    proc.execute(prgm0)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 0)

    # cell_fb.fb_clear(fb, 1)
    proc.execute(prgm1)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 1)
    

  cell_fb.fb_close(fb)
  return
コード例 #8
0
ファイル: spu_types.py プロジェクト: forrestv/pyable
def RunTest(test):
    import corepy.arch.spu.platform as env
    #from corepy.arch.spu.platform import InstructionStream, Processor

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    test()

    prgm.add(code)
    prgm.print_code()
    proc = env.Processor()
    proc.execute(prgm)
    return
コード例 #9
0
def TestStreamBufferSingle(n_spus = 1):
  n = 1024
  a = extarray.extarray('I', range(n))
  buffer_size = 128

  if n_spus > 1:  prgm = env.ParallelProgram()
  else:           prgm = env.Program()
  code = prgm.get_stream()
  
  current = var.SignedWord(0, code)

  addr = a.buffer_info()[0]
  stream = stream_buffer(code, addr, n * 4, buffer_size, 0, save = True)  
  if n_spus > 1:  stream = parallel(stream)

  #r_bufsize = code.acquire_register()
  #r_lsa = code.acquire_register()
  #r_current = code.acquire_register()
  
  for buffer in stream:
    #util.load_word(code, r_bufsize, buffer_size)
    #code.add(spu.il(r_lsa, 0))

    #loop = code.size()
    
    #code.add(spu.lqx(r_current, buffer, r_lsa))
    #code.add(spu.a(r_current, r_current, r_current))
    #code.add(spu.stqx(r_current, buffer, r_lsa))

    #code.add(spu.ai(r_bufsize, r_bufsize, -16))
    #code.add(spu.ai(r_lsa, r_lsa, 16))
    #code.add(spu.brnz(r_bufsize, loop - code.size()))

    for lsa in syn_iter(code, buffer_size, 16):
      code.add(spu.lqx(current, lsa, buffer))
      current.v = current + current
      #current.v = 5
      code.add(spu.stqx(current, lsa, buffer))
      

  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm, n_spus = n_spus)

  for i in range(0, n):
    assert(a[i] == i + i)
  
  return
コード例 #10
0
ファイル: ispugui.py プロジェクト: maxim-tyutyunnikov/corepy
    def GenerateStream(self, step=None):
        prgm = env.Program()
        code = prgm.get_stream()
        txt = self.editCtrl.GetText().split('\n')
        txtlen = len(txt)

        for i in xrange(0, txtlen):
            # For the stop case, want all instructions except the current one to be
            # STOP instructions.
            cmd = txt[i].strip()
            if step != None and i != step:
                if cmd == "" or cmd[0] == '#':
                    continue
                if cmd[-1] == ":":
                    # Label - better parsing?
                    #code.add(spe.Label(cmd[:-1]))
                    code.add(code.prgm.get_label(cmd[:-1]))
                else:
                    code.add(spu.stop(0x2FFF))
                continue

            if self.editCtrl.IsBreakSet(i):
                code.add(spu.stop(0x2FFF))
                continue

            if cmd != "" and cmd[0] != '#':
                inst = None
                if cmd[-1] == ":":
                    # Label - better parsing?
                    #inst = spe.Label(cmd[:-1])
                    inst = code.prgm.get_label(cmd[:-1])
                else:
                    # Instruction
                    strcmd = re.sub("Label\((.*?)\)",
                                    "code.prgm.get_label('\\1')", cmd)
                    try:
                        inst = eval('spu.%s' % strcmd)
                    except:
                        print 'Error creating instruction: %s' % cmd

                code.add(inst)

        prgm.add(code)
        prgm.cache_code()
        return code
コード例 #11
0
def TestContinueLabel(n_spus = 1):
  n = 1024
  a = extarray.extarray('I', range(n))
  
  buffer_size = 16

  if n_spus > 1:  prgm = env.ParallelProgram()
  else:           prgm = env.Program()
  code = prgm.get_stream()
  
  current = var.SignedWord(0, code)
  test    = var.SignedWord(0, code)
  four    = var.SignedWord(4, code)    

  stream = stream_buffer(code, a.buffer_info()[0], n * 4, buffer_size, 0, save = True)  
  if n_spus > 1:  stream = parallel(stream)

  md = memory_desc('i', 0, buffer_size)
  lsa_iter = spu_vec_iter(code, md)

  for buffer in stream:
    for current in lsa_iter:
      current.v = current + current

      test.v = (current == four)
      code.add(spu.gbb(test, test))
      #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue
      #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl))
      code.add(spu.brz(test.reg, lsa_iter.continue_label))
      current.v = current + current

    #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next))
 
  prgm.add(code) 
  proc = env.Processor()
  r = proc.execute(prgm, n_spus = n_spus)

  for i in range(0, n):
    if i >= 4:
      assert(a[i] == i + i)
    else:
      #print a[i]
      assert(a[i] == i * 4)
  return
コード例 #12
0
ファイル: spu_types.py プロジェクト: forrestv/pyable
def TestFloatScalar():
    from corepy.arch.spu.platform import InstructionStream, Processor
    import corepy.arch.spu.lib.dma as dma
    import corepy.arch.spu.platform as env

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    x = SingleFloat(1.0)
    y = SingleFloat(2.0)
    r = SingleFloat(0.0, reg=code.fp_return)

    r.v = spu.fa.ex(x, y)

    prgm.add(code)
    proc = env.Processor()
    result = proc.execute(prgm, mode='fp')
    assert (result == (1.0 + 2.0))

    return
コード例 #13
0
ファイル: ispugui.py プロジェクト: maxim-tyutyunnikov/corepy
    def _startSPU(self):
        self.ctx = ctx = env.spu_exec.alloc_context()

        # Execute a no-op instruction stream so the prolog is executed
        prgm = env.Program()
        code = prgm.get_stream()
        code.add(spu.nop(code.r_zero))

        prgm.cache_code()
        itemsize = prgm.render_code.itemsize
        code_len = len(prgm.render_code) * itemsize
        if code_len % 16 != 0:
            code_len += 16 - (code_len % 16)
        code_lsa = 0x40000 - code_len

        env.spu_exec.run_stream(ctx, prgm.inst_addr(), code_len, code_lsa,
                                code_lsa)

        self.localstore = extarray.extarray('I', 262144 / 4)
        print "spuls %x" % (ctx.spuls), ctx.spuls, type(ctx.spuls)
        self.localstore.set_memory(ctx.spuls, 262144)
        return
コード例 #14
0
def SpeedTest(n_spus=6, n_floats=6):
    """
  Get a rough estimate of the maximum flop count.
  On a PS3 using all 6 spus, this is 152 GFlops.
  """

    if n_spus > 1: prgm = env.ParallelProgram()
    else: prgm = env.Program()
    code = prgm.get_stream()

    spu.set_active_code(code)

    f_range = range(n_floats)
    a = [SingleFloat(0.0) for i in f_range]
    b = [SingleFloat(0.0) for i in f_range]
    c = [SingleFloat(0.0) for i in f_range]
    t = [SingleFloat(0.0) for i in f_range]

    outer = 2**12
    inner = 2**16
    unroll = 128
    fuse = 2
    simd = 4
    for x in syn_iter(code, outer):
        for y in syn_iter(code, inner):
            for u in xrange(unroll):
                for i in f_range:
                    t[i].v = spu.fma.ex(a[i], b[i], c[i])

    # Run the synthetic program and copy the results back to the array
    # TODO - AWF - use the SPU decrementers to time this
    proc = env.Processor()
    prgm += code

    start = time.time()
    r = proc.execute(prgm, n_spus=n_spus)
    stop = time.time()
    total = stop - start
    n_ops = long(outer) * inner * long(unroll) * long(n_floats) * long(
        fuse) * long(simd) * long(n_spus)
    print '%.6f sec, %.2f GFlops' % (total, n_ops / total / 1e9)

    #   # Run the native program and copy the results back to the array
    #   outer = 2**14
    #   inner = 2**16
    #   unroll = 1
    #   fuse = 1
    #   simd = 1

    #   proc = Processor()
    #   # ncode = NativeInstructionStream("a.out")
    #   start = time.time()
    #   r = proc.execute(ncode, n_spus = n_spus)
    #   stop = time.time()
    #   total = stop - start
    #   n_ops = long(outer) * inner * long(unroll) * long(n_floats) * long(fuse) * long(simd) * long(n_spus)
    #   print '%.6f sec, %.2f GFlops' % (total, n_ops / total / 1e9)

    results = """
  --> No optimizations
  Executing native code: a.out
  14.805322 sec, 20.89 GFlops

  --> Synthetic
  Platform: linux.spre_linux_spu
  no raw data
  65.023350 sec, 152.19 GFlops

  --> -O3 (fuse: 2, simd: 4)
  Executing native code: a.out
  7.407939 sec, 41.74 GFlops

  --> -O3 (fuse: 1, simd: 1)
  Executing native code: a.out
  7.403702 sec, 5.22 GFlops
  """
    return
コード例 #15
0
def TestSPUIter():
  size = 32
  data = extarray.extarray('I', range(size))
  prgm = env.Program()
  code = prgm.get_stream()

  r_ea_data = prgm.acquire_register()
  r_ls_data = prgm.acquire_register()
  r_size    = prgm.acquire_register()
  r_tag     = prgm.acquire_register()  

  #print 'array ea: %X' % (data.buffer_info()[0])
  #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
  #  str(code.r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))
  
  # Load the effective address
  util.load_word(code, r_ea_data, data.buffer_info()[0])

  # Load the size
  util.load_word(code, r_size, size * 4)

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 12))

  # Load the lsa
  code.add(spu.ai(r_ls_data, code.r_zero, 0))

  # Load the data into address 0
  dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<12);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Increment the data values by 1 using an unrolled loop (no branches)
  # r_current = code.acquire_register()
  current = var.SignedWord(0, code)
  
  # Use an SPU iter
  for lsa in syn_iter(code, size * 4, 16):
    code.add(spu.lqx(current, code.r_zero, lsa))
    # code.add(spu.ai(1, r_current, r_current))
    current.v = current + current
    code.add(spu.stqx(current, code.r_zero, lsa))    

  # code.prgm.release_register(r_current)
  #current.release_register(code)
  
  # Store the values back to main memory

  # Load the tag
  code.add(spu.ai(r_tag, code.r_zero, 13))

  # Load the data into address 0
  dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

  # Set the tag bit to 12
  dma.mfc_write_tag_mask(code, 1<<13);

  # Wait for the transfer to complete
  dma.mfc_read_tag_status_all(code);

  # Cleanup
  prgm.release_register(r_ea_data)
  prgm.release_register(r_ls_data)  
  prgm.release_register(r_size)
  prgm.release_register(r_tag)  

  # Stop for debugging
  # code.add(spu.stop(0xA))

  # Execute the code
  prgm.add(code)
  proc = env.Processor()
  r = proc.execute(prgm)

  for i in range(0, size):
    assert(data[i] == i + i)

  return
コード例 #16
0
    def generate(self,
                 results,
                 patterns,
                 r1_range,
                 r2_range,
                 max_init,
                 max_n,
                 size,
                 n_spus=6):
        # Connect to the framebuffer
        fb = cell_fb.framebuffer()
        cell_fb.fb_open(fb)

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = [0 for i in range(n_spus)]
        #a_ranges = [0 for i in range(n_spus)]

        # Slice and dice for parallel execution
        spu_slices = [[size[0], size[1] / n_spus] for ispu in range(n_spus)]
        spu_slices[-1][1] += size[1] % n_spus

        offset = 0.0
        for ispu in range(n_spus):
            ranges[ispu] = extarray.extarray('f', [0.0] * 16)

            for i in range(4):
                ranges[ispu][
                    i] = r1_range[0] + float(i) * r1_inc  # horizontal is simd
                ranges[ispu][4 + i] = r2_range[0] + offset
                ranges[ispu][8 + i] = r1_inc * 4.0
                ranges[ispu][12 + i] = r2_inc
            # print ranges

            # Copy the paramters to aligned buffers
            #a_ranges[ispu] = synspu.aligned_memory(len(ranges[ispu]), typecode='I')
            #a_ranges[ispu].copy_to(ranges[ispu].buffer_info()[0], len(ranges[ispu]))

            offset += r2_inc * spu_slices[ispu][1]

        # Setup the pattern vector
        for pattern in patterns:
            if len(pattern) != len(patterns[0]):
                raise Exception('All patterns must be the same length')

        bits = [_pattern2vector(pattern) for pattern in patterns]
        #a_pattern = synspu.aligned_memory(len(bits[0]), typecode='I')
        pattern = extarray.extarray('I', len(bits[0]))

        # Create the instruction streams
        prgms = []

        n = len(patterns) * 10
        offset = 0
        for ispu in range(n_spus):
            renderer = FBRenderer()
            renderer.set_lsa(0x100)
            renderer.set_addr(cell_fb.fb_addr(fb, 0) + offset)
            renderer.set_width(size[0])
            renderer.set_stride(fb.stride)

            ly_block = LyapunovBlock()

            ly_block.set_size(*spu_slices[i])
            #ly_block.set_range(a_ranges[ispu])
            ly_block.set_range(ranges[ispu])
            #ly_block.set_pattern(a_pattern)
            ly_block.set_pattern(pattern)
            ly_block.set_max_init(max_init)
            ly_block.set_max_n(max_n)
            ly_block.set_renderer(renderer)

            prgm = synspu.Program()
            code = env.get_stream()
            prgm += code

            # code.set_debug(True)
            prgms.append(prgm)
            offset += spu_slices[i][1] * fb.stride * 4

            # for i in spuiter.syn_range(code, n):
            ly_block.synthesize(code)

        # code.print_code()
        proc = synspu.Processor()

        cell_fb.fb_clear(fb, 0)

        import time
        ids = [0 for i in range(n_spus)]
        start = time.time()

        ipattern = 0
        n_patterns = len(patterns)
        len_bits = len(bits[0])
        pattern_inc = 1

        for i in range(n):
            #a_pattern.copy_to(bits[ipattern].buffer_info()[0], len_bits)
            # TODO - better/faster
            for j in xrange(0, len_bits):
                pattern[j] = bits[ipattern][j]

            for ispu in range(n_spus):
                ids[ispu] = proc.execute(prgms[ispu], async=True)

            for ispu in range(n_spus):
                proc.join(ids[ispu])

            cell_fb.fb_wait_vsync(fb)
            cell_fb.fb_flip(fb, 0)

            ipattern += pattern_inc
            if (ipattern == (n_patterns - 1)) or (ipattern == 0):
                pattern_inc *= -1

            print ipattern

        stop = time.time()

        print '%.2f fps (%.6f)' % (float(n) / (stop - start), (stop - start))
        cell_fb.fb_close(fb)

        return
コード例 #17
0
    def generate(self, results, pattern, r1_range, r2_range, max_init, max_n,
                 size):

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = extarray.extarray('f', [0.0] * 16)
        for i in range(4):
            ranges[i] = r1_range[0]
            ranges[4 + i] = r2_range[0]
            ranges[8 + i] = r1_inc
            ranges[12 + i] = r2_inc

        # Setup the pattern vector
        bits = _pattern2vector(pattern)

        # Copy the paramters to aligned buffers
        #a_ranges = synspu.aligned_memory(len(ranges), typecode='I')
        #a_ranges.copy_to(ranges.buffer_info()[0], len(ranges))

        #a_pattern = synspu.aligned_memory(len(bits), typecode='I')
        #a_pattern.copy_to(bits.buffer_info()[0], len(bits))

        renderer = MailboxRenderer()
        ly_block = LyapunovBlock()

        ly_block.set_size(size[0], size[1])
        #ly_block.set_range(a_ranges)
        #ly_block.set_pattern(a_pattern)
        ly_block.set_range(ranges)
        ly_block.set_pattern(bits)
        ly_block.set_max_init(max_init)
        ly_block.set_max_n(max_n)
        ly_block.set_renderer(renderer)

        prgm = synspu.Program()
        code = prgm.get_stream()
        prgm += code

        ly_block.synthesize(code)

        proc = synspu.Processor()

        spe_id = proc.execute(prgm, async=True)

        for i in range(size[0] * size[1]):
            while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
                pass
            print 'ly said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        proc.join(spe_id)

        # for x in range(size[0]):
        #   r2 = r2_range[0] + r2_inc
        #   print 'col:', x, r1, r2

        #   for y in range(size[1]):
        #     results[y, x] = lyapunov_point(pattern, r1, r2, max_init, max_n)
        #     r2 += r2_inc
        #   r1 += r1_inc

        return
コード例 #18
0
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.platform as env
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
    ITERS = 500000
    #ITERS = 15

    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()
    spu.set_active_code(code)
    psmap = extarray.extarray('I', 131072 / 4)
    data = extarray.extarray('I', range(0, 16))

    r_sum = prgm.gp_return
    r_cnt = prgm.acquire_register()

    spu.xor(r_sum, r_sum, r_sum)
    load_word(code, r_cnt, ITERS)

    lbl_loop = prgm.get_label("loop")
    code.add(lbl_loop)
コード例 #19
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = prgm.gp_return
    test = prgm.acquire_register()

    lbl_brz = prgm.get_label("BRZ")
    lbl_skip = prgm.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code()
    r = proc.execute(prgm, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    lbl_loop = prgm.get_label("LOOP")
    lbl_break = prgm.get_label("BREAK")

    r_cnt = prgm.acquire_register()
    r_stop = prgm.acquire_register()
    r_cmp = prgm.acquire_register()
    r_foo = prgm.gp_return

    spu.ori(r_foo, prgm.r_zero, 0)
    spu.ori(r_cnt, prgm.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    prgm.add(code)
    prgm.print_code()
    r = proc.execute(prgm, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return