Beispiel #1
0
  def save_register(self, reg): # , branch_to_save = False):
    code = spu.get_active_code()

    offset = code.acquire_register()
    size = code.acquire_register()
    test = code.acquire_register()
    regs = [offset, size, test]
    
    spu.rotqbyi(offset, self.ls_buffer, 4)
    spu.rotqbyi(size,   self.ls_buffer, 8)

    spu.stqx(reg, self.ls_buffer, offset)
    
    spu.ai(offset, offset, 16)
    spu.ceq(test,  offset, size)

    spu.wrch(size, dma.SPU_WrOutMbox)
    spu.wrch(offset, dma.SPU_WrOutMbox)
    spu.wrch(test, dma.SPU_WrOutMbox)
    # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
    lbl_ls_full = code.size()
    spu.stop(0xB)
    self.save_ls_buffer(ls_size = size)

    spu.nop(0)
    code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True)

    code.release_registers(regs)
    return
Beispiel #2
0
    def save_register(self, reg):  # , branch_to_save = False):
        code = spu.get_active_code()

        offset = code.acquire_register()
        size = code.acquire_register()
        test = code.acquire_register()
        regs = [offset, size, test]

        spu.rotqbyi(offset, self.ls_buffer, 4)
        spu.rotqbyi(size, self.ls_buffer, 8)

        spu.stqx(reg, self.ls_buffer, offset)

        spu.ai(offset, offset, 16)
        spu.ceq(test, offset, size)

        spu.wrch(size, dma.SPU_WrOutMbox)
        spu.wrch(offset, dma.SPU_WrOutMbox)
        spu.wrch(test, dma.SPU_WrOutMbox)
        # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!!
        lbl_ls_full = code.size()
        spu.stop(0xB)
        self.save_ls_buffer(ls_size=size)

        spu.nop(0)
        code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full),
                                    ignore_active=True)

        code.release_registers(regs)
        return
Beispiel #3
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    # r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
        spu_param_1,
        spu_param_2,
        spu_param_3,
        spu_param_4,
        spu_param_5,
        spu_param_6,
        spu_param_7,
        spu_param_8,
        spu_param_9,
        spu_param_10,
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    # code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert r[0] == 55
    assert r[1] == 0x200A
    # print 'int result:', r
    return
Beispiel #4
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)

  # Acquire two registers
  #x    = code.acquire_register()
  x = code.gp_return
  test = prgm.acquire_register(reg_name = 55)

  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  spu.brz(test, 2)
  spu.stop(0x100A)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code(hex = True) 
  r = proc.execute(prgm, mode = 'int', stop = True, debug = True) 
  assert(r[0] == 42)
  assert(r[1] == 0x100A)

  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  util.load_float(code, code.fp_return, 3.14)

  prgm.add(code)
  prgm.print_code(hex = True)
  r = proc.execute(prgm, mode = 'fp')
  print r
  return
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    prgm = env.Program()
    code = prgm.get_stream()
    proc = env.Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = prgm.acquire_register(reg_name=55)

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    prgm = env.Program()
    code = prgm.get_stream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    prgm.add(code)
    prgm.print_code(hex=True)
    r = proc.execute(prgm, mode='fp')
    print r
    return
Beispiel #6
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    prgm = Program()
    code = prgm.get_stream()
    proc = Processor()

    #r_sum = code.acquire_register(reg = 1)
    r_sum = prgm.gp_return
    r_current = prgm.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))
    #code.add(spu.ori(code.gp_return, r_current, 0))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    prgm += code
    r = proc.execute(prgm, params=params, stop=True)

    assert (r[0] == 55)
    assert (r[1] == 0x200A)
    # print 'int result:', r
    return
Beispiel #7
0
def TestContinueLabel(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))

    buffer_size = 16

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)
    test = var.SignedWord(0, code)
    four = var.SignedWord(4, code)

    stream = stream_buffer(code,
                           a.buffer_info()[0],
                           n * 4,
                           buffer_size,
                           0,
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    md = memory_desc('i', 0, buffer_size)
    lsa_iter = spu_vec_iter(code, md)

    for buffer in stream:
        for current in lsa_iter:
            current.v = current + current

            test.v = (current == four)
            code.add(spu.gbb(test, test))
            #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue
            #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl))
            code.add(spu.brz(test.reg, lsa_iter.continue_label))
            current.v = current + current

        #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        if i >= 4:
            assert (a[i] == i + i)
        else:
            #print a[i]
            assert (a[i] == i * 4)
    return
Beispiel #8
0
def TestParams():
    # Run this with a stop instruction and examine the registers
    code = InstructionStream()
    proc = Processor()

    r_sum = code.acquire_register()
    r_current = code.acquire_register()

    # Zero the sum
    code.add(spu.xor(r_sum, r_sum, r_sum))

    for param in [
            spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5,
            spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10
    ]:
        copy_param(code, r_current, param)
        code.add(spu.a(r_sum, r_sum, r_current))

    code.add(spu.ceqi(r_current, r_sum, 55))

    code.add(spu.brz(r_current, 2))
    code.add(spu.stop(0x200A))
    code.add(spu.stop(0x200B))

    params = spu_exec.ExecParams()

    params.p1 = 1
    params.p2 = 2
    params.p3 = 3

    params.p4 = 4
    params.p5 = 5
    params.p6 = 6

    params.p7 = 7
    params.p8 = 8
    params.p9 = 9
    params.p10 = 10

    r = proc.execute(code, params=params)

    assert (r == 0xA)
    # print 'int result:', r
    # while True:
    #   pass
    return
Beispiel #9
0
def TestContinueLabel(n_spus = 1):
  n = 1024
  a = extarray.extarray('I', range(n))
  
  buffer_size = 16

  if n_spus > 1:  prgm = env.ParallelProgram()
  else:           prgm = env.Program()
  code = prgm.get_stream()
  
  current = var.SignedWord(0, code)
  test    = var.SignedWord(0, code)
  four    = var.SignedWord(4, code)    

  stream = stream_buffer(code, a.buffer_info()[0], n * 4, buffer_size, 0, save = True)  
  if n_spus > 1:  stream = parallel(stream)

  md = memory_desc('i', 0, buffer_size)
  lsa_iter = spu_vec_iter(code, md)

  for buffer in stream:
    for current in lsa_iter:
      current.v = current + current

      test.v = (current == four)
      code.add(spu.gbb(test, test))
      #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue
      #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl))
      code.add(spu.brz(test.reg, lsa_iter.continue_label))
      current.v = current + current

    #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next))
 
  prgm.add(code) 
  proc = env.Processor()
  r = proc.execute(prgm, n_spus = n_spus)

  for i in range(0, n):
    if i >= 4:
      assert(a[i] == i + i)
    else:
      #print a[i]
      assert(a[i] == i * 4)
  return
Beispiel #10
0
  r_data = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_lsa = prgm.acquire_register()

  spu.il(r_lsa, 0x1000)

  lbl_incloop = prgm.get_label("incloop")
  code.add(lbl_incloop)

  spu.lqx(r_data, r_cnt, r_lsa)
  spu.ai(r_data, r_data, 2)
  spu.stqx(r_data, r_cnt, r_lsa)

  spu.ai(r_cnt, r_cnt, 16)
  spu.ceq(r_cmp, r_cnt, r_sum)
  spu.brz(r_cmp, lbl_incloop)

  dma.spu_write_out_mbox(code, code.r_zero)

  prgm += code

  t3 = time.time()
  id = proc.execute(prgm, async = True, mode = 'int')


  t1 = time.time()
  for i in xrange(0, ITERS):
    #env.spu_exec.write_in_mbox(id, 1)
    #env.spu_exec.write_in_mbox(id, 1)
    env.spu_exec.write_in_mbox(id, i)
    #cnt = env.spu_exec.stat_in_mbox(id)
Beispiel #11
0
    r_data = prgm.acquire_register()
    r_cmp = prgm.acquire_register()
    r_lsa = prgm.acquire_register()

    spu.il(r_lsa, 0x1000)

    lbl_incloop = prgm.get_label("incloop")
    code.add(lbl_incloop)

    spu.lqx(r_data, r_cnt, r_lsa)
    spu.ai(r_data, r_data, 2)
    spu.stqx(r_data, r_cnt, r_lsa)

    spu.ai(r_cnt, r_cnt, 16)
    spu.ceq(r_cmp, r_cnt, r_sum)
    spu.brz(r_cmp, lbl_incloop)

    dma.spu_write_out_mbox(code, code.r_zero)

    prgm += code

    t3 = time.time()
    id = proc.execute(prgm, async=True, mode='int')

    t1 = time.time()
    for i in xrange(0, ITERS):
        #env.spu_exec.write_in_mbox(id, 1)
        #env.spu_exec.write_in_mbox(id, 1)
        env.spu_exec.write_in_mbox(id, i)
        #cnt = env.spu_exec.stat_in_mbox(id)
        #print "cnt %x" % cnt
Beispiel #12
0
def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return
Beispiel #13
0
def SimpleSPU():
  """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
  prgm = env.Program()
  code = prgm.get_stream()
  proc = env.Processor()

  spu.set_active_code(code)
  

  # Acquire two registers
  #x    = code.acquire_register()
  x = prgm.gp_return
  test = prgm.acquire_register()

  lbl_brz = prgm.get_label("BRZ")
  lbl_skip = prgm.get_label("SKIP")

  spu.hbrr(lbl_brz, lbl_skip)
  spu.xor(x, x, x) # zero x
  spu.ai(x, x, 11) # x = x + 11
  spu.ai(x, x, 31) # x = x + 31

  spu.ceqi(test, x, 42) # test = (x == 42)

  # If test is false (all 0s), skip the stop(0x100A) instruction
  code.add(lbl_brz)
  spu.brz(test, lbl_skip)
  spu.stop(0x100A)
  code.add(lbl_skip)
  spu.stop(0x100B)

  prgm.add(code) 
  prgm.print_code() 
  r = proc.execute(prgm, mode = 'int', stop = True) 
  print "ret", r
  assert(r[0] == 42)
  assert(r[1] == 0x100A)


  prgm = env.Program()
  code = prgm.get_stream()
  spu.set_active_code(code)

  lbl_loop = prgm.get_label("LOOP")
  lbl_break = prgm.get_label("BREAK")

  r_cnt = prgm.acquire_register()
  r_stop = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_foo = prgm.gp_return

  spu.ori(r_foo, prgm.r_zero, 0)
  spu.ori(r_cnt, prgm.r_zero, 0)
  util.load_word(code, r_stop, 10)

  code.add(lbl_loop)

  spu.ceq(r_cmp, r_cnt, r_stop)
  spu.brnz(r_cmp, lbl_break)
  spu.ai(r_cnt, r_cnt, 1)

  spu.a(r_foo, r_foo, r_cnt)

  spu.br(lbl_loop)
  code.add(lbl_break)

  prgm.add(code)
  prgm.print_code()
  r = proc.execute(prgm, mode = 'int', stop = True)
  print "ret", r
  assert(r[0] == 55)

  return
Beispiel #14
0
    code = prgm.get_stream()
    spu.set_active_code(code)

    r_cnt = prgm.acquire_register()
    r_cmp = prgm.acquire_register()
    r_sum = prgm.acquire_register()

    spu.il(r_cnt, 32)
    spu.il(r_sum, 0)
    lbl_loop = prgm.get_unique_label("LOOP")
    code.add(lbl_loop)

    spu.ai(r_sum, r_sum, 1)

    spu.ceqi(r_cmp, r_cnt, 2)
    spu.brz(r_cmp, lbl_loop)

    spu.ai(r_sum, r_sum, 10)

    #src = prgm.acquire_register()
    #tmp = prgm.acquire_registers(3)
    #dst = prgm.acquire_registers(2)

    #spu.il(tmp[0], 1)
    #spu.il(tmp[1], 2)
    #spu.il(tmp[2], 3)
    #spu.fma(src, tmp[0], tmp[1], tmp[2])
    #spu.fa(dst[0], src, src)

    #spu.fnms(src, tmp[0], tmp[1], tmp[2])
    #spu.fs(dst[1], src, src)
Beispiel #15
0
  code = prgm.get_stream()
  spu.set_active_code(code)

  r_cnt = prgm.acquire_register()
  r_cmp = prgm.acquire_register()
  r_sum = prgm.acquire_register()

  spu.il(r_cnt, 32)
  spu.il(r_sum, 0)
  lbl_loop = prgm.get_unique_label("LOOP")
  code.add(lbl_loop)

  spu.ai(r_sum, r_sum, 1)

  spu.ceqi(r_cmp, r_cnt, 2)
  spu.brz(r_cmp, lbl_loop)

  spu.ai(r_sum, r_sum, 10)

  #src = prgm.acquire_register()
  #tmp = prgm.acquire_registers(3)
  #dst = prgm.acquire_registers(2)

  #spu.il(tmp[0], 1)
  #spu.il(tmp[1], 2)
  #spu.il(tmp[2], 3)
  #spu.fma(src, tmp[0], tmp[1], tmp[2])
  #spu.fa(dst[0], src, src)

  #spu.fnms(src, tmp[0], tmp[1], tmp[2])
  #spu.fs(dst[1], src, src)