Python Processor.execute 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: corepy.arch.spu.platform

클래스/타입: Processor

메소드/함수: execute

hotexamples.com에서의 예제들: 9

Python Processor.execute - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 corepy.arch.spu.platform.Processor.execute에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Processor(9)

execute(8)

join(1)

자주 사용되는 메소드들

Processor (9)

execute (8)

join (1)

예제 #1

파일 보기

def RunTest(test):
  from corepy.arch.spu.platform import InstructionStream, Processor

  code = InstructionStream()
  spu.set_active_code(code)

  test()
  
  code.print_code()
  proc = Processor()
  proc.execute(code)
  return

예제 #2

파일 보기

def TestFloatArray():
  from corepy.arch.spu.platform import InstructionStream, Processor
  import corepy.arch.spu.lib.dma as dma

  code = InstructionStream()
  spu.set_active_code(code)

  x = SingleFloat([1.0, 2.0, 3.0, 4.0])
  y = SingleFloat([0.5, 1.5, 2.5, 3.5])
  sum = SingleFloat(0.0)

  sum.v = spu.fa.ex(x, y)

  r = SingleFloat([0.0, 0.0, 0.0, 0.0], reg = code.fp_return)

  for i in range(4):
    r.v = spu.fa.ex(sum, r)
    spu.rotqbyi(sum, sum, 4)
  
  proc = Processor()
  result = proc.execute(code, mode='fp')

  x_test = array.array('f', [1.0, 2.0, 3.0, 4.0])
  y_test = array.array('f', [0.5, 1.5, 2.5, 3.5])
  r_test = 0.0
  for i in range(4):
    r_test += x_test[i] + y_test[i]

  assert(result == r_test)
  
  return

예제 #3

파일 보기

파일: spu_basics.py 프로젝트: tmaone/efi

def DoubleBufferExample(n_spus=6):
    """
  stream_buffer is an iterator that streams data from main memory to
  SPU local store in blocked buffers.  The buffers can be managed
  using single or double buffering semantics.  The induction variable
  returned by the buffer returns the address of the current buffer.

  Note: stream_buffer was designed before memory descriptors and has
        not been updated to support them yet.  The interface will
        change slightly when the memory classes are finalized.
  """
    n = 30000
    buffer_size = 16

    # Create an array and align the data
    a = array.array('I', range(n))

    addr = a.buffer_info()[0]
    n_bytes = n * 4

    if n_spus > 1: code = ParallelInstructionStream()
    else: code = InstructionStream()

    current = SignedWord(0, code)
    two = SignedWord(2, code)

    # Create the stream buffer, parallelizing it if using more than 1 SPU
    stream = stream_buffer(code,
                           addr,
                           n_bytes,
                           buffer_size,
                           0,
                           buffer_mode='double',
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    # Loop over the buffers
    for buffer in stream:

        # Create an iterators that computes the address offsets within the
        # buffer.  Note: this will be supported by var/vec iters soon.
        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current - two
            code.add(spu.stqx(current, lsa, buffer))

    # Run the synthetic program and copy the results back to the array
    proc = Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(2, len(a)):
        try:
            assert (a[i] == i - 2)
        except:
            print 'DoubleBuffer error:', a[i], i - 2

    return

예제 #4

파일 보기

파일: spu_basics.py 프로젝트: tmaone/efi

def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    spu.brz(test, 2)
    spu.stop(0x100A)
    spu.stop(0x100B)

    code.print_code(hex=True)
    r = proc.execute(code, mode='int', stop=True, debug=True)
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    util.load_float(code, code.fp_return, 3.14)

    code.print_code(hex=True)
    r = proc.execute(code, mode='fp')
    print r
    return

예제 #5

파일 보기

def TestFloatScalar():
  from corepy.arch.spu.platform import InstructionStream, Processor
  import corepy.arch.spu.lib.dma as dma

  code = InstructionStream()
  spu.set_active_code(code)

  x = SingleFloat(1.0)
  y = SingleFloat(2.0)
  r = SingleFloat(0.0, reg = code.fp_return)

  r.v = spu.fa.ex(x, y)
  
  proc = Processor()
  result = proc.execute(code, mode='fp')
  assert(result == (1.0 + 2.0))
  
  return

예제 #6

파일 보기

파일: bi.py 프로젝트: tmaone/efi

def bi_bug():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    stop_inst = SignedWord(0x200D)
    stop_addr = SignedWord(0x0)

    spu.stqa(stop_inst, 0x0)
    spu.bi(stop_addr)
    spu.stop(0x200A)

    r = proc.execute(code)
    assert (r == 0xD)

    return

예제 #7

파일 보기

파일: bi.py 프로젝트: KapilRijhwani/corepy

def bi_bug():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    stop_inst = SignedWord(0x200D)
    stop_addr = SignedWord(0x0)

    spu.stqa(stop_inst, 0x0)
    spu.bi(stop_addr)
    spu.stop(0x200A)

    r = proc.execute(code)
    assert r == 0xD

    return

예제 #8

파일 보기

파일: spu_labels.py 프로젝트: tmaone/efi

def SimpleSPU():
    """
  A very simple SPU that computes 11 + 31 and returns 0xA on success.
  """
    code = InstructionStream()
    proc = Processor()

    spu.set_active_code(code)

    # Acquire two registers
    #x    = code.acquire_register()
    x = code.gp_return
    test = code.acquire_register()

    lbl_brz = code.get_label("BRZ")
    lbl_skip = code.get_label("SKIP")

    spu.hbrr(lbl_brz, lbl_skip)
    spu.xor(x, x, x)  # zero x
    spu.ai(x, x, 11)  # x = x + 11
    spu.ai(x, x, 31)  # x = x + 31

    spu.ceqi(test, x, 42)  # test = (x == 42)

    # If test is false (all 0s), skip the stop(0x100A) instruction
    code.add(lbl_brz)
    spu.brz(test, lbl_skip)
    spu.stop(0x100A)
    code.add(lbl_skip)
    spu.stop(0x100B)

    code.print_code(hex=True, pro=True, epi=True)
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 42)
    assert (r[1] == 0x100A)

    code = InstructionStream()
    spu.set_active_code(code)

    lbl_loop = code.get_label("LOOP")
    lbl_break = code.get_label("BREAK")

    r_cnt = code.acquire_register()
    r_stop = code.acquire_register()
    r_cmp = code.acquire_register()
    r_foo = code.gp_return

    spu.ori(r_foo, code.r_zero, 0)
    spu.ori(r_cnt, code.r_zero, 0)
    util.load_word(code, r_stop, 10)

    code.add(lbl_loop)

    spu.ceq(r_cmp, r_cnt, r_stop)
    spu.brnz(r_cmp, lbl_break)
    spu.ai(r_cnt, r_cnt, 1)

    spu.a(r_foo, r_foo, r_cnt)

    spu.br(lbl_loop)
    code.add(lbl_break)

    code.print_code()
    r = proc.execute(code, mode='int', stop=True)
    print "ret", r
    assert (r[0] == 55)

    return

예제 #9

파일 보기

파일: spu_basics.py 프로젝트: tmaone/efi

def SpeedTest(n_spus=6, n_floats=6):
    """
  Get a rough estimate of the maximum flop count.
  On a PS3 using all 6 spus, this is 152 GFlops.
  """

    if n_spus > 1: code = ParallelInstructionStream()
    else: code = InstructionStream()

    spu.set_active_code(code)

    f_range = range(n_floats)
    a = [SingleFloat(0.0) for i in f_range]
    b = [SingleFloat(0.0) for i in f_range]
    c = [SingleFloat(0.0) for i in f_range]
    t = [SingleFloat(0.0) for i in f_range]

    outer = 2**12
    inner = 2**16
    unroll = 128
    fuse = 2
    simd = 4
    for x in syn_iter(code, outer):
        for y in syn_iter(code, inner):
            for u in range(unroll):
                for i in f_range:
                    t[i].v = spu.fma.ex(a[i], b[i], c[i])

    # Run the synthetic program and copy the results back to the array
    # TODO - AWF - use the SPU decrementers to time this
    proc = Processor()
    start = time.time()
    r = proc.execute(code, n_spus=n_spus)
    stop = time.time()
    total = stop - start
    n_ops = long(outer) * inner * long(unroll) * long(n_floats) * long(
        fuse) * long(simd) * long(n_spus)
    print '%.6f sec, %.2f GFlops' % (total, n_ops / total / 1e9)

    #   # Run the native program and copy the results back to the array
    #   outer = 2**14
    #   inner = 2**16
    #   unroll = 1
    #   fuse = 1
    #   simd = 1

    #   proc = Processor()
    #   # ncode = NativeInstructionStream("a.out")
    #   start = time.time()
    #   r = proc.execute(ncode, n_spus = n_spus)
    #   stop = time.time()
    #   total = stop - start
    #   n_ops = long(outer) * inner * long(unroll) * long(n_floats) * long(fuse) * long(simd) * long(n_spus)
    #   print '%.6f sec, %.2f GFlops' % (total, n_ops / total / 1e9)

    results = """
  --> No optimizations
  Executing native code: a.out
  14.805322 sec, 20.89 GFlops

  --> Synthetic
  Platform: linux.spre_linux_spu
  no raw data
  65.023350 sec, 152.19 GFlops

  --> -O3 (fuse: 2, simd: 4)
  Executing native code: a.out
  7.407939 sec, 41.74 GFlops

  --> -O3 (fuse: 1, simd: 1)
  Executing native code: a.out
  7.403702 sec, 5.22 GFlops
  """
    return