Exemple #1
0
def fb_draw():
  code0 = synspu.InstructionStream()
  code1 = synspu.InstructionStream()  
  proc = synspu.Processor()

  fb = cell_fb.framebuffer()
  cell_fb.fb_open(fb)

  draw0 = FBDraw()
  draw0.set_buffers(cell_fb.fb_addr(fb, 0), cell_fb.fb_addr(fb, 1))
  draw0.set_stride(fb.stride)

  draw0.synthesize(code0)

  draw1 = FBDraw()
  draw1.set_buffers(cell_fb.fb_addr(fb, 1), cell_fb.fb_addr(fb, 0))cell_fb.fb_addr(fb, 0))
  draw1.set_stride(fb.stride)

  draw1.synthesize(code1)

  while True:

    # cell_fb.fb_clear(fb, 0)
    proc.execute(code0)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 0)

    # cell_fb.fb_clear(fb, 1)
    proc.execute(code1)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 1)
    

  cell_fb.fb_close(fb)
  return
Exemple #2
0
def TestDecrementer():

    code = synspu.InstructionStream()

    spu_write_decr(code, 0x7FFFFFFFl)
    spu_start_decr(code)

    # Get a message from the PPU
    spu_read_in_mbox(code)

    reg = spu_read_decr(code)
    spu_write_out_mbox(code, reg)
    spu_stop_decr(code)

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)

    print 'test is sleeping for 1 second'
    time.sleep(1)
    synspu.spu_exec.write_in_mbox(spe_id, 0x44CAFE)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass

    print 'spu said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return
Exemple #3
0
def test_stream_popc():
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    bits = array.array('I', range(1024))
    for i in range(0, 1024, 4):
        bits[i] = 0x01010101  # 4 bits
        bits[i + 1] = 0xFFFFFFFF  # 32 bits
        bits[i + 2] = 0x10101010  # 4 bits
        bits[i + 3] = 0xFF0FF0F0  # 20 bits = 60 bits total


#    bits[i]   = 1
#    bits[i+1] = 2
#    bits[i+2] = 3
#    bits[i+3] = 4

#abits = synspu.aligned_memory(len(bits), typecode = 'I')
#abits.copy_to(bits.buffer_info()[0], len(bits))

    popc = syn_popc_stream()
    popc.set_stream_addr(bits.buffer_info()[0])
    popc.set_stream_size(len(bits))

    popc.synthesize(code)

    count = proc.execute(code, mode='mbox')
    print '-->', count
    assert (count == 60 * 1024 / 4)

    return
Exemple #4
0
def TestMbox():

    code = synspu.InstructionStream()

    # Send a message to the PPU
    spu_write_out_mbox(code, 0xDEADBEEFl)

    # Get a message from the PPU
    reg = spu_read_in_mbox(code)

    # And send it back
    code.add(spu.wrch(reg, SPU_WrOutMbox))

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)
    synspu.spu_exec.write_in_mbox(spe_id, 0x88CAFE)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass
    print 'spe said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass
    print 'spe said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return
Exemple #5
0
    def generate(self, results, pattern, r1_range, r2_range, max_init, max_n,
                 size):

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = extarray.extarray('f', [0.0] * 16)
        for i in range(4):
            ranges[i] = r1_range[0]
            ranges[4 + i] = r2_range[0]
            ranges[8 + i] = r1_inc
            ranges[12 + i] = r2_inc

        # Setup the pattern vector
        bits = _pattern2vector(pattern)

        # Copy the paramters to aligned buffers
        #a_ranges = synspu.aligned_memory(len(ranges), typecode='I')
        #a_ranges.copy_to(ranges.buffer_info()[0], len(ranges))

        #a_pattern = synspu.aligned_memory(len(bits), typecode='I')
        #a_pattern.copy_to(bits.buffer_info()[0], len(bits))

        renderer = MailboxRenderer()
        ly_block = LyapunovBlock()

        ly_block.set_size(size[0], size[1])
        #ly_block.set_range(a_ranges)
        #ly_block.set_pattern(a_pattern)
        ly_block.set_range(ranges)
        ly_block.set_pattern(bits)
        ly_block.set_max_init(max_init)
        ly_block.set_max_n(max_n)
        ly_block.set_renderer(renderer)

        code = synspu.InstructionStream()
        ly_block.synthesize(code)

        proc = synspu.Processor()

        spe_id = proc.execute(code, async=True)

        for i in range(size[0] * size[1]):
            while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
                pass
            print 'ly said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        proc.join(spe_id)

        # for x in range(size[0]):
        #   r2 = r2_range[0] + r2_inc
        #   print 'col:', x, r1, r2

        #   for y in range(size[1]):
        #     results[y, x] = lyapunov_point(pattern, r1, r2, max_init, max_n)
        #     r2 += r2_inc
        #   r1 += r1_inc

        return
Exemple #6
0
def TestVecIter(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))

    buffer_size = 16

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    stream = stream_buffer(code,
                           a.buffer_info()[0],
                           n * 4,
                           buffer_size,
                           0,
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    md = memory_desc('i', 0, buffer_size)

    for buffer in stream:
        for current in spu_vec_iter(code, md):
            current.v = current + current

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        assert (a[i] == i + i)

    return
Exemple #7
0
def TestTanimoto():
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  
  x_regs = code.acquire_registers(2)
  y_regs = code.acquire_registers(2)
  result = code.acquire_register()

  tan = Tanimoto()

  tan.set_n_bits(256)
  tan.set_x_regs(x_regs)
  tan.set_y_regs(y_regs)
  tan.set_result_reg(result)
  
  tan.synthesize(code)

  code.print_code()

  proc.execute(code)

  # TODO: Do a real test, not just a synthesis test
  return
Exemple #8
0
def TestSetSlotValue():
    import corepy.arch.spu.platform as synspu
    import corepy.arch.spu.isa as spu
    import corepy.arch.spu.types.spu_types as var
    import corepy.arch.spu.lib.dma as dma

    code = synspu.InstructionStream()
    proc = synspu.Processor()
    spu.set_active_code(code)
    a = var.SignedWord(0x11)
    b = var.SignedWord(0x13)
    r = var.SignedWord(0xFFFFFFFF)

    set_slot_value(code, r, 0, 0x10)
    set_slot_value(code, r, 1, a)
    set_slot_value(code, r, 2, 0x12)
    set_slot_value(code, r, 3, b)

    for i in range(4):
        spu.wrch(r, dma.SPU_WrOutMbox)
        spu.rotqbyi(r, r, 4)

    spe_id = proc.execute(code, mode='async')

    for i in range(4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        result = synspu.spu_exec.read_out_mbox(spe_id)
        assert (result == (i + 0x10))

    proc.join(spe_id)

    return
Exemple #9
0
def TestAll():
    import corepy.arch.spu.platform as env

    code = env.InstructionStream()
    spu.set_active_code(code)

    a = code.acquire_register()
    b = code.acquire_register()
    c = code.acquire_register()

    shr(c, a, b)
    cneq(c, a, b)
    cge(c, a, b)
    cgei(c, a, 10)
    lt(c, a, b)
    lti(c, a, 10)

    a_immediate(c, a, 10)
    a_immediate(c, a, 10000)
    sf_immediate(c, a, 10000)

    code.print_code()
    proc = env.Processor()
    proc.execute(code)

    return
def DoubleBufferExample(n_spus=6):
    """
  stream_buffer is an iterator that streams data from main memory to
  SPU local store in blocked buffers.  The buffers can be managed
  using single or double buffering semantics.  The induction variable
  returned by the buffer returns the address of the current buffer.

  Note: stream_buffer was designed before memory descriptors and has
        not been updated to support them yet.  The interface will
        change slightly when the memory classes are finalized.
  """
    n = 30000
    buffer_size = 16

    # Create an array and align the data
    a = extarray.extarray('I', range(n))

    addr = a.buffer_info()[0]
    n_bytes = n * 4

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = SignedWord(0, code)
    two = SignedWord(2, code)

    # Create the stream buffer, parallelizing it if using more than 1 SPU
    stream = stream_buffer(code,
                           addr,
                           n_bytes,
                           buffer_size,
                           0,
                           buffer_mode='double',
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    # Loop over the buffers
    for buffer in stream:

        # Create an iterators that computes the address offsets within the
        # buffer.  Note: this will be supported by var/vec iters soon.
        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current - two
            code.add(spu.stqx(current, lsa, buffer))

    # Run the synthetic program and copy the results back to the array
    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(2, len(a)):
        try:
            assert (a[i] == i - 2)
        except:
            print 'DoubleBuffer error:', a[i], i - 2

    return
Exemple #11
0
def TestFloats():
    import math

    code = synspu.InstructionStream()
    proc = synspu.Processor()

    spu.set_active_code(code)

    code.set_debug(True)

    # Create a simple SPU program that computes log for all values bettween
    # .01 and 10.0 with .01 increments

    start = .65
    stop = .75
    inc = .01

    sp_step = 0x3C23D70A
    # r_current = var.Word(0x3C23D70A) # .01 in single precision
    r_current = var.Word(0x3F266666)
    r_step = var.Word(sp_step)  # .01 in single precision
    result = var.Word(0)
    log = SPULog()

    log.setup(code)
    log.set_result(result)
    log.set_x(r_current)

    log_iter = syn_iter(code, int((stop - start) / inc))

    for i in log_iter:

        log.synthesize(code)
        spu.fa(r_current, r_current, r_step)
        spu.wrch(result, dma.SPU_WrOutMbox)

    # code.print_code()
    spe_id = proc.execute(code, mode='async')

    x = start
    for i in range(int((stop - start) / inc)):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        slog = synspu.spu_exec.read_out_mbox(spe_id)
        print '%.3f 0x%08X  %.08f %.08f ' % (x, slog, _sp_to_float(slog),
                                             math.log(x, 2))
        x += inc

    proc.join(spe_id)

    return
Exemple #12
0
    def __init__(self):

        # Code and memory buffers
        self.code = env.InstructionStream()
        self.regs = extarray.extarray('I', 128 * 4)
        self.regs.clear()

        # Runtime parameters
        self.speid = None
        self.reg_lsa = None
        self.proc = None

        self.synthesize()

        return
Exemple #13
0
def TestSaveBuffer1():
    import array

    code = synspu.InstructionStream()
    proc = synspu.Processor()

    code.set_debug(True)
    spu.set_active_code(code)

    n = 2**14
    data = array.array('I', range(n))
    #data = synspu.aligned_memory(n, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))

    save_buffer = SaveBuffer()

    save_buffer.setup()
    save_buffer.init_ls_buffer(0, 128)
    save_buffer.init_mm_buffer(data.buffer_info()[0], n)

    value = var.SignedWord(0xCAFEBABE)

    for i in spuiter.syn_iter(code, n / 4):
        save_buffer.save_register(value)

    code.print_code()
    spe_id = proc.execute(code, mode='async')

    for i in range(n / 4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'size: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'offset: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'test: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    print data[:10]
    return
Exemple #14
0
def TestContinueLabel(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))

    buffer_size = 16

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)
    test = var.SignedWord(0, code)
    four = var.SignedWord(4, code)

    stream = stream_buffer(code,
                           a.buffer_info()[0],
                           n * 4,
                           buffer_size,
                           0,
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    md = memory_desc('i', 0, buffer_size)
    lsa_iter = spu_vec_iter(code, md)

    for buffer in stream:
        for current in lsa_iter:
            current.v = current + current

            test.v = (current == four)
            code.add(spu.gbb(test, test))
            #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue
            #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl))
            code.add(spu.brz(test.reg, lsa_iter.continue_label))
            current.v = current + current

        #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        if i >= 4:
            assert (a[i] == i + i)
        else:
            #print a[i]
            assert (a[i] == i * 4)
    return
Exemple #15
0
def test_syn(kernel):
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    popc = kernel()
    popc.synthesize(code)

    params = synspu.spu_exec.ExecParams()
    params.p7 = 0x01010101  # 4 bits
    params.p8 = 0xFFFFFFFF  # 32 bits
    params.p9 = 0x10101010  # 4 bits
    params.p10 = 0xFF0FF0F0  # 20 bits = 60 bits total

    count = proc.execute(code, mode='mbox', params=params)
    assert (count == 60)

    return
Exemple #16
0
def TestStreamBufferSingle(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))
    buffer_size = 128

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    addr = a.buffer_info()[0]
    stream = stream_buffer(code, addr, n * 4, buffer_size, 0, save=True)
    if n_spus > 1: stream = parallel(stream)

    #r_bufsize = code.acquire_register()
    #r_lsa = code.acquire_register()
    #r_current = code.acquire_register()

    for buffer in stream:
        #util.load_word(code, r_bufsize, buffer_size)
        #code.add(spu.il(r_lsa, 0))

        #loop = code.size()

        #code.add(spu.lqx(r_current, buffer, r_lsa))
        #code.add(spu.a(r_current, r_current, r_current))
        #code.add(spu.stqx(r_current, buffer, r_lsa))

        #code.add(spu.ai(r_bufsize, r_bufsize, -16))
        #code.add(spu.ai(r_lsa, r_lsa, 16))
        #code.add(spu.brnz(r_bufsize, loop - code.size()))

        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current + current
            #current.v = 5
            code.add(spu.stqx(current, lsa, buffer))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        assert (a[i] == i + i)

    return
Exemple #17
0
def TestLog():
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    spu.set_active_code(code)
    # Create a simple SPU program that computes log for 10 values and
    # sends the result back using the mailbox

    log = SPULog()

    values = []
    result = code.acquire_register()

    N = 10

    x = 1
    for i in range(N):
        val = var.Word(x)
        spu.cuflt(val, val, 155)
        values.append(val)
        x = x * 10

    log.setup(code)
    log.set_result(result)

    for i in range(N):

        log.set_x(values[i])
        log.synthesize(code)

        spu.wrch(result, dma.SPU_WrOutMbox)

    spe_id = proc.execute(code, mode='async')

    x = 1
    for i in range(N):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'log said: 0x%08X  (%d)' % (
            synspu.spu_exec.read_out_mbox(spe_id), x)
        x = x * 10

    proc.join(spe_id)

    return
Exemple #18
0
    def _startSPU(self):
        self.ctx = ctx = env.spu_exec.alloc_context()

        # Execute a no-op instruction stream so the prolog is executed
        code = env.InstructionStream()
        code.add(spu.nop(code.r_zero))

        code.cache_code()
        itemsize = code.render_code.itemsize
        code_len = len(code.render_code) * itemsize
        if code_len % 16 != 0:
            code_len += 16 - (code_len % 16)
        code_lsa = 0x40000 - code_len

        env.spu_exec.run_stream(ctx, code.inst_addr(), code_len, code_lsa,
                                code_lsa)

        self.localstore = extarray.extarray('I', 262144 / 4)
        self.localstore.set_memory(ctx.spuls)
        return
Exemple #19
0
    def GenerateStream(self, step=None):
        code = env.InstructionStream()
        txt = self.editCtrl.GetText().split('\n')
        txtlen = len(txt)

        for i in xrange(0, txtlen):
            # For the stop case, want all instructions except the current one to be
            # STOP instructions.
            cmd = txt[i].strip()
            if step != None and i != step:
                if cmd == "" or cmd[0] == '#':
                    continue
                if cmd[-1] == ":":
                    # Label - better parsing?
                    code.add(code.get_label(cmd[:-1]))
                else:
                    code.add(spu.stop(0x2FFF))
                continue

            if self.editCtrl.IsBreakSet(i):
                code.add(spu.stop(0x2FFF))
                continue

            if cmd != "" and cmd[0] != '#':
                if cmd[-1] == ":":
                    # Label - better parsing?
                    inst = code.get_label(cmd[:-1])
                else:
                    # Instruction
                    strcmd = re.sub("Label\((.*?)\)", "code.get_label('\\1')",
                                    cmd)
                    try:
                        inst = eval('spu.%s' % strcmd)
                    except:
                        print 'Error creating instruction: %s' % cmd

                code.add(inst)
        code.cache_code()
        return code
Exemple #20
0
def TestSignal():

    code = synspu.InstructionStream()

    # Get a signal from the PPU
    reg = spu_read_signal1(code)

    # And send it back
    code.add(spu.wrch(reg, SPU_WrOutMbox))

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)
    synspu.spu_exec.write_signal(spe_id, 1, 0xCAFEBABEl)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass

    print 'sig said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return
Exemple #21
0
def TestStreamBufferDouble(n_spus=1):
    n = 2048
    a = extarray.extarray('I', range(n))

    buffer_size = 32

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    addr = a.buffer_info()[0]
    n_bytes = n * 4
    #print 'addr 0x%(addr)x %(addr)d' % {'addr':a.buffer_info()[0]}, n_bytes, buffer_size

    stream = stream_buffer(code,
                           addr,
                           n_bytes,
                           buffer_size,
                           0,
                           buffer_mode='double',
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    for buffer in stream:
        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current + current
            code.add(spu.stqx(current, lsa, buffer))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, len(a)):
        assert (a[i] == i + i)

    return
Exemple #22
0
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.platform as env
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
    code = env.InstructionStream()
    proc = env.Processor()

    spu.set_active_code(code)

    r_cnt = code.acquire_register()
    load_word(code, r_cnt, 0x10000)

    br_loop = code.size()

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, br_loop - code.size())

    code.print_code()

    for i in xrange(0, 10000):
def MemoryDescExample(data_size=20000):
    """
  This example uses a memory descriptor to move 20k integers back and 
  forth between main memory and the SPU local store. Each value is
  incremented by 1 while on the SPU.
  
  Memory descriptors are a general purpose method for describing a
  region of memory.  Memory is described by a typecode, address, and
  size.  Memory descriptors can be initialized by hand or from an
  array or buffer object.

  For main memory, memory descriptors are useful for transfering data
  between main memory and an SPU's local store.  The get/put methods
  on a memory descriptor generate the SPU code to move data of any
  size between main memory and local store.

  Memory descriptors can also be used with spu_vec_iters to describe
  the region of memory to iterate over.  The typecode in the memory
  descriptor is used to determine the type for the loop induction
  variable.

  Note that there is currently no difference between memory
  descriptors for main memory and local store.  It's up to the user to
  make sure the memory descriptor settings make sense in the current
  context.  (this will probably change in the near future)

  Note: get/put currently use loops rather than display lists for
        transferring data over 16k.
  """

    code = env.InstructionStream()
    proc = env.Processor()

    code.debug = True
    spu.set_active_code(code)

    # Create a python array
    data = extarray.extarray('I', range(data_size))

    # Align the data in the array
    #a_data = aligned_memory(data_size, typecode = 'I')
    #a_data.copy_to(data.buffer_info()[0], data_size)

    # Create memory descriptor for the data in main memory
    data_desc = memory_desc('I')
    #data_desc.from_array(a_data)
    data_desc.from_array(data)

    # Transfer the data to 0x0 in the local store
    data_desc.get(code, 0)

    # Create memory descriptor for the data in the local store for use
    # in the iterator
    lsa_data = memory_desc('i', 0, data_size)

    # Add one to each value
    for x in spu_vec_iter(code, lsa_data):
        x.v = x + 1

    # Transfer the data back to main memory
    data_desc.put(code, 0)

    dma.spu_write_out_mbox(code, 0xCAFE)

    # Execute the synthetic program
    # code.print_code()

    spe_id = proc.execute(code, async=True)
    proc.join(spe_id)

    # Copy it back to the Python array
    #a_data.copy_from(data.buffer_info()[0], data_size)

    for i in xrange(data_size):
        assert (data[i] == i + 1)
    return
Exemple #24
0
def TestSPUIter():
    size = 32
    data = extarray.extarray('I', range(size))
    code = env.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    #print 'array ea: %X' % (data.buffer_info()[0])
    #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
    #  str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    util.load_word(code, r_size, size * 4)

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 12))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1 << 12)

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    # r_current = code.acquire_register()
    current = var.SignedWord(0, code)

    # Use an SPU iter
    for lsa in syn_iter(code, size * 4, 16):
        code.add(spu.lqx(current, r_zero, lsa))
        # code.add(spu.ai(1, r_current, r_current))
        current.v = current + current
        code.add(spu.stqx(current, r_zero, lsa))

    # code.release_register(r_current)
    #current.release_register(code)

    # Store the values back to main memory

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 13))

    # Load the data into address 0
    dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1 << 13)

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = env.Processor()
    r = proc.execute(code)

    for i in range(0, size):
        assert (data[i] == i + i)

    return
Exemple #25
0
def TestTanimotoBlock(n_vecs = 4):
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  spu.set_active_code(code)
  
  tb = TanimotoBlock()
  ls_save = LocalSave()
  mm_save = MemorySave()

  code.set_debug(True)

  # Input block parameters
  m = 128
  n = 64
  # n_vecs = 9
  n_bits = 128 * n_vecs

  # Main memory results buffer
  # max_results = 2**16
  max_results = 16384
  words_per_result = 4

  mm_results_data = array.array('I', [12 for i in range(max_results * words_per_result)])
  #mm_results_buffer = synspu.aligned_memory(max_results * words_per_result, typecode = 'I')
  # mm_results_buffer.copy_to(mm_results_data.buffer_info()[0], len(mm_results_data))

  mm_results = spuiter.memory_desc('I')
  #mm_results.from_array(mm_results_buffer)
  mm_results.from_array(mm_results_data)

  mm_save.set_md_save_buffer(mm_results)
    
  # Local Results buffer
  buffer_size = var.SignedWord(16384)
  buffer_addr = var.SignedWord(m * n * n_vecs * 4)
  ls_results = spuiter.memory_desc('B')
  ls_results.set_size_reg(buffer_size)
  ls_results.set_addr_reg(buffer_addr)

  ls_save.set_md_results(ls_results)
  ls_save.set_mm_save_op(mm_save)

  # Setup the TanimotoBlock class
  tb.set_n_bits(n_bits)
  tb.set_block_size(m, n)

  tb.set_x_addr(0)
  tb.set_y_addr(m * n_vecs * 16)
  tb.set_save_op(ls_save)

  # Main test loop
  n_samples = 10000
  for samples in spuiter.syn_iter(code, n_samples):
    tb.synthesize(code)

  spu.wrch(buffer_size, dma.SPU_WrOutMbox)
  
  spu.stop(0x2000) 

  # "Function" Calls
  ls_save.block()
  mm_save.block()

  # code.print_code()
  start = time.time()
  spe_id = proc.execute(code, async=True)
  
  while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass
  # print 'tb said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
  stop = time.time()

  # mm_results_buffer.copy_from(mm_results_data.buffer_info()[0], len(mm_results_data))
  
  proc.join(spe_id)
  total = stop - start
  bits_sec = (m * n * n_bits * n_samples) / total / 1e9
  ops_per_compare = 48 * 4 + 8  # 48 SIMD instructions, 8 scalar
  insts_per_compare = 56
  gops = (m * n * n_vecs * n_samples * ops_per_compare ) / total / 1e9
  ginsts = (m * n * n_vecs * n_samples * insts_per_compare ) / total / 1e9  
  print '%.6f sec, %.2f Gbits/sec, %.2f GOps, %.2f GInsts, %d insts' % (
    total, bits_sec, gops, ginsts, code.size())
  return
Exemple #26
0
def TestMFC():
    size = 32
    #data_array = array.array('I', range(size))
    #data = synspu.aligned_memory(size, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))
    data = extarray.extarray('I', range(size))
    code = synspu.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    print 'array ea: %X' % (data.buffer_info()[0])
    print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
        str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    print 'test ea: %X' % data.buffer_info()[0]
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    code.add(spu.ai(r_size, r_zero, size * 4))

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 2))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    r_current = code.acquire_register()

    for lsa in range(0, size * 4, 16):
        code.add(spu.lqa(r_current, (lsa >> 2)))
        code.add(spu.ai(r_current, r_current, 1))
        code.add(spu.stqa(r_current, (lsa >> 2)))

    code.release_register(r_current)

    # Store the values back to main memory

    # Load the data into address 0
    mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = synspu.Processor()
    # code.print_code()
    #print data_array
    proc.execute(code)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    for i in range(size):
        assert (data[i] == i + 1)

    return
Exemple #27
0
    def generate(self,
                 results,
                 patterns,
                 r1_range,
                 r2_range,
                 max_init,
                 max_n,
                 size,
                 n_spus=6):
        # Connect to the framebuffer
        #fb = cell_fb.framebuffer()
        #cell_fb.fb_open(fb)
        buffer = extarray.extarray('B', size[0] * size[1] * 4)
        buffer.clear()

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = [0 for i in range(n_spus)]
        #a_ranges = [0 for i in range(n_spus)]

        # Slice and dice for parallel execution
        spu_slices = [[size[0], size[1] / n_spus] for ispu in range(n_spus)]
        spu_slices[-1][1] += size[1] % n_spus

        offset = 0.0
        for ispu in range(n_spus):
            ranges[ispu] = extarray.extarray('f', [0.0] * 16)

            for i in range(4):
                ranges[ispu][
                    i] = r1_range[0] + float(i) * r1_inc  # horizontal is simd
                ranges[ispu][4 + i] = r2_range[0] + offset
                ranges[ispu][8 + i] = r1_inc * 4.0
                ranges[ispu][12 + i] = r2_inc
            # print ranges

            # Copy the paramters to aligned buffers
            #a_ranges[ispu] = synspu.aligned_memory(len(ranges[ispu]), typecode='I')
            #a_ranges[ispu].copy_to(ranges[ispu].buffer_info()[0], len(ranges[ispu]))

            offset += r2_inc * spu_slices[ispu][1]

        # Setup the pattern vector
        for pattern in patterns:
            if len(pattern) != len(patterns[0]):
                raise Exception('All patterns must be the same length')

        bits = [_pattern2vector(pattern) for pattern in patterns]
        #a_pattern = synspu.aligned_memory(len(bits[0]), typecode='I')
        pattern = extarray.extarray('I', len(bits[0]))

        # Create the instruction streams
        codes = []

        n = len(patterns) * 10
        offset = 0
        for ispu in range(n_spus):
            renderer = FBRenderer()
            renderer.set_lsa(0x100)
            #renderer.set_addr(cell_fb.fb_addr(fb, 0) + offset)
            renderer.set_addr(buffer.buffer_info()[0] + offset)
            renderer.set_width(size[0])
            #renderer.set_stride(fb.stride)
            renderer.set_stride(size[0])

            ly_block = LyapunovBlock()

            ly_block.set_size(*spu_slices[i])
            #ly_block.set_range(a_ranges[ispu])
            ly_block.set_range(ranges[ispu])
            #ly_block.set_pattern(a_pattern)
            ly_block.set_pattern(pattern)
            ly_block.set_max_init(max_init)
            ly_block.set_max_n(max_n)
            ly_block.set_renderer(renderer)

            code = synspu.InstructionStream()
            # code.set_debug(True)
            codes.append(code)
            #offset += spu_slices[i][1] * fb.stride * 4
            offset += spu_slices[i][1] * size[0] * 4

            # for i in spuiter.syn_range(code, n):
            ly_block.synthesize(code)

        # code.print_code()
        proc = synspu.Processor()

        #cell_fb.fb_clear(fb, 0)
        buffer.clear()

        import time
        ids = [0 for i in range(n_spus)]
        start = time.time()

        ipattern = 0
        n_patterns = len(patterns)
        len_bits = len(bits[0])
        pattern_inc = 1

        for i in range(n):
            #a_pattern.copy_to(bits[ipattern].buffer_info()[0], len_bits)
            # TODO - better/faster
            for j in xrange(0, len_bits):
                pattern[j] = bits[ipattern][j]

            for ispu in range(n_spus):
                ids[ispu] = proc.execute(codes[ispu], async=True)

            for ispu in range(n_spus):
                proc.join(ids[ispu])

            #cell_fb.fb_wait_vsync(fb)
            #cell_fb.fb_flip(fb, 0)
            # TODO - write buffer to image file
            #im = Image.frombuffer("RGBA", size, buffer.tostring(), "raw", "RGBA", 0, 1)
            imgbuf = Image.new("RGBA", size)

            arr = [(buffer[i + 3], buffer[i + 2], buffer[i + 1], 0xFF)
                   for i in xrange(0, len(buffer), 4)]
            imgbuf.putdata(arr)
            imgbuf.save("lyapunov_%d.png" % ipattern)

            ipattern += pattern_inc
            if (ipattern == (n_patterns - 1)) or (ipattern == 0):
                pattern_inc *= -1

            print ipattern

        stop = time.time()

        print '%.2f fps (%.6f)' % (float(n) / (stop - start), (stop - start))
        #cell_fb.fb_close(fb)

        return