def save_register(self, reg): # , branch_to_save = False): code = spu.get_active_code() offset = code.acquire_register() size = code.acquire_register() test = code.acquire_register() regs = [offset, size, test] spu.rotqbyi(offset, self.ls_buffer, 4) spu.rotqbyi(size, self.ls_buffer, 8) spu.stqx(reg, self.ls_buffer, offset) spu.ai(offset, offset, 16) spu.ceq(test, offset, size) spu.wrch(size, dma.SPU_WrOutMbox) spu.wrch(offset, dma.SPU_WrOutMbox) spu.wrch(test, dma.SPU_WrOutMbox) # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!! lbl_ls_full = code.size() spu.stop(0xB) self.save_ls_buffer(ls_size = size) spu.nop(0) code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True) code.release_registers(regs) return
def save_register(self, reg): # , branch_to_save = False): code = spu.get_active_code() offset = code.acquire_register() size = code.acquire_register() test = code.acquire_register() regs = [offset, size, test] spu.rotqbyi(offset, self.ls_buffer, 4) spu.rotqbyi(size, self.ls_buffer, 8) spu.stqx(reg, self.ls_buffer, offset) spu.ai(offset, offset, 16) spu.ceq(test, offset, size) spu.wrch(size, dma.SPU_WrOutMbox) spu.wrch(offset, dma.SPU_WrOutMbox) spu.wrch(test, dma.SPU_WrOutMbox) # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!! lbl_ls_full = code.size() spu.stop(0xB) self.save_ls_buffer(ls_size=size) spu.nop(0) code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active=True) code.release_registers(regs) return
def TestParams(): # Run this with a stop instruction and examine the registers prgm = Program() code = prgm.get_stream() proc = Processor() # r_sum = code.acquire_register(reg = 1) r_sum = prgm.gp_return r_current = prgm.acquire_register() # Zero the sum code.add(spu.xor(r_sum, r_sum, r_sum)) for param in [ spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5, spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10, ]: copy_param(code, r_current, param) code.add(spu.a(r_sum, r_sum, r_current)) code.add(spu.ceqi(r_current, r_sum, 55)) # code.add(spu.ori(code.gp_return, r_current, 0)) code.add(spu.brz(r_current, 2)) code.add(spu.stop(0x200A)) code.add(spu.stop(0x200B)) params = spu_exec.ExecParams() params.p1 = 1 params.p2 = 2 params.p3 = 3 params.p4 = 4 params.p5 = 5 params.p6 = 6 params.p7 = 7 params.p8 = 8 params.p9 = 9 params.p10 = 10 prgm += code r = proc.execute(prgm, params=params, stop=True) assert r[0] == 55 assert r[1] == 0x200A # print 'int result:', r return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = prgm.acquire_register(reg_name = 55) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction spu.brz(test, 2) spu.stop(0x100A) spu.stop(0x100B) prgm.add(code) prgm.print_code(hex = True) r = proc.execute(prgm, mode = 'int', stop = True, debug = True) assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) util.load_float(code, code.fp_return, 3.14) prgm.add(code) prgm.print_code(hex = True) r = proc.execute(prgm, mode = 'fp') print r return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = prgm.acquire_register(reg_name=55) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction spu.brz(test, 2) spu.stop(0x100A) spu.stop(0x100B) prgm.add(code) prgm.print_code(hex=True) r = proc.execute(prgm, mode='int', stop=True, debug=True) assert (r[0] == 42) assert (r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) util.load_float(code, code.fp_return, 3.14) prgm.add(code) prgm.print_code(hex=True) r = proc.execute(prgm, mode='fp') print r return
def TestParams(): # Run this with a stop instruction and examine the registers prgm = Program() code = prgm.get_stream() proc = Processor() #r_sum = code.acquire_register(reg = 1) r_sum = prgm.gp_return r_current = prgm.acquire_register() # Zero the sum code.add(spu.xor(r_sum, r_sum, r_sum)) for param in [ spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5, spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10 ]: copy_param(code, r_current, param) code.add(spu.a(r_sum, r_sum, r_current)) code.add(spu.ceqi(r_current, r_sum, 55)) #code.add(spu.ori(code.gp_return, r_current, 0)) code.add(spu.brz(r_current, 2)) code.add(spu.stop(0x200A)) code.add(spu.stop(0x200B)) params = spu_exec.ExecParams() params.p1 = 1 params.p2 = 2 params.p3 = 3 params.p4 = 4 params.p5 = 5 params.p6 = 6 params.p7 = 7 params.p8 = 8 params.p9 = 9 params.p10 = 10 prgm += code r = proc.execute(prgm, params=params, stop=True) assert (r[0] == 55) assert (r[1] == 0x200A) # print 'int result:', r return
def TestContinueLabel(n_spus=1): n = 1024 a = extarray.extarray('I', range(n)) buffer_size = 16 if n_spus > 1: code = env.ParallelInstructionStream() else: code = env.InstructionStream() current = var.SignedWord(0, code) test = var.SignedWord(0, code) four = var.SignedWord(4, code) stream = stream_buffer(code, a.buffer_info()[0], n * 4, buffer_size, 0, save=True) if n_spus > 1: stream = parallel(stream) md = memory_desc('i', 0, buffer_size) lsa_iter = spu_vec_iter(code, md) for buffer in stream: for current in lsa_iter: current.v = current + current test.v = (current == four) code.add(spu.gbb(test, test)) #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl)) code.add(spu.brz(test.reg, lsa_iter.continue_label)) current.v = current + current #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next)) proc = env.Processor() r = proc.execute(code, n_spus=n_spus) for i in range(0, n): if i >= 4: assert (a[i] == i + i) else: #print a[i] assert (a[i] == i * 4) return
def TestParams(): # Run this with a stop instruction and examine the registers code = InstructionStream() proc = Processor() r_sum = code.acquire_register() r_current = code.acquire_register() # Zero the sum code.add(spu.xor(r_sum, r_sum, r_sum)) for param in [ spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5, spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10 ]: copy_param(code, r_current, param) code.add(spu.a(r_sum, r_sum, r_current)) code.add(spu.ceqi(r_current, r_sum, 55)) code.add(spu.brz(r_current, 2)) code.add(spu.stop(0x200A)) code.add(spu.stop(0x200B)) params = spu_exec.ExecParams() params.p1 = 1 params.p2 = 2 params.p3 = 3 params.p4 = 4 params.p5 = 5 params.p6 = 6 params.p7 = 7 params.p8 = 8 params.p9 = 9 params.p10 = 10 r = proc.execute(code, params=params) assert (r == 0xA) # print 'int result:', r # while True: # pass return
def TestContinueLabel(n_spus = 1): n = 1024 a = extarray.extarray('I', range(n)) buffer_size = 16 if n_spus > 1: prgm = env.ParallelProgram() else: prgm = env.Program() code = prgm.get_stream() current = var.SignedWord(0, code) test = var.SignedWord(0, code) four = var.SignedWord(4, code) stream = stream_buffer(code, a.buffer_info()[0], n * 4, buffer_size, 0, save = True) if n_spus > 1: stream = parallel(stream) md = memory_desc('i', 0, buffer_size) lsa_iter = spu_vec_iter(code, md) for buffer in stream: for current in lsa_iter: current.v = current + current test.v = (current == four) code.add(spu.gbb(test, test)) #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl)) code.add(spu.brz(test.reg, lsa_iter.continue_label)) current.v = current + current #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next)) prgm.add(code) proc = env.Processor() r = proc.execute(prgm, n_spus = n_spus) for i in range(0, n): if i >= 4: assert(a[i] == i + i) else: #print a[i] assert(a[i] == i * 4) return
r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop") code.add(lbl_incloop) spu.lqx(r_data, r_cnt, r_lsa) spu.ai(r_data, r_data, 2) spu.stqx(r_data, r_cnt, r_lsa) spu.ai(r_cnt, r_cnt, 16) spu.ceq(r_cmp, r_cnt, r_sum) spu.brz(r_cmp, lbl_incloop) dma.spu_write_out_mbox(code, code.r_zero) prgm += code t3 = time.time() id = proc.execute(prgm, async = True, mode = 'int') t1 = time.time() for i in xrange(0, ITERS): #env.spu_exec.write_in_mbox(id, 1) #env.spu_exec.write_in_mbox(id, 1) env.spu_exec.write_in_mbox(id, i) #cnt = env.spu_exec.stat_in_mbox(id)
r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop") code.add(lbl_incloop) spu.lqx(r_data, r_cnt, r_lsa) spu.ai(r_data, r_data, 2) spu.stqx(r_data, r_cnt, r_lsa) spu.ai(r_cnt, r_cnt, 16) spu.ceq(r_cmp, r_cnt, r_sum) spu.brz(r_cmp, lbl_incloop) dma.spu_write_out_mbox(code, code.r_zero) prgm += code t3 = time.time() id = proc.execute(prgm, async=True, mode='int') t1 = time.time() for i in xrange(0, ITERS): #env.spu_exec.write_in_mbox(id, 1) #env.spu_exec.write_in_mbox(id, 1) env.spu_exec.write_in_mbox(id, i) #cnt = env.spu_exec.stat_in_mbox(id) #print "cnt %x" % cnt
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = code.acquire_register() lbl_brz = code.get_label("BRZ") lbl_skip = code.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) code.print_code(hex=True, pro=True, epi=True) r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 42) assert (r[1] == 0x100A) code = InstructionStream() spu.set_active_code(code) lbl_loop = code.get_label("LOOP") lbl_break = code.get_label("BREAK") r_cnt = code.acquire_register() r_stop = code.acquire_register() r_cmp = code.acquire_register() r_foo = code.gp_return spu.ori(r_foo, code.r_zero, 0) spu.ori(r_cnt, code.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) code.print_code() r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 55) return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = prgm.gp_return test = prgm.acquire_register() lbl_brz = prgm.get_label("BRZ") lbl_skip = prgm.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = prgm.acquire_register() r_stop = prgm.acquire_register() r_cmp = prgm.acquire_register() r_foo = prgm.gp_return spu.ori(r_foo, prgm.r_zero, 0) spu.ori(r_cnt, prgm.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 55) return
code = prgm.get_stream() spu.set_active_code(code) r_cnt = prgm.acquire_register() r_cmp = prgm.acquire_register() r_sum = prgm.acquire_register() spu.il(r_cnt, 32) spu.il(r_sum, 0) lbl_loop = prgm.get_unique_label("LOOP") code.add(lbl_loop) spu.ai(r_sum, r_sum, 1) spu.ceqi(r_cmp, r_cnt, 2) spu.brz(r_cmp, lbl_loop) spu.ai(r_sum, r_sum, 10) #src = prgm.acquire_register() #tmp = prgm.acquire_registers(3) #dst = prgm.acquire_registers(2) #spu.il(tmp[0], 1) #spu.il(tmp[1], 2) #spu.il(tmp[2], 3) #spu.fma(src, tmp[0], tmp[1], tmp[2]) #spu.fa(dst[0], src, src) #spu.fnms(src, tmp[0], tmp[1], tmp[2]) #spu.fs(dst[1], src, src)