def save_register(self, reg): # , branch_to_save = False): code = spu.get_active_code() offset = code.acquire_register() size = code.acquire_register() test = code.acquire_register() regs = [offset, size, test] spu.rotqbyi(offset, self.ls_buffer, 4) spu.rotqbyi(size, self.ls_buffer, 8) spu.stqx(reg, self.ls_buffer, offset) spu.ai(offset, offset, 16) spu.ceq(test, offset, size) spu.wrch(size, dma.SPU_WrOutMbox) spu.wrch(offset, dma.SPU_WrOutMbox) spu.wrch(test, dma.SPU_WrOutMbox) # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!! lbl_ls_full = code.size() spu.stop(0xB) self.save_ls_buffer(ls_size = size) spu.nop(0) code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active = True) code.release_registers(regs) return
def save_register(self, reg): # , branch_to_save = False): code = spu.get_active_code() offset = code.acquire_register() size = code.acquire_register() test = code.acquire_register() regs = [offset, size, test] spu.rotqbyi(offset, self.ls_buffer, 4) spu.rotqbyi(size, self.ls_buffer, 8) spu.stqx(reg, self.ls_buffer, offset) spu.ai(offset, offset, 16) spu.ceq(test, offset, size) spu.wrch(size, dma.SPU_WrOutMbox) spu.wrch(offset, dma.SPU_WrOutMbox) spu.wrch(test, dma.SPU_WrOutMbox) # !!! STOPPED HERE !!! THESE VALUES ARE WRONG !!! lbl_ls_full = code.size() spu.stop(0xB) self.save_ls_buffer(ls_size=size) spu.nop(0) code[lbl_ls_full] = spu.brz(test, (code.size() - lbl_ls_full), ignore_active=True) code.release_registers(regs) return
def block(self, d, a, b): code = self.get_active_code() temp = code.prgm.acquire_register() spu.cgt(temp, a, b) spu.ceq(d, a, b) spu.or_(d, d, temp) code.prgm.release_register(temp) return
def _load_buffer(self): # TODO - AWF - some optimization is possible here. # rather than skipping around the DMA get on the last iteration, short out # of the loop completely. Saves doing the check twice.. # Also as soon as we do this first check, we know we are going to go # through the loop again. Again, no need for a second conditional at the # end, just increment counters and always branch. A hint could be added # right before the DMA get. # Don't perform the load the last time through the loop r_cmp = self.code.prgm.acquire_register() # Compare count == step self.code.add(spu.ceq(r_cmp, self.r_stop, self.r_count)) # Create a skip label and add the branch skip_label = self.code.prgm.get_unique_label("STREAM_BUFFER_SKIP") self.code.add(spu.brnz(r_cmp, skip_label)) # Start the DMA get dma.mfc_get(self.code, self.ls, syn_range.get_current(self), self.buffer_size, self.tag) # Add the branch label self.code.add(skip_label) self.code.prgm.release_register(r_cmp) return
r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop") code.add(lbl_incloop) spu.lqx(r_data, r_cnt, r_lsa) spu.ai(r_data, r_data, 2) spu.stqx(r_data, r_cnt, r_lsa) spu.ai(r_cnt, r_cnt, 16) spu.ceq(r_cmp, r_cnt, r_sum) spu.brz(r_cmp, lbl_incloop) dma.spu_write_out_mbox(code, code.r_zero) prgm += code t3 = time.time() id = proc.execute(prgm, async = True, mode = 'int') t1 = time.time() for i in xrange(0, ITERS): #env.spu_exec.write_in_mbox(id, 1) #env.spu_exec.write_in_mbox(id, 1) env.spu_exec.write_in_mbox(id, i)
r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop") code.add(lbl_incloop) spu.lqx(r_data, r_cnt, r_lsa) spu.ai(r_data, r_data, 2) spu.stqx(r_data, r_cnt, r_lsa) spu.ai(r_cnt, r_cnt, 16) spu.ceq(r_cmp, r_cnt, r_sum) spu.brz(r_cmp, lbl_incloop) dma.spu_write_out_mbox(code, code.r_zero) prgm += code t3 = time.time() id = proc.execute(prgm, async=True, mode='int') t1 = time.time() for i in xrange(0, ITERS): #env.spu_exec.write_in_mbox(id, 1) #env.spu_exec.write_in_mbox(id, 1) env.spu_exec.write_in_mbox(id, i) #cnt = env.spu_exec.stat_in_mbox(id)
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = code.acquire_register() lbl_brz = code.get_label("BRZ") lbl_skip = code.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) code.print_code(hex=True, pro=True, epi=True) r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 42) assert (r[1] == 0x100A) code = InstructionStream() spu.set_active_code(code) lbl_loop = code.get_label("LOOP") lbl_break = code.get_label("BREAK") r_cnt = code.acquire_register() r_stop = code.acquire_register() r_cmp = code.acquire_register() r_foo = code.gp_return spu.ori(r_foo, code.r_zero, 0) spu.ori(r_cnt, code.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) code.print_code() r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 55) return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = prgm.gp_return test = prgm.acquire_register() lbl_brz = prgm.get_label("BRZ") lbl_skip = prgm.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = prgm.acquire_register() r_stop = prgm.acquire_register() r_cmp = prgm.acquire_register() r_foo = prgm.gp_return spu.ori(r_foo, prgm.r_zero, 0) spu.ori(r_cnt, prgm.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 55) return
def block(self, d, a, b): spu.ceq(d, a, a) spu.nor(d, d, d) return
prgm = env.Program() code = prgm.get_stream() reg = prgm.acquire_register() foo = prgm.acquire_register(reg_name=5) code.add(prgm.get_label("FOO")) code.add(spu.il(foo, 0xCAFE)) code.add(spu.ilhu(reg, 0xDEAD)) code.add(spu.iohl(reg, 0xBEEF)) code.add(spu.stqd(reg, code.r_zero, 4)) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = code.gp_return r_stop = prgm.acquire_register(reg_name=9) r_cmp = prgm.acquire_register() code.add(spu.ori(r_cnt, code.r_zero, 0)) code.add(spu.il(r_stop, 5)) code.add(lbl_loop) code.add(spu.ceq(r_cmp, r_cnt, r_stop)) code.add(spu.brnz(r_cmp, prgm.get_label("BREAK"))) code.add(spu.ai(r_cnt, r_cnt, 1)) code.add(spu.br(prgm.get_label("LOOP"))) code.add(lbl_break) app = SPUApp(code) app.MainLoop()
code = prgm.get_stream() reg = prgm.acquire_register() foo = prgm.acquire_register(reg_name = 5) code.add(prgm.get_label("FOO")) code.add(spu.il(foo, 0xCAFE)) code.add(spu.ilhu(reg, 0xDEAD)) code.add(spu.iohl(reg, 0xBEEF)) code.add(spu.stqd(reg, code.r_zero, 4)) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = code.gp_return r_stop = prgm.acquire_register(reg_name = 9) r_cmp = prgm.acquire_register() code.add(spu.ori(r_cnt, code.r_zero, 0)) code.add(spu.il(r_stop, 5)) code.add(lbl_loop) code.add(spu.ceq(r_cmp, r_cnt, r_stop)) code.add(spu.brnz(r_cmp, prgm.get_label("BREAK"))) code.add(spu.ai(r_cnt, r_cnt, 1)) code.add(spu.br(prgm.get_label("LOOP"))) code.add(lbl_break) app = SPUApp(code) app.MainLoop()