def TestDebug(): prgm = Program() code = prgm.get_stream() proc = DebugProcessor() spu.set_active_code(code) ra = code.acquire_register() rb = code.acquire_register() rc = code.acquire_register() rd = code.acquire_register() re = code.acquire_register() rf = code.acquire_register() rg = code.acquire_register() rh = code.acquire_register() spu.ai(ra, 0, 14) spu.ai(rb, 0, 13) spu.ai(rc, 0, 14) spu.brnz(14, 3) spu.ai(rd, 0, 15) spu.ai(re, 0, 16) spu.ai(rf, 0, 17) spu.ai(rg, 0, 18) spu.ai(rh, 0, 19) spu.nop(0) spu.stop(0x200A) prgm += code r = proc.execute(prgm) # , debug = True) r = proc.nexti() r = proc.nexti() r = proc.nexti() r = proc.nexti() while r != None: r = proc.nexti() if r is not None: regs = proc.dump_regs() print '******', regs[122:] assert(r == None) print 'int result:', r # while True: # pass return
def TestDebug(): prgm = Program() code = prgm.get_stream() proc = DebugProcessor() spu.set_active_code(code) ra = code.acquire_register() rb = code.acquire_register() rc = code.acquire_register() rd = code.acquire_register() re = code.acquire_register() rf = code.acquire_register() rg = code.acquire_register() rh = code.acquire_register() spu.ai(ra, 0, 14) spu.ai(rb, 0, 13) spu.ai(rc, 0, 14) spu.brnz(14, 3) spu.ai(rd, 0, 15) spu.ai(re, 0, 16) spu.ai(rf, 0, 17) spu.ai(rg, 0, 18) spu.ai(rh, 0, 19) spu.nop(0) spu.stop(0x200A) prgm += code r = proc.execute(prgm) # , debug = True) r = proc.nexti() r = proc.nexti() r = proc.nexti() r = proc.nexti() while r != None: r = proc.nexti() if r is not None: regs = proc.dump_regs() print '******', regs[122:] assert (r == None) print 'int result:', r # while True: # pass return
def _load_buffer(self): # TODO - AWF - some optimization is possible here. # rather than skipping around the DMA get on the last iteration, short out # of the loop completely. Saves doing the check twice.. # Also as soon as we do this first check, we know we are going to go # through the loop again. Again, no need for a second conditional at the # end, just increment counters and always branch. A hint could be added # right before the DMA get. # Don't perform the load the last time through the loop r_cmp = self.code.prgm.acquire_register() # Compare count == step self.code.add(spu.ceq(r_cmp, self.r_stop, self.r_count)) # Create a skip label and add the branch skip_label = self.code.prgm.get_unique_label("STREAM_BUFFER_SKIP") self.code.add(spu.brnz(r_cmp, skip_label)) # Start the DMA get dma.mfc_get(self.code, self.ls, syn_range.get_current(self), self.buffer_size, self.tag) # Add the branch label self.code.add(skip_label) self.code.prgm.release_register(r_cmp) return
def TestInt2(i0 = 0, i1 = 1): i2 = i0 + i1 i3 = i1 + i2 code = InstructionStream() proc = Processor() r_loop = 4 r_address = 5 r0 = 6 r1 = 7 r2 = 8 r3 = 9 # Load arguments into a quadword ################# # Pack quadword # ################# def load_value_int32(code, reg, value, clear = False): # obviously, value should be 32 bit integer code.add(spu.ilhu(reg, value / pow(2, 16))) # immediate load halfword upper code.add(spu.iohl(reg, value % pow(2, 16))) # immediate or halfword lower if clear: code.add(spu.shlqbyi(reg, reg, 12)) # shift left qw by bytes, clears right bytes return load_value_int32(code, r0, i0, True) load_value_int32(code, r1, i1, True) code.add(spu.rotqbyi(r1, r1, 12)) # rotate qw by bytes load_value_int32(code, r2, i2, True) code.add(spu.rotqbyi(r2, r2, 8)) load_value_int32(code, r3, i3, True) code.add(spu.rotqbyi(r3, r3, 4)) code.add(spu.a(r0, r0, r1)) code.add(spu.a(r0, r0, r2)) code.add(spu.a(r0, r0, r3)) ########## # Main loop to calculate Fibnoccai sequence load_value_int32(code, r_address, pow(2, 16), clear_bits = False) # start at 64K load_value_int32(code, r_loop, 0, clear_bits = False) start_label = code.size() + 1 code.add(spu.sfi(r_loop, r_loop, 1)) code.add(spu.brnz(r_loop, (-(next - start_label) * spu.WORD_SIZE))) code.add(spu.stop(0x2005)) r = proc.execute(code) # assert(r == 12) # print 'int result:', r return
def end(self, branch = True): """Do post-loop iterator code""" if self.hint == True: self.code.add(spu.hbrr(self.branch_label, self.start_label)) if self.mode == DEC: # branch if r_count is not zero (CR) # Note that this relies on someone (e.g. cleanup()) setting the # condition register properly. if branch: self.code.add(self.branch_label) self.code.add(spu.brnz(self.r_count, self.start_label)) # Reset the counter in case this is a nested loop util.load_word(self.code, self.r_count, self.get_count()) elif self.mode == INC: # branch if r_current < r_stop if branch: r_cmp_gt = self.code.prgm.acquire_register() self.code.add(spu.cgt(r_cmp_gt, self.r_stop, self.r_count)) self.code.add(self.branch_label) self.code.add(spu.brnz(r_cmp_gt, self.start_label)) self.code.prgm.release_register(r_cmp_gt) # Reset the the current value in case this is a nested loop if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_start()) if self.r_count is not None: self.code.prgm.release_register(self.r_count) if self.r_stop is not None and not self._external_stop: self.code.prgm.release_register(self.r_stop) return
def block(self): code = spu.get_active_code() self._block_idx = len(code) # --> add the branch instruction code[self._branch_idx] = spu.nop(0, ignore_active = True) code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True) # FILL IN HERE # Return to the loop idx = len(code) spu.br(- (idx - self._branch_idx - 1)) return
spu.xor(r_sum, r_sum, r_sum) load_word(code, r_cnt, ITERS) lbl_loop = prgm.get_label("loop") code.add(lbl_loop) reg = dma.spu_read_in_mbox(code) spu.ai(r_sum, r_sum, 1) dma.spu_write_out_intr_mbox(code, r_sum) #dma.spu_write_out_mbox(code, reg) prgm.release_register(reg) spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, lbl_loop) reg = dma.spu_read_signal1(code) spu.ori(code.gp_return, reg, 0) spu.il(r_cnt, 0) spu.il(r_sum, 16 * 4) r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop")
spu.xor(r_sum, r_sum, r_sum) load_word(code, r_cnt, ITERS) lbl_loop = prgm.get_label("loop") code.add(lbl_loop) reg = dma.spu_read_in_mbox(code) spu.ai(r_sum, r_sum, 1) dma.spu_write_out_intr_mbox(code, r_sum) #dma.spu_write_out_mbox(code, reg) prgm.release_register(reg) spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, lbl_loop) reg = dma.spu_read_signal1(code) spu.ori(code.gp_return, reg, 0) spu.il(r_cnt, 0) spu.il(r_sum, 16 * 4) r_data = prgm.acquire_register() r_cmp = prgm.acquire_register() r_lsa = prgm.acquire_register() spu.il(r_lsa, 0x1000) lbl_incloop = prgm.get_label("incloop") code.add(lbl_incloop)
import corepy.arch.spu.isa as spu import corepy.arch.spu.platform as env import corepy.arch.spu.lib.dma as dma from corepy.arch.spu.lib.util import load_word import time if __name__ == '__main__': prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) r_cnt = prgm.acquire_register() load_word(code, r_cnt, 0x10000) br_loop = code.size() spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, br_loop - code.size()) prgm.add(code) prgm.print_code() for i in xrange(0, 10000): proc.execute(prgm) #if i % 25 == 0: # print "sleep" # time.sleep(1)
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = code.acquire_register() lbl_brz = code.get_label("BRZ") lbl_skip = code.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) code.print_code(hex=True, pro=True, epi=True) r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 42) assert (r[1] == 0x100A) code = InstructionStream() spu.set_active_code(code) lbl_loop = code.get_label("LOOP") lbl_break = code.get_label("BREAK") r_cnt = code.acquire_register() r_stop = code.acquire_register() r_cmp = code.acquire_register() r_foo = code.gp_return spu.ori(r_foo, code.r_zero, 0) spu.ori(r_cnt, code.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) code.print_code() r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 55) return
import corepy.arch.spu.lib.dma as dma from corepy.arch.spu.lib.util import load_word import time if __name__ == '__main__': prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) r_cnt = prgm.acquire_register() load_word(code, r_cnt, 0x10000) br_loop = code.size() spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, br_loop - code.size()) prgm.add(code) prgm.print_code() for i in xrange(0, 10000): proc.execute(prgm) #if i % 25 == 0: # print "sleep" # time.sleep(1)
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = prgm.gp_return test = prgm.acquire_register() lbl_brz = prgm.get_label("BRZ") lbl_skip = prgm.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = prgm.acquire_register() r_stop = prgm.acquire_register() r_cmp = prgm.acquire_register() r_foo = prgm.gp_return spu.ori(r_foo, prgm.r_zero, 0) spu.ori(r_cnt, prgm.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 55) return
prgm = env.Program() code = prgm.get_stream() reg = prgm.acquire_register() foo = prgm.acquire_register(reg_name=5) code.add(prgm.get_label("FOO")) code.add(spu.il(foo, 0xCAFE)) code.add(spu.ilhu(reg, 0xDEAD)) code.add(spu.iohl(reg, 0xBEEF)) code.add(spu.stqd(reg, code.r_zero, 4)) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = code.gp_return r_stop = prgm.acquire_register(reg_name=9) r_cmp = prgm.acquire_register() code.add(spu.ori(r_cnt, code.r_zero, 0)) code.add(spu.il(r_stop, 5)) code.add(lbl_loop) code.add(spu.ceq(r_cmp, r_cnt, r_stop)) code.add(spu.brnz(r_cmp, prgm.get_label("BREAK"))) code.add(spu.ai(r_cnt, r_cnt, 1)) code.add(spu.br(prgm.get_label("LOOP"))) code.add(lbl_break) app = SPUApp(code) app.MainLoop()
code = prgm.get_stream() reg = prgm.acquire_register() foo = prgm.acquire_register(reg_name = 5) code.add(prgm.get_label("FOO")) code.add(spu.il(foo, 0xCAFE)) code.add(spu.ilhu(reg, 0xDEAD)) code.add(spu.iohl(reg, 0xBEEF)) code.add(spu.stqd(reg, code.r_zero, 4)) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = code.gp_return r_stop = prgm.acquire_register(reg_name = 9) r_cmp = prgm.acquire_register() code.add(spu.ori(r_cnt, code.r_zero, 0)) code.add(spu.il(r_stop, 5)) code.add(lbl_loop) code.add(spu.ceq(r_cmp, r_cnt, r_stop)) code.add(spu.brnz(r_cmp, prgm.get_label("BREAK"))) code.add(spu.ai(r_cnt, r_cnt, 1)) code.add(spu.br(prgm.get_label("LOOP"))) code.add(lbl_break) app = SPUApp(code) app.MainLoop()