def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) # Reserve two variable registers count = code.acquire_register() result = code.acquire_register() # 'Load' the input vector x from register 5 x = code.acquire_register() spu.ai(x, 5, 0) # Zero count and result spu.xor(count, count, count) spu.xor(result, result, result) # Inline the popc and reduce operations self.popc(count, x) self.reduce_word(result, count) # Send the result to the caller spu.wrch(result, dma.SPU_WrOutMbox) code.release_register(x) spu.set_active_code(old_code) return
def fdiv(code, d, x, y, one = None): """ Single-precision floating point division for x / y """ Y = code.acquire_registers(3) t = code.acquire_register() regs = Y[:] regs.append(t) if one is None: one = code.acquire_register() spu.xor(one, one, one) spu.ai(one, one, 1) spu.cuflt(one, one, 155) regs.append(one) # Compute 1/y (from SPU ISA 1.1, p208, Normal case) spu.frest(Y[0], y) spu.fi(Y[1], y, Y[0]) spu.fnms(t, y, Y[1], one) spu.fma(Y[2], t, Y[1], Y[1]) # Compute x * (1/y) spu.fm(d, x, Y[2]) code.release_registers(regs) return
def _ab(self, x, y, ab, temp): spu.xor(temp, x, y) spu.cntb(temp, temp) spu.sumb(temp, temp, 0) spu.a(ab, ab, temp) return
def synthesize_constants(self, code): old_code = spu.get_active_code() spu.set_active_code(code) self._one = code.acquire_register() spu.xor(self._one, self._one, self._one) spu.ai(self._one, self._one, 1) spu.cuflt(self._one, self._one, 155) if old_code is not None: spu.set_active_code(old_code) return
def block(self): code = spu.get_active_code() self._block_idx = len(code) # --> add the branch instruction (use brz (?) to always branch, nop to never branch) code[self._branch_idx] = spu.nop(0, ignore_active = True) # code[self._branch_idx] = spu.brnz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True) # code[self._branch_idx] = spu.brz(self._cmp, self._block_idx - self._branch_idx, ignore_active = True) # Pack result into vector # [x][y][score][--] # Zero the save value spu.xor(self._save_value, self._save_value, self._save_value) # Copy the score spu.selb(self._save_value, self._save_value, self._score, self._word_mask) spu.rotqbyi(self._save_value, self._save_value, 12) # Copy the y value spu.selb(self._save_value, self._save_value, self._y_off, self._word_mask) spu.rotqbyi(self._save_value, self._save_value, 12) # Copy the x value spu.selb(self._save_value, self._save_value, self._x_off, self._word_mask) # Save value to local store spu.stqx(self._save_value, self._count, self._md_results.r_addr) self._count.v = self._count.v + 16 # --> MemorySave test cmp = self._save_value # reuse the save register spu.ceq.ex(cmp, self._count, self._md_results.r_size) if self._save_op is not None: self._save_op.test(cmp, self._count) # Just reset for now spu.selb(self._count, self._count, 0, cmp) # Return to the loop idx = len(code) spu.br(- (idx - self._branch_idx - 1)) return
def _ab_c(self, x, y, ab, c, ab_temp, c_temp): """ Interleave ab and c computations """ spu.xor(ab_temp, x, y) spu.and_(c_temp, x, y) spu.cntb(ab_temp, ab_temp) spu.cntb(c_temp, c_temp) spu.sumb(ab_temp, ab_temp, 0) spu.sumb(c_temp, c_temp, 0) spu.a(ab, ab, ab_temp) spu.a(c, c, c_temp) return
def row_complete(self, code): """ Save the current row to the framebuffer. """ if self.w is None: raise Exception('Please set width') if self.lsa is None: raise Exception('Please set lsa') if self.y_offset is None: raise Exception('Please call setup') md = spuiter.memory_desc('I', size = self.w) md.set_addr_reg(self.y_offset) md.put(code, self.lsa) self.y_offset.v = self.y_offset + self.stride spu.xor(self.x_offset, self.x_offset, self.x_offset) return
def TestParams(): # Run this with a stop instruction and examine the registers prgm = Program() code = prgm.get_stream() proc = Processor() # r_sum = code.acquire_register(reg = 1) r_sum = prgm.gp_return r_current = prgm.acquire_register() # Zero the sum code.add(spu.xor(r_sum, r_sum, r_sum)) for param in [ spu_param_1, spu_param_2, spu_param_3, spu_param_4, spu_param_5, spu_param_6, spu_param_7, spu_param_8, spu_param_9, spu_param_10, ]: copy_param(code, r_current, param) code.add(spu.a(r_sum, r_sum, r_current)) code.add(spu.ceqi(r_current, r_sum, 55)) # code.add(spu.ori(code.gp_return, r_current, 0)) code.add(spu.brz(r_current, 2)) code.add(spu.stop(0x200A)) code.add(spu.stop(0x200B)) params = spu_exec.ExecParams() params.p1 = 1 params.p2 = 2 params.p3 = 3 params.p4 = 4 params.p5 = 5 params.p6 = 6 params.p7 = 7 params.p8 = 8 params.p9 = 9 params.p10 = 10 prgm += code r = proc.execute(prgm, params=params, stop=True) assert r[0] == 55 assert r[1] == 0x200A # print 'int result:', r return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = prgm.acquire_register(reg_name = 55) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction spu.brz(test, 2) spu.stop(0x100A) spu.stop(0x100B) prgm.add(code) prgm.print_code(hex = True) r = proc.execute(prgm, mode = 'int', stop = True, debug = True) assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) util.load_float(code, code.fp_return, 3.14) prgm.add(code) prgm.print_code(hex = True) r = proc.execute(prgm, mode = 'fp') print r return
if __name__ == '__main__': ITERS = 500000 #ITERS = 15 prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) psmap = extarray.extarray('I', 131072 / 4) data = extarray.extarray('I', range(0, 16)) r_sum = prgm.gp_return r_cnt = prgm.acquire_register() spu.xor(r_sum, r_sum, r_sum) load_word(code, r_cnt, ITERS) lbl_loop = prgm.get_label("loop") code.add(lbl_loop) reg = dma.spu_read_in_mbox(code) spu.ai(r_sum, r_sum, 1) dma.spu_write_out_intr_mbox(code, r_sum) #dma.spu_write_out_mbox(code, reg) prgm.release_register(reg) spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, lbl_loop)
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = code.acquire_register() lbl_brz = code.get_label("BRZ") lbl_skip = code.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) code.print_code(hex=True, pro=True, epi=True) r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 42) assert (r[1] == 0x100A) code = InstructionStream() spu.set_active_code(code) lbl_loop = code.get_label("LOOP") lbl_break = code.get_label("BREAK") r_cnt = code.acquire_register() r_stop = code.acquire_register() r_cmp = code.acquire_register() r_foo = code.gp_return spu.ori(r_foo, code.r_zero, 0) spu.ori(r_cnt, code.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) code.print_code() r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 55) return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = prgm.gp_return test = prgm.acquire_register() lbl_brz = prgm.get_label("BRZ") lbl_skip = prgm.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = prgm.acquire_register() r_stop = prgm.acquire_register() r_cmp = prgm.acquire_register() r_foo = prgm.gp_return spu.ori(r_foo, prgm.r_zero, 0) spu.ori(r_cnt, prgm.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 55) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) if self.x is None: raise Exception("Please set x") if self.result is None: raise Exception("Please set result") # exponent e = var.Word() # Working values x = var.Word() y = var.Word() z = var.Word() cmp = var.Bits() tmp = var.Word() spu.xor(cmp, cmp, cmp) spu.xor(tmp, tmp, tmp) # Set the working x x.v = self.x # Extract the exponent # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e; e.v = x >> self.consts['_23'] e.v = spu.andi.ex(e, 0xff) e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits # 0b 111 1110 # Extract the mantissa x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff; x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000; # Normalize x1, x2, e1 = y, z, tmp # if (x < SQRTHF) cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x) # (True) { ... } e1.v = spu.ai.ex(e, -1) # e -= 1; x1.v = spu.fa.ex(x, x) # x = x + x - 1.0; x1.v = spu.fs.ex(x1, self.consts['ONE']) # "" "" # (False) { ... } x2.v = spu.fs.ex(x, self.consts['ONE']) # x = x - 1.0; # Select the True/False values based on cmp e.v = spu.selb.ex(e, e1, cmp) x.v = spu.selb.ex(x2, x1, cmp) # Compute polynomial z.v = spu.fm.ex(x, x) # z = x * x; y.v = spu.fms.ex(self.consts['C1'], x, # y = (((((((( 7.0376836292E-2 * x self.consts['C2']) # - 1.1514610310E-1) * x y.v = spu.fma.ex(y, x, self.consts['C3']) # + 1.1676998740E-1) * x y.v = spu.fms.ex(y, x, self.consts['C4']) # - 1.2420140846E-1) * x y.v = spu.fma.ex(y, x, self.consts['C5']) # + 1.4249322787E-1) * x y.v = spu.fms.ex(y, x, self.consts['C6']) # - 1.6668057665E-1) * x y.v = spu.fma.ex(y, x, self.consts['C7']) # + 2.0000714765E-1) * x y.v = spu.fms.ex(y, x, self.consts['C8']) # - 2.4999993993E-1) * x y.v = spu.fma.ex(y, x, self.consts['C9']) # + 3.3333331174E-1) y.v = spu.fm.ex(y, x) # * x y.v = spu.fm.ex(y, z) # * z; y.v = spu.fma.ex(self.consts['C10'], z, y) # y += -0.5 * z; # Convert to log base 2 z.v = spu.fm.ex( y, self.consts['LOG2EA']) # z = y * LOG2EA; z.v = spu.fma.ex(x, self.consts['LOG2EA'], z) # z += x * LOG2EA; z.v = spu.fa.ex(z, y) # z += y; z.v = spu.fa.ex(z, x) # z += x; e.v = spu.csflt.ex(e, 155) # z += (float) e; z.v = spu.fa.ex(z, e) # "" "" spu.ai(self.result, z, 0) # return z spu.set_active_code(old_code) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) if self.x is None: raise Exception("Please set x") if self.result is None: raise Exception("Please set result") # exponent e = var.Word() # Working values x = var.Word() y = var.Word() z = var.Word() cmp = var.Bits() tmp = var.Word() spu.xor(cmp, cmp, cmp) spu.xor(tmp, tmp, tmp) # Set the working x x.v = self.x # Extract the exponent # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e; e.v = x >> self.consts['_23'] e.v = spu.andi.ex(e, 0xff) e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits # 0b 111 1110 # Extract the mantissa x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff; x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000; # Normalize x1, x2, e1 = y, z, tmp # if (x < SQRTHF) cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x) # (True) { ... } e1.v = spu.ai.ex(e, -1) # e -= 1; x1.v = spu.fa.ex(x, x) # x = x + x - 1.0; x1.v = spu.fs.ex(x1, self.consts['ONE']) # "" "" # (False) { ... } x2.v = spu.fs.ex(x, self.consts['ONE']) # x = x - 1.0; # Select the True/False values based on cmp e.v = spu.selb.ex(e, e1, cmp) x.v = spu.selb.ex(x2, x1, cmp) # Compute polynomial z.v = spu.fm.ex(x, x) # z = x * x; y.v = spu.fms.ex( self.consts['C1'], x, # y = (((((((( 7.0376836292E-2 * x self.consts['C2']) # - 1.1514610310E-1) * x y.v = spu.fma.ex(y, x, self.consts['C3']) # + 1.1676998740E-1) * x y.v = spu.fms.ex(y, x, self.consts['C4']) # - 1.2420140846E-1) * x y.v = spu.fma.ex(y, x, self.consts['C5']) # + 1.4249322787E-1) * x y.v = spu.fms.ex(y, x, self.consts['C6']) # - 1.6668057665E-1) * x y.v = spu.fma.ex(y, x, self.consts['C7']) # + 2.0000714765E-1) * x y.v = spu.fms.ex(y, x, self.consts['C8']) # - 2.4999993993E-1) * x y.v = spu.fma.ex(y, x, self.consts['C9']) # + 3.3333331174E-1) y.v = spu.fm.ex(y, x) # * x y.v = spu.fm.ex(y, z) # * z; y.v = spu.fma.ex(self.consts['C10'], z, y) # y += -0.5 * z; # Convert to log base 2 z.v = spu.fm.ex(y, self.consts['LOG2EA']) # z = y * LOG2EA; z.v = spu.fma.ex(x, self.consts['LOG2EA'], z) # z += x * LOG2EA; z.v = spu.fa.ex(z, y) # z += y; z.v = spu.fa.ex(z, x) # z += x; e.v = spu.csflt.ex(e, 155) # z += (float) e; z.v = spu.fa.ex(z, e) # "" "" spu.ai(self.result, z, 0) # return z spu.set_active_code(old_code) return