def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) stream = spuiter.stream_buffer(code, self.stream_addr, self.stream_size * 4, self.buffer_size, self.lsa) ls_data = spuiter.memory_desc('I', self.lsa, self.buffer_size / 4) popc = syn_popc_var() x = var.Word(0) count = var.Word(0) total = var.Word(0) for buffer in stream: for x in spuiter.spu_vec_iter(code, ls_data, addr_reg=buffer): popc.popc(count, x) popc.reduce_word(total, count) # Send the result to the caller spu.wrch(total, dma.SPU_WrOutMbox) spu.set_active_code(old_code) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) if self.buffers is None: raise Exception('Please set buffers') if self.stride is None: raise Exception('Please set stride') # Draw a square color = var.SignedWord(0x0F0F0FFF) fb0 = var.Word(self.buffers[0]) fb1 = var.Word(self.buffers[1]) stride = var.Word(self.stride) addr = var.Word(0) # Draw one line line_pixels = 256 for i in spuiter.syn_iter(code, line_pixels*4, step = 16): spu.stqx(color, addr, i) # Transfer the line to the frame buffer md_fb = spuiter.memory_desc('I', size = line_pixels) md_fb.set_addr_reg(addr.reg) addr.v = fb0 for i in spuiter.syn_iter(code, 128): md_fb.put(code, 0) addr.v = addr + stride spu.set_active_code(old_code) return
def TestFloats(): import math code = synspu.InstructionStream() proc = synspu.Processor() spu.set_active_code(code) code.set_debug(True) # Create a simple SPU program that computes log for all values bettween # .01 and 10.0 with .01 increments start = .65 stop = .75 inc = .01 sp_step = 0x3C23D70A # r_current = var.Word(0x3C23D70A) # .01 in single precision r_current = var.Word(0x3F266666) r_step = var.Word(sp_step) # .01 in single precision result = var.Word(0) log = SPULog() log.setup(code) log.set_result(result) log.set_x(r_current) log_iter = syn_iter(code, int((stop - start) / inc)) for i in log_iter: log.synthesize(code) spu.fa(r_current, r_current, r_step) spu.wrch(result, dma.SPU_WrOutMbox) # code.print_code() spe_id = proc.execute(code, mode='async') x = start for i in range(int((stop - start) / inc)): while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass slog = synspu.spu_exec.read_out_mbox(spe_id) print '%.3f 0x%08X %.08f %.08f ' % (x, slog, _sp_to_float(slog), math.log(x, 2)) x += inc proc.join(spe_id) return
def setup(self, code): if self.addr is None: raise Exception('Please set addr') if self._stride is None: raise Exception('Please set stride') self.x_offset = var.Word(0) self.y_offset = var.Word(self.addr) self.stride = var.Word(self._stride * 4) # Mask to extract the lowest 2 bytes from each word in the first vector # into RGB and the first byte from the second vector into A self.uint2rgba = var.Word( extarray.extarray( 'I', [0x01030303, 0x10070707, 0x100B0B0B, 0x100F0F0F])) self.ff = var.Word(0xFF000000) return
def popc(self, count, x): """ Add the number of 1 bits in each word in X to the value in count. """ temp = var.Word() temp.v = spu.cntb.ex(x) temp.v = spu.sumb.ex(temp, 0) count.v = count + temp return
def setup(self, code): old_code = spu.get_active_code() spu.set_active_code(code) self.consts = {} for const in constants.keys(): self.consts[const] = var.Word(constants[const]) spu.set_active_code(old_code) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) # Create and initialize the variables count = var.Word(0) result = var.Word(0) x = var.Word(0) # 'Load' the input vector x from register 5 x.v = spu.ai.ex(5, 0) # Inline the popc and reduce operations self.popc(count, x) self.reduce_word(result, count) # Send the result to the caller spu.wrch(result, dma.SPU_WrOutMbox) spu.set_active_code(old_code) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) self._load_parameters(code) log = spu_log.SPULog() log.setup(code) if self.renderer is not None: self.renderer.setup(code) self.renderer.set_one(log.consts['ONE']) r1_inc = var.SingleFloat() r2_inc = var.SingleFloat() r1 = var.SingleFloat() r2 = var.SingleFloat() result = var.SingleFloat() pattern = var.Word(0) self.ly_point.set_pattern_reg(pattern) self.ly_point.set_result_reg(result) self.ly_point.set_r_regs(r1, r2) self.ly_point.set_log(log) self.ly_point.setup(code) spu.lqa(r1, 0) spu.lqa(r2, 4) spu.lqa(r1_inc, 8) spu.lqa(r2_inc, 12) spu.lqa(pattern, 16) for y in spuiter.syn_iter(code, self.h): spu.lqa(r1, 0) for x in spuiter.syn_iter(code, self.w / 4): self.ly_point.synthesize(code) r1.v = spu.fa.ex(r1, r1_inc) if self.renderer is not None: # result.v = spu.fm.ex(r1, r2) self.renderer.set_result_reg(result) self.renderer.synthesize(code) if self.renderer is not None: self.renderer.row_complete(code) r2.v = spu.fa.ex(r2, r2_inc) # return Numeric.where(Numeric.less(results, 0), results, 0) spu.set_active_code(old_code) return
def TestLog(): code = synspu.InstructionStream() proc = synspu.Processor() spu.set_active_code(code) # Create a simple SPU program that computes log for 10 values and # sends the result back using the mailbox log = SPULog() values = [] result = code.acquire_register() N = 10 x = 1 for i in range(N): val = var.Word(x) spu.cuflt(val, val, 155) values.append(val) x = x * 10 log.setup(code) log.set_result(result) for i in range(N): log.set_x(values[i]) log.synthesize(code) spu.wrch(result, dma.SPU_WrOutMbox) spe_id = proc.execute(code, mode='async') x = 1 for i in range(N): while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass print 'log said: 0x%08X (%d)' % ( synspu.spu_exec.read_out_mbox(spe_id), x) x = x * 10 proc.join(spe_id) return
def pre_setup(self, code): self._count = var.Word(0) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) # Sanity checks if self._x_addr is None: raise Exception("Please set x_addr") if self._y_addr is None: raise Exception("Please set y_addr") if self._n_bits is None: raise Exception("Please set n_bits") if self._m is None: raise Exception("Please set m") if self._n is None: raise Exception("Please set n") # Acquire a registers for the bit vectors and result n_vecs = self._n_bits / 128 x_regs = [code.acquire_register() for i in range(n_vecs)] y_regs = [code.acquire_register() for i in range(n_vecs)] result = code.acquire_register() x_addr = var.Word() y_addr = var.Word() if self._save_op is not None: if self._threshold is not None: threshold = var.SingleFloat(self._threshold) else: threshold = var.SingleFloat(0.0) bcmp = var.Word(0) # Setup the Tanimito kernel tan = Tanimoto() tan.set_n_bits(self._n_bits) tan.set_x_regs(x_regs) tan.set_y_regs(y_regs) tan.set_result(result) tan.synthesize_constants(code) # Setup the save op save_op = self._save_op if save_op is not None: save_op.setup() # Create the iterators xiter = spuiter.syn_iter(code, self._m) yiter = spuiter.syn_iter(code, self._n) # Synthesize the block comparison loops x_addr.v = self._x_addr for x_off in xiter: x_addr.v = x_addr + 16 * n_vecs y_addr.v = self._y_addr self._load_bit_vector(x_addr, x_regs) for y_off in yiter: y_addr.v = y_addr + 16 * n_vecs self._load_bit_vector(y_addr, y_regs) tan.synthesize(code) if save_op is not None: spu.fcgt(bcmp, result, threshold) save_op.test(bcmp, result, x_off, y_off) # /x_off if old_code is not None: spu.set_active_code(old_code) return
def synthesize(self, code): self._check_inputs() old_code = spu.get_active_code() spu.set_active_code(code) zero = var.Word(reg=code.r_zero) one = self.log.consts['ONE'] two = self.consts['TWO'] x = var.Word(self.x0) r = var.Word(0) cmp = var.Word(0) x_neg = var.Word(0) fmax = var.Word(self.max_init) temp = var.SingleFloat() fmax.v = spu.cuflt.ex(fmax, 155) # Init for i in spuiter.syn_iter(code, self.max_init): # x = r[i % r_max] * x * (1.0 - x) self._next_r(r) temp.v = spu.fs.ex(one, x) x.v = spu.fm.ex(x, temp) x.v = spu.fm.ex(r, x) # if x == float('-infinity'): # return -10.0 # Derive Exponent total = var.Word(0) logx = var.SingleFloat() for i in spuiter.syn_iter(code, self.max_n): # x = ri * x * (1.0 - x) self._next_r(r) temp.v = spu.fs.ex(one, x) x.v = spu.fm.ex(x, temp) x.v = spu.fm.ex(r, x) # logx = ri - 2.0 * ri * x logx.v = spu.fm.ex(two, x) logx.v = spu.fm.ex(r, logx) logx.v = spu.fs.ex(r, logx) # abs(logx) x_neg.v = spu.fs.ex(zero, logx) cmp.v = spu.fcgt.ex(logx, zero) logx.v = spu.selb.ex(x_neg, logx, cmp) # logx.v = spu.selb.ex(logx, x_neg, cmp) # log(logx) self.log.set_result(logx) self.log.set_x(logx) self.log.synthesize(code) # total = total + x total.v = spu.fa.ex(total, logx) # return total / float(max_n) fdiv(code, self.result, total, fmax, one) spu.set_active_code(code) return
def setup(self, code): for const in constants.keys(): self.consts[const] = var.Word(constants[const]) return
def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) if self.x is None: raise Exception("Please set x") if self.result is None: raise Exception("Please set result") # exponent e = var.Word() # Working values x = var.Word() y = var.Word() z = var.Word() cmp = var.Bits() tmp = var.Word() spu.xor(cmp, cmp, cmp) spu.xor(tmp, tmp, tmp) # Set the working x x.v = self.x # Extract the exponent # int e = (((*(unsigned int *) &x) >> 23) & 0xff) - 0x7e; e.v = x >> self.consts['_23'] e.v = spu.andi.ex(e, 0xff) e.v = spu.ai.ex(e, 0x382) # 0x382 == (- 0x7E) using 10 bits # 0b 111 1110 # Extract the mantissa x.v = x & self.consts['M1'] # *(unsigned int*)&x &= 0x807fffff; x.v = x | self.consts['M2'] # *(unsigned int*)&x |= 0x3f000000; # Normalize x1, x2, e1 = y, z, tmp # if (x < SQRTHF) cmp.v = spu.fcgt.ex(self.consts['SQRTHF'], x) # (True) { ... } e1.v = spu.ai.ex(e, -1) # e -= 1; x1.v = spu.fa.ex(x, x) # x = x + x - 1.0; x1.v = spu.fs.ex(x1, self.consts['ONE']) # "" "" # (False) { ... } x2.v = spu.fs.ex(x, self.consts['ONE']) # x = x - 1.0; # Select the True/False values based on cmp e.v = spu.selb.ex(e, e1, cmp) x.v = spu.selb.ex(x2, x1, cmp) # Compute polynomial z.v = spu.fm.ex(x, x) # z = x * x; y.v = spu.fms.ex( self.consts['C1'], x, # y = (((((((( 7.0376836292E-2 * x self.consts['C2']) # - 1.1514610310E-1) * x y.v = spu.fma.ex(y, x, self.consts['C3']) # + 1.1676998740E-1) * x y.v = spu.fms.ex(y, x, self.consts['C4']) # - 1.2420140846E-1) * x y.v = spu.fma.ex(y, x, self.consts['C5']) # + 1.4249322787E-1) * x y.v = spu.fms.ex(y, x, self.consts['C6']) # - 1.6668057665E-1) * x y.v = spu.fma.ex(y, x, self.consts['C7']) # + 2.0000714765E-1) * x y.v = spu.fms.ex(y, x, self.consts['C8']) # - 2.4999993993E-1) * x y.v = spu.fma.ex(y, x, self.consts['C9']) # + 3.3333331174E-1) y.v = spu.fm.ex(y, x) # * x y.v = spu.fm.ex(y, z) # * z; y.v = spu.fma.ex(self.consts['C10'], z, y) # y += -0.5 * z; # Convert to log base 2 z.v = spu.fm.ex(y, self.consts['LOG2EA']) # z = y * LOG2EA; z.v = spu.fma.ex(x, self.consts['LOG2EA'], z) # z += x * LOG2EA; z.v = spu.fa.ex(z, y) # z += y; z.v = spu.fa.ex(z, x) # z += x; e.v = spu.csflt.ex(e, 155) # z += (float) e; z.v = spu.fa.ex(z, e) # "" "" spu.ai(self.result, z, 0) # return z spu.set_active_code(old_code) return