def _set_literal_value(self, value): if type(value) in (_array_type, _extarray_type): if self.array_typecode != value.typecode: print "Warning: array typecode does not match variable type - I hope you know what you're doing!" util.vector_from_array(self.code, self, value) self.code.prgm.add_storage(value) self.storage = self.value # elif type(self.value) is _numeric_type: # raise Exception('Numeric types not yet supported') elif type(value) in (int, long): if self.array_typecode not in INT_ARRAY_TYPES: print "Warning: int does not match variable type - I hope you know what you're doing!" util.load_word(self.code, self, value) else: # print "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) # self.typecode = 'I' raise Exception( "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value)))) if self.array_typecode is not None and INT_ARRAY_SIZES[ self.array_typecode] != 4: print "Warning: Only 4-byte integers are supported for spu variables from arrays" self.code.prgm.add_storage(self.storage) return
def _set_literal_value(self, value): if type(value) in (_array_type, _extarray_type): if self.array_typecode != value.typecode: print "Warning: array typecode does not match variable type - I hope you know what you're doing!" util.vector_from_array(self.code, self, value) self.code.prgm.add_storage(value) self.storage = self.value # elif type(self.value) is _numeric_type: # raise Exception('Numeric types not yet supported') elif type(value) in (int, long): if self.array_typecode not in INT_ARRAY_TYPES: print "Warning: int does not match variable type - I hope you know what you're doing!" util.load_word(self.code, self, value) else: # print "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) # self.typecode = 'I' raise Exception( "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) ) if self.array_typecode is not None and INT_ARRAY_SIZES[self.array_typecode] != 4: print "Warning: Only 4-byte integers are supported for spu variables from arrays" self.code.prgm.add_storage(self.storage) return
def block(self, d, a, value): code = self.get_active_code() temp = code.prgm.acquire_register() load_word(code, temp, value) # RD = RB - RA spu.sf(d, temp, a) code.prgm.release_register(temp) return
def spu_writech(code, ch, msg): # msg may be either a literal value, or a register containing the value if isinstance(msg, (spe.Register, spe.Variable)): last = code.add(spu.wrch(msg, ch)) else: r_msg = code.acquire_register() util.load_word(code, r_msg, msg) last = code.add(spu.wrch(r_msg, ch)) code.release_register(r_msg) return last
def spu_writech(code, ch, msg): # msg may be either a literal value, or a register containing the value if isinstance(msg, (spe.Register, spe.Variable)): last = code.add(spu.wrch(msg, ch)) else: r_msg = code.prgm.acquire_register() util.load_word(code, r_msg, msg) last = code.add(spu.wrch(r_msg, ch)) code.prgm.release_register(r_msg) return last
def spu_mfcdma32(code, r_ls, r_ea, r_size, r_tagid, cmd): r_cmd = code.acquire_register() util.load_word(code, r_cmd, cmd) code.add(spu.wrch(r_ls, MFC_LSA)) code.add(spu.wrch(r_ea, MFC_EAL)) code.add(spu.wrch(r_size, MFC_Size)) code.add(spu.wrch(r_tagid, MFC_TagID)) last = code.add(spu.wrch(r_cmd, MFC_Cmd)) code.release_register(r_cmd) return last
def _synthesize_prologue(self): """ Setup register 0. """ self._prologue = InstructionStream() # Reserve register r0 for the value zero self.acquire_register(reg=0) util.load_word(self._prologue, 0, 0, zero=False) return
def _synthesize_prologue(self): """ Setup register 0. """ self._prologue = InstructionStream() # Reserve register r0 for the value zero self.acquire_register(reg = 0) util.load_word(self._prologue, 0, 0, zero = False) return
def spu_mfcdma64(code, r_ls, r_eah, r_eal, r_size, r_tagid, cmd): r_cmd = code.prgm.acquire_register() util.load_word(code, r_cmd, cmd) code.add(spu.wrch(r_ls, MFC_LSA)) code.add(spu.wrch(r_eah, MFC_EAH)) code.add(spu.wrch(r_eal, MFC_EAL)) code.add(spu.wrch(r_size, MFC_Size)) code.add(spu.wrch(r_tagid, MFC_TagID)) last = code.add(spu.wrch(r_cmd, MFC_Cmd)) code.release_register(r_cmd) return last
def block(self, d, a, value): """ Dispatch to the proper form of the instruction. """ if (-512 < value < 512): self.insti(d, a, value) else: code = self.get_active_code() temp = code.prgm.acquire_register() load_word(code, temp, value) self.inst(d, a, temp) code.prgm.release_register(temp) return
def _set_literal_value(self, value): # Convert lists and tuples to 'f' arrays if isinstance(value, (list, tuple)): value = array.array(self.array_typecode, value) if type(value) in (_array_type, _extarray_type): if self.array_typecode != value.typecode: print "Warning: array typecode does not match variable type - I hope you know what you're doing!" # Convert the float array to an integer array to prevent Python from # improperly casting floats to ints int_value = array.array("I") int_value.fromstring(value.tostring()) util.vector_from_array(self.code, self, int_value) self.code.prgm.add_storage(value) self.code.prgm.add_storage(int_value) self.storage = self.value # elif type(self.value) is _numeric_type: # raise Exception('Numeric types not yet supported') elif type(value) in (float,): if self.array_typecode not in FLOAT_ARRAY_TYPES: print "Warning: int does not match variable type - I hope you know what you're doing!" # Convert to bits af = array.array("f", (value,)) int_value = array.array("I") int_value.fromstring(af.tostring()) util.load_word(self.code, self, int_value[0]) else: # print "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) # self.typecode = 'I' raise Exception( "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) ) return
def _set_literal_value(self, value): # Convert lists and tuples to 'f' arrays if isinstance(value, (list, tuple)): value = array.array(self.array_typecode, value) if type(value) in (_array_type, _extarray_type): if self.array_typecode != value.typecode: print "Warning: array typecode does not match variable type - I hope you know what you're doing!" # Convert the float array to an integer array to prevent Python from # improperly casting floats to ints int_value = array.array('I') int_value.fromstring(value.tostring()) util.vector_from_array(self.code, self, int_value) self.code.prgm.add_storage(value) self.code.prgm.add_storage(int_value) self.storage = self.value # elif type(self.value) is _numeric_type: # raise Exception('Numeric types not yet supported') elif type(value) in (float, ): if self.array_typecode not in FLOAT_ARRAY_TYPES: print "Warning: int does not match variable type - I hope you know what you're doing!" # Convert to bits af = array.array('f', (value, )) int_value = array.array('I') int_value.fromstring(af.tostring()) util.load_word(self.code, self, int_value[0]) else: # print "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value))) # self.typecode = 'I' raise Exception( "Warning: unknown type for %s -> %s, defaulting to 'I'" % (str(self.value), str(type(self.value)))) return
def spu_mfcdma32(code, r_ls, r_ea, r_size, r_tagid, cmd): # print "spu_mfcdma32 cmd", cmd, str(cmd) # ref = "__spu_mfcdma32_cmd_%s" % str(cmd) # r_cmd = code.prgm.get_storage(ref) # if not isinstance(r_cmd, spu.Register): # r_cmd = code.acquire_register() # util.load_word(code, r_cmd, cmd) # code.prgm.add_storage(ref, r_cmd) r_cmd = code.prgm.acquire_register() util.load_word(code, r_cmd, cmd) code.add(spu.wrch(r_ls, MFC_LSA)) code.add(spu.wrch(r_ea, MFC_EAL)) code.add(spu.wrch(r_size, MFC_Size)) code.add(spu.wrch(r_tagid, MFC_TagID)) last = code.add(spu.wrch(r_cmd, MFC_Cmd)) code.prgm.release_register(r_cmd) return last
def end(self, branch = True): """Do post-loop iterator code""" if self.hint == True: self.code.add(spu.hbrr(self.branch_label, self.start_label)) if self.mode == DEC: # branch if r_count is not zero (CR) # Note that this relies on someone (e.g. cleanup()) setting the # condition register properly. if branch: self.code.add(self.branch_label) self.code.add(spu.brnz(self.r_count, self.start_label)) # Reset the counter in case this is a nested loop util.load_word(self.code, self.r_count, self.get_count()) elif self.mode == INC: # branch if r_current < r_stop if branch: r_cmp_gt = self.code.prgm.acquire_register() self.code.add(spu.cgt(r_cmp_gt, self.r_stop, self.r_count)) self.code.add(self.branch_label) self.code.add(spu.brnz(r_cmp_gt, self.start_label)) self.code.prgm.release_register(r_cmp_gt) # Reset the the current value in case this is a nested loop if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_start()) if self.r_count is not None: self.code.prgm.release_register(self.r_count) if self.r_stop is not None and not self._external_stop: self.code.prgm.release_register(self.r_stop) return
def start(self, align=True, branch=True): """Do pre-loop iteration initialization""" if self.r_count is None: self.r_count = self.code.acquire_register() if self.mode == DEC: if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_count()) elif self.mode == INC: if self.r_stop is None and branch: self.r_stop = self.code.acquire_register() if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_start()) if branch and not self._external_stop: util.load_word(self.code, self.r_stop, self.get_count()) # /end mode if if self.r_count is not None: self.current_count = var.SignedWord(code=self.code, reg=self.r_count) # If the step size doesn't fit in an immediate value, store it in a register # (-512 < word < 511): if not (-512 < self.step_size() < 511): self.r_step = self.code.acquire_register() util.load_word(self.code, self.r_step, self.step_size()) # Label self.start_label = self.code.get_label("SYN_ITER_START_%d" % random.randint(0, 2**32)) self.code.add(self.start_label) # Create continue/branch labels so they can be referenced; they will be # added to the code in their appropriate locations. self.branch_label = self.code.get_label("SYN_ITER_BRANCH_%d" % random.randint(0, 2**32)) self.continue_label = self.code.get_label("SYN_ITER_CONTINUE_%d" % random.randint(0, 2**32)) return
def mem_write_in_mbox(code, psmap, lsa, tag, cache = False): """Write a 32bit message at a local LSA from this SPU to another. psmap must contain the base address of the target SPU's PS map. lsa must be 12 mod 16 for DMA alignment purposes. This is a DMA operation; it must be completed using mem_complete() or similar method.""" if isinstance(lsa, (int, long)): if (lsa % 16) != 12: print "ERROR LSA for mem_write_mbox() must be 12 mod 16" assert(0) # r_mbox_mma_cached = True # ref = "__mem_write_in_mbox_mma_reg_%s" % (str(psmap)) # r_mbox_mma = code.prgm.get_storage(ref) # if not isinstance(r_mbox_mma, spu.Register): # r_size_cached = False # r_mbox_mma = code.acquire_register() # if isinstance(psmap, (int, long)): # util.load_word(code, r_mbox_mma, psmap + 0x400C) # else: # util.load_word(code, r_mbox_mma, 0x400C) # code.add(spu.a(r_mbox_mma, r_mbox_mma, psmap)) # # if cache == True: # r_mbox_mma_cached = True # code.prgm.add_storage(ref, r_mbox_mma) r_mbox_mma = code.prgm.acquire_register() if isinstance(psmap, (int, long)): util.load_word(code, r_mbox_mma, psmap + 0x400C) else: util.load_word(code, r_mbox_mma, 0x400C) code.add(spu.a(r_mbox_mma, r_mbox_mma, psmap)) r_size_cached = True ref = "_const_val_4" r_size = code.prgm.get_storage(ref) if not isinstance(r_size, spu.Register): r_size_cached = False r_size = code.prgm.acquire_register() util.load_word(code, r_size, 4) if cache == True: r_size_cached = True code.prgm.add_storage(ref, r_size) mem_put(code, lsa, r_mbox_mma, r_size, tag) code.prgm.release_register(r_mbox_mma) if cache == False: #if not isinstance(psmap, (int, long)) and r_mbox_mma_cached == False: if r_size_cached == False: code.prgm.release_register(r_size) return
def start(self, align = True, branch = True): """Do pre-loop iteration initialization""" if self.r_count is None: self.r_count = self.code.prgm.acquire_register() if self.mode == DEC: if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_count()) elif self.mode == INC: if self.r_stop is None and branch: self.r_stop = self.code.prgm.acquire_register() if self._external_start: self.code.add(spu.ai(self.r_count, self.r_start, 0)) else: util.load_word(self.code, self.r_count, self.get_start()) if branch and not self._external_stop: util.load_word(self.code, self.r_stop, self.get_count()) # /end mode if if self.r_count is not None: self.current_count = var.SignedWord(code = self.code, reg = self.r_count) # If the step size doesn't fit in an immediate value, store it in a register # (-512 < word < 511): if not (-512 < self.step_size() < 511): self.r_step = self.code.prgm.acquire_register() util.load_word(self.code, self.r_step, self.step_size()) # Label self.start_label = self.code.prgm.get_unique_label("SYN_ITER_START") self.code.add(self.start_label) # Create continue/branch labels so they can be referenced; they will be # added to the code in their appropriate locations. self.branch_label = self.code.prgm.get_unique_label("SYN_ITER_BRANCH") self.continue_label = self.code.prgm.get_unique_label("SYN_ITER_CONTINUE") return
# First all the SPUs should start up and wait for an mbox message. # The PPU will collect all the PS map addresses into an array for the SPUs. r_psinfo_mma = dma.spu_read_signal1(code) # DMA the PS info into local store dma.mem_get(code, 0x0, r_psinfo_mma, SPUS * 4 * 4, 17) dma.mem_complete(code, 17) # Load the PS info into some registers.. one register per address r_psinfo = prgm.acquire_registers(SPUS) for i in xrange(0, SPUS): spu.lqd(r_psinfo[i], code.r_zero, i) # Initialize a data register with this rank and store it at LSA 0 r_send = prgm.acquire_register() load_word(code, r_send, rank) spu.stqd(r_send, code.r_zero, 0) prgm.release_register(r_send) # Send our rank as a mailbox message to the rank after this rank dma.mem_write_in_mbox(code, r_psinfo[(rank + 1) % SPUS], 12, 18) dma.mem_complete(code, 18) # Receive the message the preceding rank sent r_recv = dma.spu_read_in_mbox(code) # Write the value out the interrupt mailbox for the PPU dma.spu_write_out_intr_mbox(code, r_recv) code.prgm.release_register(r_recv) prgm.add(code)
def _transfer_data(self, code, kernel, lsa, tag): """ Load the data into the SPU memory """ # Check the types if not isinstance(code, spe.InstructionStream): raise Exception('Code must be an InstructionStream') if not (isinstance(lsa, int) or issubclass(type(lsa), (spe.Register, spe.Variable))): raise Exception('lsa must be an integer, Register, or Variable') old_code = spu.get_active_code() spu.set_active_code(code) # Acquire registers for address and size, if they were not supplied by the user if self.r_addr is None: r_ea_data = code.prgm.acquire_register() else: r_ea_data = self.r_addr if self.r_size is None: r_size = code.prgm.acquire_register() else: r_size = self.r_size # Create variables ea_addr = var.SignedWord(reg = r_ea_data) aligned_size = var.SignedWord(0) mod_16 = var.SignedWord(0xF) # Initialize the lsa_addr variable. if isinstance(lsa, int): # From a constant ls_addr = var.SignedWord(lsa) elif issubclass(type(lsa), (spe.Register, spe.Variable)): # From a variable ls_addr = var.SignedWord() ls_addr.v = lsa tag_var = var.SignedWord(tag) cmp = var.SignedWord(0) # Load the effective address if self.r_addr is None: if self.addr % 16 != 0: print '[get_memory] Misaligned data' util.load_word(code, ea_addr, self.addr) # Load the size, rounding up as required to be 16-byte aligned if self.r_size is None: rnd_size = self.size * var.INT_SIZES[self.typecode] if rnd_size < 16: rnd_size = 16 elif (rnd_size % 16) != 0: rnd_size += (16 - (rnd_size % 16)) util.load_word(code, aligned_size, rnd_size) else: # TODO: !!! UNIT TEST THIS !!! # Same as above, but using SPU arithemtic to round size = var.SignedWord(reg = r_size) sixteen = var.SignedWord(16) cmp.v = ((size & mod_16) == size) aligned_size.v = size + (sixteen - (size & mod_16)) spu.selb(aligned_size.reg, size.reg, aligned_size.reg, cmp.reg, order = _mi(spu.selb)) code.release_register(sixteen.reg) # Use an auxillary register for the moving ea value if the # caller supplied the address register if self.r_addr is not None: ea_load = var.SignedWord(0) ea_load.v = ea_addr else: ea_load = ea_addr # note that this is reference, not .v assignment # Transfer parameters buffer_size = var.SignedWord(16384) remaining = var.SignedWord(0) transfer_size = var.SignedWord(0) remaining.v = aligned_size # Set up the iterators to transfer at most 16k at a time xfer_iter = syn_iter(code, 0, 16384) xfer_iter.set_stop_reg(aligned_size.reg) for offset in xfer_iter: cmp.v = buffer_size > remaining spu.selb(transfer_size, buffer_size, remaining, cmp) # Transfer the data kernel(code, ls_addr, ea_load, transfer_size, tag_var) ls_addr.v = ls_addr + buffer_size ea_load.v = ea_load + buffer_size remaining.v = remaining - buffer_size # Set the tag bit to tag dma.mfc_write_tag_mask(code, 1<<tag); # Wait for the transfer to complete dma.mfc_read_tag_status_all(code); # Release the registers code.release_register(buffer_size.reg) code.release_register(remaining.reg) code.release_register(aligned_size.reg) code.release_register(transfer_size.reg) code.release_register(cmp.reg) code.release_register(ls_addr.reg) code.release_register(tag_var.reg) code.release_register(ea_load.reg) if old_code is not None: spu.set_active_code(old_code) return
def TestSPUParallelIter(data, size, n_spus = 6, buffer_size = 16, run_code = True): import time # n_spus = 8 # buffer_size = 16 # 16 ints/buffer # n_buffers = 4 # 4 buffers/spu # n_buffers = size / buffer_size # size = buffer_size * n_buffers * n_spus # data = array.array('I', range(size + 2)) #data = env.aligned_memory(n, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) # print 'Data align: 0x%X, %d' % (data.buffer_info()[0], data.buffer_info()[0] % 16) code = env.ParallelInstructionStream() # code = env.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) # print 'array ea: 0x%X 0x%X' % (data.buffer_info()[0], long(data.buffer_info()[0])) # print 'r_zero = %d, ea_data = %d, ls_data = %d, r_size = %d, r_tag = %d' % ( # r_zero, r_ea_data, r_ls_data, r_size, r_tag) # Load the effective address if data.buffer_info()[0] % 16 == 0: util.load_word(code, r_ea_data, data.buffer_info()[0]) else: util.load_word(code, r_ea_data, data.buffer_info()[0] + 8) ea_start = data.buffer_info()[0] # Iterate over each buffer for ea in parallel(syn_range(code, ea_start, ea_start + size * 4 , buffer_size * 4)): # ea = var.SignedWord(code = code, reg = r_ea_data) # print 'n_iters:', size / buffer_size # for i in syn_range(code, size / buffer_size): # code.add(spu.stop(0xB)) # Load the size util.load_word(code, r_size, buffer_size * 4) # Load the tag code.add(spu.ai(r_tag, r_zero, 12)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 dma.mfc_get(code, r_ls_data, ea, r_size, r_tag) # Set the tag bit to 12 dma.mfc_write_tag_mask(code, 1<<12); # Wait for the transfer to complete dma.mfc_read_tag_status_all(code); # Increment the data values by 1 using an unrolled loop (no branches) # r_current = code.acquire_register() current = var.SignedWord(0, code) count = var.SignedWord(0, code) # Use an SPU iter for lsa in syn_iter(code, buffer_size * 4, 16): code.add(spu.lqx(current, r_zero, lsa)) # code.add(spu.ai(1, r_current, r_current)) current.v = current + current code.add(spu.stqx(current, r_zero, lsa)) count.v = count + 1 code.add(spu.stqx(count, r_zero, 0)) # code.release_register(r_current) current.release_registers(code) # Store the values back to main memory # Load the tag code.add(spu.ai(r_tag, r_zero, 13)) # Load the data into address 0 dma.mfc_put(code, r_ls_data, ea.reg, r_size, r_tag) # Set the tag bit to 13 dma.mfc_write_tag_mask(code, 1<<13); # Wait for the transfer to complete dma.mfc_read_tag_status_all(code); # code.add(spu.stop(0xB)) # Update ea # ea.v = ea + (buffer_size * 4) # /for ea address # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) if not run_code: return code # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = env.Processor() #data.copy_from(data_array.buffer_info()[0], len(data_array)) def print_blocks(): for i in range(0, size, buffer_size): # print data[i:(i + buffer_size)] print data[i + buffer_size], print '' # print_blocks() s = time.time() r = proc.execute(code, n_spus = n_spus) # r = proc.execute(code) t = time.time() - s # print_blocks() return t
prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) r_lsa = prgm.acquire_register() # Local Store address r_mma = prgm.acquire_register() # Main Memory address r_size = prgm.acquire_register() # Size in bytes r_tag = prgm.acquire_register() # DMA Tag # Set the parameters for a GET command abi = a.buffer_info() spu.il(r_lsa, 0x1000) # Local Store address 0x1000 load_word(code, r_mma, abi[0]) # Main Memory address of array a spu.il(r_size, a.itemsize * abi[1]) # Size of array a in bytes spu.il(r_tag, 12) # DMA tag 12 # Issue a DMA GET command dma.mfc_get(code, r_lsa, r_mma, r_size, r_tag) # Wait for completion # Set the completion mask; here we complete tag 12 spu.il(r_tag, 1 << 12) dma.mfc_write_tag_mask(code, r_tag) dma.mfc_read_tag_status_all(code) # Set the parameters for a PUT command bbi = b.buffer_info()
import corepy.arch.spu.isa as spu import corepy.arch.spu.platform as env import corepy.arch.spu.lib.dma as dma from corepy.arch.spu.lib.util import load_word import time if __name__ == '__main__': prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) r_cnt = prgm.acquire_register() load_word(code, r_cnt, 0x10000) br_loop = code.size() spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, br_loop - code.size()) prgm.add(code) prgm.print_code() for i in xrange(0, 10000): proc.execute(prgm) #if i % 25 == 0: # print "sleep" # time.sleep(1)
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ code = InstructionStream() proc = Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = code.gp_return test = code.acquire_register() lbl_brz = code.get_label("BRZ") lbl_skip = code.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) code.print_code(hex=True, pro=True, epi=True) r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 42) assert (r[1] == 0x100A) code = InstructionStream() spu.set_active_code(code) lbl_loop = code.get_label("LOOP") lbl_break = code.get_label("BREAK") r_cnt = code.acquire_register() r_stop = code.acquire_register() r_cmp = code.acquire_register() r_foo = code.gp_return spu.ori(r_foo, code.r_zero, 0) spu.ori(r_cnt, code.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) code.print_code() r = proc.execute(code, mode='int', stop=True) print "ret", r assert (r[0] == 55) return
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import corepy.arch.spu.isa as spu import corepy.arch.spu.platform as env import corepy.arch.spu.lib.dma as dma from corepy.arch.spu.lib.util import load_word prgm = env.Program() code = prgm.get_stream() proc = env.Processor() # Grab a register and initialize it reg = prgm.acquire_register() load_word(code, reg, 0xCAFEBABE) # Write the value to the outbound mailbox dma.spu_write_out_mbox(code, reg) # Wait for a signal sig = dma.spu_read_signal1(code) prgm.release_register(sig) prgm.release_register(reg) prgm.add(code) # Start the synthesized SPU program id = proc.execute(prgm, async = True)
def init_address(self): if self.addr_reg is None: return util.load_word(self.code, self.r_addr, _array_address(self.data))
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import corepy.arch.spu.isa as spu import corepy.arch.spu.platform as env import corepy.arch.spu.lib.dma as dma from corepy.arch.spu.lib.util import load_word code = env.InstructionStream() proc = env.Processor() # Grab a register and initialize it reg = code.acquire_register() load_word(code, reg, 0xCAFEBABE) # Write the value to the outbound mailbox dma.spu_write_out_mbox(code, reg) # Wait for a signal sig = dma.spu_read_signal1(code) code.release_register(sig) code.release_register(reg) # Start the synthesized SPU program id = proc.execute(code, async=True) # Spin until the mailbox can be read while env.spu_exec.stat_out_mbox(id) == 0:
if __name__ == '__main__': ITERS = 500000 #ITERS = 15 prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) psmap = extarray.extarray('I', 131072 / 4) data = extarray.extarray('I', range(0, 16)) r_sum = prgm.gp_return r_cnt = prgm.acquire_register() spu.xor(r_sum, r_sum, r_sum) load_word(code, r_cnt, ITERS) lbl_loop = prgm.get_label("loop") code.add(lbl_loop) reg = dma.spu_read_in_mbox(code) spu.ai(r_sum, r_sum, 1) dma.spu_write_out_intr_mbox(code, r_sum) #dma.spu_write_out_mbox(code, reg) prgm.release_register(reg) spu.ai(r_cnt, r_cnt, -1) spu.brnz(r_cnt, lbl_loop)
def TestMFC(): import corepy.lib.extarray as extarray import corepy.arch.spu.platform as synspu size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert(data[i] == i + 1) return
def TestSPUIter(): size = 32 data = extarray.extarray('I', range(size)) prgm = env.Program() code = prgm.get_stream() r_ea_data = prgm.acquire_register() r_ls_data = prgm.acquire_register() r_size = prgm.acquire_register() r_tag = prgm.acquire_register() #print 'array ea: %X' % (data.buffer_info()[0]) #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( # str(code.r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size util.load_word(code, r_size, size * 4) # Load the tag code.add(spu.ai(r_tag, code.r_zero, 12)) # Load the lsa code.add(spu.ai(r_ls_data, code.r_zero, 0)) # Load the data into address 0 dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 12 dma.mfc_write_tag_mask(code, 1<<12); # Wait for the transfer to complete dma.mfc_read_tag_status_all(code); # Increment the data values by 1 using an unrolled loop (no branches) # r_current = code.acquire_register() current = var.SignedWord(0, code) # Use an SPU iter for lsa in syn_iter(code, size * 4, 16): code.add(spu.lqx(current, code.r_zero, lsa)) # code.add(spu.ai(1, r_current, r_current)) current.v = current + current code.add(spu.stqx(current, code.r_zero, lsa)) # code.prgm.release_register(r_current) #current.release_register(code) # Store the values back to main memory # Load the tag code.add(spu.ai(r_tag, code.r_zero, 13)) # Load the data into address 0 dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 12 dma.mfc_write_tag_mask(code, 1<<13); # Wait for the transfer to complete dma.mfc_read_tag_status_all(code); # Cleanup prgm.release_register(r_ea_data) prgm.release_register(r_ls_data) prgm.release_register(r_size) prgm.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code prgm.add(code) proc = env.Processor() r = proc.execute(prgm) for i in range(0, size): assert(data[i] == i + i) return
def TestMFC(): size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert (data[i] == i + 1) return
def SimpleSPU(): """ A very simple SPU that computes 11 + 31 and returns 0xA on success. """ prgm = env.Program() code = prgm.get_stream() proc = env.Processor() spu.set_active_code(code) # Acquire two registers #x = code.acquire_register() x = prgm.gp_return test = prgm.acquire_register() lbl_brz = prgm.get_label("BRZ") lbl_skip = prgm.get_label("SKIP") spu.hbrr(lbl_brz, lbl_skip) spu.xor(x, x, x) # zero x spu.ai(x, x, 11) # x = x + 11 spu.ai(x, x, 31) # x = x + 31 spu.ceqi(test, x, 42) # test = (x == 42) # If test is false (all 0s), skip the stop(0x100A) instruction code.add(lbl_brz) spu.brz(test, lbl_skip) spu.stop(0x100A) code.add(lbl_skip) spu.stop(0x100B) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 42) assert(r[1] == 0x100A) prgm = env.Program() code = prgm.get_stream() spu.set_active_code(code) lbl_loop = prgm.get_label("LOOP") lbl_break = prgm.get_label("BREAK") r_cnt = prgm.acquire_register() r_stop = prgm.acquire_register() r_cmp = prgm.acquire_register() r_foo = prgm.gp_return spu.ori(r_foo, prgm.r_zero, 0) spu.ori(r_cnt, prgm.r_zero, 0) util.load_word(code, r_stop, 10) code.add(lbl_loop) spu.ceq(r_cmp, r_cnt, r_stop) spu.brnz(r_cmp, lbl_break) spu.ai(r_cnt, r_cnt, 1) spu.a(r_foo, r_foo, r_cnt) spu.br(lbl_loop) code.add(lbl_break) prgm.add(code) prgm.print_code() r = proc.execute(prgm, mode = 'int', stop = True) print "ret", r assert(r[0] == 55) return