def synthesize(self, code): old_code = spu.get_active_code() spu.set_active_code(code) self._load_parameters(code) log = spu_log.SPULog() log.setup(code) if self.renderer is not None: self.renderer.setup(code) self.renderer.set_one(log.consts['ONE']) r1_inc = var.SingleFloat() r2_inc = var.SingleFloat() r1 = var.SingleFloat() r2 = var.SingleFloat() result = var.SingleFloat() pattern = var.Word(0) self.ly_point.set_pattern_reg(pattern) self.ly_point.set_result_reg(result) self.ly_point.set_r_regs(r1, r2) self.ly_point.set_log(log) self.ly_point.setup(code) spu.lqa(r1, 0) spu.lqa(r2, 4) spu.lqa(r1_inc, 8) spu.lqa(r2_inc, 12) spu.lqa(pattern, 16) for y in spuiter.syn_iter(code, self.h): spu.lqa(r1, 0) for x in spuiter.syn_iter(code, self.w / 4): self.ly_point.synthesize(code) r1.v = spu.fa.ex(r1, r1_inc) if self.renderer is not None: # result.v = spu.fm.ex(r1, r2) self.renderer.set_result_reg(result) self.renderer.synthesize(code) if self.renderer is not None: self.renderer.row_complete(code) r2.v = spu.fa.ex(r2, r2_inc) # return Numeric.where(Numeric.less(results, 0), results, 0) spu.set_active_code(old_code) return
def TestMFC(): size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1 << 2) # Wait for the transfer to complete mfc_read_tag_status_all(code) # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert (data[i] == i + 1) return
def TestMFC(): import corepy.lib.extarray as extarray import corepy.arch.spu.platform as synspu size = 32 #data_array = array.array('I', range(size)) #data = synspu.aligned_memory(size, typecode = 'I') #data.copy_to(data_array.buffer_info()[0], len(data_array)) data = extarray.extarray('I', range(size)) code = synspu.InstructionStream() r_zero = code.acquire_register() r_ea_data = code.acquire_register() r_ls_data = code.acquire_register() r_size = code.acquire_register() r_tag = code.acquire_register() # Load zero util.load_word(code, r_zero, 0) print 'array ea: %X' % (data.buffer_info()[0]) print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % ( str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag)) # Load the effective address print 'test ea: %X' % data.buffer_info()[0] util.load_word(code, r_ea_data, data.buffer_info()[0]) # Load the size code.add(spu.ai(r_size, r_zero, size * 4)) # Load the tag code.add(spu.ai(r_tag, r_zero, 2)) # Load the lsa code.add(spu.ai(r_ls_data, r_zero, 0)) # Load the data into address 0 mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Increment the data values by 1 using an unrolled loop (no branches) r_current = code.acquire_register() for lsa in range(0, size * 4, 16): code.add(spu.lqa(r_current, (lsa >> 2))) code.add(spu.ai(r_current, r_current, 1)) code.add(spu.stqa(r_current, (lsa >> 2))) code.release_register(r_current) # Store the values back to main memory # Load the data into address 0 mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag) # Set the tag bit to 2 mfc_write_tag_mask(code, 1<<2); # Wait for the transfer to complete mfc_read_tag_status_all(code); # Cleanup code.release_register(r_zero) code.release_register(r_ea_data) code.release_register(r_ls_data) code.release_register(r_size) code.release_register(r_tag) # Stop for debugging # code.add(spu.stop(0xA)) # Execute the code proc = synspu.Processor() # code.print_code() #print data_array proc.execute(code) #data.copy_from(data_array.buffer_info()[0], len(data_array)) for i in range(size): assert(data[i] == i + 1) return