def pack_smimms_int(x): smimms_int = {} for i in range(16): smimms_int[i] = i smimms_int[i - 16] = i + 16 smimms_int[pack_unpack('i', 'I', i - 16)] = i + 16 smimms_int[pack_unpack('f', 'I', 2 ** (i - 8))] = i + 32 return smimms_int[x]
def pack(self): if self.addr_label is not None: addr = pack_unpack('i', 'I', (int(self.addr_label) - self.serial - 4) * 8) elif self.addr is not None: addr = self.addr else: addr = 0 set_link = 1 if self.set_link else 0 msfign = 0b00 return 0 \ | (0b10 << 56) \ | (((addr & ((1 << 24) - 1)) >> 3) << 35) \ | (self.cond_br << 32) \ | ((addr >> 24) << 24) \ | (set_link << 23) \ | (msfign << 21) \ | (self.bdu << 15) \ | (self.ub << 14) \ | (self.bdi << 12) \ | ((self.raddr_a if self.raddr_a is not None else 0) << 6)
def pack_smimms_float(x): smimms_float = {} for i in range(16): # Denormal numbers smimms_float[pack_unpack('I', 'f', i)] = i smimms_float[2 ** (i - 8)] = i + 32 return smimms_float[x]
def run(drv, unif, src, dst, num_qpus, rows, cols, tile_rows, tile_cols, subtile_rows, subtile_cols, code_offset=0): code = drv.program(qpu_comatcopy_t, num_qpus=num_qpus, tile_rows=tile_rows, tile_cols=tile_cols, subtile_rows=subtile_rows, subtile_cols=subtile_cols, code_offset=code_offset) src[:, :] = np.arange(src.size, dtype=src.dtype).reshape(src.shape) dst[:, :] = np.arange(dst.size, dtype=dst.dtype).reshape(dst.shape) unif[0] = rows unif[1] = cols unif[2] = pack_unpack('f', 'I', 1.) unif[3] = pack_unpack('f', 'I', 0.) unif[4] = src.addresses()[0, 0] unif[5] = cols * 8 unif[6] = dst.addresses()[0, 0] unif[7] = rows * 8 start = monotonic() drv.execute(code, unif.addresses()[0], thread=num_qpus) end = monotonic() print(f'{num_qpus} QPUs,', f'{rows} x {cols} matrix,', f'{tile_rows:2} x {tile_cols:2} tile,', f'{subtile_rows:2} x {subtile_cols:2} subtile:', f'{end - start} seconds,', f'{rows * cols * 8 / (end - start) * 1e-6} MB/s')
def block_2x4_params(i, j): tile_P = P // 2 tile_R = R // 4 return [ tile_P, Q, tile_R, A.addresses()[tile_P * i, 0], A.strides[0], B.addresses()[0, tile_R * j], B.strides[0], C.addresses()[tile_P * i, tile_R * j], C.strides[0], *pack_unpack('f', 'I', [alpha, beta]), ]