def __init__(self, ncols, nrows): self.nthreads = (int)(os.popen('grep -c cores /proc/cpuinfo').read()) print(self.nthreads) self.address = pti.cast( "ptiMatrix *", PTI.malloc(self.nthreads * pti.sizeof(pti.new("ptiMatrix *")))) PTI.ptiMakeMatrixBuff(self.address, ncols, nrows)
def multiplyVectorBuff(self, vector, buffer, testing=False): result = vec.ValueVector(self.address.nrows) PTI.ptiOmpSparseMatrixMulVectorCSR_Reduce(result.address, buffer.address, self.address, vector.address) if not testing: return result else: PTI.ptiFreeValueVector(result.address)
def __init__(self, size): self.nthreads = (int)(os.popen('grep -c cores /proc/cpuinfo').read()) print(self.nthreads) self.address = pti.cast( "ptiValueVector *", PTI.malloc(self.nthreads * pti.sizeof(pti.new("ptiValueVector *")))) PTI.ptiMakeVectorBuff(self.address, size)
def crossMulTensor(self, tensor, mode, modex, modey, type="default", testing=False): print("This method is actually not implemented #SAD") return None result = COOTensor() PTI.ptiSparseTensorMulTensor(result.address, self.address, tensor.address, mode, modex, modey) return result
def makeMap(self): self.map = pti.new("ptiIndex **") self.map = pti.cast("ptiIndex **", PTI.malloc(2 * pti.sizeof(self.map))) self.map[0] = pti.cast( "ptiIndex *", PTI.malloc(self.address.nrows * pti.sizeof(pti.new("ptiIndex *")))) for i in range(self.address.nrows): self.map[0][i] = i self.map[1] = pti.cast( "ptiIndex *", PTI.malloc(self.address.ncols * pti.sizeof(pti.new("ptiIndex *")))) for i in range(self.address.ncols): self.map[1][i] = i
def convert(self, type="", blockbits=7, superblockbits=7): if (type == "hicoo"): result = HiCOOMatrix() nnz = pti.new("uint64_t *", self.nnz()) PTI.ptiSparseMatrixToHiCOO(result.address, nnz, self.address, blockbits, superblockbits) elif (type == "csr"): result = CSRMatrix() PTI.ptiSparseMatrixToCSR(result.address, self.address) elif (type == "dense"): result = DenseMatrix(self.address.nrows, self.address.ncols) for i in range(self.address.nnz): m = self.address.rowind.data[i] n = self.address.colind.data[i] data = self.address.values.data[i] result.setValue(m, n, data) else: print("[ERROR] Wrong sparse matrix type.") return result
def reorder(self, type="lexi", niters=5): if (self.map is None): self.makeMap() if (type == "lexi"): relabel = 1 PTI.ptiIndexRelabel(self.address, self.map, relabel, niters, 1) elif (type == "bfs"): relabel = 2 PTI.ptiIndexRelabel(self.address, self.map, relabel, niters, 1) elif (type == "random"): PTI.ptiGetRandomShuffledIndicesMat(self.address, self.map) else: print("[ERROR] Wrong reordering type.") PTI.ptiSparseMatrixShuffleIndices(self.address, self.map)
def dotMulTensor(self, tensor, type="default", testing=False): result = COOTensor() if type == "default": print( "Default Cases not learned yet. Make sure you spedify your run type." ) if type == "serial": PTI.ptiSparseTensorDotMul(result.address, self.address, tensor.address) elif type == "serial_EQ": PTI.ptiSparseTensorDotMulEq(result.address, self.address, tensor.address) elif type == "CPU": PTI.ptiOmpSparseTensorDotMulEq(result.address, self.address, tensor.address) elif type == "GPU": PTI.ptiCudaSparseTensorDotDiv(result.address, self.address, tensor.address) else: exit("Invalid Type") if not testing: return result else: PTI.ptiFreeSparseTensor(result.address)
def sort(self, type="row", blockbits=1): if (type == "row"): # row->column order PTI.ptiSparseMatrixSortIndexSingleMode(self.address, 1, 0, self.nthreads) elif (type == "col"): # column->row order PTI.ptiSparseMatrixSortIndexSingleMode(self.address, 1, 1, self.nthreads) elif (type == "block"): # natural blocking sort PTI.ptiSparseMatrixSortIndexRowBlock(self.address, 1, 0, self.address.nnz, blockbits) elif (type == "morton"): # Z-morton sort PTI.ptiSparseMatrixSortIndexMorton(self.address, 1, 0, self.address.nnz, blockbits) else: print("[ERROR] Wrong sorting type.")
def multiplyVector(self, vector, type="default", testing=False): result = vec.ValueVector(self.address.nrows) size = self.address.nnz sp = size / self.address.ncols if type == "default": if size < 1246221: type = "serial" elif sp < 28.8: type = "CPU" else: type = "CPU_Buff" if (type == "serial"): PTI.ptiSparseMatrixMulVector(result.address, self.address, vector.address) elif (type == "CPU"): PTI.ptiOmpSparseMatrixMulVector(result.address, self.address, vector.address) elif (type == "CPU_Buff"): PTI.ptiOmpSparseMatrixMulVectorReduce(result.address, self.address, vector.address) if not testing: return result else: PTI.ptiFreeValueVector(result.address)
def mulMatrix(self, tensor, mode, type="default", testing=False): result = sCOOTensor() if type == "default": serial = 0 #TODO make params if type == "serial": PTI.ptiSemiSparseTensorMulMatrix(result.address, self.address, tensor.address, mode) elif type == "GPU": PTI.ptiCudaSemiSparseTensorMulMatrix(result.address, self.address, tensor.address, mode) if not testing: return result else: PTI.ptiFreeSemiSparseTensor(result.address)
def mulMatrix(self, matrix, mode, type="default", testing=False): result = sCOOTensor() if type == "default": print( "Default Cases not learned yet. Make sure you spedify your run type." ) if type == "serial": PTI.ptiSparseTensorMulMatrix(result.address, self.address, matrix.address, mode) elif type == "CPU": PTI.ptiOmpSparseTensorMulMatrix(result.address, self.address.matrix.address, mode) elif type == "GPU": PTI.ptiCudaSparseTensorMulMatrix(result.address, self.address, matrix.address, mode) elif type == "GPU-1K": PTI.ptiCudaSparseTensorMulMatrixOneKernal( result.address, self.address, matrix.address, mode ) #TODO Learn more about rest of params (impl_num and smen_size) else: exit("Invalid Type") if not testing: return result else: result.free()
def subtractTensor(self, tensor, nthreads=0, testing=False): if nthreads == 0: print( "Default Cases not learned yet. Make sure you spedify your run type." ) if nthreads == 1: type = "serial" else: type = "CPU" if type == "serial": result = COOTensor() PTI.ptiSparseTensorSub(result.address, self.address, tensor.address) if not testing: return result else: PTI.ptiFreeSparseTensor(result.address) elif type == "CPU": if nthreads == 0: nthreads = self.nthreads PTI.ptiSparseTensorSubOMP(tensor.address, self.address, nthreads) if not testing: return tensor else: exit("Invalid Type")
def toCOO(self): result = COOTensor() PTI.ptiSemiSparseTensorToSparseTensor(result.address, self.address, 1e-6)
def mulVector(self, vector, mode, testing=False): result = sCOOTensor() PTI.ptiSparseTensorMulVector(result.address, self.address, vector.address, mode) if not testing: return result else: result.free()
def divValue(self, scalar): PTI.ptiSparseTensorDivScalar(self.address, scalar)
def mulValue(self, scalar): PTI.ptiSparseTensorMulScalar(self.address, scalar)
def toHiCOO(self, b=7, k=7, c=7): result = HiCOOTensor() PTI.ptiSparseTensorToHiCOO(result.address, 0, self.address, b, k, c, 1)
def toSemiSparse(self, mode=0): result = sCOOTensor() PTI.ptiSparseTensorToSemiSparseTensor(self.address, result.address, mode)
def dotDivTensor(self, tensor, type="default", testing=False): result = COOTensor() PTI.ptiSparseTensorDotDiv(result.address, self.address, tensor.address) if not testing: return result else: PTI.ptiFreeSparseTensor(result.address)
def __init__(self, length): self.address = pti.new("ptiValueVector *") PTI.ptiNewValueVector(self.address, length, length)
def dump(self, filename): file = PTI.fopen(bytes(filename, 'ascii'), b"w") PTI.ptiDumpSparseTensor(self.address, 1, file)
def __init__(self, length): self.address = pti.new("ptiNnzIndexVector *") PTI.ptiNewNnzIndexVector(self.address, length, length)
def load(self, filename): file = bytes(filename, 'ascii') PTI.ptiLoadSparseTensor(self.address, 1, file) self.dimentions = self.address.nmodes
def MTTKRP(self, mode, mats=None, matsorder=None, type="default", testing=False): if mats == None: serial = 0 #TODO make code to make mats inside here if type == "default": print( "Default Cases not learned yet. Make sure you spedify your run type." ) if type == "serial": PTI.ptiMTTKRPHiCOO(self.address, mats.address, matsorder, mode) elif type == "CPU": PTI.ptiOMPMTTKRPHiCOO(self.address, mats.address, matsorder, mode) elif type == "CPU_Reduce": PTI.ptiOmpMTTKRP_Reduce() elif type == "CPU_Lock": PTI.ptiOmpMTTKRP_Lock() elif type == "CUDA": PTI.ptiCudaMTTKRP() elif type == "CUDA_1K": PTI.ptiCudaMTTKRPOneKernal() elif type == "CUDA_SM": PTI.ptiCudaMTTKRPSM() elif type == "CUDA_Device": PTI.ptiCudaMTTKRPDevice() elif type == "CUDA_Coarse": PTI.ptiCudaCoareMTTKRP() elif type == "Splitted": PTI.ptiSplittedMTTKRP() if not testing: return mats
def KhatriRao(self, tensor, testing=False): result = COOTensor() PTI.ptiSparseTensorKhatriRaoMul(result.address, self.address, tensor.address) if not testing: return result else: result.free()
def free(self): PTI.ptiFreeVecBuff(self.address)
def free(self): PTI.ptiFreeSparseTensor(self.address)
def MTTKRP(self, mode, mats=None, matsorder=None, type="default", testing=False): if mats == None: serial = 0 # TODO make code to make mats inside here if type == "default": serial = 0 # TODO make params if type == "serial": PTI.ptiMTTKRP(self.address, mats.address, matsorder, mode) if type == "CPU": PTI.ptiOMPMTTKRP(self.address, mats.address, matsorder, mode) if type == "CPU_Reduce": PTI.ptiOmpMTTKRP_Reduce() if type == "CPU_Lock": PTI.ptiOmpMTTKRP_Lock() if type == "CUDA": PTI.ptiCudaMTTKRP() if type == "CUDA_1K": PTI.ptiCudaMTTKRPOneKernal() if type == "CUDA_SM": PTI.ptiCudaMTTKRPSM() if type == "CUDA_Device": PTI.ptiCudaMTTKRPDevice() if type == "CUDA_Coarse": PTI.ptiCudaCoareMTTKRP() if type == "Splitted": PTI.ptiSplittedMTTKRP() return mats
def sort(self): PTI.ptiQuickSortNnzIndexArray(self.address.data, 0, self.address.len)