def main(): # Each process gets its own data infile = "./data/distributed/svd_{}.csv".format(d4p.my_procid() + 1) # configure a SVD object algo = d4p.svd(distributed=True) # let's provide a file directly, not a table/array result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array data = loadtxt(infile, delimiter=',') result2 = algo.compute(data) # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues assert result1.leftSingularMatrix.shape == data.shape assert result1.singularValues.shape == (1, data.shape[1]) assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1]) assert allclose(result1.leftSingularMatrix, result2.leftSingularMatrix, atol=1e-05) assert allclose(result1.rightSingularMatrix, result2.rightSingularMatrix, atol=1e-05) assert allclose(result1.singularValues, result2.singularValues, atol=1e-05) return data, result1
def svd(self, Data_Path, target, n): ''' daal4py SVD SPMD Mode ''' # Initialize SPMD mode d4p.daalinit(nthreads=n) # Train setup file_path = Data_Path + str(d4p.my_procid() + 1) + ".csv" data = pd.read_csv(file_path) data = data.drop(target, axis=1) algo = d4p.svd(distributed=True) self.logger.info('Training the SVD in pydaal SPMD Mode') # SVD result svd_start_time = time.time() result = algo.compute(data) self.latency["Parallel_SVD_SPMD_Time"] = time.time() - svd_start_time # result is available on all processes - but we print only on root if d4p.my_procid() == 0: print("SVD completed", result) self.logger.info('Completed SVD in pydaal SPMD Mode') d4p.daalfini() return
def _daal4py_svd(X): X = check_array(X, dtype=[np.float64, np.float32]) X_fptype = getFPType(X) alg = daal4py.svd(fptype=X_fptype, method='defaultDense', leftSingularMatrix='requiredInPackedForm', rightSingularMatrix='requiredInPackedForm') res = alg.compute(X) s = res.singularValues U = res.leftSingularMatrix V = res.rightSingularMatrix return U, np.ravel(s), V
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/svd.csv" # configure a SVD object algo = d4p.svd() # let's provide a file directly, not a table/array result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array algo = d4p.svd() data = readcsv(infile, range(18), t=np.float32) result2 = algo.compute(data) # SVD result objects provide leftSingularMatrix, # rightSingularMatrix and singularValues assert np.allclose(result1.leftSingularMatrix, result2.leftSingularMatrix, atol=1e-07) assert np.allclose(result1.rightSingularMatrix, result2.rightSingularMatrix, atol=1e-07) assert np.allclose(result1.singularValues, result2.singularValues, atol=1e-07) assert result1.singularValues.shape == (1, data.shape[1]) assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1]) assert result1.leftSingularMatrix.shape == data.shape if hasattr(data, 'toarray'): data = data.toarray( ) # to make the next assertion work with scipy's csr_matrix assert np.allclose( data, np.matmul( np.matmul(result1.leftSingularMatrix, np.diag(result1.singularValues[0])), result1.rightSingularMatrix)) return (data, result1)
def main(readcsv=read_csv, method='defaultDense'): infiles = ["./data/distributed/svd_{}.csv".format(i) for i in range(1, 5)] # configure a SVD object algo = d4p.svd(streaming=True) # let's provide files directly, not a tables/arrays # Feed file by file for infile in infiles: algo.compute(infile) # All files are done, now finalize the computation result = algo.finalize() # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues return result
return a = np.genfromtxt(file, delimiter=',', skip_header=s, max_rows=chunksize) if a.shape[0] == 0: return if a.ndim == 1: a = a[:, np.newaxis] # last chunk is usually smaller, if not, numpy will print warning in next iteration if chunksize > a.shape[0]: s = -1 else: s += a.shape[0] yield a # get the generator rn = read_next("./data/batch/svd.csv", 112) # creat an SVD algo object algo = d4p.svd(streaming=True) # iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation res = algo.finalize() print("Singular values:\n", res.singularValues)
def daal4py_svd(A, k): # only dense inputs algo = d4p.svd() result2 = algo.compute(A)
# daal4py SVD example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./svd_spmd.py import daal4py as d4p from numpy import loadtxt, allclose if __name__ == "__main__": # Initialize SPMD mode d4p.daalinit() # Each process gets its own data infile = "./data/distributed/svd_{}.csv".format(d4p.my_procid() + 1) # configure a SVD object algo = d4p.svd(distributed=True) # let's provide a file directly, not a table/array result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array data = loadtxt(infile, delimiter=',') result2 = algo.compute(data) # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues # leftSingularMatrix not yet supported in dist mode assert result1.leftSingularMatrix == None and result2.leftSingularMatrix == None assert allclose(result1.rightSingularMatrix, result2.rightSingularMatrix, atol=1e-05) assert allclose(result1.singularValues, result2.singularValues, atol=1e-07)