def main(readcsv=read_csv, method='defaultDense'): # read data from file file = "./data/batch/covcormoments_dense.csv" # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) chunk_size = 55 lines_read = 0 # read and feed chunk by chunk while True: # Read data in chunks try: data = readcsv(file, range(10), lines_read, chunk_size) except: break # Now feed chunk algo.compute(data) lines_read += data.shape[0] # All files are done, now finalize the computation result = algo.finalize() # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation return result
def _daal_mean_var(X): fpt = getFPType(X) try: alg = daal4py.low_order_moments(fptype=fpt, method='defaultDense', estimatesToCompute='estimatesAll') except AttributeError: return np.var(X, axis=0).mean() ssc = alg.compute(X).sumSquaresCentered ssc = ssc.reshape((-1, 1)) alg = daal4py.low_order_moments(fptype=fpt, method='defaultDense', estimatesToCompute='estimatesAll') ssc_total_res = alg.compute(ssc) mean_var = ssc_total_res.sum / X.size return mean_var[0, 0]
def main(): infile = "./data/batch/covcormoments_dense.csv" # We know the number of lines in the file # and use this to separate data between processes skiprows, nrows = get_chunk_params(lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid()) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode alg = d4p.low_order_moments(method='defaultDense', distributed=True) # Perform computation res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation assert (all( getattr(res, name).shape == (1, data.shape[1]) for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ])) return res
def _daal_var(X): """DAAL-based threaded computation of X.std()""" fpt = getFPType(X) try: alg = daal4py.low_order_moments(fptype=fpt, method='defaultDense', estimatesToCompute='estimatesMeanVariance') except AttributeError: return np.var(X) ssc = alg.compute(X.reshape(-1,1)).sumSquaresCentered return ssc[0, 0] / X.size
def main(readcsv=read_csv, method="defaultDense"): # read data from file file = "./data/batch/covcormoments_dense.csv" data = readcsv(file, range(10)) # compute alg = d4p.low_order_moments(method=method) res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation assert(all(getattr(res, name).shape==(1, data.shape[1]) for name in ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation'])) return res
def main(): # read data from file file = "./data/batch/covcormoments_dense.csv" data = read_csv(file, range(10)) # compute alg = d4p.low_order_moments() res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation assert res.minimum.shape == (1, data.shape[1]) assert res.maximum.shape == (1, data.shape[1]) assert res.sum.shape == (1, data.shape[1]) assert res.sumSquares.shape == (1, data.shape[1]) assert res.sumSquaresCentered.shape == (1, data.shape[1]) assert res.mean.shape == (1, data.shape[1]) assert res.secondOrderRawMoment.shape == (1, data.shape[1]) assert res.variance.shape == (1, data.shape[1]) assert res.standardDeviation.shape == (1, data.shape[1]) assert res.variation.shape == (1, data.shape[1]) return res
def compute(data, method): alg = d4p.low_order_moments(method=method) return alg.compute(data)
def main(readcsv=None, method='defaultDense'): # read data from file infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') # Using of the classic way (computations on CPU) # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation result_classic = algo.finalize() # It is possible to specify to make the computations on GPU try: with sycl_context('gpu'): # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_gpu = algo.finalize() for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_gpu, name)) except: pass # It is possible to specify to make the computations on CPU with sycl_context('cpu'): # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_cpu = algo.finalize() # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_cpu, name)) return result_classic
def compute(data, method): alg = d4p.low_order_moments(method=method, fptype='float') return alg.compute(data)