def main(): infile = "./data/batch/covcormoments_dense.csv" # We know the number of lines in the file and use this to separate data between processes skiprows, nrows = get_chunk_params(lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid()) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode alg = d4p.low_order_moments(method='defaultDense', distributed=True) # Perform computation res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation assert (all( getattr(res, name).shape == (1, data.shape[1]) for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ])) return res
def main(): infile = "./data/batch/covcormoments_dense.csv" # We know the number of lines in the file and use this to separate data between processes skiprows, nrows = get_chunk_params(lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid()) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode alg = d4p.covariance(method="defaultDense", distributed=True) # Perform computation res = alg.compute(data) # covariance result objects provide correlation, covariance and mean assert res.covariance.shape == (data.shape[1], data.shape[1]) assert res.mean.shape == (1, data.shape[1]) assert res.correlation.shape == (data.shape[1], data.shape[1]) return res