def main(readcsv=read_csv, method='defaultDense'):
    # read data from file
    file = "./data/batch/covcormoments_dense.csv"

    # Configure a low order moments object for streaming
    algo = d4p.low_order_moments(streaming=True)

    chunk_size = 55
    lines_read = 0
    # read and feed chunk by chunk
    while True:
        # Read data in chunks
        try:
            data = readcsv(file, range(10), lines_read, chunk_size)
        except:
            break
        # Now feed chunk
        algo.compute(data)
        lines_read += data.shape[0]

    # All files are done, now finalize the computation
    result = algo.finalize()

    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    # mean, secondOrderRawMoment, variance, standardDeviation, variation
    return result
Beispiel #2
0
def _daal_mean_var(X):
    fpt = getFPType(X)
    try:
        alg = daal4py.low_order_moments(fptype=fpt,
                                        method='defaultDense',
                                        estimatesToCompute='estimatesAll')
    except AttributeError:
        return np.var(X, axis=0).mean()
    ssc = alg.compute(X).sumSquaresCentered
    ssc = ssc.reshape((-1, 1))
    alg = daal4py.low_order_moments(fptype=fpt,
                                    method='defaultDense',
                                    estimatesToCompute='estimatesAll')
    ssc_total_res = alg.compute(ssc)
    mean_var = ssc_total_res.sum / X.size
    return mean_var[0, 0]
Beispiel #3
0
def main():
    infile = "./data/batch/covcormoments_dense.csv"

    # We know the number of lines in the file
    # and use this to separate data between processes
    skiprows, nrows = get_chunk_params(lines_count=200,
                                       chunks_count=d4p.num_procs(),
                                       chunk_number=d4p.my_procid())

    # Each process reads its chunk of the file
    data = read_csv(infile, sr=skiprows, nr=nrows)

    # Create algorithm with distributed mode
    alg = d4p.low_order_moments(method='defaultDense', distributed=True)

    # Perform computation
    res = alg.compute(data)

    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    # mean, secondOrderRawMoment, variance, standardDeviation, variation
    assert (all(
        getattr(res, name).shape == (1, data.shape[1]) for name in [
            'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered',
            'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation',
            'variation'
        ]))

    return res
Beispiel #4
0
def _daal_var(X):
    """DAAL-based threaded computation of X.std()"""
    fpt = getFPType(X)
    try:
        alg = daal4py.low_order_moments(fptype=fpt, method='defaultDense', estimatesToCompute='estimatesMeanVariance')
    except AttributeError:
        return np.var(X)
    ssc = alg.compute(X.reshape(-1,1)).sumSquaresCentered
    return ssc[0, 0] / X.size
def main(readcsv=read_csv, method="defaultDense"):
    # read data from file
    file = "./data/batch/covcormoments_dense.csv"
    data = readcsv(file, range(10))

    # compute
    alg = d4p.low_order_moments(method=method)
    res = alg.compute(data)

    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    # mean, secondOrderRawMoment, variance, standardDeviation, variation
    assert(all(getattr(res, name).shape==(1, data.shape[1]) for name in
        ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean',
        'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']))

    return res
Beispiel #6
0
def main():
    # read data from file
    file = "./data/batch/covcormoments_dense.csv"
    data = read_csv(file, range(10))

    # compute
    alg = d4p.low_order_moments()
    res = alg.compute(data)

    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    # mean, secondOrderRawMoment, variance, standardDeviation, variation
    assert res.minimum.shape == (1, data.shape[1])
    assert res.maximum.shape == (1, data.shape[1])
    assert res.sum.shape == (1, data.shape[1])
    assert res.sumSquares.shape == (1, data.shape[1])
    assert res.sumSquaresCentered.shape == (1, data.shape[1])
    assert res.mean.shape == (1, data.shape[1])
    assert res.secondOrderRawMoment.shape == (1, data.shape[1])
    assert res.variance.shape == (1, data.shape[1])
    assert res.standardDeviation.shape == (1, data.shape[1])
    assert res.variation.shape == (1, data.shape[1])

    return res
def compute(data, method):
    alg = d4p.low_order_moments(method=method)
    return alg.compute(data)
def main(readcsv=None, method='defaultDense'):
    # read data from file
    infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv')

    # Using of the classic way (computations on CPU)
    # Configure a low order moments object for streaming
    algo = d4p.low_order_moments(streaming=True)
    # get the generator (defined in stream.py)...
    rn = read_next(infile, 55, readcsv)
    # ... and iterate through chunks/stream
    for chunk in rn:
        algo.compute(chunk)
    # finalize computation
    result_classic = algo.finalize()

    # It is possible to specify to make the computations on GPU
    try:
        with sycl_context('gpu'):
            # Configure a low order moments object for streaming
            algo = d4p.low_order_moments(streaming=True)
            # get the generator (defined in stream.py)...
            rn = read_next(infile, 55, readcsv)
            # ... and iterate through chunks/stream
            for chunk in rn:
                sycl_chunk = sycl_buffer(to_numpy(chunk))
                algo.compute(sycl_chunk)
            # finalize computation
            result_gpu = algo.finalize()
        for name in [
                'minimum', 'maximum', 'sum', 'sumSquares',
                'sumSquaresCentered', 'mean', 'secondOrderRawMoment',
                'variance', 'standardDeviation', 'variation'
        ]:
            assert np.allclose(getattr(result_classic, name),
                               getattr(result_gpu, name))
    except:
        pass
    # It is possible to specify to make the computations on CPU
    with sycl_context('cpu'):
        # Configure a low order moments object for streaming
        algo = d4p.low_order_moments(streaming=True)
        # get the generator (defined in stream.py)...
        rn = read_next(infile, 55, readcsv)
        # ... and iterate through chunks/stream
        for chunk in rn:
            sycl_chunk = sycl_buffer(to_numpy(chunk))
            algo.compute(sycl_chunk)
        # finalize computation
        result_cpu = algo.finalize()

    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,
    # mean, secondOrderRawMoment, variance, standardDeviation, variation
    for name in [
            'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered',
            'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation',
            'variation'
    ]:
        assert np.allclose(getattr(result_classic, name),
                           getattr(result_cpu, name))

    return result_classic
Beispiel #9
0
def compute(data, method):
    alg = d4p.low_order_moments(method=method, fptype='float')
    return alg.compute(data)