Example #1
0
def main():
    # Each process gets its own data
    infile = "./data/distributed/svd_{}.csv".format(d4p.my_procid() + 1)

    # configure a SVD object
    algo = d4p.svd(distributed=True)

    # let's provide a file directly, not a table/array
    result1 = algo.compute(infile)

    # We can also load the data ourselfs and provide the numpy array
    data = loadtxt(infile, delimiter=',')
    result2 = algo.compute(data)

    # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues
    assert result1.leftSingularMatrix.shape == data.shape
    assert result1.singularValues.shape == (1, data.shape[1])
    assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1])

    assert allclose(result1.leftSingularMatrix,
                    result2.leftSingularMatrix,
                    atol=1e-05)
    assert allclose(result1.rightSingularMatrix,
                    result2.rightSingularMatrix,
                    atol=1e-05)
    assert allclose(result1.singularValues, result2.singularValues, atol=1e-05)

    return data, result1
Example #2
0
    def svd(self, Data_Path, target, n):
        '''
        daal4py SVD SPMD Mode
        '''

        # Initialize SPMD mode
        d4p.daalinit(nthreads=n)

        # Train setup
        file_path = Data_Path + str(d4p.my_procid() + 1) + ".csv"
        data = pd.read_csv(file_path)
        data = data.drop(target, axis=1)

        algo = d4p.svd(distributed=True)
        self.logger.info('Training the SVD in pydaal SPMD Mode')

        # SVD result
        svd_start_time = time.time()
        result = algo.compute(data)
        self.latency["Parallel_SVD_SPMD_Time"] = time.time() - svd_start_time

        # result is available on all processes - but we print only on root
        if d4p.my_procid() == 0:
            print("SVD completed", result)

        self.logger.info('Completed SVD in pydaal SPMD Mode')
        d4p.daalfini()

        return
Example #3
0
def _daal4py_svd(X):
    X = check_array(X, dtype=[np.float64, np.float32])
    X_fptype = getFPType(X)
    alg = daal4py.svd(fptype=X_fptype,
                      method='defaultDense',
                      leftSingularMatrix='requiredInPackedForm',
                      rightSingularMatrix='requiredInPackedForm')
    res = alg.compute(X)
    s = res.singularValues
    U = res.leftSingularMatrix
    V = res.rightSingularMatrix
    return U, np.ravel(s), V
Example #4
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/svd.csv"

    # configure a SVD object
    algo = d4p.svd()

    # let's provide a file directly, not a table/array
    result1 = algo.compute(infile)

    # We can also load the data ourselfs and provide the numpy array
    algo = d4p.svd()
    data = readcsv(infile, range(18), t=np.float32)
    result2 = algo.compute(data)

    # SVD result objects provide leftSingularMatrix,
    # rightSingularMatrix and singularValues
    assert np.allclose(result1.leftSingularMatrix,
                       result2.leftSingularMatrix,
                       atol=1e-07)
    assert np.allclose(result1.rightSingularMatrix,
                       result2.rightSingularMatrix,
                       atol=1e-07)
    assert np.allclose(result1.singularValues,
                       result2.singularValues,
                       atol=1e-07)
    assert result1.singularValues.shape == (1, data.shape[1])
    assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1])
    assert result1.leftSingularMatrix.shape == data.shape

    if hasattr(data, 'toarray'):
        data = data.toarray(
        )  # to make the next assertion work with scipy's csr_matrix
    assert np.allclose(
        data,
        np.matmul(
            np.matmul(result1.leftSingularMatrix,
                      np.diag(result1.singularValues[0])),
            result1.rightSingularMatrix))

    return (data, result1)
Example #5
0
def main(readcsv=read_csv, method='defaultDense'):
    infiles = ["./data/distributed/svd_{}.csv".format(i) for i in range(1, 5)]

    # configure a SVD object
    algo = d4p.svd(streaming=True)

    # let's provide files directly, not a tables/arrays
    # Feed file by file
    for infile in infiles:
        algo.compute(infile)

    # All files are done, now finalize the computation
    result = algo.finalize()

    # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues
    return result
Example #6
0
            return
        a = np.genfromtxt(file,
                          delimiter=',',
                          skip_header=s,
                          max_rows=chunksize)
        if a.shape[0] == 0:
            return
        if a.ndim == 1:
            a = a[:, np.newaxis]
        # last chunk is usually smaller, if not, numpy will print warning in next iteration
        if chunksize > a.shape[0]:
            s = -1
        else:
            s += a.shape[0]
        yield a


# get the generator
rn = read_next("./data/batch/svd.csv", 112)

# creat an SVD algo object
algo = d4p.svd(streaming=True)

# iterate through chunks/stream
for chunk in rn:
    algo.compute(chunk)

# finalize computation
res = algo.finalize()
print("Singular values:\n", res.singularValues)
Example #7
0
def daal4py_svd(A, k):  # only dense inputs
    algo = d4p.svd()
    result2 = algo.compute(A)
Example #8
0
# daal4py SVD example for distributed memory systems; SPMD mode
# run like this:
#    mpirun -n 4 python ./svd_spmd.py

import daal4py as d4p
from numpy import loadtxt, allclose

if __name__ == "__main__":
    # Initialize SPMD mode
    d4p.daalinit()

    # Each process gets its own data
    infile = "./data/distributed/svd_{}.csv".format(d4p.my_procid() + 1)

    # configure a SVD object
    algo = d4p.svd(distributed=True)

    # let's provide a file directly, not a table/array
    result1 = algo.compute(infile)

    # We can also load the data ourselfs and provide the numpy array
    data = loadtxt(infile, delimiter=',')
    result2 = algo.compute(data)

    # SVD result objects provide leftSingularMatrix, rightSingularMatrix and singularValues
    # leftSingularMatrix not yet supported in dist mode
    assert result1.leftSingularMatrix == None and result2.leftSingularMatrix == None
    assert allclose(result1.rightSingularMatrix,
                    result2.rightSingularMatrix,
                    atol=1e-05)
    assert allclose(result1.singularValues, result2.singularValues, atol=1e-07)