def test():
    np.random.seed(50)
    datanp = np.random.randn(2000000, 3)
    data = lg.array(datanp)
    pointsnp = np.random.choice(lg.arange(len(data)), 4, False)
    points = lg.array(pointsnp)

    centroids = data[points]
    centroidsnp = datanp[pointsnp]
    sqdists = lg.zeros((4, len(data)))
    sqdistsnp = np.zeros((4, len(datanp)))
    for i in range(4):
        vec = data - centroids[i]
        vecnp = datanp - centroidsnp[i]
        sqdists[i] = lg.linalg.norm(vec, axis=1)
        sqdistsnp[i] = np.linalg.norm(vecnp, axis=1)

    clusters = lg.argmin(sqdists, axis=0)
    clustersnp = np.argmin(sqdistsnp, axis=0)
    assert lg.array_equal(lg.where(clusters == 0), np.where(clustersnp == 0))
Exemple #2
0
def test():
    N = 100
    A = np.ones((N, N))
    B = np.arange(0, 10000).reshape((N, N))
    C = A + B
    print(C)
Exemple #3
0
def test():
    x = lg.array([1, 2, 3])
    y = np.array([1, 2, 3])
    z = lg.array(y)
    assert np.array_equal(x, z)
    assert x.dtype == z.dtype

    xe = lg.empty((2, 3))
    ye = np.empty((2, 3))
    assert lg.shape(xe) == np.shape(ye)
    assert xe.dtype == ye.dtype

    xz = lg.zeros((2, 3))
    yz = np.zeros((2, 3))
    assert np.array_equal(xz, yz)
    assert xz.dtype == yz.dtype

    xo = lg.ones((2, 3))
    yo = np.ones((2, 3))
    assert np.array_equal(xo, yo)
    assert xo.dtype == yo.dtype

    xf = lg.full((2, 3), 3)
    yf = np.full((2, 3), 3)
    assert np.array_equal(xf, yf)
    assert xf.dtype == yf.dtype

    xel = lg.empty_like(x)
    yel = np.empty_like(y)
    assert lg.shape(xel) == np.shape(yel)
    assert xel.dtype == yel.dtype

    xzl = lg.zeros_like(x)
    yzl = np.zeros_like(y)
    assert np.array_equal(xzl, yzl)
    assert xzl.dtype == yzl.dtype

    xol = lg.ones_like(x)
    yol = np.ones_like(y)
    assert np.array_equal(xol, yol)
    assert xol.dtype == yol.dtype

    xfl = lg.full_like(x, 3)
    yfl = np.full_like(y, 3)
    assert np.array_equal(xfl, yfl)
    assert xfl.dtype == yfl.dtype

    x = lg.arange(10)
    y = np.arange(10)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(10, dtype=np.int32)
    y = np.arange(10, dtype=np.int32)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(2.0, 10.0)
    y = np.arange(2.0, 10.0)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    x = lg.arange(2, 30, 3)
    y = np.arange(2, 30, 3)
    assert np.array_equal(x, y)
    assert x.dtype == y.dtype

    # xfls = lg.full_like(x, '3', dtype=np.str_)
    # yfls = np.full_like(y, '3', dtype=np.str_)
    # assert(lg.array_equal(xfls, yfls))
    # assert(xfls.dtype == yfls.dtype)

    return
Exemple #4
0
def test():
    x = lg.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                  [13, 14, 15, 16]])
    assert np.array_equal(x[0, :], [1, 2, 3, 4])
    assert np.array_equal(x[1:2, :], [[5, 6, 7, 8]])

    assert np.array_equal(x[:, 0], [1, 5, 9, 13])
    assert np.array_equal(x[:, 1], [2, 6, 10, 14])
    assert np.array_equal(x[:, 2], [3, 7, 11, 15])
    assert np.array_equal(x[:, 3], [4, 8, 12, 16])

    x = lg.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                  [13, 14, 15, 16]])
    y = x[1:4, 1:3]
    xnp = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                    [13, 14, 15, 16]])
    ynp = xnp[1:4, 1:3]
    assert np.array_equal(y[0, :], ynp[0, :])
    assert np.array_equal(y[1, :], ynp[1, :])
    assert np.array_equal(y[2, :], ynp[2, :])

    # TODO: Legate needs partition by phase for this to work efficiently
    # z = x[1:4:2, 1:4:2]
    # assert(lg.array_equal(z[0, :], [6, 8]))
    # assert(lg.array_equal(z[1, :], [14, 16]))

    dnp = np.random.random((2, 3, 4))
    fnp = np.random.random((3, 2))
    d = lg.array(dnp)
    f = lg.array(fnp)
    d[1, :, 0] = 1
    d[1, :, 1:3] = f
    dnp[1, :, 0] = 1
    dnp[1, :, 1:3] = fnp
    assert np.array_equal(d, dnp)
    print(fnp)
    print(dnp)
    print(d)

    natest = np.random.random((2, 3, 4))
    natestg = lg.array(natest)

    firstslice = natestg[0]
    firstslicegold = natest[0]
    assert np.array_equal(firstslice, firstslicegold)

    # TODO: Legate needs 4-D arrays for this to work correctly
    # secondslice = natestg[:,np.newaxis,:,:]
    # secondslicegold = natest[:,np.newaxis,:,:]
    # assert(lg.array_equal(secondslice, secondslicegold))

    # TODO: Legate needs 4-D arrays for this to work correctly
    # thirdslice = natestg[np.newaxis]
    # thirdslicegold = natest[np.newaxis]
    # print(thirdslice)
    # print(thirdslicegold)
    # assert(lg.array_equal(thirdslice, thirdslicegold))

    x = lg.arange(10)
    x[0:5] = x[5:10]
    assert np.array_equal(x, [5, 6, 7, 8, 9, 5, 6, 7, 8, 9])

    return
Exemple #5
0
def test(
    size_per_proc=1000,
    num_procs=1,
    num_runs=1,
    scale_lhs_only=False,
    package="legate",
    ty="int64",
    key_length=40,
    pad_side="right",
):
    if package == "legate":
        from legate import numpy as np, pandas as pd
        from legate.numpy.random import randn

    elif package == "cudf":
        import cudf as pd
        import cupy as np
        from cupy.random import randn

    elif package == "pandas":
        import numpy as np
        import pandas as pd
        from numpy.random import randn

    else:
        print("Unknown dataframe package: %s" % package)
        assert False

    if package == "legate":
        from legate.timing import time

        def block():
            pass

        def get_timestamp():
            return time()

        def compute_elapsed_time(start_ts, stop_ts):
            return (stop_ts - start_ts) / 1000.0

    else:
        import time

        def block():
            pass

        get_timestamp = time.process_time

        def compute_elapsed_time(start_ts, stop_ts):
            return (stop_ts - start_ts) * 1000.0

    size = size_per_proc * num_procs

    key = np.arange(size, dtype=np.int64) % size_per_proc
    payload = randn(size)

    df = pd.DataFrame({"key": key, "payload": payload})
    if ty == "int64":
        df["key"] = df["key"] * -1
        ascending = True
    if ty == "string":
        df["key"] = (
            df["key"]
            .astype(str)
            .str.pad(width=key_length, side=pad_side, fillchar="0")
        )
        ascending = False

    print("Size: %u, Key dtype: %s" % (size, df["key"].dtype))

    block()

    for i in range(num_runs):
        start_ts = get_timestamp()

        result = df.sort_values("key", ignore_index=True, ascending=ascending)

        stop_ts = get_timestamp()

        print(
            "[Run %d] Elapsed time: %lf ms"
            % (i + 1, compute_elapsed_time(start_ts, stop_ts))
        )

        del result
Exemple #6
0
def test(
    size_per_proc=1000,
    num_procs=1,
    num_runs=1,
    ty="int64",
    key_length=10,
    scale_lhs_only=False,
    package="legate",
):
    if package == "legate":
        from legate import numpy as np, pandas as pd
        from legate.numpy.random import randn

    elif package == "cudf":
        import cudf as pd
        import cupy as np
        from cupy.random import randn

    elif package == "pandas":
        import numpy as np
        import pandas as pd
        from numpy.random import randn

    elif package == "dask" or package == "daskcudf":
        import dask.array as da
        import dask.dataframe as df
        import numpy as np

        if package == "daskcudf":
            import cudf

    else:
        print("Unknown dataframe package: %s" % package)
        assert False

    if package == "legate":
        from legate.timing import time

        def block(*args):
            pass

        def get_timestamp():
            return time()

        def compute_elapsed_time(start_ts, stop_ts):
            return (stop_ts - start_ts) / 1000.0

    elif package == "dask" or package == "daskcudf":
        import time

        def block(*args):
            for arg in args:
                arg.compute()

        get_timestamp = time.process_time

        def compute_elapsed_time(start_ts, stop_ts):
            return (stop_ts - start_ts) * 1000.0

    else:
        import time

        def block(*args):
            pass

        get_timestamp = time.process_time

        def compute_elapsed_time(start_ts, stop_ts):
            return (stop_ts - start_ts) * 1000.0

    if scale_lhs_only:
        size = size_per_proc * num_procs
        size_rhs = size // 3

        if package == "dask" or package == "daskcudf":
            # Dask array does not have randn so use arrange
            c1 = da.arange(size, dtype=np.float64, chunks=size_per_proc)
            c2 = da.arange(
                size_rhs,
                dtype=np.float64,
                chunks=(size_per_proc + num_procs - 1) // num_procs,
            )
        else:
            c1 = randn(size)
            c2 = randn(size_rhs)

        key_dtype = np.int64
        if package == "dask" or package == "daskcudf":
            key_left = (
                da.arange(size, dtype=key_dtype, chunks=size_per_proc)
                % size_per_proc
            )
            key_right = da.arange(
                size_rhs,
                dtype=key_dtype,
                chunks=(size_per_proc + num_procs - 1) // num_procs,
            )
            da.multiply(key_right, 3, out=key_right)
        else:
            key_left = np.arange(size, dtype=key_dtype) % size_per_proc
            key_right = np.arange(size_rhs, dtype=key_dtype)
            np.multiply(key_right, 3, out=key_right)

    else:
        size = size_per_proc * num_procs
        size_rhs = size

        if package == "dask" or package == "daskcudf":
            # Dask array does not have randn so use arrange
            c1 = da.arange(size, dtype=np.float64, chunks=size_per_proc)
            c2 = da.arange(size, dtype=np.float64, chunks=size_per_proc)
        else:
            c1 = randn(size)
            c2 = randn(size)

        key_dtype = np.int64
        if package == "dask" or package == "daskcudf":
            key_left = da.arange(size, dtype=key_dtype, chunks=size_per_proc)
            key_right = da.arange(size, dtype=key_dtype, chunks=size_per_proc)
        else:
            key_left = np.arange(size, dtype=key_dtype)
            key_right = np.arange(size, dtype=key_dtype)
        # np.floor_divide(key_right, 3, out=key_right)
        # np.multiply(key_right, 3, out=key_right)

    if package == "dask" or package == "daskcudf":
        df1 = df.multi.concat(
            [df.from_dask_array(a) for a in [c1, key_left]], axis=1
        )
        df1.columns = ["c1", "key"]
        df2 = df.multi.concat(
            [df.from_dask_array(a) for a in [c2, key_right]], axis=1
        )
        df2.columns = ["c2", "key"]
        if package == "daskcudf":
            df1 = df1.map_partitions(cudf.from_pandas)
            df2 = df2.map_partitions(cudf.from_pandas)
    else:
        df1 = pd.DataFrame({"c1": c1, "key": key_left})
        df2 = pd.DataFrame({"c2": c2, "key": key_right})
    df2["key"] = df2["key"] // 3 * 3

    if ty == "string":
        df1["key"] = (
            df1["key"]
            .astype("string")
            .str.pad(width=key_length, side="both", fillchar="0")
        )
        df2["key"] = (
            df2["key"]
            .astype("string")
            .str.pad(width=key_length, side="both", fillchar="0")
        )

    print(
        "Type: inner, Size: %u x %u, Key dtype: %s"
        % (size, size_rhs, str(key_dtype))
    )

    block(df1, df2)

    for i in range(num_runs):
        start_ts = get_timestamp()

        df_result = df1.merge(df2, on="key")

        block(df_result)

        stop_ts = get_timestamp()

        print(
            "[Run %d] Elapsed time: %lf ms"
            % (i + 1, compute_elapsed_time(start_ts, stop_ts))
        )

        del df_result