def test(): np.random.seed(50) datanp = np.random.randn(2000000, 3) data = lg.array(datanp) pointsnp = np.random.choice(lg.arange(len(data)), 4, False) points = lg.array(pointsnp) centroids = data[points] centroidsnp = datanp[pointsnp] sqdists = lg.zeros((4, len(data))) sqdistsnp = np.zeros((4, len(datanp))) for i in range(4): vec = data - centroids[i] vecnp = datanp - centroidsnp[i] sqdists[i] = lg.linalg.norm(vec, axis=1) sqdistsnp[i] = np.linalg.norm(vecnp, axis=1) clusters = lg.argmin(sqdists, axis=0) clustersnp = np.argmin(sqdistsnp, axis=0) assert lg.array_equal(lg.where(clusters == 0), np.where(clustersnp == 0))
def test(): N = 100 A = np.ones((N, N)) B = np.arange(0, 10000).reshape((N, N)) C = A + B print(C)
def test(): x = lg.array([1, 2, 3]) y = np.array([1, 2, 3]) z = lg.array(y) assert np.array_equal(x, z) assert x.dtype == z.dtype xe = lg.empty((2, 3)) ye = np.empty((2, 3)) assert lg.shape(xe) == np.shape(ye) assert xe.dtype == ye.dtype xz = lg.zeros((2, 3)) yz = np.zeros((2, 3)) assert np.array_equal(xz, yz) assert xz.dtype == yz.dtype xo = lg.ones((2, 3)) yo = np.ones((2, 3)) assert np.array_equal(xo, yo) assert xo.dtype == yo.dtype xf = lg.full((2, 3), 3) yf = np.full((2, 3), 3) assert np.array_equal(xf, yf) assert xf.dtype == yf.dtype xel = lg.empty_like(x) yel = np.empty_like(y) assert lg.shape(xel) == np.shape(yel) assert xel.dtype == yel.dtype xzl = lg.zeros_like(x) yzl = np.zeros_like(y) assert np.array_equal(xzl, yzl) assert xzl.dtype == yzl.dtype xol = lg.ones_like(x) yol = np.ones_like(y) assert np.array_equal(xol, yol) assert xol.dtype == yol.dtype xfl = lg.full_like(x, 3) yfl = np.full_like(y, 3) assert np.array_equal(xfl, yfl) assert xfl.dtype == yfl.dtype x = lg.arange(10) y = np.arange(10) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(10, dtype=np.int32) y = np.arange(10, dtype=np.int32) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(2.0, 10.0) y = np.arange(2.0, 10.0) assert np.array_equal(x, y) assert x.dtype == y.dtype x = lg.arange(2, 30, 3) y = np.arange(2, 30, 3) assert np.array_equal(x, y) assert x.dtype == y.dtype # xfls = lg.full_like(x, '3', dtype=np.str_) # yfls = np.full_like(y, '3', dtype=np.str_) # assert(lg.array_equal(xfls, yfls)) # assert(xfls.dtype == yfls.dtype) return
def test(): x = lg.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) assert np.array_equal(x[0, :], [1, 2, 3, 4]) assert np.array_equal(x[1:2, :], [[5, 6, 7, 8]]) assert np.array_equal(x[:, 0], [1, 5, 9, 13]) assert np.array_equal(x[:, 1], [2, 6, 10, 14]) assert np.array_equal(x[:, 2], [3, 7, 11, 15]) assert np.array_equal(x[:, 3], [4, 8, 12, 16]) x = lg.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) y = x[1:4, 1:3] xnp = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) ynp = xnp[1:4, 1:3] assert np.array_equal(y[0, :], ynp[0, :]) assert np.array_equal(y[1, :], ynp[1, :]) assert np.array_equal(y[2, :], ynp[2, :]) # TODO: Legate needs partition by phase for this to work efficiently # z = x[1:4:2, 1:4:2] # assert(lg.array_equal(z[0, :], [6, 8])) # assert(lg.array_equal(z[1, :], [14, 16])) dnp = np.random.random((2, 3, 4)) fnp = np.random.random((3, 2)) d = lg.array(dnp) f = lg.array(fnp) d[1, :, 0] = 1 d[1, :, 1:3] = f dnp[1, :, 0] = 1 dnp[1, :, 1:3] = fnp assert np.array_equal(d, dnp) print(fnp) print(dnp) print(d) natest = np.random.random((2, 3, 4)) natestg = lg.array(natest) firstslice = natestg[0] firstslicegold = natest[0] assert np.array_equal(firstslice, firstslicegold) # TODO: Legate needs 4-D arrays for this to work correctly # secondslice = natestg[:,np.newaxis,:,:] # secondslicegold = natest[:,np.newaxis,:,:] # assert(lg.array_equal(secondslice, secondslicegold)) # TODO: Legate needs 4-D arrays for this to work correctly # thirdslice = natestg[np.newaxis] # thirdslicegold = natest[np.newaxis] # print(thirdslice) # print(thirdslicegold) # assert(lg.array_equal(thirdslice, thirdslicegold)) x = lg.arange(10) x[0:5] = x[5:10] assert np.array_equal(x, [5, 6, 7, 8, 9, 5, 6, 7, 8, 9]) return
def test( size_per_proc=1000, num_procs=1, num_runs=1, scale_lhs_only=False, package="legate", ty="int64", key_length=40, pad_side="right", ): if package == "legate": from legate import numpy as np, pandas as pd from legate.numpy.random import randn elif package == "cudf": import cudf as pd import cupy as np from cupy.random import randn elif package == "pandas": import numpy as np import pandas as pd from numpy.random import randn else: print("Unknown dataframe package: %s" % package) assert False if package == "legate": from legate.timing import time def block(): pass def get_timestamp(): return time() def compute_elapsed_time(start_ts, stop_ts): return (stop_ts - start_ts) / 1000.0 else: import time def block(): pass get_timestamp = time.process_time def compute_elapsed_time(start_ts, stop_ts): return (stop_ts - start_ts) * 1000.0 size = size_per_proc * num_procs key = np.arange(size, dtype=np.int64) % size_per_proc payload = randn(size) df = pd.DataFrame({"key": key, "payload": payload}) if ty == "int64": df["key"] = df["key"] * -1 ascending = True if ty == "string": df["key"] = ( df["key"] .astype(str) .str.pad(width=key_length, side=pad_side, fillchar="0") ) ascending = False print("Size: %u, Key dtype: %s" % (size, df["key"].dtype)) block() for i in range(num_runs): start_ts = get_timestamp() result = df.sort_values("key", ignore_index=True, ascending=ascending) stop_ts = get_timestamp() print( "[Run %d] Elapsed time: %lf ms" % (i + 1, compute_elapsed_time(start_ts, stop_ts)) ) del result
def test( size_per_proc=1000, num_procs=1, num_runs=1, ty="int64", key_length=10, scale_lhs_only=False, package="legate", ): if package == "legate": from legate import numpy as np, pandas as pd from legate.numpy.random import randn elif package == "cudf": import cudf as pd import cupy as np from cupy.random import randn elif package == "pandas": import numpy as np import pandas as pd from numpy.random import randn elif package == "dask" or package == "daskcudf": import dask.array as da import dask.dataframe as df import numpy as np if package == "daskcudf": import cudf else: print("Unknown dataframe package: %s" % package) assert False if package == "legate": from legate.timing import time def block(*args): pass def get_timestamp(): return time() def compute_elapsed_time(start_ts, stop_ts): return (stop_ts - start_ts) / 1000.0 elif package == "dask" or package == "daskcudf": import time def block(*args): for arg in args: arg.compute() get_timestamp = time.process_time def compute_elapsed_time(start_ts, stop_ts): return (stop_ts - start_ts) * 1000.0 else: import time def block(*args): pass get_timestamp = time.process_time def compute_elapsed_time(start_ts, stop_ts): return (stop_ts - start_ts) * 1000.0 if scale_lhs_only: size = size_per_proc * num_procs size_rhs = size // 3 if package == "dask" or package == "daskcudf": # Dask array does not have randn so use arrange c1 = da.arange(size, dtype=np.float64, chunks=size_per_proc) c2 = da.arange( size_rhs, dtype=np.float64, chunks=(size_per_proc + num_procs - 1) // num_procs, ) else: c1 = randn(size) c2 = randn(size_rhs) key_dtype = np.int64 if package == "dask" or package == "daskcudf": key_left = ( da.arange(size, dtype=key_dtype, chunks=size_per_proc) % size_per_proc ) key_right = da.arange( size_rhs, dtype=key_dtype, chunks=(size_per_proc + num_procs - 1) // num_procs, ) da.multiply(key_right, 3, out=key_right) else: key_left = np.arange(size, dtype=key_dtype) % size_per_proc key_right = np.arange(size_rhs, dtype=key_dtype) np.multiply(key_right, 3, out=key_right) else: size = size_per_proc * num_procs size_rhs = size if package == "dask" or package == "daskcudf": # Dask array does not have randn so use arrange c1 = da.arange(size, dtype=np.float64, chunks=size_per_proc) c2 = da.arange(size, dtype=np.float64, chunks=size_per_proc) else: c1 = randn(size) c2 = randn(size) key_dtype = np.int64 if package == "dask" or package == "daskcudf": key_left = da.arange(size, dtype=key_dtype, chunks=size_per_proc) key_right = da.arange(size, dtype=key_dtype, chunks=size_per_proc) else: key_left = np.arange(size, dtype=key_dtype) key_right = np.arange(size, dtype=key_dtype) # np.floor_divide(key_right, 3, out=key_right) # np.multiply(key_right, 3, out=key_right) if package == "dask" or package == "daskcudf": df1 = df.multi.concat( [df.from_dask_array(a) for a in [c1, key_left]], axis=1 ) df1.columns = ["c1", "key"] df2 = df.multi.concat( [df.from_dask_array(a) for a in [c2, key_right]], axis=1 ) df2.columns = ["c2", "key"] if package == "daskcudf": df1 = df1.map_partitions(cudf.from_pandas) df2 = df2.map_partitions(cudf.from_pandas) else: df1 = pd.DataFrame({"c1": c1, "key": key_left}) df2 = pd.DataFrame({"c2": c2, "key": key_right}) df2["key"] = df2["key"] // 3 * 3 if ty == "string": df1["key"] = ( df1["key"] .astype("string") .str.pad(width=key_length, side="both", fillchar="0") ) df2["key"] = ( df2["key"] .astype("string") .str.pad(width=key_length, side="both", fillchar="0") ) print( "Type: inner, Size: %u x %u, Key dtype: %s" % (size, size_rhs, str(key_dtype)) ) block(df1, df2) for i in range(num_runs): start_ts = get_timestamp() df_result = df1.merge(df2, on="key") block(df_result) stop_ts = get_timestamp() print( "[Run %d] Elapsed time: %lf ms" % (i + 1, compute_elapsed_time(start_ts, stop_ts)) ) del df_result