def predict(self, X: BlockArray): _check_array(X, True) r_oid = instance().cm.call_actor_method(self.actor, "predict", X.flattened_oids()[0]) return BlockArray.from_oid(r_oid, shape=(X.shape[0], ), dtype=predict_dtype, cm=instance().cm)
def __init__(self, *args, **kwargs): device_id = None if self.__class__ in _place_on_node_0: device_id = instance().cm.devices()[0] self.actor = instance().cm.make_actor(name, *args, device_id=device_id, **kwargs)
def train_test_split(*arrays, test_size: Union[int, float] = None, train_size: Union[int, float] = None, random_state: Optional[Union[NumsRandomState, int]] = None, shuffle: bool = True, stratify=None): # pylint: disable = protected-access updated_arrays = [] for array in arrays: updated_arrays.append(_check_array(array)) syskwargs = { "options": { "num_returns": 2 * len(updated_arrays) }, "grid_entry": (0, ), "grid_shape": (1, ), } if random_state is None: rng_params = None else: if isinstance(random_state, int): # It's a seed. random_state: NumsRandomState = instance().random_state( random_state) rng_params = random_state._rng.new_block_rng_params() array_oids = [array.flattened_oids()[0] for array in updated_arrays] result_oids = instance().cm.call("train_test_split", *array_oids, rng_params=rng_params, test_size=test_size, train_size=train_size, shuffle=shuffle, stratify=stratify, syskwargs=syskwargs) # Optimize by computing this directly. shape_dtype_oids = [ instance().cm.shape_dtype(r_oid, syskwargs={ "grid_entry": (0, ), "grid_shape": (1, ) }) for r_oid in result_oids ] shape_dtypes = instance().cm.get(shape_dtype_oids) results = [] for i, r_oid in enumerate(result_oids): shape, dtype = shape_dtypes[i] results.append( BlockArray.from_oid(r_oid, shape=shape, dtype=dtype, cm=instance().cm)) return results
def fit_transform(self, X: BlockArray, y: BlockArray = None): _check_array(X, True) if y is not None: _check_array(y, True) y = y.flattened_oids()[0] r_oid = instance().cm.call_actor_method(self.actor, "fit_transform", X.flattened_oids()[0], y) return BlockArray.from_oid(r_oid, shape=X.shape, dtype=float, cm=instance().cm)
def test_rwd_s3(): import nums from nums.core import application_manager from nums.core import settings settings.system_name = "serial" nps_app_inst = application_manager.instance() conn = boto3.resource('s3', region_name='us-east-1') assert conn.Bucket('darrays') not in conn.buckets.all() conn.create_bucket(Bucket='darrays') array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = nps_app_inst.array(array, block_shape=(3, 4)) filename = "s3://darrays/read_write_delete_array_test" write_result_ba: BlockArray = nums.write(filename, ba) write_result_np = write_result_ba.get() for grid_entry in write_result_ba.grid.get_entry_iterator(): assert write_result_ba[grid_entry].get() == write_result_np[grid_entry] ba_read: BlockArray = nums.read(filename) assert nps_app_inst.get(nps_app_inst.allclose(ba, ba_read)) delete_result_ba: BlockArray = nums.delete(filename) delete_result_np = delete_result_ba.get() for grid_entry in delete_result_ba.grid.get_entry_iterator(): assert delete_result_ba[grid_entry].get( ) == delete_result_np[grid_entry]
def _get_and_register_block_shape(self, shape): # pylint: disable=import-outside-toplevel # Only allow this to be used if app manager is maintaining an app instance. import nums.core.application_manager as am assert am.is_initialized(), "Unexpected application state: " \ "application instance doesn't exist." app = am.instance() return app.get_block_shape(shape, self.dtype)
def nps_app_inst(request): # This triggers initialization; it's not to be mixed with the app_inst fixture. # Observed (core dumped) after updating this fixture to run functions with "serial" backend. # Last time this happened, it was due poor control over the # scope and duration of ray resources. # pylint: disable = import-outside-toplevel from nums.core import settings from nums.core import application_manager settings.system_name = request.param yield application_manager.instance() application_manager.destroy()
def score(self, X: BlockArray, y: BlockArray, sample_weight: BlockArray = None): _check_array(X, True) _check_array(y, True) if sample_weight is not None: _check_array(sample_weight, True) sample_weight = sample_weight.flattened_oids()[0] r_oid = instance().cm.call_actor_method( self.actor, "score", X.flattened_oids()[0], y.flattened_oids()[0], sample_weight, ) return BlockArray.from_oid(r_oid, shape=(), dtype=float, cm=instance().cm)
def random_stub(): # pylint: disable = unused-variable import numpy.random as numpy_module from nums.core.array.random import NumsRandomState from nums.core.application_manager import instance app = instance() sys = app.system rs_inst = NumsRandomState(system=sys, seed=1337) numpy_items = sorted( systems_utils.get_module_functions(numpy_module).items()) nums_items = sorted(systems_utils.get_instance_functions(rs_inst).items()) raise NotImplementedError()
def test_app_manager(): for compute_name in ["numpy"]: for system_name in ["serial", "ray-cyclic", "ray-task"]: settings.compute_name = compute_name settings.system_name = system_name app: ArrayApplication = application_manager.instance() assert np.allclose( np.arange(10), app.arange(0, shape=(10, ), block_shape=(10, )).get()) application_manager.destroy() assert not application_manager.is_initialized() time.sleep(1)
def check_swapaxes(_np_a, axis1, axis2): ns_ins = application_manager.instance() np_swapaxes = np.__getattribute__("swapaxes") ns_swapaxes = nps.__getattribute__("swapaxes") _ns_a = nps.array(_np_a) _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape) _np_result = np_swapaxes(_np_a, axis1, axis2) _ns_result = ns_swapaxes(_ns_a, axis1, axis2) _ns_ins_result = ns_swapaxes(_ns_ins_a, axis1, axis2) assert np.allclose(_np_result, _ns_result.get()) assert np.allclose(_np_result, _ns_ins_result.get())
def example(max_iters, batch_size): app = am.instance() model = LogisticRegression(app=app, cluster_shape=(1, 1), fit_intercept=False) X, y = sample(app, sample_size=8) model.init(X) for i in range(max_iters): # Take a step. X, y = sample(app, batch_size) model.partial_fit(X, y) print("train accuracy", (nps.sum(y == model.predict(X)) / X.shape[0]).get())
def nps_app_inst(request): # This triggers initialization; it's not to be mixed with the app_inst fixture. # Observed (core dumped) after updating this fixture to run functions with "serial" backend. # Last time this happened, it was due poor control over the # scope and duration of ray resources. # pylint: disable = import-outside-toplevel from nums.core import settings from nums.core import application_manager import nums.numpy as nps settings.system_name, settings.device_grid_name = request.param # Need to reset numpy random state. # It's the only stateful numpy API object. nps.random.reset() yield application_manager.instance() application_manager.destroy()
def test_app_manager(compute_name, system_name, device_grid_name): settings.use_head = True settings.compute_name = compute_name settings.system_name = system_name settings.device_grid_name = device_grid_name app: ArrayApplication = application_manager.instance() app_arange = app.arange(0, shape=(10, ), block_shape=(10, )) assert np.allclose(np.arange(10), app_arange.get()) application_manager.destroy() assert not application_manager.is_initialized() time.sleep(1) # Revert for other tests. settings.compute_name = "numpy" settings.system_name = "ray" settings.device_grid_name = "cyclic"
def test_rwd(): import nums from nums.core import application_manager from nums.core import settings settings.system_name = "serial" nps_app_inst = application_manager.instance() array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = nps_app_inst.array(array, block_shape=(3, 4)) filename = "/tmp/darrays/read_write_delete_array_test" write_result_ba: BlockArray = nums.write(filename, ba) write_result_np = write_result_ba.get() for grid_entry in write_result_ba.grid.get_entry_iterator(): assert write_result_ba[grid_entry].get() == write_result_np[grid_entry] ba_read: BlockArray = nums.read(filename) assert nps_app_inst.get(nps_app_inst.allclose(ba, ba_read)) delete_result_ba: bool = nums.delete(filename) assert delete_result_ba
def _check_array(array, strict=False): if not isinstance(array, BlockArray): if strict: raise TypeError("Input array is not a BlockArray.") # These arrays should be a single block. array = instance().array(array, block_shape=array.shape) if not array.is_single_block(): if strict: raise ValueError("Input array is not a single block.") array_size_gb = array.nbytes / 10**9 if array_size_gb > 100.0: raise MemoryError("Operating on an " "array of size %sGB is not supported." % array_size_gb) elif array_size_gb > 10.0: # This is a large array of size 10GB. warnings.warn("Attempting to convert an array " "of size %sGB to a single block." % array_size_gb) array = array.to_single_block() return array
def test_app_manager(compute_name, system_name, device_grid_name, num_cpus): settings.use_head = True settings.compute_name = compute_name settings.system_name = system_name settings.device_grid_name = device_grid_name settings.num_cpus = num_cpus app: ArrayApplication = application_manager.instance() print(settings.num_cpus, num_cpus, app.cm.num_cores_total()) app_arange = app.arange(0, shape=(10, ), block_shape=(10, )) assert np.allclose(np.arange(10), app_arange.get()) if num_cpus is None: assert app.cm.num_cores_total() == get_num_cores() else: assert app.cm.num_cores_total() == num_cpus application_manager.destroy() assert not application_manager.is_initialized() time.sleep(1) # Revert for other tests. settings.compute_name = "numpy" settings.system_name = "ray" settings.device_grid_name = "cyclic" settings.num_cpus = None
def benchmark_mlp(num_gpus, N_list, system_class_list, d=1000, optimizer=True, dtype=np.float32): format_string = "%20s,%10s,%10s,%10s,%10s,%10s" print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV")) global app for N in N_list: N = int(N) N_block = N // num_gpus d_block = d // 1 for system_class in system_class_list: # try: if True: if system_class in ["Cupy", "Numpy"]: name = system_class import cupy as cp arr_lib = cp if system_class == "Cupy" else np arr_lib.inv = arr_lib.linalg.inv app = arr_lib X, y = np_sample(np, sample_size=N, feature=1000) W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = np_init_weights(np, X, y) # X = arr_lib.zeros((N, d), dtype=dtype) # y = arr_lib.ones((N,), dtype=dtype) # Prevent the Singular matrix Error in np.linalg.inv # arange = arr_lib.arange(N) # X[arange, arange % d] = 1 X = cp.asarray(X) y = cp.asarray(y) # print("initialize weights") W_in_1 = cp.asarray(W_in_1) W_1_2 = cp.asarray(W_1_2) W_2_out = cp.asarray(W_2_out) # print("initialize bias") B_1 = cp.asarray(B_1) B_2 = cp.asarray(B_2) B_out = cp.asarray(B_out) cp.cuda.Device(0).synchronize() # W_in_1 = app.random.normal(shape=(X.shape[1], dim_1), dtype=X.dtype) # W_1_2 = app.random.normal(shape=(dim_1, dim_2), dtype=X.dtype) # W_2_out = app.random.normal(shape=(dim_2, y.shape[1]), dtype=X.dtype) # Initialize bias # B_1 = app.zeros((X.shape[0], dim_1), dtype=X.dtype) # B_2 = app.zeros((X.shape[0], dim_2), dtype=X.dtype) # B_out = app.zeros((X.shape[0], y.shape[1]), dtype=X.dtype) # print("done initialize bias") # cp.cuda.Device(0).synchronize() # Benchmark one step mlp def func(): tic = time.time() # toc_end = np_feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) toc_end = one_step_fit_np(arr_lib, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) cp.cuda.Device(0).synchronize() toc = time.time() return toc - tic, toc_end - tic, 0, None # func() # exit() costs, costs_opt, costs_init = benchmark_func(func) del (X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) else: # Init system name = system_class.__name__ # system = system_class(num_gpus) app = am.instance() app.system.num_gpus = num_gpus app.system.cluster_shape = (num_gpus, 1) app.system.optimizer = optimizer # system.init() # app = ArrayApplication(system=system, filesystem=FileSystem(system)) # Make dataset # print("hi there") nps.random.seed(0) # print("a", flush=True) X, y = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus) # print(f"X.shape {X.shape} X.block_shape {X.block_shape}") # print(f"y.shape {y.shape} y.block_shape {y.block_shape}") W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = init_weights(app, num_gpus, X, y) # X = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus) # print("b", flush=True) # X = app.ones((N, d), block_shape=(N_block, d_block), dtype=dtype) # y = app.ones((N,), block_shape=(N_block,), dtype=dtype) # Benchmark one step MLP def func(): tic = time.time() if optimizer: toc_init, toc_opt = one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus) # toc_init, toc_opt = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, # B_out, num_gpus) else: toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) toc_init = tic # toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) toc = time.time() return toc - tic, toc_opt - tic, toc_init - tic, None costs, costs_opt, costs_init = benchmark_func(func) del (X, y, app, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # del (X, app) # except Exception: else: costs = [-1] costs_opt = [-1] costs_init = [-1] log_str = format_string % ( # system_class.__name__, name, "%d" % N, "%.4f" % np.mean(costs), "%.4f" % np.mean(costs_opt), "%.4f" % np.mean(costs_init), "%.2f" % (np.std(costs) / np.mean(costs)), ) print(log_str) with open("result_lr.csv", "a") as f: f.write(log_str + "\n")
def benchmark_lr(num_gpus, N_list, system_class_list, d=1000, optimizer=True, dtype=np.float32): format_string = "%20s,%10s,%10s,%10s,%10s,%10s" print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV")) global app for N in N_list: N = int(N) for system_class in system_class_list: # try: if True: if system_class in ["Cupy", "Numpy"]: name = system_class import cupy as cp arr_lib = cp if system_class == "Cupy" else np arr_lib.inv = arr_lib.linalg.inv app = arr_lib X = arr_lib.zeros((N, d), dtype=dtype) y = arr_lib.ones((N,), dtype=dtype) # Prevent the Singular matrix Error in np.linalg.inv arange = arr_lib.arange(N) X[arange, arange % d] = 1 cp.cuda.Device(0).synchronize() # Benchmark one step LR def func(): tic = time.time() one_step_fit_np(arr_lib, X, y) cp.cuda.Device(0).synchronize() toc = time.time() return toc - tic, 0, 0, None costs, costs_opt, costs_init = benchmark_func(func) del (X, y, app) else: # Init system name = system_class.__name__ app = am.instance(num_gpus, optimizer) # Make dataset nps.random.seed(0) X = app.ones((N, d), block_shape=(N // num_gpus, d), dtype=dtype) y = app.ones((N,), block_shape=(N // num_gpus,), dtype=dtype) theta = app.zeros((X.shape[1],), (X.block_shape[1],), dtype=X.dtype) # Benchmark one step LR def func(): tic = time.time() if optimizer: toc_init, toc_opt = one_step_fit_opt(app, X, y, theta, num_gpus=num_gpus) else: toc_init = tic toc_opt = one_step_fit(app, X, y, theta) toc = time.time() return toc - tic, toc_opt - tic, toc_init - tic, None costs, costs_opt, costs_init = benchmark_func(func) del (X, y, app) #except Exception: else: costs = [-1] costs_opt = [-1] costs_init = [-1] log_str = format_string % ( name, "%d" % N, "%.4f" % np.mean(costs), "%.4f" % np.mean(costs_opt), "%.4f" % np.mean(costs_init), "%.2f" % (np.std(costs) / np.mean(costs)), ) print(log_str) with open("result_lr.csv", "a") as f: f.write(log_str + "\n")
def benchmark_bop(num_gpus, N_list, system_class_list, d=400000, optimizer=True, dtype=np.float32): format_string = "%20s,%10s,%10s,%10s,%10s,%10s" print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV")) # global app for N in N_list: N = int(N) d1 = N d2 = d for system_class in system_class_list: # try: if True: if system_class in ["Cupy", "Numpy"]: name = system_class import cupy as cp arr_lib = cp if system_class == "Cupy" else np arr_lib.inv = arr_lib.linalg.inv app = arr_lib # X = arr_lib.ones((N, d), dtype=dtype) W = arr_lib.ones(shape=(d1, d2), dtype=dtype) D = arr_lib.ones(shape=(d2, N), dtype=dtype) # Prevent the Singular matrix Error in np.linalg.inv # arange = arr_lib.arange(N) # X[arange, arange % d] = 1 cp.cuda.Device(0).synchronize() # Benchmark bop def func(): tic = time.time() Z = W @ D # Z = X.T @ X cp.cuda.Device(0).synchronize() toc = time.time() return toc - tic, 0, 0, None costs, costs_opt, costs_init = benchmark_func(func) # del (X, app) del (W, D, app) else: # Init system name = system_class.__name__ app = am.instance(num_gpus, optimizer) W = app.ones(shape=(d1, d2), block_shape=(d1, d2 // num_gpus), dtype=dtype) D = app.ones(shape=(d2, N), block_shape=(d2 // num_gpus, N), dtype=dtype) # X = app.ones((N, d), block_shape=(N // num_gpus, d), dtype=dtype) # Benchmark bop def func(): tic = time.time() if optimizer: toc_init, toc_opt = matmul_opt(app, W, D, num_gpus) # toc_init, toc_opt = matmul_opt(app, X, num_gpus) else: Z = (W @ D).touch() # Z = (X.T @ X).touch() toc = time.time() return toc - tic, 0, 0, None costs, costs_opt, costs_init = benchmark_func(func) del (W, D) am.destroy() #except Exception: else: costs = [-1] costs_opt = [-1] costs_init = [-1] log_str = format_string % ( name, "%d" % N, "%.4f" % np.mean(costs), "%.4f" % np.mean(costs_opt), "%.4f" % np.mean(costs_init), "%.2f" % (np.std(costs) / np.mean(costs)), ) print(log_str) with open("result_bop.csv", "a") as f: f.write(log_str + "\n")
def benchmark_mlp(num_gpus, N_list, system_class_list, d=1000, optimizer=True, dtype=np.float32): # format_string = "%20s,%10s,%10s,%10s,%10s,%10s" # print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV")) global app for N in N_list: N = int(N) N_block = N // num_gpus d_block = d // 1 app = am.instance() # cupy-parallel app.system.num_gpus = num_gpus app.system.cluster_shape = (num_gpus, 1) app.system.optimizer = optimizer nps.random.seed(0) # print("a", flush=True) X, y = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus) # print(f"X.shape {X.shape} X.block_shape {X.block_shape}") # print(f"y.shape {y.shape} y.block_shape {y.block_shape}") W_in_1, W_1_2, W_2_out = data_init_weights(app, num_gpus, X, y) X_cp = X.copy() y_cp = y.copy() W_in_1_cp = W_in_1.copy() W_1_2_cp = W_1_2.copy() W_2_out_cp = W_2_out.copy() # No Partition import cupy as cp with cp.cuda.Device(0): X_tmp = cp.asarray(X_cp.get()) y_tmp = cp.asarray(y_cp.get()) W_in_1_tmp = cp.asarray(W_in_1_cp.get()) W_1_2_tmp = cp.asarray(W_1_2_cp.get()) W_2_out_tmp = cp.asarray(W_2_out_cp.get()) cp.cuda.Device(0).synchronize() np.testing.assert_allclose(W_1_2.get(), W_1_2_tmp.get()) print(" W_in_1 all close ") for system_class in system_class_list: # try: if True: if system_class in ["Cupy", "Numpy"]: name = system_class arr_lib = cp if system_class == "Cupy" else np arr_lib.inv = arr_lib.linalg.inv # app = arr_lib # X = arr_lib.zeros((N, d), dtype=dtype) # y = arr_lib.ones((N,), dtype=dtype) # Prevent the Singular matrix Error in np.linalg.inv # arange = arr_lib.arange(N) # X[arange, arange % d] = 1 # cp.cuda.Device(0).synchronize() # W_in_1 = app.random.normal(shape=(X.shape[1], dim_1), dtype=X.dtype) # W_1_2 = app.random.normal(shape=(dim_1, dim_2), dtype=X.dtype) # W_2_out = app.random.normal(shape=(dim_2, y.shape[1]), dtype=X.dtype) # Initialize bias # B_1 = app.zeros((X.shape[0], dim_1), dtype=X.dtype) # B_2 = app.zeros((X.shape[0], dim_2), dtype=X.dtype) # B_out = app.zeros((X.shape[0], y.shape[1]), dtype=X.dtype) # print("done initialize bias") print("----Cupy----") y_tmp = \ np_feedforward(arr_lib, X_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp) cp.cuda.Device(0).synchronize() # Benchmark one step LR def func(): tic = time.time() # one_step_fit(arr_lib, X_tmp, y_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp, B_1_tmp, B_2_tmp, B_out_tmp) # one_step_fit(arr_lib, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # cp.cuda.Device(0).synchronize() toc = time.time() return toc - tic, None # func() # exit() # costs = benchmark_func(func) del (X_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp) else: # Init system # name = system_class.__name__ # system = system_class(num_gpus) # app = am.instance() # app.system.num_gpus = num_gpus # app.system.cluster_shape = (num_gpus, 1) # # app.system.optimizer = optimizer # system.init() # app = ArrayApplication(system=system, filesystem=FileSystem(system)) # Make dataset # print("hi there") # X = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus) # print("b", flush=True) # X = app.ones((N, d), block_shape=(N_block, d_block), dtype=dtype) # y = app.ones((N,), block_shape=(N_block,), dtype=dtype) print("----CupyParallel----") if optimizer: # W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = \ # one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus) y = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, num_gpus) else: # toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # toc_init = tic W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = \ one_step_fit(app, app.one, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # Benchmark one step MLP # def func(): # tic = time.time() # if optimizer: # toc_init, toc_opt = one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, # B_out, num_gpus) # # toc_init, toc_opt = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, # # B_out, num_gpus) # else: # # toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # toc_init = tic # toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out) # toc = time.time() # return toc - tic, toc_opt - tic, toc_init - tic, None # costs, costs_opt, costs_init = benchmark_func(func) costs = [1] costs_opt = [0] costs_init = [0] del (X, W_in_1, W_1_2, W_2_out) # except Exception: else: costs = [-1] costs_opt = [-1] costs_init = [-1] # log_str = format_string % ( # # system_class.__name__, # # name, # "%d" % N, # "%.4f" % np.mean(costs), # "%.4f" % np.mean(costs_opt), # "%.4f" % np.mean(costs_init), # "%.2f" % (np.std(costs) / np.mean(costs)), # ) # print(log_str) # with open("result_lr.csv", "a") as f: # f.write(log_str + "\n") print("assert all close ") # np.testing.assert_allclose(X.get(), X_tmp.get()) # print(" X all close ") np.testing.assert_allclose(y.get(), y_tmp.get()) print(" y all close ") del (y_tmp, y, app, X_cp, y_cp, W_in_1_cp, W_1_2_cp, W_2_out_cp)
def test_shape(nps_app_inst): from nums import numpy as nps from nums.core import application_manager assert nps_app_inst is not None shape = (10, 20, 30, 40) block_shape = (10, 10, 10, 10) ns_ins = application_manager.instance() def check_expand_and_squeeze(_np_a, axes): np_expand_dims = np.__getattribute__("expand_dims") ns_expand_dims = nps.__getattribute__("expand_dims") np_squeeze = np.__getattribute__("squeeze") ns_squeeze = nps.__getattribute__("squeeze") _ns_a = nps.array(_np_a) _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape) _np_result = np_expand_dims(_np_a, axes) _ns_result = ns_expand_dims(_ns_a, axes) _ns_ins_result = ns_expand_dims(_ns_ins_a, axes) assert np.allclose(_np_result, _ns_result.get()) assert np.allclose(_np_result, _ns_ins_result.get()) check_dim(_np_result, _ns_result) check_dim(_np_result, _ns_ins_result) _np_result = np_squeeze(_np_a) _ns_result = ns_squeeze(_ns_a) _ns_ins_result = ns_squeeze(_ns_ins_a) assert np.allclose(_np_result, _ns_result.get()) assert np.allclose(_np_result, _ns_ins_result.get()) check_dim(_np_result, _ns_result) check_dim(_np_result, _ns_ins_result) def check_dim(_np_a, _ns_a): np_ndim = np.__getattribute__("ndim") assert np_ndim(_np_a) == np_ndim(_ns_a) def check_swapaxes(_np_a, axis1, axis2): ns_ins = application_manager.instance() np_swapaxes = np.__getattribute__("swapaxes") ns_swapaxes = nps.__getattribute__("swapaxes") _ns_a = nps.array(_np_a) _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape) _np_result = np_swapaxes(_np_a, axis1, axis2) _ns_result = ns_swapaxes(_ns_a, axis1, axis2) _ns_ins_result = ns_swapaxes(_ns_ins_a, axis1, axis2) assert np.allclose(_np_result, _ns_result.get()) assert np.allclose(_np_result, _ns_ins_result.get()) np_A = np.ones(shape) check_expand_and_squeeze(np_A, axes=0) check_expand_and_squeeze(np_A, axes=2) check_expand_and_squeeze(np_A, axes=4) check_expand_and_squeeze(np_A, axes=(2, 3)) check_expand_and_squeeze(np_A, axes=(0, 5)) check_expand_and_squeeze(np_A, axes=(0, 5, 6)) check_expand_and_squeeze(np_A, axes=(2, 3, 5, 6, 7)) for a1 in range(4): for a2 in range(4): check_swapaxes(np_A, axis1=a1, axis2=a2)
def init(): # pylint: disable = import-outside-toplevel # Explicitly initialize application instance. from nums.core.application_manager import instance return instance()
def benchmark_mlp(num_gpus, N_list, system_class_list, d=140000, optimizer=True, dtype=np.float32): format_string = "%20s,%10s,%10s,%10s,%10s,%10s,%10s,%10s" print(format_string % ("Library", "N", "d_in", "d_2", "Cost", "CostOpt", "CostInit", "CV")) global app for N in N_list: for system_class in system_class_list: # try: if True: if system_class in ["Cupy", "Numpy"]: name = system_class import cupy as cp arr_lib = cp if system_class == "Cupy" else np arr_lib.inv = arr_lib.linalg.inv app = arr_lib X, y = np_sample(np, sample_size=N, feature=d, dtype=dtype) W_in_1, W_1_2, W_2_out = np_init_weights(np, X, y, d2, dtype=dtype) X = cp.asarray(X) y = cp.asarray(y) W_in_1 = cp.asarray(W_in_1) W_1_2 = cp.asarray(W_1_2) W_2_out = cp.asarray(W_2_out) cp.cuda.Device(0).synchronize() # Benchmark one step mlp def func(): tic = time.time() toc_end = one_step_fit_np(arr_lib, X, y, W_in_1, W_1_2, W_2_out) cp.cuda.Device(0).synchronize() toc = time.time() return toc - tic, toc_end - tic, 0, None costs, costs_opt, costs_init = benchmark_func(func) del (X, y, W_in_1, W_1_2, W_2_out) else: # Init system name = system_class.__name__ app = am.instance(num_gpus, optimizer) # Make dataset nps.random.seed(0) X, y = sample(app, sample_size=N, feature=d, num_gpus=num_gpus, dtype=dtype) W_in_1, W_1_2, W_2_out = model_init_weights(app, num_gpus, X, y, d2, verbose=False) # Benchmark one step MLP def func(): tic = time.time() if optimizer: toc_init, toc_opt = one_step_fit_opt( app, X, y, W_in_1, W_1_2, W_2_out, num_gpus) else: toc_init = tic toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2, W_2_out) toc = time.time() return toc - tic, toc_opt - tic, toc_init - tic, None costs, costs_opt, costs_init = benchmark_func(func) del (X, y, app, W_in_1, W_1_2, W_2_out) # except Exception: else: costs = [-1] costs_opt = [-1] costs_init = [-1] log_str = format_string % ( name, "%d" % N, "%d" % d, "%d" % d2, "%.4f" % np.mean(costs), "%.4f" % np.mean(costs_opt), "%.4f" % np.mean(costs_init), "%.2f" % (np.std(costs) / np.mean(costs)), ) print(log_str) with open("result_mlp_model.csv", "a") as f: f.write(log_str + "\n")
def fit(self, X: BlockArray, y: BlockArray): _check_array(X, True) _check_array(y, True) instance().cm.call_actor_method(self.actor, "fit", X.flattened_oids()[0], y.flattened_oids()[0])
assert nps_app_inst is not None ba: BlockArray = nps.array([[-1, 4, np.nan, 5], [3, 2, nps.nan, 6]]) block_shapes = [(1, 1), (1, 2), (1, 4), (2, 1), (2, 4)] for block_shape in block_shapes: ba = ba.reshape(block_shape=block_shape) np_arr = ba.get() op_params = ["nanmax", "nanmin", "nansum", "nanmean", "nanvar", "nanstd"] axis_params = [None, 0, 1] keepdims_params = [True, False] for op, axis, keepdims in itertools.product( op_params, axis_params, keepdims_params ): ns_op = nps.__getattribute__(op) np_op = np.__getattribute__(op) np_result = np_op(np_arr, axis=axis, keepdims=keepdims) ba_result: BlockArray = ns_op(ba, axis=axis, keepdims=keepdims) assert ba_result.grid.grid_shape == ba_result.blocks.shape assert ba_result.shape == np_result.shape assert np.allclose(ba_result.get(), np_result, equal_nan=True) if __name__ == "__main__": from nums.core import application_manager import nums.core.settings nums.core.settings.system_name = "serial" nps_app_inst = application_manager.instance() test_nan_reductions(nps_app_inst)
from nums.core.application_manager import instance from nums.core.array.blockarray import BlockArray from nums.core.array.application import ArrayApplication from nums.core.systems.systems import System import numpy as np app: ArrayApplication = instance() system: System = app.system X: BlockArray = app.random.random(shape=(2, 2), block_shape=(1, 1)) Y: BlockArray = app.random.random(shape=(2, 2), block_shape=(1, 1)) print(X.get()) print(Y.get()) print((X + Y).get()) first_entry = list(X.grid.get_entry_iterator())[0] x_block = X.blocks[first_entry] y_block = Y.blocks[first_entry] print(x_block.get()) print(y_block.get()) result = app.zeros(shape=(2, 2), block_shape=(1, 1)) # system.call_with_options() result.blocks[first_entry].oid = system.bop("add",