def test_basic_assignment_broadcasting(app_inst: ArrayApplication): # Test mixed-length broadcasting. def get_sel(num_entries, shape): r = [] for i in range(num_entries): dim = shape[i] start = rs.random_integers(0, dim - 1) stop = rs.random_integers(start, dim) r.append((start, stop)) return r rs = np.random.RandomState(1337) a_shape = (6, 7, 2, 5) a_block_shape = (2, 4, 2, 3) b_shape = (6, 7, 2, 5) b_block_shape = (3, 2, 1, 2) num_axes = len(a_shape) access_modes = [ lambda a1, a2: a1, lambda a1, a2: slice(None, None, None), lambda a1, a2: slice(a1, None, None), lambda a1, a2: slice(None, a1, None), lambda a1, a2: slice(a1, a2, None), ] for a_len in range(num_axes): for b_len in range(num_axes): a_mode_iterator = list( itertools.product(access_modes, repeat=a_len)) b_mode_iterator = list( itertools.product(access_modes, repeat=b_len)) pbar = tqdm.tqdm( total=len(a_mode_iterator) * len(b_mode_iterator), desc="Testing assignment broadcasting %d/%d" % (a_len * num_axes + b_len, num_axes**2), ) # Create some valid intervals. for a_mode in a_mode_iterator: for b_mode in b_mode_iterator: pbar.update(1) a_sel = get_sel(a_len, a_shape) b_sel = get_sel(b_len, b_shape) a_accessor = tuple(a_mode[i](*a_sel[i]) for i in range(a_len)) b_accessor = tuple(b_mode[i](*b_sel[i]) for i in range(b_len)) arr_a = np.arange(np.product(a_shape)).reshape(a_shape) arr_b = np.arange(np.product(b_shape)).reshape(b_shape) ba_a = app_inst.array(arr_a, a_block_shape) ba_b = app_inst.array(arr_b, b_block_shape) try: arr_a[a_accessor] = arr_b[b_accessor] broadcasted = True except ValueError as _: broadcasted = False if broadcasted: ba_a[a_accessor] = ba_b[b_accessor] assert np.allclose(arr_a, ba_a.get()) assert np.allclose(arr_b, ba_b.get())
def test_block_grid_entry(app_inst: ArrayApplication): ba: BlockArray = app_inst.array(np.array([[1, 2, 3], [4, 5, 6]]), block_shape=(1, 3)) block1: Block = ba.T.blocks[0, 1] assert block1.size() == 3 assert block1.transposed assert block1.grid_entry == (0, 1) assert block1.grid_shape == (1, 2) assert block1.true_grid_entry() == (1, 0) assert block1.true_grid_shape() == (2, 1)
def mock_cluster(cluster_shape): scheduler: RayScheduler = MockMultiNodeScheduler( compute_module=numpy_compute, cluster_shape=cluster_shape, use_head=True) system: System = RaySystem(compute_module=numpy_compute, scheduler=scheduler) system.init() return ArrayApplication(system=system, filesystem=FileSystem(system))
def test_concatenate(app_inst: ArrayApplication): axis = 1 real_X, _ = BimodalGaussian.get_dataset(1000, 9) real_ones = np.ones(shape=(1000, 1)) X = app_inst.array(real_X, block_shape=(100, 9)) ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype) X_concated = app_inst.concatenate([X, ones], axis=axis, axis_block_size=X.block_shape[axis]) real_X_concated = np.concatenate([real_X, real_ones], axis=axis) assert np.allclose(X_concated.get(), real_X_concated) real_X2 = np.random.random_sample(1000 * 17).reshape(1000, 17) X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3)) X_concated = app_inst.concatenate([X, ones, X2], axis=axis, axis_block_size=X.block_shape[axis]) real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis) assert np.allclose(X_concated.get(), real_X_concated)
def test_tensordot_large_shape(app_inst: ArrayApplication): a = np.arange(4 * 6 * 10 * 90).reshape((90, 10, 6, 4)) b = np.arange(4 * 6 * 10 * 75).reshape((4, 6, 10, 75)) c = np.tensordot(a, b, axes=1) block_a = app_inst.array(a, block_shape=(30, 5, 3, 2)) block_b = app_inst.array(b, block_shape=(2, 3, 5, 25)) block_c = block_a.tensordot(block_b, axes=1) assert np.allclose(block_c.get(), c) common.check_block_integrity(block_c)
def newton(app: ArrayApplication, model: GLM, beta, X: BlockArray, y: BlockArray, tol: BlockArray, max_iter: int): for _ in range(max_iter): mu: BlockArray = model.forward(X, beta) g = model.gradient(X, y, mu, beta=beta) # These are PSD, but inv is faster than psd inv. beta += -app.inv(model.hessian(X, y, mu)) @ g if g.T @ g < tol: break return beta
def test_poisson_basic(nps_app_inst: ArrayApplication): coef = np.array([0.2, -0.1]) X_real = np.array([[0, 1, 2, 3, 4]]).T y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1) X = nps_app_inst.array(X_real, block_shape=X_real.shape) y = nps_app_inst.array(y_real, block_shape=y_real.shape) model: PoissonRegression = PoissonRegression( **{"solver": "newton", "tol": 1e-8, "max_iter": 10} ) model.fit(X, y) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("D^2", model.deviance_sqr(X, y).get()) assert nps_app_inst.allclose( model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4 ).get() assert nps_app_inst.allclose( model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4 ).get()
def test_higgs(app_inst: ArrayApplication): filename = os.path.join(settings.data_dir, "HIGGS.csv") t = time.time() ba: BlockArray = app_inst.read_csv(filename, num_workers=12) ba.touch() print("HIGGS nums load time", time.time() - t, ba.shape, ba.block_shape) t = time.time() np_data = _read_serially(filename, has_header=False) print("HIGGS serial load time", time.time() - t, np_data.shape) assert np.allclose(ba.get(), np_data)
def test_logistic(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [{ "solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }, { "solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }, { "solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }, { "solver": "newton", "tol": 1e-8, "max_iter": 10 }, { "solver": "irls", "tol": 1e-8, "max_iter": 10 }] for kwargs in param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = lr_model.predict(X).get() y_pred_proba = lr_model.predict_proba(X).get() np.allclose(np.ones(shape=(y.shape[0], )), y_pred_proba[:, 0] + y_pred_proba[:, 1]) print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", lr_model.grad_norm_sq(X, y).get()) print("objective", lr_model.objective(X, y).get()) print("accuracy", np.sum(y.get() == y_pred) / num_samples)
def test_inv(app_inst: ArrayApplication): shape = (5, 5) for dtype in (np.float32, np.float64): mat = app_inst.array(sample_sym_pd_mat(shape=shape).astype(dtype), block_shape=shape) mat_inv = app_inst.inv_sym_psd(mat).get() assert np.allclose(np.linalg.inv(mat.get()), mat_inv, rtol=1e-4, atol=1e-4) _, r = np.linalg.qr(mat.get()) r_inv = app_inst.inverse_triangular(app_inst.array(r, block_shape=shape), lower=False).get() assert np.allclose(np.linalg.inv(r), r_inv, rtol=1e-4, atol=1e-4) L = app_inst.cholesky(mat).get() assert np.allclose(np.linalg.cholesky(mat.get()), L, rtol=1e-4, atol=1e-4)
def svd(app: ArrayApplication, X): # TODO(hme): Optimize by merging with direct qr to compute U directly, # to avoid wasting space storing intermediate Q. # This may not really help until we have operator fusion. assert len(X.shape) == 2 block_shape = X.block_shape shape = X.shape R_shape = (shape[1], shape[1]) R_block_shape = (block_shape[1], block_shape[1]) Q, R = direct_tsqr(app, X, reshape_output=False) assert R.shape == R.block_shape R_U, S, VT = app.cm.svd(R.blocks[(0, 0)].oid, syskwargs={"grid_entry": (0, 0), "grid_shape": (1, 1)}) R_U: BlockArray = app.vec_from_oids([R_U], R_shape, R_block_shape, X.dtype) S: BlockArray = app.vec_from_oids([S], R_shape[:1], R_block_shape[:1], X.dtype) VT = app.vec_from_oids([VT], R_shape, R_block_shape, X.dtype) U = Q @ R_U return U, S, VT
def test_loadtxt(app_inst: ArrayApplication): seed = 1337 rs = np.random.RandomState(seed) fname = "test_text.out" header = ["field1", "field2", "field3"] data = rs.random_sample(99).reshape(33, 3) np.savetxt( fname=fname, X=data, fmt="%.18e", delimiter=",", newline="\n", header=",".join(header), footer="", comments="# ", encoding=None, ) np_loaded_data = np.loadtxt( fname, dtype=float, comments="# ", delimiter=",", converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding="bytes", max_rows=None, ) assert np.allclose(data, np_loaded_data) nums_array = app_inst.loadtxt( fname, dtype=float, comments="# ", delimiter=",", converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding="bytes", max_rows=None, ) np.allclose(data, nums_array.get()) os.remove(fname) assert not os.path.exists(fname)
def test_default_random(app_inst: ArrayApplication): num1 = app_inst.random_state().random() num2 = app_inst.random_state().random() num_iters = 0 max_iters = 10 while app_inst.allclose(num1, num2) and num_iters < max_iters: num_iters += 1 num2 = app_inst.random_state().random() if num_iters > 0: warnings.warn( "More than one iteration required to generate unequal random numbers." ) assert not app_inst.allclose(num1, num2) # Test default random seed. app_inst.random.seed(1337) num1 = app_inst.random.random() app_inst.random.seed(1337) num2 = app_inst.random.random() assert app_inst.allclose(num1, num2)
def test_top_k(app_inst: ArrayApplication): # Simple tests np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3,)) for k in range(1, len(np_x) + 1): # Largest ba_v, ba_i = app_inst.top_k(ba_x, k) np_v = np.partition(np_x, -k)[-k:] assert len(ba_v.get()) == k and len(ba_i.get()) == k for v, i in zip(ba_v.get(), ba_i.get()): assert v in np_v assert np_x[i] == v # Smallest ba_v, ba_i = app_inst.top_k(ba_x, k, largest=False) np_v = np.partition(np_x, k - 1)[:k] assert len(ba_v.get()) == k and len(ba_i.get()) == k for v, i in zip(ba_v.get(), ba_i.get()): assert v in np_v assert np_x[i] == v # Randomized tests shapes = [(50,), (437,), (1000,)] block_shapes = [(10,), (23,), (50,)] ks = range(1, 51, 15) for shape, block_shape, k in itertools.product(shapes, block_shapes, ks): ba_x = app_inst.random.random(shape=shape, block_shape=block_shape) np_x = ba_x.get() # Largest ba_v, ba_i = app_inst.top_k(ba_x, k) np_v = np.partition(np_x, -k)[-k:] assert len(ba_v.get()) == k and len(ba_i.get()) == k for v, i in zip(ba_v.get(), ba_i.get()): assert v in np_v assert np_x[i] == v # Smallest ba_v, ba_i = app_inst.top_k(ba_x, k, largest=False) np_v = np.partition(np_x, k - 1)[:k] assert len(ba_v.get()) == k and len(ba_i.get()) == k for v, i in zip(ba_v.get(), ba_i.get()): assert v in np_v assert np_x[i] == v
def sample(app: ArrayApplication, sample_size): X_train = nps.concatenate([ nps.random.randn(sample_size // 2, 2), nps.random.randn(sample_size // 2, 2) + 2.0 ], axis=0) y_train = nps.concatenate([ nps.zeros(shape=(sample_size // 2, ), dtype=nps.int), nps.ones(shape=(sample_size // 2, ), dtype=nps.int) ], axis=0) # We augment X with 1s for intercept term. X_train = app.concatenate([ X_train, app.ones(shape=(X_train.shape[0], 1), block_shape=(X_train.block_shape[0], 1), dtype=X_train.dtype) ], axis=1, axis_block_size=X_train.block_shape[1] + 1) return X_train, y_train
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LinearRegression as SKLinearRegression _, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: lr_model: LinearRegression = LinearRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() sk_lr_model = SKLinearRegression() sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) np.allclose(sk_y_pred, y_pred)
def test_quickselect(app_inst: ArrayApplication): # Simple tests np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3, )) ba_oids = ba_x.flattened_oids() correct = [7, 6, 5, 5, 4, 3, 2, 1] for i in range(-8, 8): value_oid = app_inst.quickselect(ba_oids, i) value = app_inst.cm.get(value_oid) assert value == correct[i] # Randomized tests shapes = [(50, ), (437, ), (1000, )] block_shapes = [(10, ), (23, ), (50, )] kth = [-50, -42, -25, -13, 0, 8, 25, 36, 49] for shape, block_shape, k in itertools.product(shapes, block_shapes, kth): ba_x = app_inst.random.random(shape=shape, block_shape=block_shape) ba_oids = ba_x.flattened_oids() value_oid = app_inst.quickselect(ba_oids, k) value = app_inst.cm.get(value_oid) assert value == np.partition(ba_x.get(), -k - 1)[-k - 1]
def test_basic_select(app_inst: ArrayApplication): arr: np.ndarray = np.arange(5) block_shape = 3, slice_params = list(get_slices(size=10, index_multiplier=2, basic_step=True)) pbar = tqdm.tqdm(total=len(slice_params)) for slice_sel in slice_params: pbar.set_description(str(slice_sel)) pbar.update(1) ba = app_inst.array(arr, block_shape=block_shape) bav = ArrayView.from_block_array(ba) res = (arr[slice_sel], bav[slice_sel].create().get()) assert np.allclose(*res), str(res)
def test_quantile_percentile(app_inst: ArrayApplication): # see https://github.com/dask/dask/blob/main/dask/array/tests/test_percentiles.py qs = [0, 50, 100] methods = ["tdigest"] interpolations = ["linear"] np_x = np.ones((10,)) ba_x = app_inst.ones(shape=(10,), block_shape=(2,)) for q, method, interpolation in itertools.product(qs, methods, interpolations): assert app_inst.quantile( ba_x, q / 100, method=method, interpolation=interpolation ).get() == np.quantile(np_x, q / 100) assert app_inst.percentile( ba_x, q, method=method, interpolation=interpolation ).get() == np.percentile(np_x, q) np_x = np.array([0, 0, 5, 5, 5, 5, 20, 20]) ba_x = app_inst.array(np_x, block_shape=(3,)) for q, method, interpolation in itertools.product(qs, methods, interpolations): assert app_inst.quantile( ba_x, q / 100, method=method, interpolation=interpolation ).get() == np.quantile(np_x, q / 100) assert app_inst.percentile( ba_x, q, method=method, interpolation=interpolation ).get() == np.percentile(np_x, q)
def test_rr(app_inst: ArrayApplication): num_features = 13 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta) extra_X, extra_y = BimodalGaussian.get_dataset(10, num_features, p=0.5, theta=real_theta) # Perturb some examples. extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape) extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape) real_X = np.concatenate([real_X, extra_X], axis=0) real_y = np.concatenate([real_y, extra_y], axis=0) X = app_inst.array(real_X, block_shape=(15, 5)) y = app_inst.array(real_y, block_shape=(15,)) theta = app_inst.ridge_regression(X, y, lamb=0.0) robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0) # Generate a test set to evaluate robustness to outliers. test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta) test_X = app_inst.array(test_X, block_shape=(15, 5)) test_y = app_inst.array(test_y, block_shape=(15,)) theta_error = np.sum((((test_X @ theta) - test_y)**2).get()) robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get()) assert robust_theta_error < theta_error
def test_lr(app_inst: ArrayApplication): num_features = 13 rs = np.random.RandomState(1337) for dtype in (np.float32, np.float64): real_theta = rs.random_sample(num_features).astype(dtype) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) real_X = real_X.astype(dtype) real_y = real_y.astype(dtype) X = app_inst.array(real_X, block_shape=(15, 5)) y = app_inst.array(real_y, block_shape=(15,)) # Direct TSQR LR theta = app_inst.linear_regression(X, y) error = app_inst.sum((((X @ theta) - y)**2)).get() if dtype == np.float64: assert np.allclose(0, error), error else: # Need to account for lower precision. assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error # Fast LR theta = app_inst.fast_linear_regression(X, y) error = app_inst.sum((((X @ theta) - y)**2)).get() if dtype == np.float64: assert np.allclose(0, error), error else: # Need to account for lower precision. assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error
def irls( app: ArrayApplication, model: LogisticRegression, beta, X: BlockArray, y: BlockArray, tol: BlockArray, max_iter: int, ): for _ in range(max_iter): eta: BlockArray = X @ beta mu: BlockArray = model.link_inv(eta) s = mu * (1 - mu) + 1e-16 XT_s = X.T * s # These are PSD, but inv is faster than psd inv. XTsX_inv = linalg.inv(app, XT_s @ X) z = eta + (y - mu) / s beta = XTsX_inv @ XT_s @ z g = model.gradient(X, y, mu, beta) if app.max(app.abs(g)) <= tol: break return beta
def ridge_regression(app: ArrayApplication, X: BlockArray, y: BlockArray, lamb: float): assert len(X.shape) == 2 assert len(y.shape) == 1 assert lamb >= 0 block_shape = X.block_shape shape = X.shape R_shape = (shape[1], shape[1]) R_block_shape = (block_shape[1], block_shape[1]) R = indirect_tsr(app, X) lamb_vec = app.array(lamb * np.eye(R_shape[0]), block_shape=R_block_shape) # TODO (hme): A better solution exists, which inverts R by augmenting X and y. # See Murphy 7.5.2. theta = inv(app, lamb_vec + R.T @ R) @ (X.T @ y) return theta
def test_rwd(app_inst: ArrayApplication): array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = app_inst.array(array, block_shape=(3, 4)) filename = "darrays/read_write_delete_array_test" write_result: BlockArray = app_inst.write_s3(ba, filename) write_result_arr = app_inst.get(write_result) for grid_entry in write_result.grid.get_entry_iterator(): assert 'ETag' in write_result_arr[grid_entry] ba_read: BlockArray = app_inst.read_s3(filename) assert app_inst.get(app_inst.allclose(ba, ba_read)) delete_result: BlockArray = app_inst.delete_s3(filename) delete_result_arr = app_inst.get(delete_result) for grid_entry in delete_result.grid.get_entry_iterator(): deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"] assert deleted_key == StoredArrayS3( filename, delete_result.grid).get_key(grid_entry)
def test_split(app_inst: ArrayApplication): # TODO (hme): Implement a split leveraging block_shape param in reshape op. x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4, )) syskwargs = x.blocks[0].syskwargs() syskwargs["options"] = {"num_returns": 2} res1, res2 = x.system.split(x.blocks[0].oid, 2, axis=0, transposed=False, syskwargs=syskwargs) ba = BlockArray(ArrayGrid((4, ), (2, ), x.dtype.__name__), x.system) ba.blocks[0].oid = res1 ba.blocks[1].oid = res2 assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
def test_reshape_ones(app_inst: ArrayApplication): def _strip_ones(shape, block_shape): indexes = np.where(np.array(shape) != 1) return tuple(np.array(shape)[indexes]), tuple( np.array(block_shape)[indexes]) # inject many different variants of ones, and ensure the block shapes match at every level. shapes = [ [(10, 2, 20, 5, 3), (5, 1, 4, 3, 3)], [(10, 1, 5, 1, 3), (5, 1, 4, 1, 3)], [(1, 2, 3), (1, 1, 1)], [(10, 1), (2, 1)], [(1, 100, 10), (1, 10, 10)], [(), ()], [(1, ), (1, )], [(1, 1), (1, 1)], [(1, 1, 1), (1, 1, 1)], ] num_ones = [1, 2, 3] for shape, block_shape in shapes: arr = app_inst.random_state(1337).random(shape, block_shape) arr_np = arr.get() # Try removing ones. new_shape, new_block_shape = _strip_ones(shape, block_shape) new_arr = arr.reshape(new_shape, block_shape=new_block_shape) for grid_entry in new_arr.grid.get_entry_iterator(): new_block: Block = new_arr.blocks[grid_entry] new_block_np = new_block.get() assert new_block.shape == new_block_np.shape assert np.allclose(arr_np, new_arr.get().reshape(shape)) # Try adding ones. for nones in num_ones: for pos in range(len(shape) + 1): ones = [1] * nones new_shape = list(shape) new_shape = new_shape[:pos] + ones + new_shape[pos:] new_block_shape = list(block_shape) new_block_shape = new_block_shape[: pos] + ones + new_block_shape[ pos:] new_arr = arr.reshape(new_shape, block_shape=new_block_shape) for grid_entry in new_arr.grid.get_entry_iterator(): new_block: Block = new_arr.blocks[grid_entry] new_block_np = new_block.get() assert new_block.shape == new_block_np.shape assert np.allclose(arr_np, new_arr.get().reshape(shape))
def test_poisson(nps_app_inst: ArrayApplication): # TODO (hme): Is there a more appropriate distribution for testing Poisson? num_samples, num_features = 1000, 1 rs = np.random.RandomState(1337) real_beta = rs.random_sample(num_features) real_model: PoissonRegression = PoissonRegression(solver="newton") real_model._beta = nps_app_inst.array(real_beta, block_shape=(3,)) real_model._beta0 = nps_app_inst.scalar(rs.random_sample()) real_X = rs.random_sample(size=(num_samples, num_features)) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = real_model.predict(X) param_set = [{"solver": "newton", "tol": 1e-8, "max_iter": 10}] for kwargs in param_set: runtime = time.time() model: PoissonRegression = PoissonRegression(**kwargs) model.fit(X, y) runtime = time.time() - runtime print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("D^2", model.deviance_sqr(X, y).get()) assert nps_app_inst.allclose(real_model._beta, model._beta).get() assert nps_app_inst.allclose(real_model._beta0, model._beta0).get()
def test_lr(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() model: LinearRegression = LinearRegression(**kwargs) model.fit(X, y) assert model._beta.shape == real_theta.shape and model._beta0.shape == () runtime = time.time() - runtime y_pred = model.predict(X).get() print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("error", np.sum((y.get() - y_pred) ** 2) / num_samples) print("D^2", model.deviance_sqr(X, y).get())
def test_basic_assign(app_inst: ArrayApplication): from_arr: np.ndarray = np.arange(5) block_shape = 3, slice_params = list(get_slices(size=10, index_multiplier=2, basic_step=True)) pbar = tqdm.tqdm(total=len(slice_params)) for slice_sel in slice_params: pbar.set_description(str(slice_sel)) pbar.update(1) from_ba = app_inst.array(from_arr, block_shape=block_shape) from_bav = ArrayView.from_block_array(from_ba) to_arr: np.ndarray = np.zeros(5) to_ba = app_inst.array(to_arr, block_shape=block_shape) to_bav = ArrayView.from_block_array(to_ba) to_bav[slice_sel] = from_bav[slice_sel] to_arr[slice_sel] = from_arr[slice_sel] from_res = (from_arr, from_bav.create().get()) assert np.allclose(*from_res), str(from_res) to_res = (to_arr, to_bav.create().get()) assert np.allclose(*to_res), str(to_res)
def test_logistic(app_inst: ArrayApplication): num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = app_inst.array(real_X, block_shape=(100, 3)) y = app_inst.array(real_y, block_shape=(100, )) opt_param_set = [("gd", { "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }), ("block_sync_sgd", { "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }), ("block_async_sgd", { "lr": 1e-6, "tol": 1e-8, "max_iter": 10 }), ("newton", { "tol": 1e-8, "max_iter": 10 }), ("irls", { "tol": 1e-8, "max_iter": 10 })] for opt, opt_params in opt_param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression( app_inst, opt, opt_params) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = (lr_model.predict(X).get() > 0.5).astype(int) print("opt", opt) print("runtime", runtime) print("norm", lr_model.grad_norm_sq(X, y).get()) print("objective", lr_model.objective(X, y).get()) print("accuracy", np.sum(y.get() == y_pred) / num_samples)