def test_logistic(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, {"solver": "irls", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = lr_model.predict(X).get() y_pred_proba = lr_model.predict_proba(X).get() np.allclose( np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1] ) print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", lr_model.grad_norm_sq(X, y).get()) print("objective", lr_model.objective(X, y).get()) print("accuracy", np.sum(y.get() == y_pred) / num_samples)
def test_lr(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() model: LinearRegression = LinearRegression(**kwargs) model.fit(X, y) assert model._beta.shape == real_theta.shape and model._beta0.shape == () runtime = time.time() - runtime y_pred = model.predict(X).get() print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("error", np.sum((y.get() - y_pred) ** 2) / num_samples) print("D^2", model.deviance_sqr(X, y).get()) # Test if integer array arguments will converge properly. X = nps_app_inst.array([[1, 2], [3, 5], [1, 5]], block_shape=(2, 2)) y = nps_app_inst.array([1, 2, 3], block_shape=(2,)) model: LinearRegression = LinearRegression() model.fit(X, y) try: pred = model.predict([1, 2]).get() assert 0.9 < pred < 1.1 except OverflowError: assert False, "LinearRegression overflows with integer array arguments."
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LinearRegression as SKLinearRegression _, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [ { "solver": "newton-cg", "tol": 1e-8, "max_iter": 10 }, ] for kwargs in param_set: lr_model: LinearRegression = LinearRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() sk_lr_model = SKLinearRegression() sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) np.allclose(sk_y_pred, y_pred)
def test_vecdot(app_inst: ArrayApplication): size = 9 block_size = 3 y1 = app_inst.array(np.arange(size).reshape(size, 1), block_shape=(block_size, 1)) y2 = app_inst.array(np.arange(size).reshape(size, 1), block_shape=(block_size, 1)) assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get()) y1 = app_inst.array(np.arange(size).reshape(size), block_shape=(block_size, )) y2 = app_inst.array(np.arange(size).reshape(size), block_shape=(block_size, )) assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get()) y1 = app_inst.array(np.arange(size).reshape(size), block_shape=(block_size, )) y2 = app_inst.array(np.arange(size).reshape(size, 1), block_shape=(block_size, 1)) assert np.allclose((y1.T @ y2).get(), y1.T.get() @ y2.get()) assert np.allclose((y2.T @ y1).get(), y2.T.get() @ y1.get()) y1 = app_inst.array(np.arange(size).reshape(1, size), block_shape=(1, block_size)) y2 = app_inst.array(np.arange(size).reshape(size, 1), block_shape=(block_size, 1)) assert np.allclose((y1 @ y2).get(), y1.get() @ y2.get()) y1 = app_inst.array(np.arange(size).reshape(1, size), block_shape=(1, block_size)) y2 = app_inst.array(np.arange(size).reshape(1, size), block_shape=(1, block_size)) assert np.allclose((y1 @ y2.T).get(), y1.get() @ y2.T.get())
def test_concatenate(app_inst: ArrayApplication): axis = 1 real_X, _ = BimodalGaussian.get_dataset(1000, 9) real_ones = np.ones(shape=(1000, 1)) X = app_inst.array(real_X, block_shape=(100, 9)) ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype) X_concated = app_inst.concatenate([X, ones], axis=axis, axis_block_size=X.block_shape[axis]) common.check_block_integrity(X_concated) real_X_concated = np.concatenate([real_X, real_ones], axis=axis) assert np.allclose(X_concated.get(), real_X_concated) real_X2 = np.random.random_sample(1000 * 17).reshape(1000, 17) X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3)) X_concated = app_inst.concatenate([X, ones, X2], axis=axis, axis_block_size=X.block_shape[axis]) common.check_block_integrity(X_concated) real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis) assert np.allclose(X_concated.get(), real_X_concated) y1 = app_inst.zeros(shape=(50, ), block_shape=(10, ), dtype=int) y2 = app_inst.ones(shape=(50, ), block_shape=(10, ), dtype=int) y = app_inst.concatenate([y1, y2], axis=0) common.check_block_integrity(y)
def test_lr(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [{ "solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100 }, { "solver": "newton", "tol": 1e-8, "max_iter": 10 }] for kwargs in param_set: runtime = time.time() model: LinearRegression = LinearRegression(**kwargs) model.fit(X, y) assert model._beta.shape == real_theta.shape and model._beta0.shape == ( ) runtime = time.time() - runtime y_pred = model.predict(X).get() print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("error", np.sum((y.get() - y_pred)**2) / num_samples) print("D^2", model.deviance_sqr(X, y).get())
def test_lr(app_inst: ArrayApplication): num_features = 13 rs = np.random.RandomState(1337) for dtype in (np.float32, np.float64): real_theta = rs.random_sample(num_features).astype(dtype) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) real_X = real_X.astype(dtype) real_y = real_y.astype(dtype) X = app_inst.array(real_X, block_shape=(15, 5)) y = app_inst.array(real_y, block_shape=(15,)) # Direct TSQR LR theta = app_inst.linear_regression(X, y) error = app_inst.sum((((X @ theta) - y)**2)).get() if dtype == np.float64: assert np.allclose(0, error), error else: # Need to account for lower precision. assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error # Fast LR theta = app_inst.fast_linear_regression(X, y) error = app_inst.sum((((X @ theta) - y)**2)).get() if dtype == np.float64: assert np.allclose(0, error), error else: # Need to account for lower precision. assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error
def test_subscript(app_inst: ArrayApplication): shape = 12, 21 npX = np.arange(np.product(shape)).reshape(*shape) X = app_inst.array(npX, block_shape=(6, 7)) for i in range(12): assert np.allclose((X[:, i].T @ X[:, i]).get(), npX[:, i].T @ npX[:, i]) # Aligned tests. for i in range(0, 21, 7): sel = slice(i, i + 7) assert np.allclose((X[:, sel].T @ X[:, sel]).get(), npX[:, sel].T @ npX[:, sel]) # More basic tests. X_shape = 12, 21, 16 npX = np.arange(np.product(X_shape)).reshape(*X_shape) X = app_inst.array(npX, block_shape=(6, 7, 8)) for i in range(12): assert np.allclose(X[i].get(), npX[i]) assert np.allclose(X[i, 1].get(), npX[i, 1]) assert np.allclose(X[i, :, 2].get(), npX[i, :, 2]) assert np.allclose(X[:, 3, i].get(), npX[:, 3, i]) assert np.allclose(X[:, :, i].get(), npX[:, :, i]) assert np.allclose((X[i].T @ X[i]).get(), npX[i].T @ npX[i])
def test_rr(app_inst: ArrayApplication): num_features = 13 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta) extra_X, extra_y = BimodalGaussian.get_dataset(10, num_features, p=0.5, theta=real_theta) # Perturb some examples. extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape) extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape) real_X = np.concatenate([real_X, extra_X], axis=0) real_y = np.concatenate([real_y, extra_y], axis=0) X = app_inst.array(real_X, block_shape=(15, 5)) y = app_inst.array(real_y, block_shape=(15,)) theta = app_inst.ridge_regression(X, y, lamb=0.0) robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0) # Generate a test set to evaluate robustness to outliers. test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta) test_X = app_inst.array(test_X, block_shape=(15, 5)) test_y = app_inst.array(test_y, block_shape=(15,)) theta_error = np.sum((((test_X @ theta) - test_y)**2).get()) robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get()) assert robust_theta_error < theta_error
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LogisticRegression as SKLogisticRegression num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = lr_model.predict(X).get() y_pred_proba = lr_model.predict_proba(X).get() np.allclose( np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1] ) sk_lr_model = SKLogisticRegression(**kwargs) sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) sk_y_pred_proba = sk_lr_model.predict_proba(real_X) np.allclose( np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1] ) np.allclose(sk_y_pred, y_pred)
def test_penalties(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, {"solver": "lbfgs", "tol": 1e-8, "max_iter": 20}, ] for kwargs in param_set: model: Lasso = Lasso(alpha=0.5, **kwargs) model.fit(X, y) if kwargs["solver"] in ("newton", "lbfgs"): assert model.deviance_sqr(X, y) > 0.9 model: Ridge = Ridge(alpha=0.5, **kwargs) model.fit(X, y) if kwargs["solver"] in ("newton", "lbfgs"): assert model.deviance_sqr(X, y) > 0.9 model: ElasticNet = ElasticNet(alpha=0.5, l1_ratio=0.5, **kwargs) model.fit(X, y) if kwargs["solver"] in ("newton", "lbfgs"): assert model.deviance_sqr(X, y) > 0.9
def test_basic_assign(app_inst: ArrayApplication): from_arr: np.ndarray = np.arange(5) block_shape = 3, slice_params = list( get_slices(size=10, index_multiplier=2, basic_step=True)) pbar = tqdm.tqdm(total=len(slice_params)) for slice_sel in slice_params: pbar.set_description(str(slice_sel)) pbar.update(1) from_ba = app_inst.array(from_arr, block_shape=block_shape) from_bav = ArrayView.from_block_array(from_ba) to_arr: np.ndarray = np.zeros(5) to_ba = app_inst.array(to_arr, block_shape=block_shape) to_bav = ArrayView.from_block_array(to_ba) to_bav[slice_sel] = from_bav[slice_sel] to_arr[slice_sel] = from_arr[slice_sel] from_res = (from_arr, from_bav.create().get()) assert np.allclose(*from_res), str(from_res) to_res = (to_arr, to_bav.create().get()) assert np.allclose(*to_res), str(to_res)
def test_inv(app_inst: ArrayApplication): shape = (5, 5) for dtype in (np.float32, np.float64): mat = app_inst.array(sample_sym_pd_mat(shape=shape).astype(dtype), block_shape=shape) _, r = np.linalg.qr(mat.get()) r_inv = app_inst.inv(app_inst.array(r, block_shape=shape)).get() assert np.allclose(np.linalg.inv(r), r_inv, rtol=1e-4, atol=1e-4) L = app_inst.cholesky(mat).get() assert np.allclose(np.linalg.cholesky(mat.get()), L, rtol=1e-4, atol=1e-4)
def test_stats(app_inst: ArrayApplication): real_X, _ = BimodalGaussian.get_dataset(3, 2) X = app_inst.array(real_X, block_shape=(2, 1)) assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0)) assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1)) real_X, _ = BimodalGaussian.get_dataset(100, 9) X = app_inst.array(real_X, block_shape=(10, 2)) assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0)) assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1))
def test_quickselect(app_inst: ArrayApplication): # Simple tests np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3, )) ba_oids = ba_x.flattened_oids() correct = [1, 2, 3, 4, 5, 5, 6, 7] for i in range(-8, 8): value = app_inst.quickselect(ba_oids, i) if i < 0: assert value == correct[i + 8] else: assert value == correct[i] # Randomized tests shapes = [(50, ), (437, ), (1000, )] block_shapes = [(10, ), (23, ), (50, )] kth = [-50, -42, -25, -13, 0, 8, 25, 36, 49] for shape, block_shape, k in itertools.product(shapes, block_shapes, kth): ba_x = app_inst.random.random(shape=shape, block_shape=block_shape) ba_oids = ba_x.flattened_oids() value = app_inst.quickselect(ba_oids, k) if k < 0: assert value == np.partition(ba_x.get(), k + shape[0])[k + shape[0]] else: assert value == np.partition(ba_x.get(), k)[k]
def test_quantile_percentile(app_inst: ArrayApplication): # see https://github.com/dask/dask/blob/main/dask/array/tests/test_percentiles.py qs = [0, 50, 100] methods = ["tdigest"] interpolations = ["linear"] np_x = np.ones((10,)) ba_x = app_inst.ones(shape=(10,), block_shape=(2,)) for q, method, interpolation in itertools.product(qs, methods, interpolations): assert app_inst.quantile( ba_x, q / 100, method=method, interpolation=interpolation ).get() == np.quantile(np_x, q / 100) assert app_inst.percentile( ba_x, q, method=method, interpolation=interpolation ).get() == np.percentile(np_x, q) np_x = np.array([0, 0, 5, 5, 5, 5, 20, 20]) ba_x = app_inst.array(np_x, block_shape=(3,)) for q, method, interpolation in itertools.product(qs, methods, interpolations): assert app_inst.quantile( ba_x, q / 100, method=method, interpolation=interpolation ).get() == np.quantile(np_x, q / 100) assert app_inst.percentile( ba_x, q, method=method, interpolation=interpolation ).get() == np.percentile(np_x, q)
def test_sklearn_poisson_regression(nps_app_inst: ArrayApplication): def dsqr(dev_func, y, _y_pred): dev = dev_func(y, _y_pred) y_mean = nps_app_inst.mean(y) dev_null = dev_func(y, y_mean) return 1 - dev / dev_null from sklearn.linear_model import PoissonRegressor as SKPoissonRegressor coef = np.array([0.2, -0.1]) real_X = np.array([[0, 1, 2, 3, 4]]).T real_y = np.exp(np.dot(real_X, coef[0]) + coef[1]).reshape(-1) X = nps_app_inst.array(real_X, block_shape=real_X.shape) y = nps_app_inst.array(real_y, block_shape=real_y.shape) param_set = [ {"tol": 1e-4, "max_iter": 100}, ] for kwargs in param_set: lr_model: PoissonRegression = PoissonRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() print("D^2", dsqr(lr_model.deviance, y, y_pred).get()) sk_lr_model = SKPoissonRegressor(**kwargs) sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) print("D^2", dsqr(lr_model.deviance, y, sk_y_pred).get())
def test_assign_complete_2dim_slices(app_inst: ArrayApplication): # All 2-dim slice assignments. # i_1:i_2 = k_1:k_2 A_shape = 4, 6 B_shape = 4, 6 A_shape_range = list(map(lambda x: list(range(1, x + 1)), A_shape)) B_shape_range = list(map(lambda x: list(range(1, x + 1)), B_shape)) A_block_shapes = list(itertools.product(*A_shape_range)) B_block_shapes = list(itertools.product(*B_shape_range)) pbar = tqdm.tqdm(total=np.product([ len(A_block_shapes), len(B_block_shapes), len(A_block_shapes)**2, len(B_block_shapes)**2, ])) for A_block_shape in A_block_shapes: for B_block_shape in B_block_shapes: if A_block_shape != B_block_shape: # If array shapes are equal # then block shapes must be equal. pbar.update(len(A_block_shapes)**2 * len(B_block_shapes)**2) continue npA = np.zeros(np.product(A_shape)).reshape(*A_shape) npB = np.random.random_sample( np.product(B_shape)).reshape(*B_shape) A = app_inst.array(npA, block_shape=A_block_shape) B = app_inst.array(npB, block_shape=B_block_shape) for A_strt in A_block_shapes: for A_stp in A_block_shapes: for B_strt in B_block_shapes: for B_stp in B_block_shapes: pbar.update(1) if A_stp[0] <= A_strt[0] or A_stp[1] <= A_strt[1]: continue if (A_stp[0] - A_strt[0] != B_stp[0] - B_strt[0] or A_stp[1] - A_strt[1] != B_stp[1] - B_strt[1]): continue desc_A = "(%d, %d)[%d:%d, %d:%d]" % ( A.block_shape[0], A.block_shape[1], A_strt[0], A_stp[0], A_strt[1], A_stp[1]) desc_B = "(%d, %d)[%d:%d, %d:%d]" % ( B.block_shape[0], B.block_shape[1], B_strt[0], B_stp[0], B_strt[1], B_stp[1]) desc = "Testing 2dim slices. %s = %s" % (desc_A, desc_B) pbar.set_description(desc=desc) assert np.allclose( B[B_strt[0]:B_stp[0], B_strt[1]:B_stp[1]].get(), npB[B_strt[0]:B_stp[0], B_strt[1]:B_stp[1]]) npA[A_strt[0]:A_stp[0], A_strt[1]:A_stp[1]] = npB[B_strt[0]:B_stp[0], B_strt[1]:B_stp[1]] A[A_strt[0]:A_stp[0], A_strt[1]:A_stp[1]] = B[B_strt[0]:B_stp[0], B_strt[1]:B_stp[1]] assert np.allclose(A.get(), npA) assert np.allclose(B.get(), npB)
def test_median(app_inst: ArrayApplication): # Simple tests np_x = np.array([7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3,)) assert app_inst.median(ba_x).get() == np.median(np_x) np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6]) ba_x = app_inst.array(np_x, block_shape=(3,)) assert app_inst.median(ba_x).get() == np.median(np_x) # Randomized tests shapes = [(50,), (437,), (1000,)] block_shapes = [(10,), (23,), (50,)] for shape, block_shape in itertools.product(shapes, block_shapes): ba_x = app_inst.random.random(shape=shape, block_shape=block_shape) assert app_inst.median(ba_x).get() == np.median(ba_x.get())
def test_qr(app_inst: ArrayApplication): real_X, _ = BimodalGaussian.get_dataset(2345, 9) X = app_inst.array(real_X, block_shape=(123, 4)) Q, R = app_inst.indirect_tsqr(X) assert np.allclose(Q.get() @ R.get(), real_X) Q, R = app_inst.direct_tsqr(X) assert np.allclose(Q.get() @ R.get(), real_X)
def test_tensordot_basic(app_inst: ArrayApplication): shape = 2, 4, 10, 15 npX = np.arange(np.product(shape)).reshape(*shape) rX = app_inst.array(npX, block_shape=(1, 2, 10, 3)) rResult = rX.T.tensordot(rX, axes=1) assert np.allclose(rResult.get(), (np.tensordot(npX.T, npX, axes=1))) common.check_block_integrity(rResult)
def test_tensordot_all_shapes(app_inst: ArrayApplication): for axes in [0, 1, 2]: if axes == 2: a = np.arange(7 * 6 * 4).reshape((7, 6, 4)) b = np.arange(6 * 4 * 9).reshape((6, 4, 9)) c = np.tensordot(a, b, axes=axes) elif axes in (1, 0): a = np.arange(7 * 6 * 4).reshape((7, 6, 4)) b = np.arange(6 * 4 * 9).reshape((4, 6, 9)) c = np.tensordot(a, b, axes=axes) else: raise Exception() a_block_shapes = list( itertools.product( *list(map(lambda x: list(range(1, x + 1)), a.shape)))) b_block_shapes = list( itertools.product( *list(map(lambda x: list(range(1, x + 1)), b.shape)))) pbar = tqdm.tqdm(total=np.product( [len(a_block_shapes), len(b_block_shapes)])) for a_block_shape in a_block_shapes: for b_block_shape in b_block_shapes: pbar.update(1) if a_block_shape[-axes:] != b_block_shape[:axes]: continue pbar.set_description( "axes=%s %s @ %s" % (str(axes), str(a_block_shape), str(b_block_shape))) block_a = app_inst.array(a, block_shape=a_block_shape) block_b = app_inst.array(b, block_shape=b_block_shape) block_c = block_a.tensordot(block_b, axes=axes) assert np.allclose(block_c.get(), c) common.check_block_integrity(block_c)
def test_bool_reduction(app_inst: ArrayApplication): np_arr = np.array([True, False, True, True, False, False], dtype=np.bool_) ba = app_inst.array(np_arr, block_shape=(2,)) result_sum = app_inst.sum(ba, axis=0).get() np_sum = np.sum(np_arr) assert result_sum.dtype == np_sum.dtype assert result_sum == np_sum
def test_transpose(app_inst: ArrayApplication): real_X, _ = BimodalGaussian.get_dataset(100, 9) X = app_inst.array(real_X, block_shape=(100, 1)) assert np.allclose(X.T.get(), real_X.T) # Identity. assert np.allclose(X.T.T.get(), X.get()) assert np.allclose(X.T.T.get(), real_X)
def test_inv_assumptions(app_inst: ArrayApplication): # pylint: disable=no-member, unused-variable np_Z = sample_sym_pd_mat(shape=(10, 10)) # Compute the inverse of np_Z using sym_psd routine. Z = app_inst.array(np_Z, np_Z.shape) Z_inv = app_inst.inv(Z).get() Z_true_inv = np.linalg.inv(np_Z) assert np.allclose(Z_true_inv, Z_inv) # Try Cholesky approach. np_L = np.linalg.cholesky(np_Z) np_L_inv = np.linalg.inv(np_L) Z_cho_inv = np_L_inv.T @ np_L_inv assert np.allclose(Z_cho_inv, Z_true_inv) # Test backsub. assert np_L.dtype == np.float64 lp_L_inv, _ = lapack.dtrtri(np_L, lower=1, unitdiag=0, overwrite_c=0) assert np.allclose(np_L_inv, lp_L_inv) # Test overwrite. overwrite_L_inv = np_L.copy(order="F") overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1) assert np.allclose(overwrite_L_inv_res, overwrite_L_inv) assert np.allclose(np_L_inv, overwrite_L_inv) # This should copy. overwrite_L_inv = np_L.copy(order="C") overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1) assert not np.allclose(overwrite_L_inv_res, overwrite_L_inv) # scipy cholesky tests. scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z), lower=True, overwrite_a=True, check_finite=False), lower=1, unitdiag=0, overwrite_c=1) assert np.allclose(scipy_L_inv, np_L_inv) # Benchmark test. np_Z = sample_sym_pd_mat((1500, 1500)) scipy_runtime = time.time() scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z), lower=True, overwrite_a=True, check_finite=False), lower=1, unitdiag=0, overwrite_c=1) scipy_Z_inv = scipy_L_inv.T @ scipy_L_inv scipy_runtime = time.time() - scipy_runtime np_runtime = time.time() np_Z_inv = np.linalg.inv(np_Z) np_runtime = time.time() - np_runtime assert scipy_runtime < np_runtime
def test_transposed_block(app_inst_all: ArrayApplication): ba: BlockArray = app_inst_all.array(np.array([[1, 2, 3], [4, 5, 6]]), block_shape=(1, 3)) block1: Block = ba.T.blocks[0, 1] assert block1.size() == 3 assert not block1.transposed assert block1.grid_entry == block1.true_grid_entry() assert block1.grid_shape == block1.true_grid_shape()
def test_basic_assignment_broadcasting(app_inst: ArrayApplication): # Test mixed-length broadcasting. def get_sel(num_entries, shape): r = [] for i in range(num_entries): dim = shape[i] start = rs.random_integers(0, dim - 1) stop = rs.random_integers(start, dim) r.append((start, stop)) return r rs = np.random.RandomState(1337) a_shape = (6, 7, 2, 5) a_block_shape = (2, 4, 2, 3) b_shape = (6, 7, 2, 5) b_block_shape = (3, 2, 1, 2) num_axes = len(a_shape) access_modes = [ lambda a1, a2: a1, lambda a1, a2: slice(None, None, None), lambda a1, a2: slice(a1, None, None), lambda a1, a2: slice(None, a1, None), lambda a1, a2: slice(a1, a2, None), ] for a_len in range(num_axes): for b_len in range(num_axes): a_mode_iterator = list( itertools.product(access_modes, repeat=a_len)) b_mode_iterator = list( itertools.product(access_modes, repeat=b_len)) pbar = tqdm.tqdm( total=len(a_mode_iterator) * len(b_mode_iterator), desc="Testing assignment broadcasting %d/%d" % (a_len * num_axes + b_len, num_axes**2), ) # Create some valid intervals. for a_mode in a_mode_iterator: for b_mode in b_mode_iterator: pbar.update(1) a_sel = get_sel(a_len, a_shape) b_sel = get_sel(b_len, b_shape) a_accessor = tuple(a_mode[i](*a_sel[i]) for i in range(a_len)) b_accessor = tuple(b_mode[i](*b_sel[i]) for i in range(b_len)) arr_a = np.arange(np.product(a_shape)).reshape(a_shape) arr_b = np.arange(np.product(b_shape)).reshape(b_shape) ba_a = app_inst.array(arr_a, a_block_shape) ba_b = app_inst.array(arr_b, b_block_shape) try: arr_a[a_accessor] = arr_b[b_accessor] broadcasted = True except ValueError as _: broadcasted = False if broadcasted: ba_a[a_accessor] = ba_b[b_accessor] assert np.allclose(arr_a, ba_a.get()) assert np.allclose(arr_b, ba_b.get())
def test_poisson_basic(nps_app_inst: ArrayApplication): coef = np.array([0.2, -0.1]) X_real = np.array([[0, 1, 2, 3, 4]]).T y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1) X = nps_app_inst.array(X_real, block_shape=X_real.shape) y = nps_app_inst.array(y_real, block_shape=y_real.shape) model: PoissonRegression = PoissonRegression( **{"solver": "newton", "tol": 1e-8, "max_iter": 10} ) model.fit(X, y) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("D^2", model.deviance_sqr(X, y).get()) assert nps_app_inst.allclose( model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4 ).get() assert nps_app_inst.allclose( model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4 ).get()
def test_tensordot_large_shape(app_inst: ArrayApplication): a = np.arange(4 * 6 * 10 * 90).reshape((90, 10, 6, 4)) b = np.arange(4 * 6 * 10 * 75).reshape((4, 6, 10, 75)) c = np.tensordot(a, b, axes=1) block_a = app_inst.array(a, block_shape=(30, 5, 3, 2)) block_b = app_inst.array(b, block_shape=(2, 3, 5, 25)) block_c = block_a.tensordot(block_b, axes=1) assert np.allclose(block_c.get(), c) common.check_block_integrity(block_c)
def test_pca(app_inst: ArrayApplication): real_X, _ = BimodalGaussian.get_dataset(2345, 9) X = app_inst.array(real_X, block_shape=(123, 4)) # Covariance matrix test. C = app_inst.cov(X, rowvar=False) V, _, VT = linalg.svd(app_inst, C) assert app_inst.allclose(V, VT.T) pc = X @ V assert app_inst.allclose(pc, linalg.pca(app_inst, X))