def test_pca(app_inst: ArrayApplication): real_X, _ = BimodalGaussian.get_dataset(2345, 9) X = app_inst.array(real_X, block_shape=(123, 4)) # Covariance matrix test. C = app_inst.cov(X, rowvar=False) V, _, VT = linalg.svd(app_inst, C) assert app_inst.allclose(V, VT.T) pc = X @ V assert app_inst.allclose(pc, linalg.pca(app_inst, X))
def test_poisson_basic(nps_app_inst: ArrayApplication): coef = np.array([0.2, -0.1]) X_real = np.array([[0, 1, 2, 3, 4]]).T y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1) X = nps_app_inst.array(X_real, block_shape=X_real.shape) y = nps_app_inst.array(y_real, block_shape=y_real.shape) model: PoissonRegression = PoissonRegression( **{"solver": "newton", "tol": 1e-8, "max_iter": 10} ) model.fit(X, y) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("D^2", model.deviance_sqr(X, y).get()) assert nps_app_inst.allclose( model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4 ).get() assert nps_app_inst.allclose( model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4 ).get()
def test_default_random(app_inst: ArrayApplication): num1 = app_inst.random_state().random() num2 = app_inst.random_state().random() num_iters = 0 max_iters = 10 while app_inst.allclose(num1, num2) and num_iters < max_iters: num_iters += 1 num2 = app_inst.random_state().random() if num_iters > 0: warnings.warn( "More than one iteration required to generate unequal random numbers." ) assert not app_inst.allclose(num1, num2) # Test default random seed. app_inst.random.seed(1337) num1 = app_inst.random.random() app_inst.random.seed(1337) num2 = app_inst.random.random() assert app_inst.allclose(num1, num2)
def test_rwd(app_inst: ArrayApplication): array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = app_inst.array(array, block_shape=(3, 4)) filename = "/tmp/darrays/read_write_delete_array_test" write_result_ba: BlockArray = app_inst.write_fs(ba, filename) write_result_np = write_result_ba.get() for grid_entry in write_result_ba.grid.get_entry_iterator(): assert write_result_ba[grid_entry].get() == write_result_np[grid_entry] ba_read: BlockArray = app_inst.read_fs(filename) assert app_inst.get(app_inst.allclose(ba, ba_read)) del_result: bool = app_inst.delete_fs(filename) assert del_result
def test_np_random(app_inst: ArrayApplication): # Sample a single value. sample = app_inst.random_state(1337).random().get() assert sample.shape == () assert isinstance(sample.item(), np.float) shape, block_shape = (15, 10), (5, 5) # Probably not equal if pvalue falls below this threshold. epsilon = 1e-2 rs1: NumsRandomState = app_inst.random_state(1337) ba1: BlockArray = rs1.random(shape, block_shape) # The Kolmogorov–Smirnov test for arbitrary distributions. # Under the null hypothesis, the distributions are equal, # so we say distributions are neq if pvalue < epsilon. stat, pvalue = stats.kstest(ba1.get().flatten(), stats.uniform.cdf) assert pvalue > epsilon rs2: NumsRandomState = app_inst.random_state(1337) ba2: BlockArray = rs2.random(shape, block_shape) assert app_inst.allclose(ba1, ba2) rs3: NumsRandomState = app_inst.random_state(1338) ba3: BlockArray = rs3.random(shape, block_shape) assert not app_inst.allclose(ba2, ba3) # If block shape differs, so does generated arrays. # This is a non-issue since we don't expose block shape as a param. rs4: NumsRandomState = app_inst.random_state(1337) ba4: BlockArray = rs4.random( shape, block_shape=(6, 7)).reshape(block_shape=block_shape) assert not app_inst.allclose(ba2, ba4) # dtype tests. rs: NumsRandomState = app_inst.random_state(1337) ba4: BlockArray = rs.random(shape, block_shape, dtype=np.float32) assert ba4.dtype is np.float32 assert str(ba4.get().dtype) == "float32"
def test_poisson(nps_app_inst: ArrayApplication): # TODO (hme): Is there a more appropriate distribution for testing Poisson? num_samples, num_features = 1000, 1 rs = np.random.RandomState(1337) real_beta = rs.random_sample(num_features) real_model: PoissonRegression = PoissonRegression(solver="newton") real_model._beta = nps_app_inst.array(real_beta, block_shape=(3,)) real_model._beta0 = nps_app_inst.scalar(rs.random_sample()) real_X = rs.random_sample(size=(num_samples, num_features)) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = real_model.predict(X) param_set = [{"solver": "newton", "tol": 1e-8, "max_iter": 10}] for kwargs in param_set: runtime = time.time() model: PoissonRegression = PoissonRegression(**kwargs) model.fit(X, y) runtime = time.time() - runtime print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("D^2", model.deviance_sqr(X, y).get()) assert nps_app_inst.allclose(real_model._beta, model._beta).get() assert nps_app_inst.allclose(real_model._beta0, model._beta0).get()
def test_rwd(app_inst: ArrayApplication): array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = app_inst.array(array, block_shape=(3, 4)) filename = "darrays/read_write_delete_array_test" write_result: BlockArray = app_inst.write_s3(ba, filename) write_result_arr = app_inst.get(write_result) for grid_entry in write_result.grid.get_entry_iterator(): assert 'ETag' in write_result_arr[grid_entry] ba_read: BlockArray = app_inst.read_s3(filename) assert app_inst.get(app_inst.allclose(ba, ba_read)) delete_result: BlockArray = app_inst.delete_s3(filename) delete_result_arr = app_inst.get(delete_result) for grid_entry in delete_result.grid.get_entry_iterator(): deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"] assert deleted_key == StoredArrayS3( filename, delete_result.grid).get_key(grid_entry)
def test_rwd(app_inst_s3: ArrayApplication): conn = boto3.resource("s3", region_name="us-east-1") assert conn.Bucket("darrays") not in conn.buckets.all() conn.create_bucket(Bucket="darrays") array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = app_inst_s3.array(array, block_shape=(3, 4)) filename = "darrays/read_write_delete_array_test" write_result: BlockArray = app_inst_s3.write_s3(ba, filename) write_result_arr = app_inst_s3.get(write_result) for grid_entry in write_result.grid.get_entry_iterator(): assert "ETag" in write_result_arr[grid_entry] ba_read: BlockArray = app_inst_s3.read_s3(filename) assert app_inst_s3.get(app_inst_s3.allclose(ba, ba_read)) delete_result: bool = app_inst_s3.delete_s3(filename) assert delete_result
def test_rwd(serial_app_inst: ArrayApplication): conn = boto3.resource('s3', region_name='us-east-1') assert conn.Bucket('darrays') not in conn.buckets.all() conn.create_bucket(Bucket='darrays') array: np.ndarray = np.random.random(35).reshape(7, 5) ba: BlockArray = serial_app_inst.array(array, block_shape=(3, 4)) filename = "darrays/read_write_delete_array_test" write_result: BlockArray = serial_app_inst.write_s3(ba, filename) write_result_arr = serial_app_inst.get(write_result) for grid_entry in write_result.grid.get_entry_iterator(): assert 'ETag' in write_result_arr[grid_entry] ba_read: BlockArray = serial_app_inst.read_s3(filename) assert serial_app_inst.get(serial_app_inst.allclose(ba, ba_read)) delete_result: BlockArray = serial_app_inst.delete_s3(filename) delete_result_arr = serial_app_inst.get(delete_result) for grid_entry in delete_result.grid.get_entry_iterator(): deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"] assert deleted_key == StoredArrayS3( filename, delete_result.grid).get_key(grid_entry)