Esempio n. 1
0
def test_rwd():
    from tests import conftest
    app_inst: ArrayApplication = conftest.get_app("serial")

    conn = boto3.resource('s3', region_name='us-east-1')
    assert conn.Bucket('darrays') not in conn.buckets.all()
    conn.create_bucket(Bucket='darrays')

    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = app_inst.array(array, block_shape=(3, 4))
    filename = "darrays/read_write_delete_array_test"
    write_result: BlockArray = app_inst.write_s3(ba, filename)
    write_result_arr = app_inst.get(write_result)
    for grid_entry in write_result.grid.get_entry_iterator():
        assert 'ETag' in write_result_arr[grid_entry]
    ba_read: BlockArray = app_inst.read_s3(filename)
    assert app_inst.get(app_inst.allclose(ba, ba_read))
    delete_result: BlockArray = app_inst.delete_s3(filename)
    delete_result_arr = app_inst.get(delete_result)
    for grid_entry in delete_result.grid.get_entry_iterator():
        deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"]
        assert deleted_key == StoredArrayS3(
            filename, delete_result.grid).get_key(grid_entry)
Esempio n. 2
0
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape)
    extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape)
    real_X = np.concatenate([real_X, extra_X], axis=0)
    real_y = np.concatenate([real_y, extra_y], axis=0)

    X = app_inst.array(real_X, block_shape=(15, 5))
    y = app_inst.array(real_y, block_shape=(15,))
    theta = app_inst.ridge_regression(X, y, lamb=0.0)
    robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0)

    # Generate a test set to evaluate robustness to outliers.
    test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    test_X = app_inst.array(test_X, block_shape=(15, 5))
    test_y = app_inst.array(test_y, block_shape=(15,))
    theta_error = np.sum((((test_X @ theta) - test_y)**2).get())
    robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get())
    assert robust_theta_error < theta_error


if __name__ == "__main__":
    # pylint: disable=import-error
    from tests import conftest

    app_inst = conftest.get_app("serial")
    # test_inv_assumptions(app_inst)
    test_inv(app_inst)
    # test_qr(app_inst)
    # test_svd(app_inst)
    # test_lr(app_inst)
    # test_rr(app_inst)
Esempio n. 3
0
                                         k + shape[0])[k + shape[0]]
        else:
            assert value == np.partition(ba_x.get(), k)[k]


def test_median(app_inst: ArrayApplication):
    # Simple tests
    np_x = np.array([7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3, ))
    assert app_inst.median(ba_x) == np.median(np_x)

    np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3, ))
    assert app_inst.median(ba_x) == np.median(np_x)

    # Randomized tests
    shapes = [(50, ), (437, ), (1000, )]
    block_shapes = [(10, ), (23, ), (50, )]
    for shape, block_shape in itertools.product(shapes, block_shapes):
        ba_x = app_inst.random.random(shape=shape, block_shape=block_shape)
        assert app_inst.median(ba_x) == np.median(ba_x.get())


if __name__ == "__main__":
    # pylint: disable=import-error
    from tests import conftest

    app_inst: ArrayApplication = conftest.get_app("serial")
    test_quickselect(app_inst)
    test_median(app_inst)
Esempio n. 4
0
    path = os.path.abspath(__file__)
    dir_path = os.path.dirname(path)
    filename = os.path.join(dir_path, "test.csv")
    ba_data: BlockArray = app_inst.read_csv(filename, has_header=True)
    np_data = _read_serially(filename, has_header=True)
    assert np.allclose(ba_data.get(), np_data)


@pytest.mark.skip
def test_higgs(app_inst: ArrayApplication):
    filename = os.path.join(settings.data_dir, "HIGGS.csv")
    t = time.time()
    ba: BlockArray = app_inst.read_csv(filename, num_workers=12)
    ba.touch()
    print("HIGGS nums load time", time.time() - t, ba.shape, ba.block_shape)
    t = time.time()
    np_data = _read_serially(filename, has_header=False)
    print("HIGGS serial load time", time.time() - t, np_data.shape)
    assert np.allclose(ba.get(), np_data)


if __name__ == "__main__":
    # pylint: disable=import-error
    from tests import conftest

    app_inst = conftest.get_app("ray-cyclic")
    # test_loadtxt(app_inst)
    # test_rwd(app_inst)
    test_read_csv(app_inst)
    # test_higgs(app_inst)