Exemple #1
0
def test_basic_assignment_broadcasting(app_inst: ArrayApplication):
    # Test mixed-length broadcasting.
    def get_sel(num_entries, shape):
        r = []
        for i in range(num_entries):
            dim = shape[i]
            start = rs.random_integers(0, dim - 1)
            stop = rs.random_integers(start, dim)
            r.append((start, stop))
        return r

    rs = np.random.RandomState(1337)
    a_shape = (6, 7, 2, 5)
    a_block_shape = (2, 4, 2, 3)
    b_shape = (6, 7, 2, 5)
    b_block_shape = (3, 2, 1, 2)
    num_axes = len(a_shape)
    access_modes = [
        lambda a1, a2: a1,
        lambda a1, a2: slice(None, None, None),
        lambda a1, a2: slice(a1, None, None),
        lambda a1, a2: slice(None, a1, None),
        lambda a1, a2: slice(a1, a2, None),
    ]
    for a_len in range(num_axes):
        for b_len in range(num_axes):
            a_mode_iterator = list(
                itertools.product(access_modes, repeat=a_len))
            b_mode_iterator = list(
                itertools.product(access_modes, repeat=b_len))
            pbar = tqdm.tqdm(
                total=len(a_mode_iterator) * len(b_mode_iterator),
                desc="Testing assignment broadcasting %d/%d" %
                (a_len * num_axes + b_len, num_axes**2),
            )
            # Create some valid intervals.
            for a_mode in a_mode_iterator:
                for b_mode in b_mode_iterator:
                    pbar.update(1)
                    a_sel = get_sel(a_len, a_shape)
                    b_sel = get_sel(b_len, b_shape)
                    a_accessor = tuple(a_mode[i](*a_sel[i])
                                       for i in range(a_len))
                    b_accessor = tuple(b_mode[i](*b_sel[i])
                                       for i in range(b_len))
                    arr_a = np.arange(np.product(a_shape)).reshape(a_shape)
                    arr_b = np.arange(np.product(b_shape)).reshape(b_shape)
                    ba_a = app_inst.array(arr_a, a_block_shape)
                    ba_b = app_inst.array(arr_b, b_block_shape)
                    try:
                        arr_a[a_accessor] = arr_b[b_accessor]
                        broadcasted = True
                    except ValueError as _:
                        broadcasted = False
                    if broadcasted:
                        ba_a[a_accessor] = ba_b[b_accessor]
                        assert np.allclose(arr_a, ba_a.get())
                        assert np.allclose(arr_b, ba_b.get())
Exemple #2
0
def test_block_grid_entry(app_inst: ArrayApplication):
    ba: BlockArray = app_inst.array(np.array([[1, 2, 3], [4, 5, 6]]), block_shape=(1, 3))
    block1: Block = ba.T.blocks[0, 1]
    assert block1.size() == 3
    assert block1.transposed
    assert block1.grid_entry == (0, 1)
    assert block1.grid_shape == (1, 2)
    assert block1.true_grid_entry() == (1, 0)
    assert block1.true_grid_shape() == (2, 1)
Exemple #3
0
def mock_cluster(cluster_shape):
    scheduler: RayScheduler = MockMultiNodeScheduler(
        compute_module=numpy_compute,
        cluster_shape=cluster_shape,
        use_head=True)
    system: System = RaySystem(compute_module=numpy_compute,
                               scheduler=scheduler)
    system.init()
    return ArrayApplication(system=system, filesystem=FileSystem(system))
Exemple #4
0
def test_concatenate(app_inst: ArrayApplication):
    axis = 1
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    real_ones = np.ones(shape=(1000, 1))
    X = app_inst.array(real_X, block_shape=(100, 9))
    ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype)
    X_concated = app_inst.concatenate([X, ones],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    real_X_concated = np.concatenate([real_X, real_ones], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    real_X2 = np.random.random_sample(1000 * 17).reshape(1000, 17)
    X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3))
    X_concated = app_inst.concatenate([X, ones, X2],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)
Exemple #5
0
def test_tensordot_large_shape(app_inst: ArrayApplication):
    a = np.arange(4 * 6 * 10 * 90).reshape((90, 10, 6, 4))
    b = np.arange(4 * 6 * 10 * 75).reshape((4, 6, 10, 75))
    c = np.tensordot(a, b, axes=1)

    block_a = app_inst.array(a, block_shape=(30, 5, 3, 2))
    block_b = app_inst.array(b, block_shape=(2, 3, 5, 25))
    block_c = block_a.tensordot(block_b, axes=1)
    assert np.allclose(block_c.get(), c)
    common.check_block_integrity(block_c)
Exemple #6
0
def newton(app: ArrayApplication, model: GLM, beta, X: BlockArray,
           y: BlockArray, tol: BlockArray, max_iter: int):
    for _ in range(max_iter):
        mu: BlockArray = model.forward(X, beta)
        g = model.gradient(X, y, mu, beta=beta)
        # These are PSD, but inv is faster than psd inv.
        beta += -app.inv(model.hessian(X, y, mu)) @ g
        if g.T @ g < tol:
            break
    return beta
Exemple #7
0
def test_poisson_basic(nps_app_inst: ArrayApplication):
    coef = np.array([0.2, -0.1])
    X_real = np.array([[0, 1, 2, 3, 4]]).T
    y_real = np.exp(np.dot(X_real, coef[0]) + coef[1]).reshape(-1)
    X = nps_app_inst.array(X_real, block_shape=X_real.shape)
    y = nps_app_inst.array(y_real, block_shape=y_real.shape)
    model: PoissonRegression = PoissonRegression(
        **{"solver": "newton", "tol": 1e-8, "max_iter": 10}
    )
    model.fit(X, y)
    print("norm", model.grad_norm_sq(X, y).get())
    print("objective", model.objective(X, y).get())
    print("D^2", model.deviance_sqr(X, y).get())
    assert nps_app_inst.allclose(
        model._beta, nps_app_inst.array(coef[:-1], block_shape=(1,)), rtol=1e-4
    ).get()
    assert nps_app_inst.allclose(
        model._beta0, nps_app_inst.scalar(coef[-1]), rtol=1e-4
    ).get()
Exemple #8
0
def test_higgs(app_inst: ArrayApplication):
    filename = os.path.join(settings.data_dir, "HIGGS.csv")
    t = time.time()
    ba: BlockArray = app_inst.read_csv(filename, num_workers=12)
    ba.touch()
    print("HIGGS nums load time", time.time() - t, ba.shape, ba.block_shape)
    t = time.time()
    np_data = _read_serially(filename, has_header=False)
    print("HIGGS serial load time", time.time() - t, np_data.shape)
    assert np.allclose(ba.get(), np_data)
Exemple #9
0
def test_logistic(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100, ))
    param_set = [{
        "solver": "gd",
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }, {
        "solver": "sgd",
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }, {
        "solver": "block_sgd",
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }, {
        "solver": "newton",
        "tol": 1e-8,
        "max_iter": 10
    }, {
        "solver": "irls",
        "tol": 1e-8,
        "max_iter": 10
    }]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(np.ones(shape=(y.shape[0], )),
                    y_pred_proba[:, 0] + y_pred_proba[:, 1])
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Exemple #10
0
def test_inv(app_inst: ArrayApplication):
    shape = (5, 5)
    for dtype in (np.float32, np.float64):
        mat = app_inst.array(sample_sym_pd_mat(shape=shape).astype(dtype),
                             block_shape=shape)
        mat_inv = app_inst.inv_sym_psd(mat).get()
        assert np.allclose(np.linalg.inv(mat.get()),
                           mat_inv,
                           rtol=1e-4,
                           atol=1e-4)
        _, r = np.linalg.qr(mat.get())
        r_inv = app_inst.inverse_triangular(app_inst.array(r,
                                                           block_shape=shape),
                                            lower=False).get()
        assert np.allclose(np.linalg.inv(r), r_inv, rtol=1e-4, atol=1e-4)
        L = app_inst.cholesky(mat).get()
        assert np.allclose(np.linalg.cholesky(mat.get()),
                           L,
                           rtol=1e-4,
                           atol=1e-4)
Exemple #11
0
def svd(app: ArrayApplication, X):
    # TODO(hme): Optimize by merging with direct qr to compute U directly,
    #  to avoid wasting space storing intermediate Q.
    #  This may not really help until we have operator fusion.
    assert len(X.shape) == 2
    block_shape = X.block_shape
    shape = X.shape
    R_shape = (shape[1], shape[1])
    R_block_shape = (block_shape[1], block_shape[1])
    Q, R = direct_tsqr(app, X, reshape_output=False)
    assert R.shape == R.block_shape
    R_U, S, VT = app.cm.svd(R.blocks[(0, 0)].oid,
                            syskwargs={"grid_entry": (0, 0),
                                       "grid_shape": (1, 1)})
    R_U: BlockArray = app.vec_from_oids([R_U], R_shape, R_block_shape, X.dtype)
    S: BlockArray = app.vec_from_oids([S], R_shape[:1], R_block_shape[:1], X.dtype)
    VT = app.vec_from_oids([VT], R_shape, R_block_shape, X.dtype)
    U = Q @ R_U

    return U, S, VT
Exemple #12
0
def test_loadtxt(app_inst: ArrayApplication):
    seed = 1337
    rs = np.random.RandomState(seed)

    fname = "test_text.out"
    header = ["field1", "field2", "field3"]
    data = rs.random_sample(99).reshape(33, 3)

    np.savetxt(
        fname=fname,
        X=data,
        fmt="%.18e",
        delimiter=",",
        newline="\n",
        header=",".join(header),
        footer="",
        comments="# ",
        encoding=None,
    )

    np_loaded_data = np.loadtxt(
        fname,
        dtype=float,
        comments="# ",
        delimiter=",",
        converters=None,
        skiprows=0,
        usecols=None,
        unpack=False,
        ndmin=0,
        encoding="bytes",
        max_rows=None,
    )

    assert np.allclose(data, np_loaded_data)

    nums_array = app_inst.loadtxt(
        fname,
        dtype=float,
        comments="# ",
        delimiter=",",
        converters=None,
        skiprows=0,
        usecols=None,
        unpack=False,
        ndmin=0,
        encoding="bytes",
        max_rows=None,
    )

    np.allclose(data, nums_array.get())

    os.remove(fname)
    assert not os.path.exists(fname)
Exemple #13
0
def test_default_random(app_inst: ArrayApplication):
    num1 = app_inst.random_state().random()
    num2 = app_inst.random_state().random()
    num_iters = 0
    max_iters = 10
    while app_inst.allclose(num1, num2) and num_iters < max_iters:
        num_iters += 1
        num2 = app_inst.random_state().random()
    if num_iters > 0:
        warnings.warn(
            "More than one iteration required to generate unequal random numbers."
        )
    assert not app_inst.allclose(num1, num2)

    # Test default random seed.
    app_inst.random.seed(1337)
    num1 = app_inst.random.random()
    app_inst.random.seed(1337)
    num2 = app_inst.random.random()
    assert app_inst.allclose(num1, num2)
Exemple #14
0
def test_top_k(app_inst: ArrayApplication):
    # Simple tests
    np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3,))
    for k in range(1, len(np_x) + 1):
        # Largest
        ba_v, ba_i = app_inst.top_k(ba_x, k)
        np_v = np.partition(np_x, -k)[-k:]
        assert len(ba_v.get()) == k and len(ba_i.get()) == k
        for v, i in zip(ba_v.get(), ba_i.get()):
            assert v in np_v
            assert np_x[i] == v
        # Smallest
        ba_v, ba_i = app_inst.top_k(ba_x, k, largest=False)
        np_v = np.partition(np_x, k - 1)[:k]
        assert len(ba_v.get()) == k and len(ba_i.get()) == k
        for v, i in zip(ba_v.get(), ba_i.get()):
            assert v in np_v
            assert np_x[i] == v

    # Randomized tests
    shapes = [(50,), (437,), (1000,)]
    block_shapes = [(10,), (23,), (50,)]
    ks = range(1, 51, 15)
    for shape, block_shape, k in itertools.product(shapes, block_shapes, ks):
        ba_x = app_inst.random.random(shape=shape, block_shape=block_shape)
        np_x = ba_x.get()
        # Largest
        ba_v, ba_i = app_inst.top_k(ba_x, k)
        np_v = np.partition(np_x, -k)[-k:]
        assert len(ba_v.get()) == k and len(ba_i.get()) == k
        for v, i in zip(ba_v.get(), ba_i.get()):
            assert v in np_v
            assert np_x[i] == v
        # Smallest
        ba_v, ba_i = app_inst.top_k(ba_x, k, largest=False)
        np_v = np.partition(np_x, k - 1)[:k]
        assert len(ba_v.get()) == k and len(ba_i.get()) == k
        for v, i in zip(ba_v.get(), ba_i.get()):
            assert v in np_v
            assert np_x[i] == v
Exemple #15
0
def sample(app: ArrayApplication, sample_size):
    X_train = nps.concatenate([
        nps.random.randn(sample_size // 2, 2),
        nps.random.randn(sample_size // 2, 2) + 2.0
    ],
                              axis=0)
    y_train = nps.concatenate([
        nps.zeros(shape=(sample_size // 2, ), dtype=nps.int),
        nps.ones(shape=(sample_size // 2, ), dtype=nps.int)
    ],
                              axis=0)
    # We augment X with 1s for intercept term.
    X_train = app.concatenate([
        X_train,
        app.ones(shape=(X_train.shape[0], 1),
                 block_shape=(X_train.block_shape[0], 1),
                 dtype=X_train.dtype)
    ],
                              axis=1,
                              axis_block_size=X_train.block_shape[1] + 1)
    return X_train, y_train
Exemple #16
0
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LinearRegression as SKLinearRegression

    _, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        lr_model: LinearRegression = LinearRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()

        sk_lr_model = SKLinearRegression()
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        np.allclose(sk_y_pred, y_pred)
Exemple #17
0
def test_quickselect(app_inst: ArrayApplication):
    # Simple tests
    np_x = np.array([3, 7, 2, 4, 5, 1, 5, 6])
    ba_x = app_inst.array(np_x, block_shape=(3, ))
    ba_oids = ba_x.flattened_oids()
    correct = [7, 6, 5, 5, 4, 3, 2, 1]
    for i in range(-8, 8):
        value_oid = app_inst.quickselect(ba_oids, i)
        value = app_inst.cm.get(value_oid)
        assert value == correct[i]

    # Randomized tests
    shapes = [(50, ), (437, ), (1000, )]
    block_shapes = [(10, ), (23, ), (50, )]
    kth = [-50, -42, -25, -13, 0, 8, 25, 36, 49]
    for shape, block_shape, k in itertools.product(shapes, block_shapes, kth):
        ba_x = app_inst.random.random(shape=shape, block_shape=block_shape)
        ba_oids = ba_x.flattened_oids()
        value_oid = app_inst.quickselect(ba_oids, k)
        value = app_inst.cm.get(value_oid)
        assert value == np.partition(ba_x.get(), -k - 1)[-k - 1]
Exemple #18
0
def test_basic_select(app_inst: ArrayApplication):
    arr: np.ndarray = np.arange(5)
    block_shape = 3,
    slice_params = list(get_slices(size=10, index_multiplier=2, basic_step=True))
    pbar = tqdm.tqdm(total=len(slice_params))
    for slice_sel in slice_params:
        pbar.set_description(str(slice_sel))
        pbar.update(1)
        ba = app_inst.array(arr, block_shape=block_shape)
        bav = ArrayView.from_block_array(ba)
        res = (arr[slice_sel], bav[slice_sel].create().get())
        assert np.allclose(*res), str(res)
Exemple #19
0
def test_quantile_percentile(app_inst: ArrayApplication):
    # see https://github.com/dask/dask/blob/main/dask/array/tests/test_percentiles.py
    qs = [0, 50, 100]
    methods = ["tdigest"]
    interpolations = ["linear"]

    np_x = np.ones((10,))
    ba_x = app_inst.ones(shape=(10,), block_shape=(2,))
    for q, method, interpolation in itertools.product(qs, methods, interpolations):
        assert app_inst.quantile(
            ba_x, q / 100, method=method, interpolation=interpolation
        ).get() == np.quantile(np_x, q / 100)
        assert app_inst.percentile(
            ba_x, q, method=method, interpolation=interpolation
        ).get() == np.percentile(np_x, q)

    np_x = np.array([0, 0, 5, 5, 5, 5, 20, 20])
    ba_x = app_inst.array(np_x, block_shape=(3,))
    for q, method, interpolation in itertools.product(qs, methods, interpolations):
        assert app_inst.quantile(
            ba_x, q / 100, method=method, interpolation=interpolation
        ).get() == np.quantile(np_x, q / 100)
        assert app_inst.percentile(
            ba_x, q, method=method, interpolation=interpolation
        ).get() == np.percentile(np_x, q)
Exemple #20
0
def test_rr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    extra_X, extra_y = BimodalGaussian.get_dataset(10, num_features, p=0.5, theta=real_theta)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape)
    extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape)
    real_X = np.concatenate([real_X, extra_X], axis=0)
    real_y = np.concatenate([real_y, extra_y], axis=0)

    X = app_inst.array(real_X, block_shape=(15, 5))
    y = app_inst.array(real_y, block_shape=(15,))
    theta = app_inst.ridge_regression(X, y, lamb=0.0)
    robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0)

    # Generate a test set to evaluate robustness to outliers.
    test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    test_X = app_inst.array(test_X, block_shape=(15, 5))
    test_y = app_inst.array(test_y, block_shape=(15,))
    theta_error = np.sum((((test_X @ theta) - test_y)**2).get())
    robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get())
    assert robust_theta_error < theta_error
Exemple #21
0
def test_lr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    for dtype in (np.float32, np.float64):
        real_theta = rs.random_sample(num_features).astype(dtype)
        real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
        real_X = real_X.astype(dtype)
        real_y = real_y.astype(dtype)
        X = app_inst.array(real_X, block_shape=(15, 5))
        y = app_inst.array(real_y, block_shape=(15,))

        # Direct TSQR LR
        theta = app_inst.linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error

        # Fast LR
        theta = app_inst.fast_linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error
Exemple #22
0
def irls(
    app: ArrayApplication,
    model: LogisticRegression,
    beta,
    X: BlockArray,
    y: BlockArray,
    tol: BlockArray,
    max_iter: int,
):
    for _ in range(max_iter):
        eta: BlockArray = X @ beta
        mu: BlockArray = model.link_inv(eta)
        s = mu * (1 - mu) + 1e-16
        XT_s = X.T * s
        # These are PSD, but inv is faster than psd inv.
        XTsX_inv = linalg.inv(app, XT_s @ X)
        z = eta + (y - mu) / s
        beta = XTsX_inv @ XT_s @ z
        g = model.gradient(X, y, mu, beta)
        if app.max(app.abs(g)) <= tol:
            break
    return beta
Exemple #23
0
def ridge_regression(app: ArrayApplication, X: BlockArray, y: BlockArray, lamb: float):
    assert len(X.shape) == 2
    assert len(y.shape) == 1
    assert lamb >= 0
    block_shape = X.block_shape
    shape = X.shape
    R_shape = (shape[1], shape[1])
    R_block_shape = (block_shape[1], block_shape[1])
    R = indirect_tsr(app, X)
    lamb_vec = app.array(lamb * np.eye(R_shape[0]), block_shape=R_block_shape)
    # TODO (hme): A better solution exists, which inverts R by augmenting X and y.
    #  See Murphy 7.5.2.
    theta = inv(app, lamb_vec + R.T @ R) @ (X.T @ y)
    return theta
Exemple #24
0
def test_rwd(app_inst: ArrayApplication):
    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = app_inst.array(array, block_shape=(3, 4))
    filename = "darrays/read_write_delete_array_test"
    write_result: BlockArray = app_inst.write_s3(ba, filename)
    write_result_arr = app_inst.get(write_result)
    for grid_entry in write_result.grid.get_entry_iterator():
        assert 'ETag' in write_result_arr[grid_entry]
    ba_read: BlockArray = app_inst.read_s3(filename)
    assert app_inst.get(app_inst.allclose(ba, ba_read))
    delete_result: BlockArray = app_inst.delete_s3(filename)
    delete_result_arr = app_inst.get(delete_result)
    for grid_entry in delete_result.grid.get_entry_iterator():
        deleted_key = delete_result_arr[grid_entry]["Deleted"][0]["Key"]
        assert deleted_key == StoredArrayS3(
            filename, delete_result.grid).get_key(grid_entry)
Exemple #25
0
def test_split(app_inst: ArrayApplication):
    # TODO (hme): Implement a split leveraging block_shape param in reshape op.
    x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4, ))
    syskwargs = x.blocks[0].syskwargs()
    syskwargs["options"] = {"num_returns": 2}
    res1, res2 = x.system.split(x.blocks[0].oid,
                                2,
                                axis=0,
                                transposed=False,
                                syskwargs=syskwargs)
    ba = BlockArray(ArrayGrid((4, ), (2, ), x.dtype.__name__), x.system)
    ba.blocks[0].oid = res1
    ba.blocks[1].oid = res2
    assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
Exemple #26
0
def test_reshape_ones(app_inst: ArrayApplication):
    def _strip_ones(shape, block_shape):
        indexes = np.where(np.array(shape) != 1)
        return tuple(np.array(shape)[indexes]), tuple(
            np.array(block_shape)[indexes])

    # inject many different variants of ones, and ensure the block shapes match at every level.
    shapes = [
        [(10, 2, 20, 5, 3), (5, 1, 4, 3, 3)],
        [(10, 1, 5, 1, 3), (5, 1, 4, 1, 3)],
        [(1, 2, 3), (1, 1, 1)],
        [(10, 1), (2, 1)],
        [(1, 100, 10), (1, 10, 10)],
        [(), ()],
        [(1, ), (1, )],
        [(1, 1), (1, 1)],
        [(1, 1, 1), (1, 1, 1)],
    ]
    num_ones = [1, 2, 3]

    for shape, block_shape in shapes:
        arr = app_inst.random_state(1337).random(shape, block_shape)
        arr_np = arr.get()

        # Try removing ones.
        new_shape, new_block_shape = _strip_ones(shape, block_shape)
        new_arr = arr.reshape(new_shape, block_shape=new_block_shape)
        for grid_entry in new_arr.grid.get_entry_iterator():
            new_block: Block = new_arr.blocks[grid_entry]
            new_block_np = new_block.get()
            assert new_block.shape == new_block_np.shape
        assert np.allclose(arr_np, new_arr.get().reshape(shape))

        # Try adding ones.
        for nones in num_ones:
            for pos in range(len(shape) + 1):
                ones = [1] * nones
                new_shape = list(shape)
                new_shape = new_shape[:pos] + ones + new_shape[pos:]
                new_block_shape = list(block_shape)
                new_block_shape = new_block_shape[:
                                                  pos] + ones + new_block_shape[
                                                      pos:]
                new_arr = arr.reshape(new_shape, block_shape=new_block_shape)
                for grid_entry in new_arr.grid.get_entry_iterator():
                    new_block: Block = new_arr.blocks[grid_entry]
                    new_block_np = new_block.get()
                    assert new_block.shape == new_block_np.shape
                assert np.allclose(arr_np, new_arr.get().reshape(shape))
Exemple #27
0
def test_poisson(nps_app_inst: ArrayApplication):
    # TODO (hme): Is there a more appropriate distribution for testing Poisson?
    num_samples, num_features = 1000, 1
    rs = np.random.RandomState(1337)
    real_beta = rs.random_sample(num_features)
    real_model: PoissonRegression = PoissonRegression(solver="newton")
    real_model._beta = nps_app_inst.array(real_beta, block_shape=(3,))
    real_model._beta0 = nps_app_inst.scalar(rs.random_sample())
    real_X = rs.random_sample(size=(num_samples, num_features))
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = real_model.predict(X)
    param_set = [{"solver": "newton", "tol": 1e-8, "max_iter": 10}]
    for kwargs in param_set:
        runtime = time.time()
        model: PoissonRegression = PoissonRegression(**kwargs)
        model.fit(X, y)
        runtime = time.time() - runtime
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("D^2", model.deviance_sqr(X, y).get())
        assert nps_app_inst.allclose(real_model._beta, model._beta).get()
        assert nps_app_inst.allclose(real_model._beta0, model._beta0).get()
Exemple #28
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == ()
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred) ** 2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())
Exemple #29
0
def test_basic_assign(app_inst: ArrayApplication):
    from_arr: np.ndarray = np.arange(5)
    block_shape = 3,
    slice_params = list(get_slices(size=10, index_multiplier=2, basic_step=True))
    pbar = tqdm.tqdm(total=len(slice_params))
    for slice_sel in slice_params:
        pbar.set_description(str(slice_sel))
        pbar.update(1)

        from_ba = app_inst.array(from_arr, block_shape=block_shape)
        from_bav = ArrayView.from_block_array(from_ba)

        to_arr: np.ndarray = np.zeros(5)
        to_ba = app_inst.array(to_arr, block_shape=block_shape)
        to_bav = ArrayView.from_block_array(to_ba)

        to_bav[slice_sel] = from_bav[slice_sel]
        to_arr[slice_sel] = from_arr[slice_sel]

        from_res = (from_arr, from_bav.create().get())
        assert np.allclose(*from_res), str(from_res)

        to_res = (to_arr, to_bav.create().get())
        assert np.allclose(*to_res), str(to_res)
Exemple #30
0
def test_logistic(app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = app_inst.array(real_X, block_shape=(100, 3))
    y = app_inst.array(real_y, block_shape=(100, ))
    opt_param_set = [("gd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_sync_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_async_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("newton", {
        "tol": 1e-8,
        "max_iter": 10
    }), ("irls", {
        "tol": 1e-8,
        "max_iter": 10
    })]
    for opt, opt_params in opt_param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(
            app_inst, opt, opt_params)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = (lr_model.predict(X).get() > 0.5).astype(int)
        print("opt", opt)
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)