Example #1
0
def test_rr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    extra_X, extra_y = BimodalGaussian.get_dataset(10, num_features, p=0.5, theta=real_theta)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(extra_X.shape)
    extra_y = extra_y * rs.random_sample(extra_y.shape).reshape(extra_y.shape)
    real_X = np.concatenate([real_X, extra_X], axis=0)
    real_y = np.concatenate([real_y, extra_y], axis=0)

    X = app_inst.array(real_X, block_shape=(15, 5))
    y = app_inst.array(real_y, block_shape=(15,))
    theta = app_inst.ridge_regression(X, y, lamb=0.0)
    robust_theta = app_inst.ridge_regression(X, y, lamb=10000.0)

    # Generate a test set to evaluate robustness to outliers.
    test_X, test_y = BimodalGaussian.get_dataset(100, num_features, p=0.5, theta=real_theta)
    test_X = app_inst.array(test_X, block_shape=(15, 5))
    test_y = app_inst.array(test_y, block_shape=(15,))
    theta_error = np.sum((((test_X @ theta) - test_y)**2).get())
    robust_theta_error = np.sum((((test_X @ robust_theta) - test_y)**2).get())
    assert robust_theta_error < theta_error
Example #2
0
def test_stats(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(3, 2)
    X = app_inst.array(real_X, block_shape=(2, 1))
    assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0))
    assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1))

    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(10, 2))
    assert np.allclose(app_inst.mean(X, axis=0).get(), np.mean(real_X, axis=0))
    assert np.allclose(app_inst.std(X, axis=1).get(), np.std(real_X, axis=1))
Example #3
0
def test_logistic(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "irls", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Example #4
0
def test_transpose(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(100, 1))
    assert np.allclose(X.T.get(), real_X.T)
    # Identity.
    assert np.allclose(X.T.T.get(), X.get())
    assert np.allclose(X.T.T.get(), real_X)
Example #5
0
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LinearRegression as SKLinearRegression

    _, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233,
                                                 num_features,
                                                 theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100, ))
    param_set = [
        {
            "solver": "newton-cg",
            "tol": 1e-8,
            "max_iter": 10
        },
    ]
    for kwargs in param_set:
        lr_model: LinearRegression = LinearRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()

        sk_lr_model = SKLinearRegression()
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        np.allclose(sk_y_pred, y_pred)
Example #6
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233,
                                                 num_features,
                                                 theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100, ))
    param_set = [{
        "solver": "gd",
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 100
    }, {
        "solver": "newton",
        "tol": 1e-8,
        "max_iter": 10
    }]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == (
        )
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred)**2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())
Example #7
0
def test_concatenate(app_inst: ArrayApplication):
    axis = 1
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    real_ones = np.ones(shape=(1000, 1))
    X = app_inst.array(real_X, block_shape=(100, 9))
    ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype)
    X_concated = app_inst.concatenate([X, ones],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    common.check_block_integrity(X_concated)
    real_X_concated = np.concatenate([real_X, real_ones], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    real_X2 = np.random.random_sample(1000 * 17).reshape(1000, 17)
    X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3))
    X_concated = app_inst.concatenate([X, ones, X2],
                                      axis=axis,
                                      axis_block_size=X.block_shape[axis])
    common.check_block_integrity(X_concated)
    real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    y1 = app_inst.zeros(shape=(50, ), block_shape=(10, ), dtype=int)
    y2 = app_inst.ones(shape=(50, ), block_shape=(10, ), dtype=int)
    y = app_inst.concatenate([y1, y2], axis=0)
    common.check_block_integrity(y)
Example #8
0
def test_penalties(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "lbfgs", "tol": 1e-8, "max_iter": 20},
    ]
    for kwargs in param_set:
        model: Lasso = Lasso(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: Ridge = Ridge(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: ElasticNet = ElasticNet(alpha=0.5, l1_ratio=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9
Example #9
0
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LogisticRegression as SKLogisticRegression

    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )

        sk_lr_model = SKLogisticRegression(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        sk_y_pred_proba = sk_lr_model.predict_proba(real_X)
        np.allclose(
            np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1]
        )
        np.allclose(sk_y_pred, y_pred)
Example #10
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == ()
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred) ** 2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())

    # Test if integer array arguments will converge properly.
    X = nps_app_inst.array([[1, 2], [3, 5], [1, 5]], block_shape=(2, 2))
    y = nps_app_inst.array([1, 2, 3], block_shape=(2,))
    model: LinearRegression = LinearRegression()
    model.fit(X, y)
    try:
        pred = model.predict([1, 2]).get()
        assert 0.9 < pred < 1.1
    except OverflowError:
        assert False, "LinearRegression overflows with integer array arguments."
Example #11
0
def test_lr(app_inst: ArrayApplication):
    num_features = 13
    rs = np.random.RandomState(1337)
    for dtype in (np.float32, np.float64):
        real_theta = rs.random_sample(num_features).astype(dtype)
        real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
        real_X = real_X.astype(dtype)
        real_y = real_y.astype(dtype)
        X = app_inst.array(real_X, block_shape=(15, 5))
        y = app_inst.array(real_y, block_shape=(15,))

        # Direct TSQR LR
        theta = app_inst.linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error

        # Fast LR
        theta = app_inst.fast_linear_regression(X, y)
        error = app_inst.sum((((X @ theta) - y)**2)).get()
        if dtype == np.float64:
            assert np.allclose(0, error), error
        else:
            # Need to account for lower precision.
            assert np.allclose(0, error, rtol=1.e-4, atol=1.e-4), error
Example #12
0
def test_qr(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))
    Q, R = app_inst.indirect_tsqr(X)
    assert np.allclose(Q.get() @ R.get(), real_X)
    Q, R = app_inst.direct_tsqr(X)
    assert np.allclose(Q.get() @ R.get(), real_X)
Example #13
0
def test_pca(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))

    # Covariance matrix test.
    C = app_inst.cov(X, rowvar=False)
    V, _, VT = linalg.svd(app_inst, C)
    assert app_inst.allclose(V, VT.T)
    pc = X @ V
    assert app_inst.allclose(pc, linalg.pca(app_inst, X))
Example #14
0
def test_logistic(app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = app_inst.array(real_X, block_shape=(100, 3))
    y = app_inst.array(real_y, block_shape=(100, ))
    opt_param_set = [("gd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_sync_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_async_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("newton", {
        "tol": 1e-8,
        "max_iter": 10
    }), ("irls", {
        "tol": 1e-8,
        "max_iter": 10
    })]
    for opt, opt_params in opt_param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(
            app_inst, opt, opt_params)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = (lr_model.predict(X).get() > 0.5).astype(int)
        print("opt", opt)
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Example #15
0
def test_reshape(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    X = app_inst.array(real_X, block_shape=(100, 9))
    X = X.reshape(shape=(1000, 9), block_shape=(1000, 1))
    assert np.allclose(X.get(), real_X)
Example #16
0
def test_svd(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))
    U, S, VT = app_inst.svd(X)
    assert np.allclose((U.get() * S.get()) @ VT.get(), real_X)
Example #17
0
def test_matmul(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(100, 1))
    X_sqr = X.T @ X
    assert np.allclose(X_sqr.get(), real_X.T @ real_X)
Example #18
0
def test_logistic_cv(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    num_bad = 100
    block_shape = (200, 10)
    folds = num_samples // block_shape[0]
    rs = np.random.RandomState(1337)

    real_X, real_y = BimodalGaussian.get_dataset(
        num_samples - num_bad, num_features, p=0.5
    )
    extra_X, extra_y = BimodalGaussian.get_dataset(num_bad, num_features, p=0.5)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(
        extra_X.shape
    )
    extra_y = rs.randint(0, 2, extra_y.shape).reshape(extra_y.shape)
    perm = rs.permutation(np.arange(num_samples))
    real_X = np.concatenate([real_X, extra_X], axis=0)[perm]
    real_y = np.concatenate([real_y, extra_y], axis=0)[perm]

    # real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=block_shape)
    y = nps_app_inst.array(real_y, block_shape=(block_shape[0],))
    param_set = [
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.1,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.2,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.4,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.8,
            "tol": 1e-8,
            "max_iter": 10,
        },
    ]
    X_train = nps_app_inst.empty(
        (num_samples - X.block_shape[0], num_features), X.block_shape, X.dtype
    )
    y_train = nps_app_inst.empty(
        (num_samples - y.block_shape[0],), y.block_shape, y.dtype
    )
    num_hps = len(param_set)
    mean_accuracies = nps_app_inst.empty((num_hps,), (num_hps,))
    for i, kwargs in enumerate(param_set):
        accuracies = nps_app_inst.empty((folds,), (folds,))
        for fold in range(folds):
            print(i, fold)
            pos = X.block_shape[0] * fold
            block_size, _ = X.grid.get_block_shape((fold, 0))
            start = pos
            stop = pos + block_size
            X_train[:start] = X[:start]
            X_train[start:] = X[stop:]
            y_train[:start] = y[:start]
            y_train[start:] = y[stop:]
            X_test, y_test = X[start:stop], y[start:stop]
            lr_model: LogisticRegression = LogisticRegression(**kwargs)
            lr_model.fit(X_train, y_train)
            y_pred = lr_model.predict(X_test)
            accuracies[fold] = nps_app_inst.sum(y_test == y_pred) / (stop - start)
        mean_accuracies[i] = nps_app_inst.mean(accuracies)
    print(mean_accuracies.get())
Example #19
0
def test_log(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(10, 2))
    assert np.allclose(app_inst.log(X).get(), np.log(real_X))