Exemple #1
0
def test_penalties(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "lbfgs", "tol": 1e-8, "max_iter": 20},
    ]
    for kwargs in param_set:
        model: Lasso = Lasso(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: Ridge = Ridge(alpha=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9

        model: ElasticNet = ElasticNet(alpha=0.5, l1_ratio=0.5, **kwargs)
        model.fit(X, y)
        if kwargs["solver"] in ("newton", "lbfgs"):
            assert model.deviance_sqr(X, y) > 0.9
Exemple #2
0
def test_logistic(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "irls", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Exemple #3
0
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LogisticRegression as SKLogisticRegression

    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )

        sk_lr_model = SKLogisticRegression(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        sk_y_pred_proba = sk_lr_model.predict_proba(real_X)
        np.allclose(
            np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1]
        )
        np.allclose(sk_y_pred, y_pred)
Exemple #4
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == ()
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred) ** 2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())

    # Test if integer array arguments will converge properly.
    X = nps_app_inst.array([[1, 2], [3, 5], [1, 5]], block_shape=(2, 2))
    y = nps_app_inst.array([1, 2, 3], block_shape=(2,))
    model: LinearRegression = LinearRegression()
    model.fit(X, y)
    try:
        pred = model.predict([1, 2]).get()
        assert 0.9 < pred < 1.1
    except OverflowError:
        assert False, "LinearRegression overflows with integer array arguments."
Exemple #5
0
def test_pca(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))

    # Covariance matrix test.
    C = app_inst.cov(X, rowvar=False)
    V, _, VT = linalg.svd(app_inst, C)
    assert app_inst.allclose(V, VT.T)
    pc = X @ V
    assert app_inst.allclose(pc, linalg.pca(app_inst, X))
Exemple #6
0
def test_concatenate(app_inst: ArrayApplication):
    axis = 1
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    real_ones = np.ones(shape=(1000, 1))
    X = app_inst.array(real_X, block_shape=(100, 9))
    ones = app_inst.ones((1000, 1), (100, 1), dtype=X.dtype)
    X_concated = app_inst.concatenate([X, ones], axis=axis, axis_block_size=X.block_shape[axis])
    real_X_concated = np.concatenate([real_X, real_ones], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)

    real_X2 = np.random.random_sample(1000*17).reshape(1000, 17)
    X2 = app_inst.array(real_X2, block_shape=(X.block_shape[0], 3))
    X_concated = app_inst.concatenate([X, ones, X2], axis=axis, axis_block_size=X.block_shape[axis])
    real_X_concated = np.concatenate([real_X, real_ones, real_X2], axis=axis)
    assert np.allclose(X_concated.get(), real_X_concated)
Exemple #7
0
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LinearRegression as SKLinearRegression

    _, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        lr_model: LinearRegression = LinearRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()

        sk_lr_model = SKLinearRegression()
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        np.allclose(sk_y_pred, y_pred)
Exemple #8
0
def test_lr(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    rs = np.random.RandomState(1337)
    real_theta = rs.random_sample(num_features)
    real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        model: LinearRegression = LinearRegression(**kwargs)
        model.fit(X, y)
        assert model._beta.shape == real_theta.shape and model._beta0.shape == ()
        runtime = time.time() - runtime
        y_pred = model.predict(X).get()
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", model.grad_norm_sq(X, y).get())
        print("objective", model.objective(X, y).get())
        print("error", np.sum((y.get() - y_pred) ** 2) / num_samples)
        print("D^2", model.deviance_sqr(X, y).get())
Exemple #9
0
def test_logistic(app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = app_inst.array(real_X, block_shape=(100, 3))
    y = app_inst.array(real_y, block_shape=(100, ))
    opt_param_set = [("gd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_sync_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("block_async_sgd", {
        "lr": 1e-6,
        "tol": 1e-8,
        "max_iter": 10
    }), ("newton", {
        "tol": 1e-8,
        "max_iter": 10
    }), ("irls", {
        "tol": 1e-8,
        "max_iter": 10
    })]
    for opt, opt_params in opt_param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(
            app_inst, opt, opt_params)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = (lr_model.predict(X).get() > 0.5).astype(int)
        print("opt", opt)
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Exemple #10
0
def test_logistic_cv(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    num_bad = 100
    block_shape = (200, 10)
    folds = num_samples // block_shape[0]
    rs = np.random.RandomState(1337)

    real_X, real_y = BimodalGaussian.get_dataset(
        num_samples - num_bad, num_features, p=0.5
    )
    extra_X, extra_y = BimodalGaussian.get_dataset(num_bad, num_features, p=0.5)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(
        extra_X.shape
    )
    extra_y = rs.randint(0, 2, extra_y.shape).reshape(extra_y.shape)
    perm = rs.permutation(np.arange(num_samples))
    real_X = np.concatenate([real_X, extra_X], axis=0)[perm]
    real_y = np.concatenate([real_y, extra_y], axis=0)[perm]

    # real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=block_shape)
    y = nps_app_inst.array(real_y, block_shape=(block_shape[0],))
    param_set = [
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.1,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.2,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.4,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.8,
            "tol": 1e-8,
            "max_iter": 10,
        },
    ]
    X_train = nps_app_inst.empty(
        (num_samples - X.block_shape[0], num_features), X.block_shape, X.dtype
    )
    y_train = nps_app_inst.empty(
        (num_samples - y.block_shape[0],), y.block_shape, y.dtype
    )
    num_hps = len(param_set)
    mean_accuracies = nps_app_inst.empty((num_hps,), (num_hps,))
    for i, kwargs in enumerate(param_set):
        accuracies = nps_app_inst.empty((folds,), (folds,))
        for fold in range(folds):
            print(i, fold)
            pos = X.block_shape[0] * fold
            block_size, _ = X.grid.get_block_shape((fold, 0))
            start = pos
            stop = pos + block_size
            X_train[:start] = X[:start]
            X_train[start:] = X[stop:]
            y_train[:start] = y[:start]
            y_train[start:] = y[stop:]
            X_test, y_test = X[start:stop], y[start:stop]
            lr_model: LogisticRegression = LogisticRegression(**kwargs)
            lr_model.fit(X_train, y_train)
            y_pred = lr_model.predict(X_test)
            accuracies[fold] = nps_app_inst.sum(y_test == y_pred) / (stop - start)
        mean_accuracies[i] = nps_app_inst.mean(accuracies)
    print(mean_accuracies.get())
Exemple #11
0
def test_svd(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(2345, 9)
    X = app_inst.array(real_X, block_shape=(123, 4))
    U, S, VT = app_inst.svd(X)
    assert np.allclose((U.get() * S.get()) @ VT.get(), real_X)
Exemple #12
0
def test_matmul(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(100, 1))
    X_sqr = X.T @ X
    assert np.allclose(X_sqr.get(), real_X.T @ real_X)
Exemple #13
0
def test_log(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(100, 9)
    X = app_inst.array(real_X, block_shape=(10, 2))
    assert np.allclose(app_inst.log(X).get(), np.log(real_X))
Exemple #14
0
def test_reshape(app_inst: ArrayApplication):
    real_X, _ = BimodalGaussian.get_dataset(1000, 9)
    X = app_inst.array(real_X, block_shape=(100, 9))
    X = X.reshape(shape=(1000, 9), block_shape=(1000, 1))
    assert np.allclose(X.get(), real_X)