Exemple #1
0
def test_lbfgs(nps_app_inst):
    assert nps_app_inst is not None
    X, y = sample_set(nps_app_inst)
    model = LogisticRegression(solver="lbfgs", max_iter=30)
    model.fit(X, y)
    y_pred = model.predict(X)
    error = (
        (nps_app_inst.sum(nps_app_inst.abs(y - y_pred)) / X.shape[0])
        .astype(np.float64)
        .get()
    )
    print("error", error)
    assert error < 0.25
Exemple #2
0
def test_logistic(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10},
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {"solver": "irls", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )
        print("opt", kwargs["solver"])
        print("runtime", runtime)
        print("norm", lr_model.grad_norm_sq(X, y).get())
        print("objective", lr_model.objective(X, y).get())
        print("accuracy", np.sum(y.get() == y_pred) / num_samples)
Exemple #3
0
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication):
    from sklearn.linear_model import LogisticRegression as SKLogisticRegression

    num_samples, num_features = 1000, 10
    real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=(100, 3))
    y = nps_app_inst.array(real_y, block_shape=(100,))
    param_set = [
        {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10},
    ]
    for kwargs in param_set:
        runtime = time.time()
        lr_model: LogisticRegression = LogisticRegression(**kwargs)
        lr_model.fit(X, y)
        runtime = time.time() - runtime
        y_pred = lr_model.predict(X).get()
        y_pred_proba = lr_model.predict_proba(X).get()
        np.allclose(
            np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1]
        )

        sk_lr_model = SKLogisticRegression(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        sk_y_pred_proba = sk_lr_model.predict_proba(real_X)
        np.allclose(
            np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1]
        )
        np.allclose(sk_y_pred, y_pred)
Exemple #4
0
def test_logistic_cv(nps_app_inst: ArrayApplication):
    num_samples, num_features = 1000, 10
    num_bad = 100
    block_shape = (200, 10)
    folds = num_samples // block_shape[0]
    rs = np.random.RandomState(1337)

    real_X, real_y = BimodalGaussian.get_dataset(
        num_samples - num_bad, num_features, p=0.5
    )
    extra_X, extra_y = BimodalGaussian.get_dataset(num_bad, num_features, p=0.5)

    # Perturb some examples.
    extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape(
        extra_X.shape
    )
    extra_y = rs.randint(0, 2, extra_y.shape).reshape(extra_y.shape)
    perm = rs.permutation(np.arange(num_samples))
    real_X = np.concatenate([real_X, extra_X], axis=0)[perm]
    real_y = np.concatenate([real_y, extra_y], axis=0)[perm]

    # real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features)
    X = nps_app_inst.array(real_X, block_shape=block_shape)
    y = nps_app_inst.array(real_y, block_shape=(block_shape[0],))
    param_set = [
        {"solver": "newton", "tol": 1e-8, "max_iter": 10},
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.1,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.2,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.4,
            "tol": 1e-8,
            "max_iter": 10,
        },
        {
            "solver": "newton",
            "penalty": "l2",
            "C": 1.0 / 0.8,
            "tol": 1e-8,
            "max_iter": 10,
        },
    ]
    X_train = nps_app_inst.empty(
        (num_samples - X.block_shape[0], num_features), X.block_shape, X.dtype
    )
    y_train = nps_app_inst.empty(
        (num_samples - y.block_shape[0],), y.block_shape, y.dtype
    )
    num_hps = len(param_set)
    mean_accuracies = nps_app_inst.empty((num_hps,), (num_hps,))
    for i, kwargs in enumerate(param_set):
        accuracies = nps_app_inst.empty((folds,), (folds,))
        for fold in range(folds):
            print(i, fold)
            pos = X.block_shape[0] * fold
            block_size, _ = X.grid.get_block_shape((fold, 0))
            start = pos
            stop = pos + block_size
            X_train[:start] = X[:start]
            X_train[start:] = X[stop:]
            y_train[:start] = y[:start]
            y_train[start:] = y[stop:]
            X_test, y_test = X[start:stop], y[start:stop]
            lr_model: LogisticRegression = LogisticRegression(**kwargs)
            lr_model.fit(X_train, y_train)
            y_pred = lr_model.predict(X_test)
            accuracies[fold] = nps_app_inst.sum(y_test == y_pred) / (stop - start)
        mean_accuracies[i] = nps_app_inst.mean(accuracies)
    print(mean_accuracies.get())
import nums
import nums.numpy as nps
from nums.models.glms import LogisticRegression

nums.init()

# Make dataset.

X1 = nps.random.randn(500, 1) + 5.0
y1 = nps.zeros(shape=(500, ), dtype=bool)

X2 = nps.random.randn(500, 1) + 10.0
y2 = nps.ones(shape=(500, ), dtype=bool)

X = nps.concatenate([X1, X2], axis=0)
y = nps.concatenate([y1, y2], axis=0)

# Train Logistic Regression Model.

model = LogisticRegression(solver="newton", tol=1e-8, max_iter=1)

model.fit(X, y)
y_pred = model.predict(X)

print("accuracy", (nps.sum(y == y_pred) / X.shape[0]).get())