def test_lbfgs(nps_app_inst): assert nps_app_inst is not None X, y = sample_set(nps_app_inst) model = LogisticRegression(solver="lbfgs", max_iter=30) model.fit(X, y) y_pred = model.predict(X) error = ( (nps_app_inst.sum(nps_app_inst.abs(y - y_pred)) / X.shape[0]) .astype(np.float64) .get() ) print("error", error) assert error < 0.25
def test_logistic(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "block_sgd", "lr": 1e-6, "tol": 1e-8, "max_iter": 10}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, {"solver": "irls", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = lr_model.predict(X).get() y_pred_proba = lr_model.predict_proba(X).get() np.allclose( np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1] ) print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", lr_model.grad_norm_sq(X, y).get()) print("objective", lr_model.objective(X, y).get()) print("accuracy", np.sum(y.get() == y_pred) / num_samples)
def test_sklearn_logistic_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LogisticRegression as SKLogisticRegression num_samples, num_features = 1000, 10 real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "newton-cg", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X, y) runtime = time.time() - runtime y_pred = lr_model.predict(X).get() y_pred_proba = lr_model.predict_proba(X).get() np.allclose( np.ones(shape=(y.shape[0],)), y_pred_proba[:, 0] + y_pred_proba[:, 1] ) sk_lr_model = SKLogisticRegression(**kwargs) sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) sk_y_pred_proba = sk_lr_model.predict_proba(real_X) np.allclose( np.ones(shape=(y.shape[0],)), sk_y_pred_proba[:, 0] + sk_y_pred_proba[:, 1] ) np.allclose(sk_y_pred, y_pred)
def test_logistic_cv(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 num_bad = 100 block_shape = (200, 10) folds = num_samples // block_shape[0] rs = np.random.RandomState(1337) real_X, real_y = BimodalGaussian.get_dataset( num_samples - num_bad, num_features, p=0.5 ) extra_X, extra_y = BimodalGaussian.get_dataset(num_bad, num_features, p=0.5) # Perturb some examples. extra_X = extra_X * rs.random_sample(np.product(extra_X.shape)).reshape( extra_X.shape ) extra_y = rs.randint(0, 2, extra_y.shape).reshape(extra_y.shape) perm = rs.permutation(np.arange(num_samples)) real_X = np.concatenate([real_X, extra_X], axis=0)[perm] real_y = np.concatenate([real_y, extra_y], axis=0)[perm] # real_X, real_y = BimodalGaussian.get_dataset(num_samples, num_features) X = nps_app_inst.array(real_X, block_shape=block_shape) y = nps_app_inst.array(real_y, block_shape=(block_shape[0],)) param_set = [ {"solver": "newton", "tol": 1e-8, "max_iter": 10}, { "solver": "newton", "penalty": "l2", "C": 1.0 / 0.1, "tol": 1e-8, "max_iter": 10, }, { "solver": "newton", "penalty": "l2", "C": 1.0 / 0.2, "tol": 1e-8, "max_iter": 10, }, { "solver": "newton", "penalty": "l2", "C": 1.0 / 0.4, "tol": 1e-8, "max_iter": 10, }, { "solver": "newton", "penalty": "l2", "C": 1.0 / 0.8, "tol": 1e-8, "max_iter": 10, }, ] X_train = nps_app_inst.empty( (num_samples - X.block_shape[0], num_features), X.block_shape, X.dtype ) y_train = nps_app_inst.empty( (num_samples - y.block_shape[0],), y.block_shape, y.dtype ) num_hps = len(param_set) mean_accuracies = nps_app_inst.empty((num_hps,), (num_hps,)) for i, kwargs in enumerate(param_set): accuracies = nps_app_inst.empty((folds,), (folds,)) for fold in range(folds): print(i, fold) pos = X.block_shape[0] * fold block_size, _ = X.grid.get_block_shape((fold, 0)) start = pos stop = pos + block_size X_train[:start] = X[:start] X_train[start:] = X[stop:] y_train[:start] = y[:start] y_train[start:] = y[stop:] X_test, y_test = X[start:stop], y[start:stop] lr_model: LogisticRegression = LogisticRegression(**kwargs) lr_model.fit(X_train, y_train) y_pred = lr_model.predict(X_test) accuracies[fold] = nps_app_inst.sum(y_test == y_pred) / (stop - start) mean_accuracies[i] = nps_app_inst.mean(accuracies) print(mean_accuracies.get())
import nums import nums.numpy as nps from nums.models.glms import LogisticRegression nums.init() # Make dataset. X1 = nps.random.randn(500, 1) + 5.0 y1 = nps.zeros(shape=(500, ), dtype=bool) X2 = nps.random.randn(500, 1) + 10.0 y2 = nps.ones(shape=(500, ), dtype=bool) X = nps.concatenate([X1, X2], axis=0) y = nps.concatenate([y1, y2], axis=0) # Train Logistic Regression Model. model = LogisticRegression(solver="newton", tol=1e-8, max_iter=1) model.fit(X, y) y_pred = model.predict(X) print("accuracy", (nps.sum(y == y_pred) / X.shape[0]).get())