def test_lr(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100,)) param_set = [ {"solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100}, {"solver": "newton", "tol": 1e-8, "max_iter": 10}, ] for kwargs in param_set: runtime = time.time() model: LinearRegression = LinearRegression(**kwargs) model.fit(X, y) assert model._beta.shape == real_theta.shape and model._beta0.shape == () runtime = time.time() - runtime y_pred = model.predict(X).get() print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("error", np.sum((y.get() - y_pred) ** 2) / num_samples) print("D^2", model.deviance_sqr(X, y).get()) # Test if integer array arguments will converge properly. X = nps_app_inst.array([[1, 2], [3, 5], [1, 5]], block_shape=(2, 2)) y = nps_app_inst.array([1, 2, 3], block_shape=(2,)) model: LinearRegression = LinearRegression() model.fit(X, y) try: pred = model.predict([1, 2]).get() assert 0.9 < pred < 1.1 except OverflowError: assert False, "LinearRegression overflows with integer array arguments."
def test_sklearn_linear_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LinearRegression as SKLinearRegression _, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [ { "solver": "newton-cg", "tol": 1e-8, "max_iter": 10 }, ] for kwargs in param_set: lr_model: LinearRegression = LinearRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() sk_lr_model = SKLinearRegression() sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) np.allclose(sk_y_pred, y_pred)
def test_lr(nps_app_inst: ArrayApplication): num_samples, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [{ "solver": "gd", "lr": 1e-6, "tol": 1e-8, "max_iter": 100 }, { "solver": "newton", "tol": 1e-8, "max_iter": 10 }] for kwargs in param_set: runtime = time.time() model: LinearRegression = LinearRegression(**kwargs) model.fit(X, y) assert model._beta.shape == real_theta.shape and model._beta0.shape == ( ) runtime = time.time() - runtime y_pred = model.predict(X).get() print("opt", kwargs["solver"]) print("runtime", runtime) print("norm", model.grad_norm_sq(X, y).get()) print("objective", model.objective(X, y).get()) print("error", np.sum((y.get() - y_pred)**2) / num_samples) print("D^2", model.deviance_sqr(X, y).get())