def test_sklearn_linear_regression(nps_app_inst: ArrayApplication): from sklearn.linear_model import LinearRegression as SKLinearRegression _, num_features = 1000, 10 rs = np.random.RandomState(1337) real_theta = rs.random_sample(num_features) real_X, real_y = BimodalGaussian.get_dataset(233, num_features, theta=real_theta) X = nps_app_inst.array(real_X, block_shape=(100, 3)) y = nps_app_inst.array(real_y, block_shape=(100, )) param_set = [ { "solver": "newton-cg", "tol": 1e-8, "max_iter": 10 }, ] for kwargs in param_set: lr_model: LinearRegression = LinearRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() sk_lr_model = SKLinearRegression() sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) np.allclose(sk_y_pred, y_pred)
def __init__(self, fit_intercept=True, normalize=False, n_jobs=-1, random_state=0, **kwargs): parameters = { 'fit_intercept': fit_intercept, 'normalize': normalize, 'n_jobs': n_jobs } parameters.update(kwargs) linear_regressor = SKLinearRegression(**parameters) super().__init__(parameters=parameters, component_obj=linear_regressor, random_state=random_state)
def test_linear_regression(input_shape, output_shape, epochs, learn_rate, acceptable_r_squared): model = LinearRegressionModel(input_shape, output_shape) sk_model = SKLinearRegression() X = np.random.rand(BATCH_SIZE, input_shape) w, b = np.random.rand(input_shape, output_shape), np.random.rand(output_shape) y = X @ w + b model.fit(X, y, epochs, learn_rate) sk_model.fit(X, y) r_squared = sk_model.score(X, model.predict(X)) assert r_squared >= acceptable_r_squared is_gd = epochs is not None and learn_rate is not None acceptable_error = ACCEPTABLE_GD_ERROR if is_gd else ACCEPTABLE_BASIC_ERROR acceptable_loss = ACCEPTABLE_GD_LOSS if is_gd else ACCEPTABLE_BASIC_LOSS y_pred = model.predict(X) y_sk_pred = sk_model.predict(X) assert abs(y_pred - y).max() < acceptable_error assert abs(y_pred - y_sk_pred).max() < acceptable_error assert abs(model.loss(y_pred, y) - mean_squared_error(y, y_pred)) < ACCEPTABLE_NUMERIC_ERROR assert model.loss(y_pred, y) < acceptable_loss
def test_linear_regression(): X, y = make_regression(n_samples=100, n_features=1, n_informative=2, n_targets=1, noise=10.0) X = StandardScaler().fit(X).transform(X) metric = r2_score model = LinearRegression() result = bootstrap(model, metric, X, y) print(f'simpleml BGD: mean={np.mean(result)}, std={np.std(result)}') plot_regression(X, y, model) model = LinearRegression(method='BFGS') result = bootstrap(model, metric, X, y) print(f'simpleml BFGS: mean={np.mean(result)}, std={np.std(result)}') model = SKLinearRegression() result = bootstrap(model, metric, X, y) print(f'sklearn: mean={np.mean(result)}, std={np.std(result)}')
def get_theta(self): return self.theta from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression as SKLinearRegression from sklearn.metrics import mean_squared_error as mse dataset = load_boston() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) my_lr = LinearRegression(X_train, y_train) my_lr.fit() my_pred = my_lr.predict(X_test) sklearn_regressor = SKLinearRegression().fit(X_train, y_train) sklearn_pred = sklearn_regressor.predict(X_test) sklearn_train_accuracy = sklearn_regressor.score(X_train, y_train) sklearn_test_accuracy = sklearn_regressor.score(X_test, y_test) print('testing cost:') print('My LR', mse(y_test, my_pred, squared=False)) print('SK Learn', mse(y_test, sklearn_pred, squared=False))
# ============================================================================ print('Linear Regression') print('---------------------------------------------------------------------') boston = load_boston() X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target) lr = MyLinearRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) print('My MSE:', mean_squared_error(y_test, y_pred)) lr.fit_analitical(X_train, y_train) y_pred = lr.predict(X_test) print('My MSE:', mean_squared_error(y_test, y_pred), '(analytical method)') from sklearn.linear_model import LinearRegression as SKLinearRegression lr = SKLinearRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) print('Sk MSE:', mean_squared_error(y_test, y_pred)) print('\nLogistic Regression') print('---------------------------------------------------------------------') iris = load_iris() X = iris.data[iris.target != 2] # only use samples with class 0 or 1 y = iris.target[iris.target != 2] # only use samples with class 0 or 1 X_train, X_test, y_train, y_test = train_test_split(X, y) lclf = MyLogisticRegression() lclf.fit(X_train, y_train) y_pred = lclf.predict(X_test) print('My Accuracy:', accuracy_score(y_test, y_pred))
def test_metrics(): X, y = make_classification(n_samples=500, n_features=5, n_informative=5, n_redundant=0, n_repeated=0, n_classes=2) X = StandardScaler().fit(X).transform(X) model = SKLogisticRegression() model.fit(X, y) y_pred = model.predict(X) y_proba = model.predict_proba(X)[:, 1] y_true = y print('Confusion Matrix:', confusion_matrix(y_true, y_pred) == metrics.confusion_matrix(y_true, y_pred), sep='\n') print('Delta Accuracy:', accuracy(y_true, y_pred) - metrics.accuracy_score(y_true, y_pred)) print( 'Delta Micro Recall:', recall(y_true, y_true, kind='micro') - metrics.recall_score(y_true, y_pred, average='micro')) print( 'Delta Micro Precision:', precision(y_true, y_pred, kind='micro') - metrics.precision_score(y_true, y_pred, average='micro')) print( 'Delta Micro F1-score:', f1_score(y_true, y_pred, kind='micro') - metrics.f1_score(y_true, y_pred, average='micro')) print( 'Delta Macro Recall:', recall(y_true, y_true, kind='macro') - metrics.recall_score(y_true, y_pred, average='macro')) print( 'Delta Macro Precision:', precision(y_true, y_pred, kind='macro') - metrics.precision_score(y_true, y_pred, average='macro')) print( 'Delta Macro F1-score:', f1_score(y_true, y_pred, kind='macro') - metrics.f1_score(y_true, y_pred, average='macro')) print( 'Delta All Recall:', recall(y_true, y_true, kind='all') - metrics.recall_score(y_true, y_pred, average=None)) print( 'Delta All Precision:', precision(y_true, y_pred, kind='all') - metrics.precision_score(y_true, y_pred, average=None)) print( 'Delta All F1-score:', f1_score(y_true, y_pred, kind='all') - metrics.f1_score(y_true, y_pred, average=None)) print('Delta log loss:', log_loss_score(y_true, y_proba) - metrics.log_loss(y_true, y_proba)) print( 'Delta zero one loss:', zero_one_loss(y_true, y_true) - metrics.zero_one_loss(y_true, y_true)) print('*' * 80) X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1) model = SKLinearRegression() model.fit(X, y) y_pred = model.predict(X) y_true = y print( 'Delta mean_absolute_error:', mean_absolute_error(y_true, y_pred) - metrics.mean_absolute_error(y_true, y_pred)) print( 'Delta mean_squared_error:', mean_squared_error(y_true, y_pred) - metrics.mean_squared_error(y_true, y_pred)) print('Delta r2_score:', r2_score(y_true, y_pred) - metrics.r2_score(y_true, y_pred))