def PoissonReg(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = PoissonRegressor() reg1.fit(X_train, y_train1) reg2 = PoissonRegressor() reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="PoissonReg", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def PoissonRegGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = PoissonRegressor() reg2 = PoissonRegressor() grid_values = {'alpha': list(range(1, 3))} grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="PoissonRegGS", best_params=best_params) logSave(nameOfModel="PoissonRegGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def test_sklearn_poisson_regression(nps_app_inst: ArrayApplication): def dsqr(dev_func, y, _y_pred): dev = dev_func(y, _y_pred) y_mean = nps_app_inst.mean(y) dev_null = dev_func(y, y_mean) return 1 - dev / dev_null from sklearn.linear_model import PoissonRegressor as SKPoissonRegressor coef = np.array([0.2, -0.1]) real_X = np.array([[0, 1, 2, 3, 4]]).T real_y = np.exp(np.dot(real_X, coef[0]) + coef[1]).reshape(-1) X = nps_app_inst.array(real_X, block_shape=real_X.shape) y = nps_app_inst.array(real_y, block_shape=real_y.shape) param_set = [ {"tol": 1e-4, "max_iter": 100}, ] for kwargs in param_set: lr_model: PoissonRegression = PoissonRegression(**kwargs) lr_model.fit(X, y) y_pred = lr_model.predict(X).get() print("D^2", dsqr(lr_model.deviance, y, y_pred).get()) sk_lr_model = SKPoissonRegressor(**kwargs) sk_lr_model.fit(real_X, real_y) sk_y_pred = sk_lr_model.predict(real_X) print("D^2", dsqr(lr_model.deviance, y, sk_y_pred).get())
def main(lr, train_path, eval_path, save_path, save_img): """Problem: Poisson regression with gradient ascent. Args: lr: Learning rate for gradient ascent. train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. save_path: Path to save predictions. """ # Load training set train = pd.read_csv(train_path) x_train, y_train = train[['x_1', 'x_2', 'x_3', 'x_4']], train[['y']].values.ravel() glm = PoissonRegressor(tol=1e-5, max_iter=10000000) glm.fit(x_train, y_train) valid = pd.read_csv(eval_path) x_eval, y_eval = valid[['x_1', 'x_2', 'x_3', 'x_4']], valid[['y']].values.ravel() predictions = glm.predict(x_eval) np.savetxt(save_path, predictions) util.scatter(y_eval, predictions, save_img) print(glm.coef_) print(glm.score(x_eval, y_eval))
def sk_poisson_regression(X_train, X_test, y_train, y_test): glm = PoissonRegressor(alpha=0, fit_intercept=False, max_iter=300) glm.fit(X_train, y_train) print('score: ', glm.score(X_test, y_test)) y_hat = glm.predict(X) fig = plt.figure(figsize=(6.0, 6.0)) plt.plot(X, y, 'o') plt.plot(X, y_hat, '*', color='r') plt.xlabel('x (total_bill)') plt.ylabel('y (tips)') plt.xlim(0, 60) plt.ylim(0, 12) plt.show()
def regression(transformed, train_data_index_list, test_data_index_list, combined_data, dataset_name, data_path, regression_type): X_train1 = transformed[transformed.index.isin(train_data_index_list)] X_train1 = np.array(X_train1) X_test1 = transformed[transformed.index.isin(test_data_index_list)] X_test1 = np.array(X_test1) Y_train1 = combined_data[transformed.index.isin(train_data_index_list)] Y_train1 = Y_train1['bug'] Y_test1 = combined_data[transformed.index.isin(test_data_index_list)] Y_test1 = Y_test1['bug'] if (regression_type == 'poisson'): reg = PoissonRegressor().fit(X_train1, Y_train1) elif (regression_type == 'linear'): reg = LinearRegression().fit(X_train1, Y_train1) else: reg = Lasso().fit(X_train1, Y_train1) predictions = reg.predict(X_test1) FPA_result = str(FPA(predictions)) CLC_result = str(CLC(predictions)) if (regression_type == 'poisson'): path_to_save = '../../BTP_results/ml_results/poisson' + '_' + dataset_name write_to_file('poisson_' + data_path, FPA_result, CLC_result, path_to_save) elif (regression_type == 'linear'): path_to_save = '../../BTP_results/ml_results/linear' + '_' + dataset_name write_to_file('linear_' + data_path, FPA_result, CLC_result, path_to_save) else: path_to_save = '../../BTP_results/ml_results/lasso' + '_' + dataset_name write_to_file('lasso_' + data_path, FPA_result, CLC_result, path_to_save) print("FPA metric value obtained is: " + FPA_result) print("CLC metric value obtained is: " + CLC_result) print("MSE is: " + str(mean_squared_error(Y_test1, predictions))) print("success!!")
print(scores) # %% # We can visually compare observed and predicted values, aggregated by the # drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance # bonus/malus (``BonusMalus``). fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8)) fig.subplots_adjust(hspace=0.3, wspace=0.2) plot_obs_pred( df=df_train, feature="DrivAge", weight="Exposure", observed="Frequency", predicted=glm_freq.predict(X_train), y_label="Claim Frequency", title="train data", ax=ax[0, 0], ) plot_obs_pred( df=df_test, feature="DrivAge", weight="Exposure", observed="Frequency", predicted=glm_freq.predict(X_test), y_label="Claim Frequency", title="test data", ax=ax[0, 1], fill_legend=True,
def poissonregressor(self,X_train,X_test,y_train,y_test): regressor= PoissonRegressor() regfit=regressor.fit(self.X_train,self.y_train) return regressor.predict(self.X_test)
print(scores) # %% # We can visually compare observed and predicted values, aggregated by the # drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance # bonus/malus (``BonusMalus``). fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8)) fig.subplots_adjust(hspace=0.3, wspace=0.2) plot_obs_pred( df=df_train, feature="DrivAge", weight="Exposure", observed="Frequency", predicted=glm_freq.predict(X_train), y_label="Claim Frequency", title="train data", ax=ax[0, 0], ) plot_obs_pred( df=df_test, feature="DrivAge", weight="Exposure", observed="Frequency", predicted=glm_freq.predict(X_test), y_label="Claim Frequency", title="test data", ax=ax[0, 1], fill_legend=True
regr_l2_100.fit(X_train_std, y_train) #print(scores_length_l2_100_reg) #The mean score and the standard deviation are hence given by: print("%0.2f (with L2 alpha = 100) accuracy with a standard deviation of %0.2f" % (scores_length_l2_100_reg.mean(), scores_length_l2_100_reg.std())) #print(patient) # Commented out IPython magic to ensure Python compatibility. # Modeling with Poisson Regressor import sklearn from sklearn.linear_model import PoissonRegressor regr = PoissonRegressor(alpha=1.0, fit_intercept=True, max_iter=100, tol=0.0001, warm_start=False, verbose=0) regr.fit(X_train_std, y_train) # Make predictions using the testing set y_pred = regr.predict(X_test_std) from sklearn.metrics import r2_score print(r2_score(y_test, y_pred)) # The coefficients # print('Coefficients: \n', regr.coef_) # The mean squared error print('Mean squared error: %.2f' # % mean_squared_error(y_test, y_pred)) # The coefficient of determination: 1 is perfect prediction print('Coefficient of determination: %.2f' # % r2_score(y_test, y_pred)) scores_length_no_reg = cross_val_score(regr, X_train_std, y_train, cv=5, scoring='r2') regr.fit(X_train_std, y_train)