Ejemplo n.º 1
0
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
Ejemplo n.º 2
0
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])
Ejemplo n.º 3
0
def get_metrics(y_test, y_pred):
    print('R^2 Korelasyon: ' + str(r2_score(y_test, y_pred)))
    print('Maksimum Hata: ' + str(max_error(y_test, y_pred)))
    print('Mutlak Hata: ' + str(mean_absolute_error(y_test, y_pred)))
    print('Hata Karesi: ' + str(mean_squared_error(y_test, y_pred)))
Ejemplo n.º 4
0
y1_axis = dt.Predicted
plt.scatter(x_axis, y_axis)
plt.plot(x_axis, y1_axis, color='r')
plt.title("linear regression")

plt.show()

from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix, r2_score, mean_gamma_deviance, explained_variance_score, max_error

print("  ")
print("Linear Regression:")
print("R2 Score:", r2_score(y, y_pred))
print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y, y_pred)))
print("Explained Variance Score:", explained_variance_score(y, y_pred))
print("Max Error:", max_error(y, y_pred))
print("Mean Gamma Devience:", mean_gamma_deviance(y, y_pred))
print("---------------------------------------------------------------------")
print("  ")

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(x_train)
pol_reg = LinearRegression()
pol_reg.fit(X_poly, y_train)
b = pol_reg.predict(poly_reg.fit_transform(x_test))
print("Polynomial Regression:")
print("R2 Score:", r2_score(y_test, b))
print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y_test, b)))
print("Explained Variance Score:", explained_variance_score(y_test, b))
print("Max Error:", max_error(y_test, b))
Ejemplo n.º 5
0
def prophetCV(df_train, df_test, exogenous_features):
    if (df_test['IP'].values == 0) & (df_test['CON'].values == 0):
        forecast = 0
        return forecast

    class suppress_stdout_stderr(object):
        '''
            A context manager for doing a "deep suppression" of stdout and stderr in
            Python, i.e. will suppress all print, even if the print originates in a
            compiled C/Fortran sub-function.
               This will not suppress raised exceptions, since exceptions are printed
            to stderr just before a script exits, and after the context manager has
            exited (at least, I think that is why it lets exceptions through).

            '''
        def __init__(self):
            # Open a pair of null files
            self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
            # Save the actual stdout (1) and stderr (2) file descriptors.
            self.save_fds = [os.dup(1), os.dup(2)]

        def __enter__(self):
            # Assign the null pointers to stdout and stderr.
            os.dup2(self.null_fds[0], 1)
            os.dup2(self.null_fds[1], 2)

        def __exit__(self, *_):
            # Re-assign the real stdout/stderr back to (1) and (2)
            os.dup2(self.save_fds[0], 1)
            os.dup2(self.save_fds[1], 2)
            # Close the null files
            for fd in self.null_fds + self.save_fds:
                os.close(fd)

    # perform a mini grid search
    param_grid = [0.01, 0.05, 0.1]
    scores = []
    for cp_scale in param_grid:
        # split data in few different chuncks
        training, valid, _, __ = timeseries_train_test_split(df_train,
                                                             df_test,
                                                             test_size=0.15)
        grid_model = Prophet(daily_seasonality=False,
                             weekly_seasonality=False,
                             yearly_seasonality=False,
                             changepoint_prior_scale=cp_scale,
                             seasonality_mode='multiplicative')
        for feature in exogenous_features:
            grid_model.add_regressor(feature)
        with suppress_stdout_stderr():
            forecast = grid_model.fit(training[["ds", "y"] + exogenous_features]). \
                predict(valid[["ds"] + exogenous_features])
            error = max_error(valid['y'].values, forecast['yhat'].values)
            scores.append([cp_scale, error])
    scores.sort(key=lambda tup: tup[1])
    # done with grid search
    model = Prophet(daily_seasonality=False,
                    weekly_seasonality=False,
                    yearly_seasonality=False,
                    changepoint_prior_scale=scores[0][0],
                    seasonality_mode='multiplicative')
    for feature in exogenous_features:
        model.add_regressor(feature)
    with suppress_stdout_stderr():
        model.fit(df_train[["ds", "y"] + exogenous_features])
    forecast = model.predict(df_test[["ds"] + exogenous_features])
    forecast.loc[forecast.yhat < 0, "yhat"] = 0
    forecast = forecast["yhat"].item()

    return np.round(forecast, 0)
Ejemplo n.º 6
0
def measure_error(actual, predicted):
    return {'EVC': explained_variance_score(actual, predicted),
            'ME': max_error(actual, predicted),
            'MAE': mean_absolute_error(actual, predicted),
            'MSE': mean_squared_error(actual, predicted),
            'RMSE': sqrt(mean_squared_error(actual, predicted))}#,
Ejemplo n.º 7
0
print('Mean Squared Error = %0.3f' % mse)

#Mean Absolute Error
mae = mean_absolute_error(y_true, y_pred)
print('Mean Absolute Error = %0.3f' % mae)

#Median Absolute Error
med_ea = median_absolute_error(y_true, y_pred)
print('Median Absolute Error = %0.3f' % med_ea)

#Mean Squared Log Error
msle = mean_squared_log_error(y_true, y_pred)
print('Mean Squared Log Error = %0.3f' % msle)

#Max Error
me = max_error(y_true, y_pred)
print('Max Error = %0.3f' % me)

#Polt Actual vs. Predicted
plt.title('Actual vs. Predicted')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.scatter(x_true, y_true)
plt.scatter(x_true, y_pred)
plt.show()

#Outputs plot
plt.title('Actual vs. Predicted')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.scatter(x_true, y_true)
Ejemplo n.º 8
0
def main():
    st.write("""
    # Streamlit: EKA Goals Modelling App

    Statistical Estimations of Goals Scored in the England Korfball League Since 2014/15 Season"""
             )

    st.sidebar.header('Dataset Filters')
    dataset = st.sidebar.selectbox("Select Dataset", datasets)

    df = pd.read_csv(data_sources[dataset])

    seasons = st.sidebar.multiselect("Select Seasons", df.Season.unique())
    teams = st.sidebar.multiselect("Select Teams",
                                   df['Home Team'].sort_values().unique())
    venue = st.sidebar.multiselect("Select Venue", venues)
    global goals
    goals = filter_scores(df, teams, seasons, venue)['Goals']

    st.sidebar.header('Histogram')

    bin_size = st.sidebar.number_input('Bin Size', 1, 10, 1)
    x_max = goals.max() + 1

    if goals.size == 0:
        st.warning('No Data. Please update dataset filters.')
    else:

        global x
        x = np.arange(0, x_max, bin_size)
        global x_arr
        x_arr = np.arange(0, x_max + bin_size, bin_size)
        global y
        y = np.histogram(goals, bins=x_arr, density=True)[0]

        st.write(f"""
        **Observations: ** {goals.count()}

        **Mean: ** {goals.mean():.2f}
        **Var: ** {goals.var():.2f}
        **Std: ** {goals.std():.2f}
        **Skew: ** {goals.skew():.2f}
        **Kurtosis: ** {goals.kurtosis():.2f}""")

        st.sidebar.header('Distribution Parameters')

        dist_name = st.sidebar.selectbox("Select Distribution", distributions)

        params = add_parameter_ui(dist_name, y)

        st.sidebar.markdown('#### Formula')
        formula = st.sidebar.markdown(get_formula(dist_name))

        dist = get_dist_data(dist_name, params)

        # graph = plot_data(goals, dist, dist_name, params)

        graph = st.altair_chart(plot_altair(y, dist, dist_name, bin_size))

        g_data, dist_data = y, dist[:-1]

        mse = metrics.mean_squared_error(g_data, dist_data)
        rmse = mse**.5
        e_var = metrics.explained_variance_score(g_data, dist_data)
        max_error = metrics.max_error(g_data, dist_data)
        mae = metrics.mean_absolute_error(g_data, dist_data)
        # r2_score = metrics.r2_score(g_data, dist_data)
        sum_square_errors = sse(g_data, dist_data)
        total_sum_square = sst(g_data)

        st.write(f"""
        **MSE: ** {mse * 100 :.5f}
        **RMSE: ** {rmse : .5f}

        **Max Error:** {max_error: .5f}
        **MAE: ** {mae: .5f}
        **SSE: ** {sum_square_errors: .5f}
        **SST: ** {total_sum_square: .5f}

        **Explained Variance: ** {e_var:.3f}""")
Ejemplo n.º 9
0
 def max_error(self):
     return max_error(self.ytrue, self.ypred)
Ejemplo n.º 10
0
from sklearn.metrics import explained_variance_score 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import max_error

mtrc_evs_xgb = explained_variance_score(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_mae_xgb = mean_absolute_error(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_mdae_xgb = median_absolute_error(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_mse_xgb = mean_squared_error(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_rmse_xgb = np.sqrt(mean_squared_error(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb))
mtrc_r2_xgb = r2_score(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_r2_custom_xgb = coefficient_of_determination_by_correlation(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)
mtrc_max_error_xgb = max_error(Y_agosto_19_reg, Y_agosto_19_refit_pred_reg_xgb)

print('Explained Variance Score: {:.16f}'.format(mtrc_evs_xgb))
print('Mean Absolute Error: {:.16f}'.format(mtrc_mae_xgb)) 
print('Median Absolute Error: {:.16f}'.format(mtrc_mdae_xgb)) 
print('Mean Squared Error: {:.16f}'.format(mtrc_mse_xgb))
print('Root Mean Squared Error: {:.16f}'.format(mtrc_rmse_xgb))
print('R² Score: {:.16f}'.format(mtrc_r2_xgb))
print('R² Score Custom: {:.16f}'.format(mtrc_r2_custom_xgb))
print('Max Error: {:.16f}'.format(mtrc_max_error_xgb))

#print('Explained Variance Score: {:.4f}'.format(mtrc_evs_xgb))
#print('Mean Absolute Error: {:.4f}'.format(mtrc_mae_xgb)) 
#print('Median Absolute Error: {:.4f}'.format(mtrc_mdae_xgb)) 
#print('Mean Squared Error: {:.4f}'.format(mtrc_mse_xgb))
#print('Root Mean Squared Error: {:.4f}'.format(mtrc_rmse_xgb))
Ejemplo n.º 11
0
def get_scores(estimator, X, y, y_pred=None):
    """Get estimator scores on ``X``.

    If you pass ``y_pred``, then predictions are not computed from ``X`` and ``y`` data.

    Estimator should be fitted before calling this function.

    **Regressor**

    For regressors that outputs single value, following scores are logged:

    * explained variance
    * max error
    * mean absolute error
    * r2

    For multi-output regressor:

    * r2

    **Classifier**

    For classifier, following scores are logged:

    * precision
    * recall
    * f beta score
    * support

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        estimator (:obj:`estimator`):
            | Scikit-learn estimator to compute scores.
        X (:obj:`ndarray`):
            | Data matrix.
        y (:obj:`ndarray`):
            | Target for testing.
        y_pred (:obj:`ndarray`, optional, default is ``None``):
            | Estimator predictions on data.

    Returns:
        ``dict`` with scores.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['estimator/scores'] = npt_utils.get_scores(rfc, X, y)
    """
    assert is_regressor(estimator) or is_classifier(estimator), \
        'Estimator should be sklearn regressor or classifier.'

    scores_dict = {}

    if y_pred is None:
        y_pred = estimator.predict(X)

    if is_regressor(estimator):
        # single output
        if len(y_pred.shape) == 1:
            evs = explained_variance_score(y, y_pred)
            me = max_error(y, y_pred)
            mae = mean_absolute_error(y, y_pred)
            r2 = r2_score(y, y_pred)

            scores_dict['explained_variance_score'] = evs
            scores_dict['max_error'] = me
            scores_dict['mean_absolute_error'] = mae
            scores_dict['r2_score'] = r2

        # multi output
        if len(y_pred.shape) == 2:
            r2 = estimator.score(X, y)
            scores_dict['r2_score'] = r2

    elif is_classifier(estimator):
        precision, recall, fbeta_score, support = precision_recall_fscore_support(
            y, y_pred)
        for i, value in enumerate(precision):
            scores_dict['class_{}'.format(i)] = {
                'precision': value,
                'recall': recall[i],
                'fbeta_score': fbeta_score[i],
                'support': support[i]
            }
    return scores_dict
Ejemplo n.º 12
0
        regression_models = {
            "LINEAR_REG": LinearRegression(),
            "SVR": SVR(),
            "DTR": DecisionTreeRegressor(),
            "RFR": RandomForestRegressor(n_estimators=400),
            "XGBR": GradientBoostingRegressor(n_estimators=400)
        }

        metric_dict = {}
        for name, algorithm in tqdm(regression_models.items()):
            model = algorithm
            model.fit(x_train, y_train.ravel())
            y_pred = model.predict(x_test)
            metric_dict[name] = {
                "Max_error": round(max_error(y_test, y_pred), 5),
                "MAE": round(mean_absolute_error(y_test, y_pred), 3),
                "MSE": round(mean_squared_error(y_test, y_pred), 3),
                "R2-score": round(r2_score(y_test, y_pred), 5),
                "RMSE":
                round(mean_squared_error(y_test, y_pred, squared=False), 3),
                "MAPE": round(mean_absolute_percentage_error(y_test, y_pred),
                              3)
            }

        metric_df = pd.DataFrame(metric_dict)
        metric_df.reset_index(inplace=True)

        #---------------------------Presentation----------------------------------

        #-------------------------------------view data --------------------------
Ejemplo n.º 13
0
from sklearn.metrics import explained_variance_score 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import max_error

mtrc_evs_xgb = explained_variance_score(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_mae_xgb = mean_absolute_error(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_mdae_xgb = median_absolute_error(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_mse_xgb = mean_squared_error(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_rmse_xgb = np.sqrt(mean_squared_error(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb))
mtrc_r2_xgb = r2_score(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_r2_custom_xgb = coefficient_of_determination_by_correlation(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)
mtrc_max_error_xgb = max_error(Y_agosto_19_reg, Y_agosto_19_pred_train_reg_xgb)

print('Explained Variance Score: {:.16f}'.format(mtrc_evs_xgb))
print('Mean Absolute Error: {:.16f}'.format(mtrc_mae_xgb)) 
print('Median Absolute Error: {:.16f}'.format(mtrc_mdae_xgb)) 
print('Mean Squared Error: {:.16f}'.format(mtrc_mse_xgb))
print('Root Mean Squared Error: {:.16f}'.format(mtrc_rmse_xgb))
print('R² Score: {:.16f}'.format(mtrc_r2_xgb))
print('R² Score Custom: {:.16f}'.format(mtrc_r2_custom_xgb))
print('Max Error: {:.16f}'.format(mtrc_max_error_xgb))

#print('Explained Variance Score: {:.4f}'.format(mtrc_evs_xgb))
#print('Mean Absolute Error: {:.4f}'.format(mtrc_mae_xgb)) 
#print('Median Absolute Error: {:.4f}'.format(mtrc_mdae_xgb)) 
#print('Mean Squared Error: {:.4f}'.format(mtrc_mse_xgb))
#print('Root Mean Squared Error: {:.4f}'.format(mtrc_rmse_xgb))
Ejemplo n.º 14
0
def log_scores(estimator, X, y, y_pred=None, name=None, experiment=None):
    """Log estimator scores on ``X``.

    Calculate and log scores on data and have them as metrics in Neptune.
    If you pass ``y_pred``, then predictions are not computed from ``X`` data.

    Estimator should be fitted before calling this function.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    **Regressor**

    For regressors that outputs single value, following scores are logged:

    * explained variance
    * max error
    * mean absolute error
    * r2

    For multi-output regressor:

    * r2

    **Classifier**

    For classifier, following scores are logged:

    * precision
    * recall
    * f beta score
    * support

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        estimator (:obj:`estimator`):
            | Scikit-learn estimator to compute scores.
        X (:obj:`ndarray`):
            | Data matrix.
        y (:obj:`ndarray`):
            | Target for testing.
        y_pred (:obj:`ndarray`, optional, default is ``None``):
            | Estimator predictions on data.
        name (`str`, optional, default is ``None``):
            | Use 'train', 'valid', 'test' to better define on what data scores are logged.
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_scores(rfc, X, y, name='test', experiment=exp)
    """
    assert is_regressor(estimator) or is_classifier(estimator),\
        'Estimator should be sklearn regressor or classifier.'
    assert isinstance(name, str), 'name should be str. {} was passed.'.format(
        type(name))

    exp = _validate_experiment(experiment)

    if y_pred is None:
        y_pred = estimator.predict(X)

    if is_regressor(estimator):
        # single output
        if len(y_pred.shape) == 1:
            evs = explained_variance_score(y, y_pred)
            me = max_error(y, y_pred)
            mae = mean_absolute_error(y, y_pred)
            r2 = r2_score(y, y_pred)

            exp.log_metric('evs_{}_sklearn'.format(name), evs)
            exp.log_metric('me_{}_sklearn'.format(name), me)
            exp.log_metric('mae_{}_sklearn'.format(name), mae)
            exp.log_metric('r2_{}_sklearn'.format(name), r2)

        # multi output
        if len(y_pred.shape) == 2:
            r2 = estimator.score(X, y)
            exp.log_metric('r2_{}_sklearn'.format(name), r2)
    elif is_classifier(estimator):
        for metric_name, values in zip(
            ['precision', 'recall', 'fbeta_score', 'support'],
                precision_recall_fscore_support(y, y_pred)):
            for i, value in enumerate(values):
                exp.log_metric(
                    '{}_class_{}_{}_sklearn'.format(metric_name, i, name),
                    value)
Ejemplo n.º 15
0
    def _BuildRegrModel(self, y, X):
        """Train an ensemble regression model and assess its performance.

        Start by splitting the y and X to train and test samples. Then create three regressors,
        namely a Random Forest, a Ridge and a SVM regressor and tune their hyperparameters using
        random search with cross validation. After updating their hyperparamters stack the three
        regressors using an ElasticNET linear regression model and fit the ensemble model to the 
        train sample. Finally, calculate its performance using the test sample and return
        both the ensemble model and the calculated metrics.

        Arguments:
            y {numpy.ndarray} -- The response variable (i.e. the LST data)
            X {numpy.ndarray} -- The explanatory variables (i.e. the LST predictors)

        Returns:
            sklearn.ensemble._stacking.StackingRegressor -- The ensemble regression model
            tuple -- A tuple with the regression performance metrics
        """

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self.regr_test_size, random_state=self.SEED)

        regressors = [
            ("random forest",
             RandomForestRegressor(random_state=self.SEED,
                                   n_jobs=self.N_JOBS)),
            ("ridge", Ridge(random_state=self.SEED)),
            ("svr", SVR()),
        ]

        hyperparam_distributions = {
            "random forest": {
                "max_depth": stats.randint(5, 100),
                "n_estimators": stats.randint(30, 800),
                "min_samples_leaf": stats.randint(2, 20),
                "min_samples_split": stats.randint(2, 50),
            },
            "svr": {
                "kernel": ["rbf", "poly", "sigmoid", "linear"],
                "degree": stats.randint(2, 7),
                "epsilon": stats.uniform(0.05, 5.0),
                "C": stats.uniform(0.0, 25.0),
            },
            "ridge": {
                "alpha": stats.uniform(0.0001, 1.0)
            },
        }

        for name, regressor in regressors:
            print(f"{f'    Tuning the {name} hyperparameters...':<50}", end="")
            hyperparam_candidates = RandomizedSearchCV(
                regressor,
                param_distributions=[hyperparam_distributions[name]],
                scoring="r2",
                random_state=self.SEED,
                n_jobs=self.N_JOBS,
                n_iter=self.N_RANDOM_SEARCHES,
                verbose=0,
            ).fit(X_train, y_train)
            print(
                f"Done [CV R2 score = {hyperparam_candidates.best_score_:0.2f}]"
            )
            regressor.set_params(**hyperparam_candidates.best_params_)

        ensemble_regressor = StackingRegressor(
            regressors,
            final_estimator=ElasticNetCV(
                l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1.0],
                cv=10,
                n_jobs=self.N_JOBS,
                random_state=self.SEED,
            ),
            n_jobs=self.N_JOBS,
            passthrough=True,
        )

        try:
            ensemble_regressor.fit(X_train, y_train)
        except ValueError as err:
            raise ValueError(
                f"Error in _BuildRegrModel: Unable to fit ensemble regression model. {err}"
            )

        # Assess the model performance using the test data
        y_pred = ensemble_regressor.predict(X_test)

        #y_pred = regressors[1][1].predict(X_test)
        regr_metrics = (
            metrics.r2_score(y_test, y_pred),
            metrics.explained_variance_score(y_test, y_pred),
            metrics.max_error(y_test, y_pred),
            metrics.mean_absolute_error(y_test, y_pred),
            metrics.mean_squared_error(y_test, y_pred),
            metrics.median_absolute_error(y_test, y_pred),
        )

        return ensemble_regressor, regr_metrics
Ejemplo n.º 16
0
                            'deaths': yTest[:, 1],
                            'r0': yTest[:, 2]
                            },
                        ),
                        epochs=1000,
                        batch_size=10,
                        verbose=1,
                        shuffle=True,
                        callbacks=callbacks)

    y_pred = model.predict(xTest)

    y_pred = np.array(y_pred).squeeze(-1).transpose()
    foldMse = list(map(lambda i: mean_squared_error(y_pred[:, i], yTest[:, i]), range(yTest.shape[1])))
    foldR2 = list(map(lambda i: r2_score(y_pred[:, i], yTest[:, i]), range(yTest.shape[1])))
    foldMaxError = list(map(lambda i: max_error(y_pred[:, i], yTest[:, i]), range(yTest.shape[1])))

    loss.append(history.history['val_loss'])
    mse.append(foldMse)
    r2.append(foldR2)
    max_e.append(foldMaxError)

    if mse[-1] == min(mse):
        shutil.copy(f'./checkpoints/{model_config}.h5', f'./checkpoints/{model_config}.best.h5')

mse = np.average(np.array(mse), axis=0)
r2 = np.average(np.array(r2), axis=0)
max_e = np.average(np.array(max_e), axis=0)

metrics = {'mse':list(mse), 'r2':list(r2), 'max_e':list(max_e)}
Ejemplo n.º 17
0
    def __init__(self, prediction, y, target, threshold=4, binary=True):
        """
        Master class from which all metrics are computed

        Computed metrics:

        Classification metrics:
        - self.sensitivity: Sensitivity, hit rate, recall, or true positive rate
        - self.specificity: Specificity or true negative rate
        - self.precision: Precision or positive predictive value
        - self.NPV: Negative predictive value
        - self.FPR: Fall out or false positive rate
        - self.FNR: False negative rate
        - self.FDR: False discovery rate
        - self.accuracy: Accuracy

        - self.auc(): AUC
        - self.hitrate(): Hit rate

        Regression metrics:
        - self.explained_variance: Explained variance regression score function
        - self.max_error: Max_error metric calculates the maximum residual error
        - self.mean_abolute_error: Mean absolute error regression loss
        - self.mean_squared_error: Mean squared error regression loss
        - self.root_mean_squared_error: Root mean squared error regression loss
        - self.mean_squared_log_error: Mean squared logarithmic error regression loss
        - self.median_squared_log_error: Median absolute error regression loss
        - self.r2_score: R^2 (coefficient of determination) regression score function

        Args:
            prediction (list): predicted values
            y (list): list of target values
            target (string): irmsd, fnat, capri_class, bin_class
            binary (bool, optional): transform the data in binary vectors. Defaults to True.
            threshold (int, optional): threshold used to split the data into a binary vector. Defaults to 4.
        """
        self.prediction = prediction
        self.y = y
        self.binary = binary
        self.target = target
        self.threshold = threshold

        print('Threshold set to {}'.format(self.threshold))

        if self.binary == True:
            prediction_binary = get_binary(self.prediction, self.threshold,
                                           self.target)
            y_binary = get_binary(self.y, self.threshold, self.target)
            classes = [0, 1]
            false_positive, false_negative, true_positive, true_negative = get_comparison(
                prediction_binary, y_binary, self.binary, classes=classes)

        else:
            if target == 'capri_class':
                classes = [1, 2, 3, 4, 5]
            elif target == 'bin_class':
                classes = [0, 1]
            else:
                raise ValueError('target must be capri_class on bin_class')
            false_positive, false_negative, true_positive, true_negative = get_comparison(
                self.prediction, self.y, self.binary, classes=classes)

        try:
            # Sensitivity, hit rate, recall, or true positive rate
            self.sensitivity = true_positive / (true_positive + false_negative)
        except:
            self.sensitivity = None

        try:
            # Specificity or true negative rate
            self.specificity = true_negative / (true_negative + false_positive)
        except:
            self.specificity = None

        try:
            # Precision or positive predictive value
            self.precision = true_positive / (true_positive + false_positive)
        except:
            self.precision = None

        try:
            # Negative predictive value
            self.NPV = true_negative / (true_negative + false_negative)
        except:
            self.NPV = None

        try:
            # Fall out or false positive rate
            self.FPR = false_positive / (false_positive + true_negative)
        except:
            self.FPR = None

        try:
            # False negative rate
            self.FNR = false_negative / (true_positive + false_negative)
        except:
            self.FNR = None

        try:
            # False discovery rate
            self.FDR = false_positive / (true_positive + false_positive)
        except:
            self.FDR = None

        self.accuracy = (true_positive + true_negative) / (
            true_positive + false_positive + false_negative + true_negative)

        # regression metrics
        self.explained_variance = None
        self.max_error = None
        self.mean_abolute_error = None
        self.mean_squared_error = None
        self.root_mean_squared_error = None
        self.mean_squared_log_error = None
        self.median_squared_log_error = None
        self.r2_score = None

        if target in ['fnat', 'irmsd', 'lrmsd']:

            # Explained variance regression score function
            self.explained_variance = metrics.explained_variance_score(
                self.y, self.prediction)

            # Max_error metric calculates the maximum residual error
            self.max_error = metrics.max_error(self.y, self.prediction)

            # Mean absolute error regression loss
            self.mean_absolute_error = metrics.mean_absolute_error(
                self.y, self.prediction)

            # Mean squared error regression loss
            self.mean_squared_error = metrics.mean_squared_error(
                self.y, self.prediction, squared=True)

            # Root mean squared error regression loss
            self.root_mean_squared_error = metrics.mean_squared_error(
                self.y, self.prediction, squared=False)

            try:
                # Mean squared logarithmic error regression loss
                self.mean_squared_log_error = metrics.mean_squared_log_error(
                    self.y, self.prediction)
            except ValueError:
                print(
                    "WARNING: Mean Squared Logarithmic Error cannot be used when "
                    "targets contain negative values.")

            # Median absolute error regression loss
            self.median_squared_log_error = metrics.median_absolute_error(
                self.y, self.prediction)

            # R^2 (coefficient of determination) regression score function
            self.r2_score = metrics.r2_score(self.y, self.prediction)
Ejemplo n.º 18
0
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_squared_error([0.0], [0.0], squared=False), 0.0)
    assert_almost_equal(mean_squared_log_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_absolute_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_pinball_loss([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_absolute_percentage_error([0.0], [0.0]), 0.0)
    assert_almost_equal(median_absolute_error([0.0], [0.0]), 0.0)
    assert_almost_equal(max_error([0.0], [0.0]), 0.0)
    assert_almost_equal(explained_variance_score([0.0], [0.0]), 1.0)
    assert_almost_equal(r2_score([0.0, 1], [0.0, 1]), 1.0)
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([-1.0], [-1.0])
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([1.0, 2.0, 3.0], [1.0, -2.0, 3.0])
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([1.0, -2.0, 3.0], [1.0, 2.0, 3.0])

    # Tweedie deviance error
    power = -1.2
    assert_allclose(mean_tweedie_deviance([0], [1.0], power=power),
                    2 / (2 - power),
                    rtol=1e-3)
    with pytest.raises(ValueError,
                       match="can only be used on strictly positive y_pred."):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    assert_almost_equal(mean_tweedie_deviance([0.0], [0.0], power=0), 0.00, 2)

    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=1.0)

    power = 1.5
    assert_allclose(mean_tweedie_deviance([0.0], [1.0], power=power),
                    2 / (2 - power))
    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    power = 2.0
    assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power),
                    0.00,
                    atol=1e-8)
    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    power = 3.0
    assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power),
                    0.00,
                    atol=1e-8)

    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)

    with pytest.raises(ValueError,
                       match="is only defined for power<=0 and power>=1"):
        mean_tweedie_deviance([0.0], [0.0], power=0.5)
Ejemplo n.º 19
0
    def dmltest(self):
        '''dmltest should start to listen on the _npdc_hostadr bounded during negotiations,
        and attempts to receive the numpy arrays (TEST BATCH) from the same donors, 
        it then returns the evaluation metrics
        RETURNS Ture upon No errors. False otherwise'''
        self._mdmlalist = []
        self._mdmlvlist = []
        out = True
        if (self.hasNegotiated() and self.donorTrained):
            self.verbose("Hosting DML test")
            for i in range(self._hostnum):
                conn = self._mdmlconnlist[i]
                rcv = smsg.recv(conn)
                if (rcv == None):
                    self.error("aggregate rcv has failed from donor", i)
                    out = False  #Error occurred
                else:
                    ra = deserialize(rcv)
                    #ra = numpy.array( json.loads(rcv))
                    self._mdmlalist.append(ra)
                conn.send('ACKN'.encode('utf-8'))
                if (self._mnegformlist[i].primary['dflag']):
                    rcv = smsg.recv(conn)
                    if (rcv == None):
                        self.error("test targets rcv has failed from donor", i)
                        out = False  #Error occurred
                    else:
                        rt = deserialize(rcv)
                        #rt = numpy.array(json.loads(rcv))
                        self._mdmlvlist.append(rt)
                        self.verbose("received test targets from donor", i)

            if (len(self._mdmlalist) < 1):
                self.error("No aggregates received, Aborting")
                out = False  #Error occurred
                self.__abortAll()
            if (len(self._mdmlvlist) < 1):
                self.error(
                    "No verifying targets. Is target donor specified correctly?"
                )
                out = False
                self.__abortAll()
            else:
                asum = numpy.zeros(self._mdmlalist[0].shape, dtype=self.compd)
                for i, a in enumerate(self._mdmlalist):
                    self.info("aggregate", i, a.shape)
                    asum = a + asum

                # only test against the first TODO: selectable testing

                varg = self._mdmlvlist[0]
                jrep = {
                    "mse": mean_squared_error(asum, varg),
                    "mae": mean_absolute_error(asum, varg),
                    "max": max_error(asum, varg),
                    "evs": explained_variance_score(asum, varg),
                    "r2s": r2_score(asum, varg)
                }
                self.verbose("mse", jrep.get("mse"))
                self.verbose("mae", jrep.get("mae"))
                self.verbose("max", jrep.get("max"))
                self.verbose("evs", jrep.get("evs"))
                self.verbose("r2s", jrep.get("r2s"))

                for i in range(self._hostnum):
                    conn = self._mdmlconnlist[i]
                    smsg.send(conn, json.dumps(jrep))
                self.verbose("DML done. JREP pushed to donors")
        else:
            self.error(
                "Not negotiated or donor is not trained yet, unable to test")
            out = False
        return out
Ejemplo n.º 20
0
output = model(train.float())
output = output.cpu()
#    output = output.detach().numpy()[:,:,-1]
output = output.detach().numpy()
#denormal_output = abs((output*(np.max(target)-np.min(target))) + np.min(target))
denormal_output = output * 100
pred = model(test.float())
pred = pred.cpu()
#    pred = pred.detach().numpy()[:,:,0]
pred = pred.detach().numpy()
#denormal_pred = abs((pred*(np.max(target)-np.min(target))) + np.min(target))
denormal_pred = pred * 100

meanse = sm.mean_squared_error(denormal_output[:, :, 0], days)
meanabse = sm.mean_absolute_error(denormal_output[:, :, 0], days)
maxe = sm.max_error(denormal_output[:, :, 0], days)
var_sc = sm.explained_variance_score(denormal_output[:, :, 0], days)
r2_coeff = sm.r2_score(denormal_output[:, :, 0], days)
adjr2_coeff = 1 - (1 - r2_coeff) * ((len(china) - 1) / (len(china) - 2 - 1))
out = denormal_output[:, :, 0]
out = out.reshape((len(out), ))
out = list(out)
days = days.reshape((len(days), ))
days = list(days)
stat1, p1 = pearsonr(out, days)
stat2, p2 = spearmanr(out, days)
for i in range(len(days)):
    if out[i] == 0:
        out[i] = 1.0
    if days[i] == 0:
        days[i] = 1.0
Ejemplo n.º 21
0
    print("MAE 1 esercizio: ", mean_absolute_error(Y_Mean1, P_Mean1))
    print("MAE 2 esercizio: ", mean_absolute_error(Y_Mean2, P_Mean2))
    print("MAE 3 esercizio: ", mean_absolute_error(Y_Mean3, P_Mean3))

    # media delle differenze al quadrato tra previsioni e target.
    print("MSE 1 esercizio: ", mean_squared_error(Y_Mean1, P_Mean1))
    print("MSE 2 esercizio: ", mean_squared_error(Y_Mean2, P_Mean2))
    print("MSE 3 esercizio: ", mean_squared_error(Y_Mean3, P_Mean3))

    # proporzione tra variabilità e correttezza dei dati del modello.
    print("R2 1 esercizio: ", r2_score(Y_Mean1, P_Mean1))
    print("R2 2 esercizio: ", r2_score(Y_Mean2, P_Mean2))
    print("R2 3 esercizio: ", r2_score(Y_Mean3, P_Mean3))

    # misura di quanto i valori stimati si discostano dai valori reali
    print("Max Error 1 esercizio:  ", max_error(Y_Mean1, P_Mean1))
    print("Max Error 2 esercizio: ", max_error(Y_Mean2, P_Mean2))
    print("Max Error 3 esercizio: ", max_error(Y_Mean3, P_Mean3))

    for s in range(1, 4):
        with open(filePath + '.csv', 'a') as f:
            wtr = csv.writer(f)
            wtr.writerow(str(p))
            if s == 1:
                wtr.writerow("E1")
                wtr.writerow(Y_Mean1)
                wtr.writerow(P_Mean1)
            else:
                if s == 2:
                    wtr.writerow("E2")
                    wtr.writerow(Y_Mean2)
Ejemplo n.º 22
0
    def _BuildRegrModel(self, y, X):
        """Train an ensemble regression model and assess its performance.

        Start by splitting the y and X to train and test samples. Next, make an ensemble
        voting regressor by cominging an AdaBoost, a RandomForest, an ElasticNet and a
        Ridge regressor and fit it to the train sample. Finally, calculate its performance
        using the test sample and return both the model and the calculated metrics.

        Arguments:
            y {numpy.ndarray} -- The response variable (i.e. the LST data)
            X {numpy.ndarray} -- The explanatory variables (i.e. the LST predictors)

        Returns:
            sklearn.ensemble.voting.VotingRegressor -- The ensemble regression model
            tuple -- A tuple with the regression performance metrics
        """
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self.regr_test_size, random_state=self.SEED
        )

        reg1 = AdaBoostRegressor(
            loss=self.params_ADAboost["loss"],
            n_estimators=self.params_ADAboost["n_estimators"],
            random_state=self.SEED,
        )
        reg2 = RandomForestRegressor(
            max_depth=self.params_RF["max_depth"],
            n_estimators=self.params_RF["n_estimators"],
            min_samples_split=self.params_RF["min_samples_split"],
            min_samples_leaf=self.params_RF["min_samples_leaf"],
            random_state=self.SEED,
        )
        reg3 = ElasticNetCV(
            l1_ratio=self.params_elastNet["l1_ratio"],
            n_alphas=self.params_elastNet["n_alphas"],
            cv=self.params_elastNet["cv"],
            random_state=self.SEED,
        )
        reg4 = Ridge(
                alpha=self.params_ridge["alpha"],
                random_state=self.SEED
        )
        ereg = VotingRegressor(
            estimators=[("ada", reg1), ("rf", reg2), ("net", reg3), ("ridge", reg4)]
        )

        # Train the model
        try:
            ereg.fit(X_train, y_train)
        except ValueError as err:
            raise ValueError(
                f"Error in _BuildRegrModel: Unable to fit regression model. {err}"
            )

        # Assess the model performance
        y_pred = ereg.predict(X_test)
        regr_metrics = (
            metrics.r2_score(y_test, y_pred),
            metrics.explained_variance_score(y_test, y_pred),
            metrics.max_error(y_test, y_pred),
            metrics.mean_absolute_error(y_test, y_pred),
            metrics.mean_squared_error(y_test, y_pred),
            metrics.median_absolute_error(y_test, y_pred),
        )

        return ereg, regr_metrics
Ejemplo n.º 23
0
 ######### Deepcut ##########
 data_train_s_deepcut, data_test_s_deepcut, label_train_s_deepcut, label_test_s_deepcut = train_test_split(
     data_train_deepcut, label_train_deepcut, test_size=0.2)
 estimator.fit(data_train_s_deepcut, label_train_s_deepcut)
 actual_deepcut, predicted_deepcut = label_test_s_deepcut, estimator.predict(
     data_test_s_deepcut)
 y_true_deepcut, y_pred_deepcut = label_test_s_deepcut, estimator.predict(
     data_test_s_deepcut)
 print("Deepcut")
 print(
     metrics.classification_report(actual_deepcut,
                                   predicted_deepcut,
                                   target_names=target))
 print(accuracy_score(actual_deepcut, predicted_deepcut))
 print(multilabel_confusion_matrix(actual_deepcut, predicted_deepcut))
 print(max_error(actual_deepcut, predicted_deepcut))
 #plot_confusion_matrix(estimator, data_test_s_deepcut, label_test_s_deepcut)  # doctest: +SKIP
 for title, normalize in titles_options:
     disp = plot_confusion_matrix(estimator,
                                  data_test_s_deepcut,
                                  label_test_s_deepcut,
                                  display_labels=target1,
                                  cmap=plt.cm.YlOrRd,
                                  normalize=normalize)
     disp.ax_.set_title(title)
     print(title)
     print(disp.confusion_matrix)
 plt.show()  # doctest: +SKIP
 #-------------
 print("")
 #-------------
neptune.init('shared/sklearn-integration', api_token='ANONYMOUS')

### Create an experiment and log classifier parameters

neptune.create_experiment(
    params=parameters,
    name='classification-example',
    tags=['GradientBoostingClassifier', 'classification'])

### Log scores on test data to Neptune

from sklearn.metrics import max_error, mean_absolute_error, r2_score

y_pred = gbc.predict(X_test)

neptune.log_metric('max_error', max_error(y_test, y_pred))
neptune.log_metric('mean_absolute_error', mean_absolute_error(y_test, y_pred))
neptune.log_metric('r2_score', r2_score(y_test, y_pred))

# tests
exp = neptune.get_experiment()

### Stop Neptune experiment after logging scores

neptune.stop()

# tests
# check logs
correct_logs_set = {'max_error', 'mean_absolute_error', 'r2_score'}
from_exp_logs = set(exp.get_logs().keys())
assert correct_logs_set == from_exp_logs, '{} - incorrect logs'.format(exp)
Ejemplo n.º 25
0
def solve():
    if request.method == 'POST':

        #Reading data from html form from preferences.html
        algorithm = int(request.form["Algorithm"])
        testsize = (int(request.form["testsize"]) / 10)
        #print(testsize)
        #columns = [int(x) for x in request.form.getlist("selected_columns")]#while using checkboxes in preferences.html
        outcome = request.form['outcome']
        choice = int(request.form["choice"])  #represents problem type

        selected_categorical_columns = request.form.getlist(
            'selected_categorical_columns')
        selected_numerical_columns = request.form.getlist(
            'selected_numerical_columns')

        s = t.perf_counter()

        target = data[outcome]  #target column of data
        features = data[
            selected_categorical_columns +
            selected_numerical_columns]  #data after removing target column and with selected categorical and numerical columns

        #segregating categorical and numerical data and applying One-Hot Encoding
        categorical_data = features[selected_categorical_columns]
        numerical_data = features[selected_numerical_columns]

        X = features
        y = target
        labels = y.unique()

        #testsize to be given by user read from form data
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=testsize,
                                                            random_state=101)

        testsize = 0

        X_train_ohc = pd.get_dummies(data=X_train,
                                     columns=selected_categorical_columns)
        X_train_categorical_data_ohc = X_train_ohc[[
            x for x in X_train_ohc.columns
            if x not in selected_numerical_columns
        ]]  #one hot encoded categorical data
        selected_categorical_columns_ohc = X_train_categorical_data_ohc.columns
        scaler.fit(X_train[selected_numerical_columns])
        temp1 = scaler.transform(X_train[selected_numerical_columns])
        temp2 = X_train_ohc[selected_categorical_columns_ohc].values
        X_train = np.concatenate((temp1, temp2), axis=1)

        original_X_test = X_test  #to display data on results page
        X_test_ohc = pd.get_dummies(data=X_test,
                                    columns=selected_categorical_columns)
        temp1 = scaler.transform(X_test[selected_numerical_columns])
        temp2 = X_test_ohc[selected_categorical_columns_ohc].values
        X_test = np.concatenate((temp1, temp2), axis=1)

        if choice == 0:  #Classification

            #training the model, predicting values, and get the time in doing so
            Classification_Algorithm[algorithm].fit(X_train, y_train)
            pred = Classification_Algorithm[algorithm].predict(X_test)
            time_taken = t.perf_counter() - s

            print(original_X_test)

            #show the rsults page with the metrics
            return render_template(
                'Classification_results.html',
                Classification_Algorithm_used=str(
                    Classification_Algorithm_used[algorithm]),
                selected_columns=selected_categorical_columns +
                selected_numerical_columns,
                time_taken=time_taken,
                confusion_matrix=confusion_matrix(y_test, pred),
                labels=labels,
                cr=classification_report(y_test, pred, output_dict=True),
                accuracy=accuracy_score(y_test, pred),
                X_test=original_X_test.head().values,
                y_test=y_test.head().values,
                pred=pred[0:5])

        else:  #Regression

            #training the model, predicting values, and get the time in doing so
            Regression_Algorithm[algorithm].fit(X_train, y_train)
            pred = Regression_Algorithm[algorithm].predict(X_test)
            time_taken = t.perf_counter() - s

            #show the rsults page with the metrics
            return render_template(
                'Regression_results.html',
                Regression_Algorithm_used=str(
                    Regression_Algorithm_used[algorithm]),
                selected_columns=selected_categorical_columns +
                selected_numerical_columns,
                time_taken=time_taken,
                mean_squared_error=mean_squared_error(y_test, pred),
                max_error=max_error(y_test, pred),
                r2_score=r2_score(y_test, pred),
                X_test=original_X_test.head().values,
                y_test=y_test.head().values,
                pred=pred[0:5])

    #when GET request is made
    return "Wrong Method"
            imp=SimpleImputer(missing_values=np.nan, strategy="mean")
            imp=imp.fit(train_X_2)
            ################################################################################
            grid_regressor_2 = GridSearchCV(extra_tree_regressor, param_grd, 
                                        n_jobs=-1, 
                                        verbose=0)
            ################################################################################
            grid_regressor_2.fit(imp.transform(train_X_2), train_y_2)
            ################################################################################
            best_regressor_2 = grid_regressor_2.best_estimator_

            imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
            imputer = imputer.fit(test_X_2)
            test_X_imp = imputer.transform(test_X_2)

            predicted_2 = best_regressor_2.predict(test_X_imp)
            ################################################################################
            r2=float("{:.3f}".format(r2_score(test_y_2, predicted_2)))
            rmse=float("{:.3f}".format(mean_squared_error(test_y_2, predicted_2, squared=False)))
            maxerr=float("{:.3f}".format(max_error(test_y_2, predicted_2)))
            print(f"R2: \t\t{r2}")
            print(f"RMSE: \t\t{rmse}")
            print(f"MAX ERR: \t{maxerr}")
            ################################################################################
            df_results.loc[[dataset+':'+str(rnd_state)], [coefficents[0]+'_'+feature]] = r2
            df_results.loc[[dataset+':'+str(rnd_state)], [coefficents[1]+'_'+feature]] = rmse
            df_results.loc[[dataset+':'+str(rnd_state)], [coefficents[2]+'_'+feature]] = maxerr


df_results.to_csv('covid_extra_trees_doppio_test.csv')
Ejemplo n.º 27
0
print("Fitting LGBMRegressor model...")
gbm_fit = gbm0.fit(X_train, y_train, eval_metric='rmse')
print("Finished fitting LGBMRegressor model")

# In[ ]:

# Prediction
predict_lightGBM = gbm0.predict(X_test)

y_trained = np.array(trained['Production'])
y_pred = predict_lightGBM
y_truth = np.array(y_test)

print('explained_variance_score', explained_variance_score(y_truth, y_pred))
print('max_error', max_error(y_truth, y_pred))
print('mean_absolute_error', mean_absolute_error(y_truth, y_pred))
print('mean_squared_error', mean_squared_error(y_truth, y_pred))
print('mean_squared_log_error', mean_squared_log_error(y_truth**2, y_pred**2))
print('median_absolute_error', median_absolute_error(y_truth, y_pred))
print('r2_score', r2_score(y_truth, y_pred))
print('rmse', rmse(y_truth, y_pred))

# In[ ]:

optimization_dict = {'max_depth': [2, 4, 6], 'n_estimators': [50, 100, 200]}

model_gbm = GridSearchCV(gbm0,
                         optimization_dict,
                         scoring='neg_mean_absolute_error',
                         verbose=1)
Ejemplo n.º 28
0
print('Root Mean Squared Error = %0.3f' % rmse)

#Mean Squared Error
mse = mean_squared_error(actual_rainfall, predictions)
print('Mean Squared Error = %0.3f' % mse)

#Mean Absolute Error
mae = mean_absolute_error(actual_rainfall, predictions)
print('Mean Absolute Error = %0.3f' % mae)

#Mean Squared Log Error
msle = mean_squared_log_error(actual_rainfall, predictions)
print('Mean Squared Log Error = %0.3f' % msle)

#Max Error
me = max_error(actual_rainfall, predictions)
print('Max Error = %0.3f' % me)

#Plot True Values vs. Predictions
a = plt.axes(aspect='equal')
plt.scatter(test_labels, predictions)
plt.xlabel('Actual Rainfall')
plt.ylabel('Predicted Rainfall')
lims = [0, 2500]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims)
plt.show()

#Plot Actuall vs. Prediction
plt.title('Actual Rainfall vs. Predicted Rainfall')
        # store the best classifier for each classifier
        pipe = grid_clf.best_estimator_
    else:
        pipe.fit(X_train, y_train)

    # just a piece of code in case we need access to the classifier in the pipe
    ## print(pipe[classifier.__class__.__name__])

    y_pred = pipe.predict(X_test)

    result = {
        'Classifier': classifier.__class__.__name__,
        'Score': pipe.score(X_test, y_test),
        'Explained variance score': explained_variance_score(y_test, y_pred),
        'Max error': max_error(y_test, y_pred),
        'Mean absolute error': mean_absolute_error(y_test, y_pred),
        'Mean squared error': mean_squared_error(y_test, y_pred),
        # 'Mean squared logarithmic error': mean_squared_log_error(y_test, y_pred),
        'Median absolute error': median_absolute_error(y_test, y_pred),
        'R^2 score': r2_score(y_test, y_pred)
    }
    results.append(result)

results_df = pd.DataFrame(data=results,
                          index=None,
                          columns=[
                              'Classifier', 'Score',
                              'Explained variance score', 'Max error',
                              'Mean absolute error', 'Mean squared error',
                              'Median absolute error', 'R^2 score'
Ejemplo n.º 30
0
def test_regression_metrics_at_limits():
    # Single-sample case
    # Note: for r2 and d2_tweedie see also test_regression_single_sample
    assert_almost_equal(mean_squared_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_squared_error([0.0], [0.0], squared=False), 0.0)
    assert_almost_equal(mean_squared_log_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_absolute_error([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_pinball_loss([0.0], [0.0]), 0.0)
    assert_almost_equal(mean_absolute_percentage_error([0.0], [0.0]), 0.0)
    assert_almost_equal(median_absolute_error([0.0], [0.0]), 0.0)
    assert_almost_equal(max_error([0.0], [0.0]), 0.0)
    assert_almost_equal(explained_variance_score([0.0], [0.0]), 1.0)

    # Perfect cases
    assert_almost_equal(r2_score([0.0, 1], [0.0, 1]), 1.0)

    # Non-finite cases
    # R² and explained variance have a fix by default for non-finite cases
    for s in (r2_score, explained_variance_score):
        assert_almost_equal(s([0, 0], [1, -1]), 0.0)
        assert_almost_equal(s([0, 0], [1, -1], force_finite=False), -np.inf)
        assert_almost_equal(s([1, 1], [1, 1]), 1.0)
        assert_almost_equal(s([1, 1], [1, 1], force_finite=False), np.nan)
    msg = ("Mean Squared Logarithmic Error cannot be used when targets "
           "contain negative values.")
    with pytest.raises(ValueError, match=msg):
        mean_squared_log_error([-1.0], [-1.0])
    msg = ("Mean Squared Logarithmic Error cannot be used when targets "
           "contain negative values.")
    with pytest.raises(ValueError, match=msg):
        mean_squared_log_error([1.0, 2.0, 3.0], [1.0, -2.0, 3.0])
    msg = ("Mean Squared Logarithmic Error cannot be used when targets "
           "contain negative values.")
    with pytest.raises(ValueError, match=msg):
        mean_squared_log_error([1.0, -2.0, 3.0], [1.0, 2.0, 3.0])

    # Tweedie deviance error
    power = -1.2
    assert_allclose(mean_tweedie_deviance([0], [1.0], power=power),
                    2 / (2 - power),
                    rtol=1e-3)
    msg = "can only be used on strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError, match=msg):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)

    assert_almost_equal(mean_tweedie_deviance([0.0], [0.0], power=0), 0.0, 2)

    power = 1.0
    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError, match=msg):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)

    power = 1.5
    assert_allclose(mean_tweedie_deviance([0.0], [1.0], power=power),
                    2 / (2 - power))
    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError, match=msg):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)

    power = 2.0
    assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power),
                    0.00,
                    atol=1e-8)
    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError, match=msg):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)

    power = 3.0
    assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power),
                    0.00,
                    atol=1e-8)
    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError, match=msg):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)

    power = 0.5
    with pytest.raises(ValueError,
                       match="is only defined for power<=0 and power>=1"):
        mean_tweedie_deviance([0.0], [0.0], power=power)
    with pytest.raises(ValueError,
                       match="is only defined for power<=0 and power>=1"):
        d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# importing Dataset

# dataset = pd.read_excel(open('parking-estimation.xlsx', 'rb'))
dataset = pd.read_csv('csvParking.csv', decimal=',')

X = dataset.iloc[:, 2:6].values
y = dataset.iloc[:, 1:2].values

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

explained_variance_score(y_test, y_pred)
max_error(y_test, y_pred)
r2_score(y_test, y_pred)
mean_squared_error(y_test, y_pred)
Ejemplo n.º 32
0
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1
    y_pred_2 = y_true - 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.0)
    assert_almost_equal(
        mean_squared_log_error(y_true, y_pred),
        mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred)),
    )
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.0)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred), 0.5)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred_2), 0.5)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred, alpha=0.4), 0.6)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred_2, alpha=0.4), 0.4)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.0)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    assert np.isfinite(mape)
    assert mape > 1e6
    assert_almost_equal(max_error(y_true, y_pred), 1.0)
    assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
    assert_almost_equal(r2_score(y_true, y_pred, force_finite=False), 0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.0)
    assert_almost_equal(
        explained_variance_score(y_true, y_pred, force_finite=False), 1.0)
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=0),
        mean_squared_error(y_true, y_pred),
    )
    assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=0),
                        r2_score(y_true, y_pred))

    # Tweedie deviance needs positive y_pred, except for p=0,
    # p>=2 needs positive y_true
    # results evaluated by sympy
    y_true = np.arange(1, 1 + n_samples)
    y_pred = 2 * y_true
    n = n_samples
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=-1),
        5 / 12 * n * (n**2 + 2 * n + 1),
    )
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1),
                        (n + 1) * (1 - np.log(2)))
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2),
                        2 * np.log(2) - 1)
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=3 / 2),
        ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum(),
    )
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3),
                        np.sum(1 / y_true) / (4 * n))

    dev_mean = 2 * np.mean(xlogy(y_true, 2 * y_true / (n + 1)))
    assert_almost_equal(
        d2_tweedie_score(y_true, y_pred, power=1),
        1 - (n + 1) * (1 - np.log(2)) / dev_mean,
    )

    dev_mean = 2 * np.log((n + 1) / 2) - 2 / n * np.log(factorial(n))
    assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=2),
                        1 - (2 * np.log(2) - 1) / dev_mean)