Beispiel #1
0
def test_poisson():
    # For Poisson distributed target, Poisson loss should give better results
    # than least squares measured in Poisson deviance as metric.
    rng = np.random.RandomState(42)
    n_train, n_test, n_features = 500, 100, 100
    X = make_low_rank_matrix(n_samples=n_train + n_test,
                             n_features=n_features,
                             random_state=rng)
    # We create a log-linear Poisson model and downscale coef as it will get
    # exponentiated.
    coef = rng.uniform(low=-2, high=2, size=n_features) / np.max(X, axis=0)
    y = rng.poisson(lam=np.exp(X @ coef))
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=n_test,
                                                        random_state=rng)
    gbdt_pois = HistGradientBoostingRegressor(loss="poisson", random_state=rng)
    gbdt_ls = HistGradientBoostingRegressor(loss="squared_error",
                                            random_state=rng)
    gbdt_pois.fit(X_train, y_train)
    gbdt_ls.fit(X_train, y_train)
    dummy = DummyRegressor(strategy="mean").fit(X_train, y_train)

    for X, y in [(X_train, y_train), (X_test, y_test)]:
        metric_pois = mean_poisson_deviance(y, gbdt_pois.predict(X))
        # squared_error might produce non-positive predictions => clip
        metric_ls = mean_poisson_deviance(
            y, np.clip(gbdt_ls.predict(X), 1e-15, None))
        metric_dummy = mean_poisson_deviance(y, dummy.predict(X))
        assert metric_pois < metric_ls
        assert metric_pois < metric_dummy
Beispiel #2
0
    def generate_metrics(self):
        model = self.model
        target = self._target_test
        prediction = model.predict(self._x_test)

        met_dict = {
            'explained_variance_score':
            explained_variance_score(target, prediction),
            'max_error':
            max_error(target, prediction),
            'mean_absolute_error':
            mean_absolute_error(target, prediction),
            'mean_squared_error':
            mean_squared_error(target, prediction),
            'mean_squared_log_error':
            mean_squared_log_error(target, prediction),
            'median_absolute_error':
            median_absolute_error(target, prediction),
            'r2_score':
            r2_score(target, prediction),
            'mean_poisson_deviance':
            mean_poisson_deviance(target, prediction),
            'mean_gamma_deviance':
            mean_gamma_deviance(target, prediction)
        }

        self._model_metrics = pd.DataFrame.from_dict(met_dict, orient='index')
Beispiel #3
0
def score_rec(metric, X, X_):

    if metric == 'rmse':
        score = np.sqrt(mean_squared_error(X.T, X_.T,
                                           multioutput='raw_values'))
    elif metric == 'mse':
        score = mean_squared_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'mae':
        score = mean_absolute_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'msle':
        score = mean_squared_log_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'evs':
        score = explained_variance_score(X.T, X_.T, multioutput='raw_values')
    elif metric == 'poisson':
        n = X.shape[0]
        score = np.zeros(n)
        X = np.abs(X)
        X_ = np.abs(X_)
        for i in range(n):
            score[i] = mean_poisson_deviance(X[i, :], X_[i, :])
    elif metric == 'gamma':
        n = X.shape[0]
        score = np.zeros(n)
        X = np.abs(X)
        X_ = np.abs(X_)
        for i in range(n):
            score[i] = mean_gamma_deviance(X[i, :], X_[i, :])

    return score
Beispiel #4
0
    def evaluate_forecast(self):
        n = min(len(self.validation_data), len(self.forecasts))
        y_forecast = self.forecasts[:n]
        y_actual = self.validation_data.tail(n)["close"]

        mean_abs_err = learn.mean_absolute_error(y_actual, y_forecast)
        mean_sq_err = learn.mean_squared_error(y_actual, y_forecast)
        mean_sq_lg_err = learn.mean_squared_log_error(y_actual, y_forecast)
        mean_abs_percent_err = learn.mean_absolute_percentage_error(
            y_actual, y_forecast)
        median_abs_err = learn.median_absolute_error(y_actual, y_forecast)
        mean_gamma_dev = learn.mean_gamma_deviance(y_actual, y_forecast)
        mean_poisson_dev = learn.mean_poisson_deviance(y_actual, y_forecast)
        mean_tweedie_dev = learn.mean_tweedie_deviance(y_actual, y_forecast)
        explained_variance = learn.explained_variance_score(
            y_actual, y_forecast)
        max_residual = learn.max_error(y_actual, y_forecast)
        coeff_determination = learn.r2_score(y_actual, y_forecast)

        metrics = {
            "Mean Squared Error (MSE)": mean_sq_err,
            "Mean Absolute Error (MAE)": mean_abs_err,
            "Mean Squared Logarithmic Error (MSLE)": mean_sq_lg_err,
            "Mean Absolute Percentage Error (MAPE)": mean_abs_percent_err,
            "Median Absolute Error (MedAE)": median_abs_err,
            "Mean Gamma Deviance": mean_gamma_dev,
            "Mean Poisson Deviance": mean_poisson_dev,
            "Mean Tweedie Deviance Error": mean_tweedie_dev,
            "Explained Variance Regression Score": explained_variance,
            "Max Residual Error": max_residual,
            "Coefficient of Determination": coeff_determination
        }
        self.metrics = metrics
Beispiel #5
0
def train_and_test_one(Model, train, test, *args, **kwargs):
    name = get_name(Model, **kwargs)
    print(f'Training and testing {name}...')

    algorithm = Model(*args, **kwargs)
    X_train, y_train = train
    X_test, y_test = test
    regressor = algorithm.fit(X_train, y_train)
    y_predict = regressor.predict(X_test)

    mse = mean_squared_error(y_test, y_predict)
    mpd = mean_poisson_deviance(y_test, y_predict)
    mgd = mean_gamma_deviance(y_test, y_predict)

    mae = mean_absolute_error(y_test, y_predict)
    mape = mean_absolute_percentage_error(y_test, y_predict)
    evs = explained_variance_score(y_test, y_predict)
    me = max_error(y_test, y_predict)
    medae = median_absolute_error(y_test, y_predict)
    r2 = r2_score(y_test, y_predict)

    print(f'Saving {name}...\n')
    metrics = pd.DataFrame.from_dict(
        {name: [evs, r2, mape, mse, mpd, mgd, me, mae, medae]}, orient='index')
    metrics.to_csv(METRICS, mode='a', header=False)

    prediction = pd.DataFrame(y_predict, columns=['prediction'])
    prediction.index = X_test.index
    predict_path = join_path(MODELS, f'{name}.csv')
    prediction.to_csv(predict_path)
    return y_predict
Beispiel #6
0
def score_estimator(estimator, df_test):
    """Score an estimator on the test set."""

    y_pred = estimator.predict(df_test)

    print("MSE: %.3f" %
          mean_squared_error(df_test["Frequency"], y_pred,
                             df_test["Exposure"]))
    print("MAE: %.3f" %
          mean_absolute_error(df_test["Frequency"], y_pred,
                              df_test["Exposure"]))

    # ignore non-positive predictions, as they are invalid for
    # the Poisson deviance
    mask = y_pred > 0
    if (~mask).any():
        warnings.warn("Estimator yields non-positive predictions for {} "
                      "samples out of {}. These will be ignored while "
                      "computing the Poisson deviance"
                      .format((~mask).sum(), mask.shape[0]))

    print("mean Poisson deviance: %.3f" %
          mean_poisson_deviance(df_test["Frequency"][mask],
                                y_pred[mask],
                                df_test["Exposure"][mask]))
    def mpd(self) -> float:
        """
        Mean poisson deviance error metric for regression problems

        :return: float
            Mean-Poisson-Deviance-Error Score
        """
        return mean_poisson_deviance(y_true=self.obs,
                                     y_pred=self.pred,
                                     sample_weight=None)
Beispiel #8
0
    def mean_poisson_deviance_(self):
        """
        Calculate Mean Poisson deviance regression loss.

        Returns
        -------
        :mean_poisson_deviance : float
            A non-negative floating point value (the best value is 0.0).
        """
        return mean_poisson_deviance(self.Y_true, self.Y_true)
Beispiel #9
0
def get_regression_scoring(y_test, y_pred):
    scoring = {}
    try:
        scoring['r2'] = \
            metrics.r2_score(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['explained_variance'] = \
            metrics.explained_variance_score(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['max_error'] = \
            metrics.max_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_absolute_error'] = \
            metrics.mean_absolute_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_squared_error'] = \
            metrics.mean_squared_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_root_mean_squared_error'] = \
            metrics.mean_squared_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_squared_log_error'] = \
            metrics.mean_squared_log_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_median_absolute_error'] = \
            metrics.median_absolute_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_poisson_deviance'] = \
            metrics.mean_poisson_deviance(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_gamma_deviance'] = \
            metrics.mean_gamma_deviance(y_test, y_pred)
    except Exception:
        pass
    return scoring
Beispiel #10
0
def scoring(estimator, df_test):
    """Score an estimator on the test set."""
    y_pred = estimator.predict(df_test)

    mse = mean_squared_error(df_test["Words"], y_pred)
    mae = mean_absolute_error(df_test["Words"], y_pred)
    # Ignore non-positive predictions, as they are invalid for
    # the Poisson deviance.
    mask = y_pred > 0
    if (~mask).any():
        n_masked, n_samples = (~mask).sum(), mask.shape[0]
    mpd = mean_poisson_deviance(df_test["Words"][mask], y_pred[mask])
    return mse, mae, mpd
Beispiel #11
0
def set_metrics(y_pred, y_true, dict):
    try:
        dict["max_error"] = mets.max_error(y_true, y_pred)
    except:
        pass
    try:
        dict["explained_variance_score"] = mets.explained_variance_score(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_absolute_error"] = mets.mean_absolute_error(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_squared_error"] = mets.mean_squared_error(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_squared_log_error"] = mets.mean_squared_log_error(y_true, y_pred)
    except:
        pass
    try:
        dict["median_absolute_error"] = mets.median_absolute_error(y_true, y_pred)
    except:
        pass
    try:
        dict["r2_score"] = mets.r2_score(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_poisson_deviance"] = mets.mean_poisson_deviance(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_gamma_deviance"] = mets.mean_gamma_deviance(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_tweedie_deviance"] =  mets.mean_tweedie_deviance(y_true, y_pred)
    except:
        pass
    return dict
Beispiel #12
0
def get_model_score(score_type, data, grid_predict):
    output = import_required_package()

    if output == 'imported':
        if score_type == 'r2':
            r2 = r2_score(data[1], grid_predict)
            adj_r2 = 1 - ((1 - r2) *
                          ((data[0].shape[0] - 1) /
                           (data[0].shape[0] - data[0].shape[1] - 1)))
            score = {"r2": r2, "adj_r2": adj_r2}
        elif score_type == 'explained_variance':
            exp_variance = explained_variance(data[1], grid_predict)
            score = {'explained_variance': exp_variance}
        elif score_type == 'max_error':
            mx_error = max_error(data[1], grid_predict)
            score = {'max_error': mx_error}
        elif score_type == 'neg_mean_absolute_error':
            mn_absolute_error = mean_absolute_error(data[1], grid_predict)
            score = {'mean_absolute_error': mn_absolute_error}
        elif score_type == 'neg_mean_squared_error' or score_type == 'neg_root_mean_squared_error':
            mn_squared_error = mean_squared_error(data[1], grid_predict)
            score = {'mean_squared_error': mn_squared_error}
        elif score_type == 'neg_mean_squared_log_error':
            mn_squared_log_error = mean_squared_log_error(
                data[1], grid_predict)
            score = {'mean_squared_log_error': mn_squared_log_error}
        elif score_type == 'neg_median_absolute_error':
            med_absolute_error = median_absolute_error(data[1], grid_predict)
            score = {'median_absolute_error': med_absolute_error}
        elif score_type == 'neg_mean_poisson_deviance':
            mn_poisson_deviance = mean_poisson_deviance(data[1], grid_predict)
            score = {'mean_poisson_deviance': mn_poisson_deviance}
        elif score_type == 'neg_mean_gamma_deviance':
            mn_gamma_deviance = mean_gamma_deviance(data[1], grid_predict)
            score = {'mn_gamma_deviance': mn_gamma_deviance}
        else:
            score = {score_type: 'Not a valid ScoreType'}

        return score
    else:
        return output
def EvaluatePerformance_fbprophet(model,test,y_true):
    """ Evaluating model performance of fb prophet
    
    
    Args:
        model (fbprophetmodel): fb prophet model fit on the training data
        test (pandas dataframe): X_test predicted dataframes
        y_true (pandas dataframe): True values dataframe
    
    
    Returns:
        return_dict: Dictionary containing the metrics and the predicted dataframe
    
    """    

    
    
    #Make sure you use the command below to install latest version of scikit learn
    # conda install -c conda-forge scikit-learn  
    
    from sklearn.metrics import mean_poisson_deviance,mean_squared_error
    test_dataset = pd.DataFrame()
    test_dataset['ds'] = pd.to_datetime(test["Date"])
    predicted_df = model.predict(test_dataset) #Get the predicted data frame on test
    y_pred = predicted_df['yhat'] #yhat is the output column name in fbprophet
    print("y_pred is \n",y_pred)
    
    
    RMSE = mean_squared_error(y_true, y_pred,squared= False)
    print("The RMSE of the model with the test data is",RMSE)
    
    Mean_poission_dev = mean_poisson_deviance(y_true, y_pred)
    print("The Mean_poission_dev of the model with the test data is",Mean_poission_dev)
    
    return_dict = { "Root mean squared error" : RMSE,
                    "prediction_dataframe" : y_pred,
                    "mean_poisson_deviance":Mean_poission_dev}
    
    
    return return_dict
Beispiel #14
0
def score_estimator(estimator, df_test):
    """Score an estimator on the test set."""
    y_pred = estimator.predict(df_test)

    print("MSE: %.3f" % mean_squared_error(
        df_test["Frequency"], y_pred, sample_weight=df_test["Exposure"]))
    print("MAE: %.3f" % mean_absolute_error(
        df_test["Frequency"], y_pred, sample_weight=df_test["Exposure"]))

    # Ignore non-positive predictions, as they are invalid for
    # the Poisson deviance.
    mask = y_pred > 0
    if (~mask).any():
        n_masked, n_samples = (~mask).sum(), mask.shape[0]
        print(f"WARNING: Estimator yields invalid, non-positive predictions "
              f" for {n_masked} samples out of {n_samples}. These predictions "
              f"are ignored when computing the Poisson deviance.")

    print("mean Poisson deviance: %.3f" %
          mean_poisson_deviance(df_test["Frequency"][mask],
                                y_pred[mask],
                                sample_weight=df_test["Exposure"][mask]))
def _mean_poisson_deviance(y_true, y_pred):
    from sklearn.metrics import mean_poisson_deviance
    return mean_poisson_deviance(y_true, y_pred)
Beispiel #16
0
def log_rf(experimentID, run_name, params, X_train, X_test, y_train, y_test):
    import os
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import explained_variance_score, max_error
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    from sklearn.metrics import mean_squared_log_error, median_absolute_error
    from sklearn.metrics import r2_score, mean_poisson_deviance
    from sklearn.metrics import mean_gamma_deviance
    import tempfile

    with mlflow.start_run(experiment_id=experimentID,
                          run_name=run_name) as run:
        # Create model, train it, and create predictions
        rf = RandomForestRegressor(**params)
        rf.fit(X_train, y_train)
        predictions = rf.predict(X_test)

        # Log model
        mlflow.sklearn.log_model(rf, "random-forest-model")

        # Log params
        [mlflow.log_param(param, value) for param, value in params.items()]

        # Create metrics
        exp_var = explained_variance_score(y_test, predictions)
        max_err = max_error(y_test, predictions)
        mae = mean_absolute_error(y_test, predictions)
        mse = mean_squared_error(y_test, predictions)
        rmse = mean_squared_error(y_test, predictions, squared=False)
        mslogerror = mean_squared_log_error(y_test, predictions)
        medianae = median_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        mean_poisson = mean_poisson_deviance(y_test, predictions)
        mean_gamma = mean_gamma_deviance(y_test, predictions)

        # Print metrics
        print("  explained variance: {}".format(exp_var))
        print("  max error: {}".format(max_err))
        print("  mae: {}".format(mae))
        print("  mse: {}".format(mse))
        print("  rmse: {}".format(rmse))
        print("  mean square log error: {}".format(mslogerror))
        print("  median abosulte error: {}".format(medianae))
        print("  R2: {}".format(r2))
        print("  mean poisson deviance: {}".format(mean_poisson))
        print("  mean gamma deviance: {}".format(mean_gamma))

        # Log metrics
        mlflow.log_metric("explained variance", exp_var)
        mlflow.log_metric("max error", max_err)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mean square log error", mslogerror)
        mlflow.log_metric("median abosulte error", medianae)
        mlflow.log_metric("R2", r2)
        mlflow.log_metric("mean poisson deviance", mean_poisson)
        mlflow.log_metric("mean gamma deviance", mean_gamma)

        # Create feature importance
        importance = pd.DataFrame(list(
            zip(df_pits_races_4_model_encoded.columns,
                rf.feature_importances_)),
                                  columns=["Feature", "Importance"
                                           ]).sort_values("Importance",
                                                          ascending=False)

        # Log importances using a temporary file
        temp = tempfile.NamedTemporaryFile(prefix="feature-importance-",
                                           suffix=".csv")
        temp_name = temp.name
        try:
            importance.to_csv(temp_name, index=False)
            mlflow.log_artifact(temp_name, "feature-importance.csv")
        finally:
            temp.close()  # Delete the temp file

        # Create plot
        fig, ax = plt.subplots()

        sns.residplot(predictions, y_test.values.ravel(), lowess=False)
        plt.xlabel("Predicted values pit duration")
        plt.ylabel("Residual")
        plt.title("Residual Plot for pitting")

        # Log residuals using a temporary file
        temp = tempfile.NamedTemporaryFile(prefix="residuals_pit_model",
                                           suffix=".png")
        temp_name = temp.name
        try:
            fig.savefig(temp_name)
            mlflow.log_artifact(temp_name, "residuals_pit_model.png")
        finally:
            temp.close()  # Delete the temp file

        display(fig)
        return run.info.run_uuid
Beispiel #17
0
#Predict method is used to do prediction on test dataset using regressor object# It predict how salary increases based on the experiences#
#Y_pred is the vector of the predicted salary#
Y_pred= regressor.predict(X_test)

#Visualising the training algorithm output#
#According to the plot the red dots are real values which is used in the training dataset, whereas blue line is the predicted values#
plt.scatter(X_train, Y_train, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Training set)')  
plt.xlabel('Years of Experience')                 
plt.ylabel('Salary')                             
plt.show()

#Visualizing the test dataset#
plt.scatter(X_test, Y_test, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Test set)')     
plt.xlabel('Years of Experience')                
plt.ylabel('Salary')                             
plt.show()

#Model evaluation#
from sklearn import metrics
r_square = metrics.r2_score(Y_test, Y_pred)
print('R-Square Error:', r_square)

from sklearn.metrics import mean_poisson_deviance
MPD= mean_poisson_deviance(Y_test, Y_pred)
print('Mean poisson deviance:', MPD)
def EvaluatePerformance_ARIMA(model_fit_object,
                              differenced,
                              y_train,
                              y_test):
  

    """This function is used to evaluate performance of an ARIMA model
    
    Args:
        model_fit_object (ARIMA .fit object): The return value of the .fit call of ARIMA
        differenced (numpy array): The array with y_train differenced using d value
        y_train (pandas dataframe): pandas dataframe containing y_train 
        y_test (pandas dataframe): pandas dataframe containing y_test 
    
    
    Returns:
        return_dict: Dictionary containing the metrics and the predicted dataframe
    
    """    
  

    
    #Make sure you use the command below to install latest version of scikit learn
    # conda install -c conda-forge scikit-learn  
    
    from sklearn.metrics import mean_poisson_deviance,mean_squared_error
    days_for_prediction  = len(y_test) 
    y_test.fillna(0,inplace=True)
    y_true =np.array(y_test)
    
    
    # multi-step out-of-sample forecast
    start_index = len(differenced)
    end_index = start_index + days_for_prediction -1
    
    forecast = model_fit_object.predict(start=start_index, end=end_index) #Get the predicted data frame on test
    
    # invert the differenced forecast to something usable
    #Since we differenced it,  we need to add it back to previous value to get next value
    history = [x for x in y_train]
    y_pred =[]
    day = 1
    for yhat in forecast:
        inverted = inverse_difference(history, yhat, 1)
        history.append(inverted)
        y_pred.append(inverted)
        day += 1
    print("Length of predictions is",len(y_pred))
    
    #Create a dataframe which can be used to store predictions 
    temp = X_test['Date'].reset_index().drop(columns='index')
 
    predicted_df = pd.DataFrame(index = range(0,days_for_prediction))
    predicted_df['Date'] = temp
    #Making sure that index is matched so that it starts at 0 
    predicted_df['Predicted_value'] = inverted_list

    predicted_df
    
    
    #Calculate the metrics
    
    RMSE = mean_squared_error(y_true, y_pred,squared= False)
    print("The RMSE of the model with the test data is",RMSE)
    
    Mean_poission_dev = mean_poisson_deviance(y_true, y_pred)
    print("The Mean_poission_dev of the model with the test data is",Mean_poission_dev)
    
    return_dict = { "Root mean squared error" : RMSE,
                     "prediction_dataframe" : predicted_df,
                     "mean_poisson_deviance":Mean_poission_dev}
    
    
    return return_dict